1 package org.apache.maven.archetype.common.util;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import org.codehaus.plexus.logging.AbstractLogEnabled;
23 import org.mozilla.intl.chardet.nsDetector;
24 import org.mozilla.intl.chardet.nsICharsetDetectionObserver;
25 import org.mozilla.intl.chardet.nsPSMDetector;
26
27 import java.io.BufferedInputStream;
28 import java.io.File;
29 import java.io.FileInputStream;
30 import java.io.FileNotFoundException;
31 import java.io.IOException;
32 import java.io.InputStream;
33
34
35
36
37 public class FileCharsetDetector
38 extends AbstractLogEnabled
39 {
40 private String charset = null;
41
42 private boolean found = false;
43
44 public FileCharsetDetector( File detectedFile )
45 throws FileNotFoundException, IOException
46 {
47 nsDetector det = new nsDetector( nsPSMDetector.ALL );
48
49 det.Init( new nsICharsetDetectionObserver()
50 {
51 public void Notify( String charset )
52 {
53 FileCharsetDetector.this.charset = charset;
54 FileCharsetDetector.this.found = true;
55 }
56 } );
57
58 FileInputStream fileInputStream = new FileInputStream( detectedFile );
59 BufferedInputStream imp = new BufferedInputStream( fileInputStream );
60 try
61 {
62
63 byte[] buf = new byte[1024];
64 int len;
65 boolean done = false;
66 boolean isAscii = true;
67
68 while ( ( len = imp.read( buf, 0, buf.length ) ) != -1 )
69 {
70
71 if ( isAscii )
72 {
73 isAscii = det.isAscii( buf, len );
74 }
75
76
77 if ( !isAscii && !done )
78 {
79 done = det.DoIt( buf, len, false );
80 found = done;
81 }
82 }
83 det.DataEnd();
84
85 if ( !isFound() )
86 {
87 String[] prob = det.getProbableCharsets();
88
89 if ( prob.length > 0 )
90 {
91 charset = prob[0];
92 }
93 }
94
95 if ( isAscii )
96 {
97 charset = "ASCII";
98 }
99 }
100 finally
101 {
102 imp.close();
103 fileInputStream.close();
104 }
105 }
106
107
108
109 public FileCharsetDetector( InputStream detectedStream )
110 throws FileNotFoundException, IOException
111 {
112 nsDetector det = new nsDetector( nsPSMDetector.ALL );
113
114 det.Init( new nsICharsetDetectionObserver()
115 {
116 public void Notify( String charset )
117 {
118 FileCharsetDetector.this.charset = charset;
119 FileCharsetDetector.this.found = true;
120 }
121 } );
122
123 BufferedInputStream imp = new BufferedInputStream( detectedStream );
124
125 byte[] buf = new byte[1024];
126 int len;
127 boolean done = false;
128 boolean isAscii = true;
129
130 while ( ( len = imp.read( buf, 0, buf.length ) ) != -1 )
131 {
132
133 if ( isAscii )
134 {
135 isAscii = det.isAscii( buf, len );
136 }
137
138
139 if ( !isAscii && !done )
140 {
141 done = det.DoIt( buf, len, false );
142 found = done;
143 }
144 }
145 det.DataEnd();
146
147 if ( !isFound() )
148 {
149 String[] prob = det.getProbableCharsets();
150
151 if ( prob.length > 0 )
152 {
153 charset = prob[0];
154 }
155 }
156
157 if ( isAscii )
158 {
159 charset = "ASCII";
160 }
161 }
162
163 public String getCharset()
164 {
165 return charset;
166 }
167
168 public boolean isFound()
169 {
170 return found;
171 }
172 }