1 package org.apache.maven.archetype.common.util;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import org.codehaus.plexus.logging.AbstractLogEnabled;
23 import org.mozilla.intl.chardet.nsDetector;
24 import org.mozilla.intl.chardet.nsICharsetDetectionObserver;
25 import org.mozilla.intl.chardet.nsPSMDetector;
26
27 import java.io.BufferedInputStream;
28 import java.io.File;
29 import java.io.FileInputStream;
30 import java.io.FileNotFoundException;
31 import java.io.IOException;
32 import java.io.InputStream;
33
34
35
36
37 public class FileCharsetDetector
38 extends AbstractLogEnabled
39 {
40 private String charset = null;
41
42 private boolean found = false;
43
44 public FileCharsetDetector( File detectedFile )
45 throws FileNotFoundException, IOException
46 {
47 nsDetector det = new nsDetector( nsPSMDetector.ALL );
48
49 det.Init( new nsICharsetDetectionObserver()
50 {
51 @SuppressWarnings( "checkstyle:methodname" )
52 public void Notify( String charset )
53 {
54 FileCharsetDetector.this.charset = charset;
55 FileCharsetDetector.this.found = true;
56 }
57 } );
58
59 FileInputStream fileInputStream = new FileInputStream( detectedFile );
60 BufferedInputStream imp = new BufferedInputStream( fileInputStream );
61 try
62 {
63
64 byte[] buf = new byte[1024];
65 int len;
66 boolean done = false;
67 boolean isAscii = true;
68
69 while ( ( len = imp.read( buf, 0, buf.length ) ) != -1 )
70 {
71
72 if ( isAscii )
73 {
74 isAscii = det.isAscii( buf, len );
75 }
76
77
78 if ( !isAscii && !done )
79 {
80 done = det.DoIt( buf, len, false );
81 found = done;
82 }
83 }
84 det.DataEnd();
85
86 if ( !isFound() )
87 {
88 String[] prob = det.getProbableCharsets();
89
90 if ( prob.length > 0 )
91 {
92 charset = prob[0];
93 }
94 }
95
96 if ( isAscii )
97 {
98 charset = "ASCII";
99 }
100 }
101 finally
102 {
103 imp.close();
104 fileInputStream.close();
105 }
106 }
107
108
109
110 public FileCharsetDetector( InputStream detectedStream )
111 throws FileNotFoundException, IOException
112 {
113 nsDetector det = new nsDetector( nsPSMDetector.ALL );
114
115 det.Init( new nsICharsetDetectionObserver()
116 {
117 @SuppressWarnings( "checkstyle:methodname" )
118 public void Notify( String charset )
119 {
120 FileCharsetDetector.this.charset = charset;
121 FileCharsetDetector.this.found = true;
122 }
123 } );
124
125 BufferedInputStream imp = new BufferedInputStream( detectedStream );
126
127 byte[] buf = new byte[1024];
128 int len;
129 boolean done = false;
130 boolean isAscii = true;
131
132 while ( ( len = imp.read( buf, 0, buf.length ) ) != -1 )
133 {
134
135 if ( isAscii )
136 {
137 isAscii = det.isAscii( buf, len );
138 }
139
140
141 if ( !isAscii && !done )
142 {
143 done = det.DoIt( buf, len, false );
144 found = done;
145 }
146 }
147 det.DataEnd();
148
149 if ( !isFound() )
150 {
151 String[] prob = det.getProbableCharsets();
152
153 if ( prob.length > 0 )
154 {
155 charset = prob[0];
156 }
157 }
158
159 if ( isAscii )
160 {
161 charset = "ASCII";
162 }
163 }
164
165 public String getCharset()
166 {
167 return charset;
168 }
169
170 public boolean isFound()
171 {
172 return found;
173 }
174 }