1 package org.apache.maven.archetype.common.util;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import org.codehaus.plexus.logging.AbstractLogEnabled;
23 import org.mozilla.intl.chardet.nsDetector;
24 import org.mozilla.intl.chardet.nsICharsetDetectionObserver;
25 import org.mozilla.intl.chardet.nsPSMDetector;
26
27 import java.io.BufferedInputStream;
28 import java.io.File;
29 import java.io.FileInputStream;
30 import java.io.FileNotFoundException;
31 import java.io.IOException;
32 import java.io.InputStream;
33
34
35
36
37 public class FileCharsetDetector
38 extends AbstractLogEnabled
39 {
40 private String charset = null;
41
42 private boolean found = false;
43
44 public FileCharsetDetector( File detectedFile )
45 throws FileNotFoundException, IOException
46 {
47 nsDetector det = new nsDetector( nsPSMDetector.ALL );
48
49 det.Init( new nsICharsetDetectionObserver()
50 {
51 @Override
52 @SuppressWarnings( "checkstyle:methodname" )
53 public void Notify( String charset )
54 {
55 FileCharsetDetector.this.charset = charset;
56 FileCharsetDetector.this.found = true;
57 }
58 } );
59
60 try ( FileInputStream fileInputStream = new FileInputStream( detectedFile );
61 BufferedInputStream imp = new BufferedInputStream( fileInputStream ) )
62 {
63 byte[] buf = new byte[1024];
64 int len;
65 boolean done = false;
66 boolean isAscii = true;
67
68 while ( ( len = imp.read( buf, 0, buf.length ) ) != -1 )
69 {
70
71 if ( isAscii )
72 {
73 isAscii = det.isAscii( buf, len );
74 }
75
76
77 if ( !isAscii && !done )
78 {
79 done = det.DoIt( buf, len, false );
80 found = done;
81 }
82 }
83 det.DataEnd();
84
85 if ( !isFound() )
86 {
87 String[] prob = det.getProbableCharsets();
88
89 if ( prob.length > 0 )
90 {
91 charset = prob[0];
92 }
93 }
94
95 if ( isAscii )
96 {
97 charset = "ASCII";
98 }
99 }
100 }
101
102
103
104 public FileCharsetDetector( InputStream detectedStream )
105 throws FileNotFoundException, IOException
106 {
107 nsDetector det = new nsDetector( nsPSMDetector.ALL );
108
109 det.Init( new nsICharsetDetectionObserver()
110 {
111 @Override
112 @SuppressWarnings( "checkstyle:methodname" )
113 public void Notify( String charset )
114 {
115 FileCharsetDetector.this.charset = charset;
116 FileCharsetDetector.this.found = true;
117 }
118 } );
119
120 BufferedInputStream imp = new BufferedInputStream( detectedStream );
121
122 byte[] buf = new byte[1024];
123 int len;
124 boolean done = false;
125 boolean isAscii = true;
126
127 while ( ( len = imp.read( buf, 0, buf.length ) ) != -1 )
128 {
129
130 if ( isAscii )
131 {
132 isAscii = det.isAscii( buf, len );
133 }
134
135
136 if ( !isAscii && !done )
137 {
138 done = det.DoIt( buf, len, false );
139 found = done;
140 }
141 }
142 det.DataEnd();
143
144 if ( !isFound() )
145 {
146 String[] prob = det.getProbableCharsets();
147
148 if ( prob.length > 0 )
149 {
150 charset = prob[0];
151 }
152 }
153
154 if ( isAscii )
155 {
156 charset = "ASCII";
157 }
158 }
159
160 public String getCharset()
161 {
162 return charset;
163 }
164
165 public boolean isFound()
166 {
167 return found;
168 }
169 }