1 package org.apache.maven.archetype.common.util;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import org.codehaus.plexus.logging.AbstractLogEnabled;
23 import org.mozilla.intl.chardet.nsDetector;
24 import org.mozilla.intl.chardet.nsICharsetDetectionObserver;
25 import org.mozilla.intl.chardet.nsPSMDetector;
26
27 import java.io.BufferedInputStream;
28 import java.io.File;
29 import java.io.FileInputStream;
30 import java.io.FileNotFoundException;
31 import java.io.IOException;
32 import java.io.InputStream;
33
34
35 public class FileCharsetDetector
36 extends AbstractLogEnabled
37 {
38 private String charset = null;
39
40 private boolean found = false;
41
42 public FileCharsetDetector( File detectedFile )
43 throws FileNotFoundException, IOException
44 {
45 nsDetector det = new nsDetector( nsPSMDetector.ALL );
46
47 det.Init(
48 new nsICharsetDetectionObserver()
49 {
50 public void Notify( String charset )
51 {
52 FileCharsetDetector.this.charset = charset;
53 FileCharsetDetector.this.found = true;
54 }
55 }
56 );
57
58 BufferedInputStream imp = new BufferedInputStream( new FileInputStream( detectedFile ) );
59
60 byte[] buf = new byte[1024];
61 int len;
62 boolean done = false;
63 boolean isAscii = true;
64
65 while ( ( len = imp.read( buf, 0, buf.length ) ) != -1 )
66 {
67
68 if ( isAscii )
69 {
70 isAscii = det.isAscii( buf, len );
71 }
72
73
74 if ( !isAscii && !done )
75 {
76 done = det.DoIt( buf, len, false );
77 found = done;
78 }
79 }
80 det.DataEnd();
81
82 if ( !isFound() )
83 {
84 String[] prob = det.getProbableCharsets();
85
86 if ( prob.length > 0 )
87 {
88 charset = prob[0];
89 }
90 }
91
92 if ( isAscii )
93 {
94 charset = "ASCII";
95 }
96 }
97
98 public FileCharsetDetector( InputStream detectedStream )
99 throws FileNotFoundException, IOException
100 {
101 nsDetector det = new nsDetector( nsPSMDetector.ALL );
102
103 det.Init(
104 new nsICharsetDetectionObserver()
105 {
106 public void Notify( String charset )
107 {
108 FileCharsetDetector.this.charset = charset;
109 FileCharsetDetector.this.found = true;
110 }
111 }
112 );
113
114 BufferedInputStream imp = new BufferedInputStream( detectedStream );
115
116 byte[] buf = new byte[1024];
117 int len;
118 boolean done = false;
119 boolean isAscii = true;
120
121 while ( ( len = imp.read( buf, 0, buf.length ) ) != -1 )
122 {
123
124 if ( isAscii )
125 {
126 isAscii = det.isAscii( buf, len );
127 }
128
129
130 if ( !isAscii && !done )
131 {
132 done = det.DoIt( buf, len, false );
133 found = done;
134 }
135 }
136 det.DataEnd();
137
138 if ( !isFound() )
139 {
140 String[] prob = det.getProbableCharsets();
141
142 if ( prob.length > 0 )
143 {
144 charset = prob[0];
145 }
146 }
147
148 if ( isAscii )
149 {
150 charset = "ASCII";
151 }
152 }
153
154 public String getCharset()
155 {
156 return charset;
157 }
158
159 public boolean isFound()
160 {
161 return found;
162 }
163 }