View Javadoc
1   package org.apache.maven.archetype.common.util;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import org.codehaus.plexus.logging.AbstractLogEnabled;
23  import org.mozilla.intl.chardet.nsDetector;
24  import org.mozilla.intl.chardet.nsICharsetDetectionObserver;
25  import org.mozilla.intl.chardet.nsPSMDetector;
26  
27  import java.io.BufferedInputStream;
28  import java.io.File;
29  import java.io.FileInputStream;
30  import java.io.FileNotFoundException;
31  import java.io.IOException;
32  import java.io.InputStream;
33  
34  /**
35   * @author rafale
36   */
37  public class FileCharsetDetector
38      extends AbstractLogEnabled
39  {
40      private String charset = null;
41  
42      private boolean found = false;
43  
44      public FileCharsetDetector( File detectedFile )
45          throws FileNotFoundException, IOException
46      {
47          nsDetector det = new nsDetector( nsPSMDetector.ALL );
48  
49          det.Init( new nsICharsetDetectionObserver()
50          {
51              @SuppressWarnings( "checkstyle:methodname" )
52              public void Notify( String charset )
53              {
54                  FileCharsetDetector.this.charset = charset;
55                  FileCharsetDetector.this.found = true;
56              }
57          } );
58  
59          FileInputStream fileInputStream = new FileInputStream( detectedFile );
60          BufferedInputStream imp = new BufferedInputStream( fileInputStream );
61          try
62          {
63  
64              byte[] buf = new byte[1024];
65              int len;
66              boolean done = false;
67              boolean isAscii = true;
68  
69              while ( ( len = imp.read( buf, 0, buf.length ) ) != -1 )
70              {
71                  // Check if the stream is only ascii.
72                  if ( isAscii )
73                  {
74                      isAscii = det.isAscii( buf, len );
75                  }
76  
77                  // DoIt if non-ascii and not done yet.
78                  if ( !isAscii && !done )
79                  {
80                      done = det.DoIt( buf, len, false );
81                      found = done;
82                  }
83              }
84              det.DataEnd();
85  
86              if ( !isFound() )
87              {
88                  String[] prob = det.getProbableCharsets();
89  
90                  if ( prob.length > 0 )
91                  {
92                      charset = prob[0];
93                  }
94              }
95  
96              if ( isAscii )
97              {
98                  charset = "ASCII";
99              }
100         }
101         finally
102         {
103             imp.close();
104             fileInputStream.close();
105         }
106     }
107 
108 
109 
110     public FileCharsetDetector( InputStream detectedStream )
111         throws FileNotFoundException, IOException
112     {
113         nsDetector det = new nsDetector( nsPSMDetector.ALL );
114 
115         det.Init( new nsICharsetDetectionObserver()
116         {
117             @SuppressWarnings( "checkstyle:methodname" )
118             public void Notify( String charset )
119             {
120                 FileCharsetDetector.this.charset = charset;
121                 FileCharsetDetector.this.found = true;
122             }
123         } );
124 
125         BufferedInputStream imp = new BufferedInputStream( detectedStream );
126 
127         byte[] buf = new byte[1024];
128         int len;
129         boolean done = false;
130         boolean isAscii = true;
131 
132         while ( ( len = imp.read( buf, 0, buf.length ) ) != -1 )
133         {
134             // Check if the stream is only ascii.
135             if ( isAscii )
136             {
137                 isAscii = det.isAscii( buf, len );
138             }
139 
140             // DoIt if non-ascii and not done yet.
141             if ( !isAscii && !done )
142             {
143                 done = det.DoIt( buf, len, false );
144                 found = done;
145             }
146         }
147         det.DataEnd();
148 
149         if ( !isFound() )
150         {
151             String[] prob = det.getProbableCharsets();
152 
153             if ( prob.length > 0 )
154             {
155                 charset = prob[0];
156             }
157         }
158 
159         if ( isAscii )
160         {
161             charset = "ASCII";
162         }
163     }
164 
165     public String getCharset()
166     {
167         return charset;
168     }
169 
170     public boolean isFound()
171     {
172         return found;
173     }
174 }