View Javadoc
1   package org.apache.maven.archetype.common.util;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import org.codehaus.plexus.logging.AbstractLogEnabled;
23  import org.mozilla.intl.chardet.nsDetector;
24  import org.mozilla.intl.chardet.nsICharsetDetectionObserver;
25  import org.mozilla.intl.chardet.nsPSMDetector;
26  
27  import java.io.BufferedInputStream;
28  import java.io.File;
29  import java.io.FileInputStream;
30  import java.io.FileNotFoundException;
31  import java.io.IOException;
32  import java.io.InputStream;
33  
34  /**
35   * @author rafale
36   */
37  public class FileCharsetDetector
38      extends AbstractLogEnabled
39  {
40      private String charset = null;
41  
42      private boolean found = false;
43  
44      public FileCharsetDetector( File detectedFile )
45          throws FileNotFoundException, IOException
46      {
47          nsDetector det = new nsDetector( nsPSMDetector.ALL );
48  
49          det.Init( new nsICharsetDetectionObserver()
50          {
51              @Override
52              @SuppressWarnings( "checkstyle:methodname" )
53              public void Notify( String charset )
54              {
55                  FileCharsetDetector.this.charset = charset;
56                  FileCharsetDetector.this.found = true;
57              }
58          } );
59  
60          try ( FileInputStream fileInputStream = new FileInputStream( detectedFile );
61                BufferedInputStream imp = new BufferedInputStream( fileInputStream ) )
62          {
63              byte[] buf = new byte[1024];
64              int len;
65              boolean done = false;
66              boolean isAscii = true;
67  
68              while ( ( len = imp.read( buf, 0, buf.length ) ) != -1 )
69              {
70                  // Check if the stream is only ascii.
71                  if ( isAscii )
72                  {
73                      isAscii = det.isAscii( buf, len );
74                  }
75  
76                  // DoIt if non-ascii and not done yet.
77                  if ( !isAscii && !done )
78                  {
79                      done = det.DoIt( buf, len, false );
80                      found = done;
81                  }
82              }
83              det.DataEnd();
84  
85              if ( !isFound() )
86              {
87                  String[] prob = det.getProbableCharsets();
88  
89                  if ( prob.length > 0 )
90                  {
91                      charset = prob[0];
92                  }
93              }
94  
95              if ( isAscii )
96              {
97                  charset = "ASCII";
98              }
99          }
100     }
101 
102 
103 
104     public FileCharsetDetector( InputStream detectedStream )
105         throws FileNotFoundException, IOException
106     {
107         nsDetector det = new nsDetector( nsPSMDetector.ALL );
108 
109         det.Init( new nsICharsetDetectionObserver()
110         {
111             @Override
112             @SuppressWarnings( "checkstyle:methodname" )
113             public void Notify( String charset )
114             {
115                 FileCharsetDetector.this.charset = charset;
116                 FileCharsetDetector.this.found = true;
117             }
118         } );
119 
120         BufferedInputStream imp = new BufferedInputStream( detectedStream );
121 
122         byte[] buf = new byte[1024];
123         int len;
124         boolean done = false;
125         boolean isAscii = true;
126 
127         while ( ( len = imp.read( buf, 0, buf.length ) ) != -1 )
128         {
129             // Check if the stream is only ascii.
130             if ( isAscii )
131             {
132                 isAscii = det.isAscii( buf, len );
133             }
134 
135             // DoIt if non-ascii and not done yet.
136             if ( !isAscii && !done )
137             {
138                 done = det.DoIt( buf, len, false );
139                 found = done;
140             }
141         }
142         det.DataEnd();
143 
144         if ( !isFound() )
145         {
146             String[] prob = det.getProbableCharsets();
147 
148             if ( prob.length > 0 )
149             {
150                 charset = prob[0];
151             }
152         }
153 
154         if ( isAscii )
155         {
156             charset = "ASCII";
157         }
158     }
159 
160     public String getCharset()
161     {
162         return charset;
163     }
164 
165     public boolean isFound()
166     {
167         return found;
168     }
169 }