View Javadoc

1   package org.apache.maven.archetype.common.util;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import org.codehaus.plexus.logging.AbstractLogEnabled;
23  import org.mozilla.intl.chardet.nsDetector;
24  import org.mozilla.intl.chardet.nsICharsetDetectionObserver;
25  import org.mozilla.intl.chardet.nsPSMDetector;
26  
27  import java.io.BufferedInputStream;
28  import java.io.File;
29  import java.io.FileInputStream;
30  import java.io.FileNotFoundException;
31  import java.io.IOException;
32  import java.io.InputStream;
33  
34  /**
35   * @author rafale
36   */
37  public class FileCharsetDetector
38      extends AbstractLogEnabled
39  {
40      private String charset = null;
41  
42      private boolean found = false;
43  
44      public FileCharsetDetector( File detectedFile )
45          throws FileNotFoundException, IOException
46      {
47          nsDetector det = new nsDetector( nsPSMDetector.ALL );
48  
49          det.Init( new nsICharsetDetectionObserver()
50          {
51              public void Notify( String charset )
52              {
53                  FileCharsetDetector.this.charset = charset;
54                  FileCharsetDetector.this.found = true;
55              }
56          } );
57  
58          FileInputStream fileInputStream = new FileInputStream( detectedFile );
59          BufferedInputStream imp = new BufferedInputStream( fileInputStream );
60          try
61          {
62  
63              byte[] buf = new byte[1024];
64              int len;
65              boolean done = false;
66              boolean isAscii = true;
67  
68              while ( ( len = imp.read( buf, 0, buf.length ) ) != -1 )
69              {
70                  // Check if the stream is only ascii.
71                  if ( isAscii )
72                  {
73                      isAscii = det.isAscii( buf, len );
74                  }
75  
76                  // DoIt if non-ascii and not done yet.
77                  if ( !isAscii && !done )
78                  {
79                      done = det.DoIt( buf, len, false );
80                      found = done;
81                  }
82              }
83              det.DataEnd();
84  
85              if ( !isFound() )
86              {
87                  String[] prob = det.getProbableCharsets();
88  
89                  if ( prob.length > 0 )
90                  {
91                      charset = prob[0];
92                  }
93              }
94  
95              if ( isAscii )
96              {
97                  charset = "ASCII";
98              }
99          }
100         finally
101         {
102             imp.close();
103             fileInputStream.close();
104         }
105     }
106 
107 
108 
109     public FileCharsetDetector( InputStream detectedStream )
110         throws FileNotFoundException, IOException
111     {
112         nsDetector det = new nsDetector( nsPSMDetector.ALL );
113 
114         det.Init( new nsICharsetDetectionObserver()
115         {
116             public void Notify( String charset )
117             {
118                 FileCharsetDetector.this.charset = charset;
119                 FileCharsetDetector.this.found = true;
120             }
121         } );
122 
123         BufferedInputStream imp = new BufferedInputStream( detectedStream );
124 
125         byte[] buf = new byte[1024];
126         int len;
127         boolean done = false;
128         boolean isAscii = true;
129 
130         while ( ( len = imp.read( buf, 0, buf.length ) ) != -1 )
131         {
132             // Check if the stream is only ascii.
133             if ( isAscii )
134             {
135                 isAscii = det.isAscii( buf, len );
136             }
137 
138             // DoIt if non-ascii and not done yet.
139             if ( !isAscii && !done )
140             {
141                 done = det.DoIt( buf, len, false );
142                 found = done;
143             }
144         }
145         det.DataEnd();
146 
147         if ( !isFound() )
148         {
149             String[] prob = det.getProbableCharsets();
150 
151             if ( prob.length > 0 )
152             {
153                 charset = prob[0];
154             }
155         }
156 
157         if ( isAscii )
158         {
159             charset = "ASCII";
160         }
161     }
162 
163     public String getCharset()
164     {
165         return charset;
166     }
167 
168     public boolean isFound()
169     {
170         return found;
171     }
172 }