View Javadoc

1   package org.apache.maven.archetype.common.util;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import org.codehaus.plexus.logging.AbstractLogEnabled;
23  import org.mozilla.intl.chardet.nsDetector;
24  import org.mozilla.intl.chardet.nsICharsetDetectionObserver;
25  import org.mozilla.intl.chardet.nsPSMDetector;
26  
27  import java.io.BufferedInputStream;
28  import java.io.File;
29  import java.io.FileInputStream;
30  import java.io.FileNotFoundException;
31  import java.io.IOException;
32  import java.io.InputStream;
33  
34  /** @author rafale */
35  public class FileCharsetDetector
36      extends AbstractLogEnabled
37  {
38      private String charset = null;
39  
40      private boolean found = false;
41  
42      public FileCharsetDetector( File detectedFile )
43          throws FileNotFoundException, IOException
44      {
45          nsDetector det = new nsDetector( nsPSMDetector.ALL );
46  
47          det.Init(
48              new nsICharsetDetectionObserver()
49              {
50                  public void Notify( String charset )
51                  {
52                      FileCharsetDetector.this.charset = charset;
53                      FileCharsetDetector.this.found = true;
54                  }
55              }
56          );
57  
58          BufferedInputStream imp = new BufferedInputStream( new FileInputStream( detectedFile ) );
59  
60          byte[] buf = new byte[1024];
61          int len;
62          boolean done = false;
63          boolean isAscii = true;
64  
65          while ( ( len = imp.read( buf, 0, buf.length ) ) != -1 )
66          {
67              // Check if the stream is only ascii.
68              if ( isAscii )
69              {
70                  isAscii = det.isAscii( buf, len );
71              }
72  
73              // DoIt if non-ascii and not done yet.
74              if ( !isAscii && !done )
75              {
76                  done = det.DoIt( buf, len, false );
77                  found = done;
78              }
79          }
80          det.DataEnd();
81  
82          if ( !isFound() )
83          {
84              String[] prob = det.getProbableCharsets();
85  
86              if ( prob.length > 0 )
87              {
88                  charset = prob[0];
89              }
90          }
91  
92          if ( isAscii )
93          {
94              charset = "ASCII";
95          }
96      }
97  
98      public FileCharsetDetector( InputStream detectedStream )
99          throws FileNotFoundException, IOException
100     {
101         nsDetector det = new nsDetector( nsPSMDetector.ALL );
102 
103         det.Init(
104             new nsICharsetDetectionObserver()
105             {
106                 public void Notify( String charset )
107                 {
108                     FileCharsetDetector.this.charset = charset;
109                     FileCharsetDetector.this.found = true;
110                 }
111             }
112         );
113 
114         BufferedInputStream imp = new BufferedInputStream( detectedStream );
115 
116         byte[] buf = new byte[1024];
117         int len;
118         boolean done = false;
119         boolean isAscii = true;
120 
121         while ( ( len = imp.read( buf, 0, buf.length ) ) != -1 )
122         {
123             // Check if the stream is only ascii.
124             if ( isAscii )
125             {
126                 isAscii = det.isAscii( buf, len );
127             }
128 
129             // DoIt if non-ascii and not done yet.
130             if ( !isAscii && !done )
131             {
132                 done = det.DoIt( buf, len, false );
133                 found = done;
134             }
135         }
136         det.DataEnd();
137 
138         if ( !isFound() )
139         {
140             String[] prob = det.getProbableCharsets();
141 
142             if ( prob.length > 0 )
143             {
144                 charset = prob[0];
145             }
146         }
147 
148         if ( isAscii )
149         {
150             charset = "ASCII";
151         }
152     }
153 
154     public String getCharset()
155     {
156         return charset;
157     }
158 
159     public boolean isFound()
160     {
161         return found;
162     }
163 }