1 package org.apache.maven.index.updater;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import java.io.BufferedInputStream;
23 import java.io.DataInput;
24 import java.io.DataInputStream;
25 import java.io.EOFException;
26 import java.io.IOException;
27 import java.io.InputStream;
28 import java.io.UTFDataFormatException;
29 import java.util.Date;
30 import java.util.zip.GZIPInputStream;
31
32 import org.apache.lucene.document.Document;
33 import org.apache.lucene.document.Field;
34 import org.apache.lucene.document.Field.Index;
35 import org.apache.lucene.document.Field.Store;
36 import org.apache.lucene.index.IndexWriter;
37 import org.apache.maven.index.context.IndexUtils;
38 import org.apache.maven.index.context.IndexingContext;
39
40
41
42
43
44
45 public class IndexDataReader
46 {
47 private final DataInputStream dis;
48
49 public IndexDataReader( InputStream is )
50 throws IOException
51 {
52 BufferedInputStream bis = new BufferedInputStream( is, 1024 * 8 );
53
54
55
56
57 bis.mark( 2 );
58 InputStream data;
59 if ( bis.read() == 0x1f && bis.read() == 0x8b )
60 {
61 bis.reset();
62 data = new GZIPInputStream( bis, 2 * 1024 );
63 }
64 else
65 {
66 bis.reset();
67 data = bis;
68 }
69
70 this.dis = new DataInputStream( data );
71 }
72
73 public IndexDataReadResult readIndex( IndexWriter w, IndexingContext context )
74 throws IOException
75 {
76 long timestamp = readHeader();
77
78 Date date = null;
79
80 if ( timestamp != -1 )
81 {
82 date = new Date( timestamp );
83
84 IndexUtils.updateTimestamp( w.getDirectory(), date );
85 }
86
87 int n = 0;
88
89 Document doc;
90 while ( ( doc = readDocument() ) != null )
91 {
92 w.addDocument( IndexUtils.updateDocument( doc, context, false ) );
93
94 n++;
95 }
96
97 w.commit();
98 w.optimize();
99
100 IndexDataReadResult result = new IndexDataReadResult();
101 result.setDocumentCount( n );
102 result.setTimestamp( date );
103 return result;
104 }
105
106 public long readHeader()
107 throws IOException
108 {
109 final byte HDRBYTE = (byte) ( ( IndexDataWriter.VERSION << 24 ) >> 24 );
110
111 if ( HDRBYTE != dis.readByte() )
112 {
113
114 throw new IOException( "Provided input contains unexpected data (0x01 expected as 1st byte)!" );
115 }
116
117 return dis.readLong();
118 }
119
120 public Document readDocument()
121 throws IOException
122 {
123 int fieldCount;
124 try
125 {
126 fieldCount = dis.readInt();
127 }
128 catch ( EOFException ex )
129 {
130 return null;
131 }
132
133 Document doc = new Document();
134
135 for ( int i = 0; i < fieldCount; i++ )
136 {
137 doc.add( readField() );
138 }
139
140 return doc;
141 }
142
143 private Field readField()
144 throws IOException
145 {
146 int flags = dis.read();
147
148 Index index = Index.NO;
149 if ( ( flags & IndexDataWriter.F_INDEXED ) > 0 )
150 {
151 boolean isTokenized = ( flags & IndexDataWriter.F_TOKENIZED ) > 0;
152 index = isTokenized ? Index.ANALYZED : Index.NOT_ANALYZED;
153 }
154
155 Store store = Store.NO;
156 if ( ( flags & IndexDataWriter.F_STORED ) > 0 )
157 {
158 store = Store.YES;
159 }
160
161 String name = dis.readUTF();
162 String value = readUTF( dis );
163
164 return new Field( name, value, store, index );
165 }
166
167 private static String readUTF( DataInput in )
168 throws IOException
169 {
170 int utflen = in.readInt();
171
172 byte[] bytearr;
173 char[] chararr;
174
175 try
176 {
177 bytearr = new byte[utflen];
178 chararr = new char[utflen];
179 }
180 catch ( OutOfMemoryError e )
181 {
182 final IOException ex =
183 new IOException(
184 "Index data content is inappropriate (is junk?), leads to OutOfMemoryError! See MINDEXER-28 for more information!" );
185 ex.initCause( e );
186 throw ex;
187 }
188
189 int c, char2, char3;
190 int count = 0;
191 int chararr_count = 0;
192
193 in.readFully( bytearr, 0, utflen );
194
195 while ( count < utflen )
196 {
197 c = bytearr[count] & 0xff;
198 if ( c > 127 )
199 {
200 break;
201 }
202 count++;
203 chararr[chararr_count++] = (char) c;
204 }
205
206 while ( count < utflen )
207 {
208 c = bytearr[count] & 0xff;
209 switch ( c >> 4 )
210 {
211 case 0:
212 case 1:
213 case 2:
214 case 3:
215 case 4:
216 case 5:
217 case 6:
218 case 7:
219
220 count++;
221 chararr[chararr_count++] = (char) c;
222 break;
223
224 case 12:
225 case 13:
226
227 count += 2;
228 if ( count > utflen )
229 {
230 throw new UTFDataFormatException( "malformed input: partial character at end" );
231 }
232 char2 = bytearr[count - 1];
233 if ( ( char2 & 0xC0 ) != 0x80 )
234 {
235 throw new UTFDataFormatException( "malformed input around byte " + count );
236 }
237 chararr[chararr_count++] = (char) ( ( ( c & 0x1F ) << 6 ) | ( char2 & 0x3F ) );
238 break;
239
240 case 14:
241
242 count += 3;
243 if ( count > utflen )
244 {
245 throw new UTFDataFormatException( "malformed input: partial character at end" );
246 }
247 char2 = bytearr[count - 2];
248 char3 = bytearr[count - 1];
249 if ( ( ( char2 & 0xC0 ) != 0x80 ) || ( ( char3 & 0xC0 ) != 0x80 ) )
250 {
251 throw new UTFDataFormatException( "malformed input around byte " + ( count - 1 ) );
252 }
253 chararr[chararr_count++] =
254 (char) ( ( ( c & 0x0F ) << 12 ) | ( ( char2 & 0x3F ) << 6 ) | ( ( char3 & 0x3F ) << 0 ) );
255 break;
256
257 default:
258
259 throw new UTFDataFormatException( "malformed input around byte " + count );
260 }
261 }
262
263
264 return new String( chararr, 0, chararr_count );
265 }
266
267
268
269
270 public static class IndexDataReadResult
271 {
272 private Date timestamp;
273
274 private int documentCount;
275
276 public void setDocumentCount( int documentCount )
277 {
278 this.documentCount = documentCount;
279 }
280
281 public int getDocumentCount()
282 {
283 return documentCount;
284 }
285
286 public void setTimestamp( Date timestamp )
287 {
288 this.timestamp = timestamp;
289 }
290
291 public Date getTimestamp()
292 {
293 return timestamp;
294 }
295
296 }
297
298
299
300
301
302
303
304
305
306
307 public IndexDataReadResult readIndex( final IndexDataReadVisitor visitor, final IndexingContext context )
308 throws IOException
309 {
310 dis.readByte();
311
312 long timestamp = dis.readLong();
313
314 Date date = null;
315
316 if ( timestamp != -1 )
317 {
318 date = new Date( timestamp );
319 }
320
321 int n = 0;
322
323 Document doc;
324 while ( ( doc = readDocument() ) != null )
325 {
326 visitor.visitDocument( IndexUtils.updateDocument( doc, context, false ) );
327
328 n++;
329 }
330
331 IndexDataReadResult result = new IndexDataReadResult();
332 result.setDocumentCount( n );
333 result.setTimestamp( date );
334 return result;
335 }
336
337
338
339
340 public static interface IndexDataReadVisitor
341 {
342
343
344
345
346
347
348 void visitDocument( Document document );
349
350 }
351
352 }