1 package org.apache.maven.index.updater;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import java.io.BufferedInputStream;
23 import java.io.DataInput;
24 import java.io.DataInputStream;
25 import java.io.EOFException;
26 import java.io.IOException;
27 import java.io.InputStream;
28 import java.io.UTFDataFormatException;
29 import java.util.Date;
30 import java.util.zip.GZIPInputStream;
31
32 import com.google.common.base.Strings;
33 import java.util.LinkedHashSet;
34 import java.util.Set;
35 import org.apache.lucene.document.Document;
36 import org.apache.lucene.document.Field;
37 import org.apache.lucene.document.Field.Index;
38 import org.apache.lucene.document.Field.Store;
39 import org.apache.lucene.index.IndexWriter;
40 import org.apache.maven.index.ArtifactInfo;
41 import org.apache.maven.index.context.IndexUtils;
42 import org.apache.maven.index.context.IndexingContext;
43
44
45
46
47
48
49 public class IndexDataReader
50 {
51 private final DataInputStream dis;
52
53 public IndexDataReader( final InputStream is )
54 throws IOException
55 {
56
57
58
59 is.mark( 2 );
60 InputStream data;
61 if ( is.read() == 0x1f && is.read() == 0x8b )
62 {
63 is.reset();
64 data = new BufferedInputStream( new GZIPInputStream( is, 1024 * 8 ), 1024 * 8 );
65 }
66 else
67 {
68 BufferedInputStream bis = new BufferedInputStream( is, 1024 * 8 );
69 bis.reset();
70 data = bis;
71 }
72
73 this.dis = new DataInputStream( data );
74 }
75
76 public IndexDataReadResult readIndex( IndexWriter w, IndexingContext context )
77 throws IOException
78 {
79 long timestamp = readHeader();
80
81 Date date = null;
82
83 if ( timestamp != -1 )
84 {
85 date = new Date( timestamp );
86
87 IndexUtils.updateTimestamp( w.getDirectory(), date );
88 }
89
90 int n = 0;
91
92 Document doc;
93 Set<String> rootGroups = new LinkedHashSet<>();
94 Set<String> allGroups = new LinkedHashSet<>();
95
96 while ( ( doc = readDocument() ) != null )
97 {
98 ArtifactInfo ai = IndexUtils.constructArtifactInfo( doc, context );
99 if ( ai != null )
100 {
101 w.addDocument( IndexUtils.updateDocument( doc, context, false, ai ) );
102
103 rootGroups.add( ai.getRootGroup() );
104 allGroups.add( ai.getGroupId() );
105
106 }
107 else
108 {
109 w.addDocument( doc );
110 }
111 n++;
112 }
113
114 w.commit();
115
116 IndexDataReadResult result = new IndexDataReadResult();
117 result.setDocumentCount( n );
118 result.setTimestamp( date );
119 result.setRootGroups( rootGroups );
120 result.setAllGroups( allGroups );
121
122 return result;
123 }
124
125 public long readHeader()
126 throws IOException
127 {
128 final byte hdrbyte = (byte) ( ( IndexDataWriter.VERSION << 24 ) >> 24 );
129
130 if ( hdrbyte != dis.readByte() )
131 {
132
133 throw new IOException( "Provided input contains unexpected data (0x01 expected as 1st byte)!" );
134 }
135
136 return dis.readLong();
137 }
138
139 public Document readDocument()
140 throws IOException
141 {
142 int fieldCount;
143 try
144 {
145 fieldCount = dis.readInt();
146 }
147 catch ( EOFException ex )
148 {
149 return null;
150 }
151
152 Document doc = new Document();
153
154 for ( int i = 0; i < fieldCount; i++ )
155 {
156 doc.add( readField() );
157 }
158
159
160 final Field uinfoField = (Field) doc.getField( ArtifactInfo.UINFO );
161 final String info = doc.get( ArtifactInfo.INFO );
162 if ( uinfoField != null && !Strings.isNullOrEmpty( info ) )
163 {
164 final String[] splitInfo = ArtifactInfo.FS_PATTERN.split( info );
165 if ( splitInfo.length > 6 )
166 {
167 final String extension = splitInfo[6];
168 final String uinfoString = uinfoField.stringValue();
169 if ( uinfoString.endsWith( ArtifactInfo.FS + ArtifactInfo.NA ) )
170 {
171 uinfoField.setStringValue( uinfoString + ArtifactInfo.FS + ArtifactInfo.nvl( extension ) );
172 }
173 }
174 }
175
176 return doc;
177 }
178
179 private Field readField()
180 throws IOException
181 {
182 int flags = dis.read();
183
184 Index index = Index.NO;
185 if ( ( flags & IndexDataWriter.F_INDEXED ) > 0 )
186 {
187 boolean isTokenized = ( flags & IndexDataWriter.F_TOKENIZED ) > 0;
188 index = isTokenized ? Index.ANALYZED : Index.NOT_ANALYZED;
189 }
190
191 Store store = Store.NO;
192 if ( ( flags & IndexDataWriter.F_STORED ) > 0 )
193 {
194 store = Store.YES;
195 }
196
197 String name = dis.readUTF();
198 String value = readUTF( dis );
199
200 return new Field( name, value, store, index );
201 }
202
203 private static String readUTF( DataInput in )
204 throws IOException
205 {
206 int utflen = in.readInt();
207
208 byte[] bytearr;
209 char[] chararr;
210
211 try
212 {
213 bytearr = new byte[utflen];
214 chararr = new char[utflen];
215 }
216 catch ( OutOfMemoryError e )
217 {
218 final IOException ex =
219 new IOException( "Index data content is inappropriate (is junk?), leads to OutOfMemoryError!"
220 + " See MINDEXER-28 for more information!" );
221 ex.initCause( e );
222 throw ex;
223 }
224
225 int c, char2, char3;
226 int count = 0;
227 int chararrCount = 0;
228
229 in.readFully( bytearr, 0, utflen );
230
231 while ( count < utflen )
232 {
233 c = bytearr[count] & 0xff;
234 if ( c > 127 )
235 {
236 break;
237 }
238 count++;
239 chararr[chararrCount++] = (char) c;
240 }
241
242 while ( count < utflen )
243 {
244 c = bytearr[count] & 0xff;
245 switch ( c >> 4 )
246 {
247 case 0:
248 case 1:
249 case 2:
250 case 3:
251 case 4:
252 case 5:
253 case 6:
254 case 7:
255
256 count++;
257 chararr[chararrCount++] = (char) c;
258 break;
259
260 case 12:
261 case 13:
262
263 count += 2;
264 if ( count > utflen )
265 {
266 throw new UTFDataFormatException( "malformed input: partial character at end" );
267 }
268 char2 = bytearr[count - 1];
269 if ( ( char2 & 0xC0 ) != 0x80 )
270 {
271 throw new UTFDataFormatException( "malformed input around byte " + count );
272 }
273 chararr[chararrCount++] = (char) ( ( ( c & 0x1F ) << 6 ) | ( char2 & 0x3F ) );
274 break;
275
276 case 14:
277
278 count += 3;
279 if ( count > utflen )
280 {
281 throw new UTFDataFormatException( "malformed input: partial character at end" );
282 }
283 char2 = bytearr[count - 2];
284 char3 = bytearr[count - 1];
285 if ( ( ( char2 & 0xC0 ) != 0x80 ) || ( ( char3 & 0xC0 ) != 0x80 ) )
286 {
287 throw new UTFDataFormatException( "malformed input around byte " + ( count - 1 ) );
288 }
289 chararr[chararrCount++] =
290 (char) ( ( ( c & 0x0F ) << 12 ) | ( ( char2 & 0x3F ) << 6 ) | ( ( char3 & 0x3F ) << 0 ) );
291 break;
292
293 default:
294
295 throw new UTFDataFormatException( "malformed input around byte " + count );
296 }
297 }
298
299
300 return new String( chararr, 0, chararrCount );
301 }
302
303
304
305
306 public static class IndexDataReadResult
307 {
308 private Date timestamp;
309
310 private int documentCount;
311
312 private Set<String> rootGroups;
313
314 private Set<String> allGroups;
315
316 public void setDocumentCount( int documentCount )
317 {
318 this.documentCount = documentCount;
319 }
320
321 public int getDocumentCount()
322 {
323 return documentCount;
324 }
325
326 public void setTimestamp( Date timestamp )
327 {
328 this.timestamp = timestamp;
329 }
330
331 public Date getTimestamp()
332 {
333 return timestamp;
334 }
335
336 public void setRootGroups( Set<String> rootGroups )
337 {
338 this.rootGroups = rootGroups;
339 }
340
341 public Set<String> getRootGroups()
342 {
343 return rootGroups;
344 }
345
346 public void setAllGroups( Set<String> allGroups )
347 {
348 this.allGroups = allGroups;
349 }
350
351 public Set<String> getAllGroups()
352 {
353 return allGroups;
354 }
355
356 }
357
358
359
360
361
362
363
364
365
366
367 public IndexDataReadResult readIndex( final IndexDataReadVisitor visitor, final IndexingContext context )
368 throws IOException
369 {
370 dis.readByte();
371
372 long timestamp = dis.readLong();
373
374 Date date = null;
375
376 if ( timestamp != -1 )
377 {
378 date = new Date( timestamp );
379 }
380
381 int n = 0;
382
383 Document doc;
384 while ( ( doc = readDocument() ) != null )
385 {
386 visitor.visitDocument( IndexUtils.updateDocument( doc, context, false ) );
387
388 n++;
389 }
390
391 IndexDataReadResult result = new IndexDataReadResult();
392 result.setDocumentCount( n );
393 result.setTimestamp( date );
394 return result;
395 }
396
397
398
399
400 public interface IndexDataReadVisitor
401 {
402
403
404
405
406
407
408 void visitDocument( Document document );
409
410 }
411
412 }