1 package org.apache.maven.index.updater;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import java.io.BufferedInputStream;
23 import java.io.DataInput;
24 import java.io.DataInputStream;
25 import java.io.EOFException;
26 import java.io.IOException;
27 import java.io.InputStream;
28 import java.io.UTFDataFormatException;
29 import java.util.Date;
30 import java.util.LinkedHashSet;
31 import java.util.Set;
32 import java.util.zip.GZIPInputStream;
33
34 import org.apache.lucene.document.Document;
35 import org.apache.lucene.document.Field;
36 import org.apache.lucene.document.FieldType;
37 import org.apache.lucene.index.IndexOptions;
38 import org.apache.lucene.index.IndexWriter;
39 import org.apache.maven.index.ArtifactInfo;
40 import org.apache.maven.index.context.IndexUtils;
41 import org.apache.maven.index.context.IndexingContext;
42
43
44
45
46
47
48 public class IndexDataReader
49 {
50 private final DataInputStream dis;
51
52 public IndexDataReader( final InputStream is )
53 throws IOException
54 {
55
56
57
58 is.mark( 2 );
59 InputStream data;
60 if ( is.read() == 0x1f && is.read() == 0x8b )
61 {
62 is.reset();
63 data = new BufferedInputStream( new GZIPInputStream( is, 1024 * 8 ), 1024 * 8 );
64 }
65 else
66 {
67 is.reset();
68 data = new BufferedInputStream( is, 1024 * 8 );
69 }
70
71 this.dis = new DataInputStream( data );
72 }
73
74 public IndexDataReadResult readIndex( IndexWriter w, IndexingContext context )
75 throws IOException
76 {
77 long timestamp = readHeader();
78
79 Date date = null;
80
81 if ( timestamp != -1 )
82 {
83 date = new Date( timestamp );
84
85 IndexUtils.updateTimestamp( w.getDirectory(), date );
86 }
87
88 int n = 0;
89
90 Document doc;
91 Set<String> rootGroups = new LinkedHashSet<>();
92 Set<String> allGroups = new LinkedHashSet<>();
93
94 while ( ( doc = readDocument() ) != null )
95 {
96 ArtifactInfo ai = IndexUtils.constructArtifactInfo( doc, context );
97 if ( ai != null )
98 {
99 w.addDocument( IndexUtils.updateDocument( doc, context, false, ai ) );
100
101 rootGroups.add( ai.getRootGroup() );
102 allGroups.add( ai.getGroupId() );
103
104 }
105 else
106 {
107 w.addDocument( doc );
108 }
109 n++;
110 }
111
112 w.commit();
113
114 IndexDataReadResult result = new IndexDataReadResult();
115 result.setDocumentCount( n );
116 result.setTimestamp( date );
117 result.setRootGroups( rootGroups );
118 result.setAllGroups( allGroups );
119
120 return result;
121 }
122
123 public long readHeader()
124 throws IOException
125 {
126 final byte hdrbyte = (byte) ( ( IndexDataWriter.VERSION << 24 ) >> 24 );
127
128 if ( hdrbyte != dis.readByte() )
129 {
130
131 throw new IOException( "Provided input contains unexpected data (0x01 expected as 1st byte)!" );
132 }
133
134 return dis.readLong();
135 }
136
137 public Document readDocument()
138 throws IOException
139 {
140 int fieldCount;
141 try
142 {
143 fieldCount = dis.readInt();
144 }
145 catch ( EOFException ex )
146 {
147 return null;
148 }
149
150 Document doc = new Document();
151
152 for ( int i = 0; i < fieldCount; i++ )
153 {
154 doc.add( readField() );
155 }
156
157
158 final Field uinfoField = (Field) doc.getField( ArtifactInfo.UINFO );
159 final String info = doc.get( ArtifactInfo.INFO );
160 if ( uinfoField != null && info != null && !info.isEmpty() )
161 {
162 final String[] splitInfo = ArtifactInfo.FS_PATTERN.split( info );
163 if ( splitInfo.length > 6 )
164 {
165 final String extension = splitInfo[6];
166 final String uinfoString = uinfoField.stringValue();
167 if ( uinfoString.endsWith( ArtifactInfo.FS + ArtifactInfo.NA ) )
168 {
169 uinfoField.setStringValue( uinfoString + ArtifactInfo.FS + ArtifactInfo.nvl( extension ) );
170 }
171 }
172 }
173
174 return doc;
175 }
176
177 private Field readField()
178 throws IOException
179 {
180 int flags = dis.read();
181
182 FieldType fieldType = new FieldType();
183 if ( ( flags & IndexDataWriter.F_INDEXED ) > 0 )
184 {
185 boolean tokenized = ( flags & IndexDataWriter.F_TOKENIZED ) > 0;
186 fieldType.setTokenized( tokenized );
187 fieldType.setOmitNorms( !tokenized );
188 fieldType.setIndexOptions( IndexOptions.DOCS_AND_FREQS_AND_POSITIONS );
189 }
190 fieldType.setStored( ( flags & IndexDataWriter.F_STORED ) > 0 );
191
192 String name = dis.readUTF();
193 String value = readUTF( dis );
194
195 return new Field( name, value, fieldType );
196 }
197
198 private static String readUTF( DataInput in )
199 throws IOException
200 {
201 int utflen = in.readInt();
202
203 byte[] bytearr;
204 char[] chararr;
205
206 try
207 {
208 bytearr = new byte[utflen];
209 chararr = new char[utflen];
210 }
211 catch ( OutOfMemoryError e )
212 {
213 throw new IOException( "Index data content is inappropriate (is junk?), leads to OutOfMemoryError!"
214 + " See MINDEXER-28 for more information!", e );
215 }
216
217 int c, char2, char3;
218 int count = 0;
219 int chararrCount = 0;
220
221 in.readFully( bytearr, 0, utflen );
222
223 while ( count < utflen )
224 {
225 c = bytearr[count] & 0xff;
226 if ( c > 127 )
227 {
228 break;
229 }
230 count++;
231 chararr[chararrCount++] = (char) c;
232 }
233
234 while ( count < utflen )
235 {
236 c = bytearr[count] & 0xff;
237 switch ( c >> 4 )
238 {
239 case 0:
240 case 1:
241 case 2:
242 case 3:
243 case 4:
244 case 5:
245 case 6:
246 case 7:
247
248 count++;
249 chararr[chararrCount++] = (char) c;
250 break;
251
252 case 12:
253 case 13:
254
255 count += 2;
256 if ( count > utflen )
257 {
258 throw new UTFDataFormatException( "malformed input: partial character at end" );
259 }
260 char2 = bytearr[count - 1];
261 if ( ( char2 & 0xC0 ) != 0x80 )
262 {
263 throw new UTFDataFormatException( "malformed input around byte " + count );
264 }
265 chararr[chararrCount++] = (char) ( ( ( c & 0x1F ) << 6 ) | ( char2 & 0x3F ) );
266 break;
267
268 case 14:
269
270 count += 3;
271 if ( count > utflen )
272 {
273 throw new UTFDataFormatException( "malformed input: partial character at end" );
274 }
275 char2 = bytearr[count - 2];
276 char3 = bytearr[count - 1];
277 if ( ( ( char2 & 0xC0 ) != 0x80 ) || ( ( char3 & 0xC0 ) != 0x80 ) )
278 {
279 throw new UTFDataFormatException( "malformed input around byte " + ( count - 1 ) );
280 }
281 chararr[chararrCount++] =
282 (char) ( ( ( c & 0x0F ) << 12 ) | ( ( char2 & 0x3F ) << 6 ) | ( ( char3 & 0x3F ) ) );
283 break;
284
285 default:
286
287 throw new UTFDataFormatException( "malformed input around byte " + count );
288 }
289 }
290
291
292 return new String( chararr, 0, chararrCount );
293 }
294
295
296
297
298 public static class IndexDataReadResult
299 {
300 private Date timestamp;
301
302 private int documentCount;
303
304 private Set<String> rootGroups;
305
306 private Set<String> allGroups;
307
308 public void setDocumentCount( int documentCount )
309 {
310 this.documentCount = documentCount;
311 }
312
313 public int getDocumentCount()
314 {
315 return documentCount;
316 }
317
318 public void setTimestamp( Date timestamp )
319 {
320 this.timestamp = timestamp;
321 }
322
323 public Date getTimestamp()
324 {
325 return timestamp;
326 }
327
328 public void setRootGroups( Set<String> rootGroups )
329 {
330 this.rootGroups = rootGroups;
331 }
332
333 public Set<String> getRootGroups()
334 {
335 return rootGroups;
336 }
337
338 public void setAllGroups( Set<String> allGroups )
339 {
340 this.allGroups = allGroups;
341 }
342
343 public Set<String> getAllGroups()
344 {
345 return allGroups;
346 }
347
348 }
349
350
351
352
353
354
355
356
357
358
359 public IndexDataReadResult readIndex( final IndexDataReadVisitor visitor, final IndexingContext context )
360 throws IOException
361 {
362 dis.readByte();
363
364 long timestamp = dis.readLong();
365
366 Date date = null;
367
368 if ( timestamp != -1 )
369 {
370 date = new Date( timestamp );
371 }
372
373 int n = 0;
374
375 Document doc;
376 while ( ( doc = readDocument() ) != null )
377 {
378 visitor.visitDocument( IndexUtils.updateDocument( doc, context, false ) );
379
380 n++;
381 }
382
383 IndexDataReadResult result = new IndexDataReadResult();
384 result.setDocumentCount( n );
385 result.setTimestamp( date );
386 return result;
387 }
388
389
390
391
392 public interface IndexDataReadVisitor
393 {
394
395
396
397
398
399
400 void visitDocument( Document document );
401
402 }
403
404 }