1 package org.apache.maven.index;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 import java.io.IOException;
23 import java.io.StringReader;
24 import java.util.ArrayList;
25 import java.util.Iterator;
26 import java.util.List;
27 import org.apache.lucene.analysis.Analyzer;
28
29 import org.apache.lucene.analysis.CachingTokenFilter;
30 import org.apache.lucene.analysis.TokenStream;
31 import org.apache.lucene.document.Document;
32 import org.apache.lucene.search.Explanation;
33 import org.apache.lucene.search.IndexSearcher;
34 import org.apache.lucene.search.Query;
35 import org.apache.lucene.search.TopDocs;
36 import org.apache.lucene.search.highlight.Formatter;
37 import org.apache.lucene.search.highlight.Highlighter;
38 import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
39 import org.apache.lucene.search.highlight.QueryScorer;
40 import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
41 import org.apache.lucene.search.highlight.TextFragment;
42 import org.apache.maven.index.context.IndexUtils;
43 import org.apache.maven.index.context.IndexingContext;
44 import org.apache.maven.index.context.NexusIndexMultiSearcher;
45 import org.apache.maven.index.creator.JarFileContentsIndexCreator;
46
47
48
49
50
51
52 public class DefaultIteratorResultSet
53 implements IteratorResultSet
54 {
55 private final IteratorSearchRequest searchRequest;
56
57 private final NexusIndexMultiSearcher indexSearcher;
58
59 private final List<IndexingContext> contexts;
60
61 private final int[] starts;
62
63 private final ArtifactInfoFilter filter;
64
65 private final ArtifactInfoPostprocessor postprocessor;
66
67 private final List<MatchHighlightRequest> matchHighlightRequests;
68
69 private final TopDocs hits;
70
71 private final int from;
72
73 private final int count;
74
75 private final int maxRecPointer;
76
77 private int pointer;
78
79 private int processedArtifactInfoCount;
80
81 private ArtifactInfo ai;
82
83 protected DefaultIteratorResultSet( final IteratorSearchRequest request,
84 final NexusIndexMultiSearcher indexSearcher,
85 final List<IndexingContext> contexts, final TopDocs hits )
86 throws IOException
87 {
88 this.searchRequest = request;
89
90 this.indexSearcher = indexSearcher;
91
92 this.contexts = contexts;
93
94 {
95 int maxDoc = 0;
96 this.starts = new int[contexts.size() + 1];
97
98
99 final List<IndexSearcher> acquiredSearchers =
100 indexSearcher.getNexusIndexMultiReader().getAcquiredSearchers();
101 for ( int i = 0; i < contexts.size(); i++ )
102 {
103 starts[i] = maxDoc;
104 maxDoc += acquiredSearchers.get( i ).getIndexReader().maxDoc();
105 }
106 starts[contexts.size()] = maxDoc;
107 }
108
109 this.filter = request.getArtifactInfoFilter();
110
111 this.postprocessor = request.getArtifactInfoPostprocessor();
112
113 this.matchHighlightRequests = request.getMatchHighlightRequests();
114
115 List<MatchHighlightRequest> matchHighlightRequests = new ArrayList<>();
116 for ( MatchHighlightRequest hr : request.getMatchHighlightRequests() )
117 {
118 Query rewrittenQuery = hr.getQuery().rewrite( indexSearcher.getIndexReader() );
119 matchHighlightRequests.add( new MatchHighlightRequest( hr.getField(), rewrittenQuery,
120 hr.getHighlightMode() ) );
121 }
122
123 this.hits = hits;
124
125 this.from = request.getStart();
126
127 this.count =
128 ( request.getCount() == AbstractSearchRequest.UNDEFINED ? hits.scoreDocs.length : Math.min(
129 request.getCount(), hits.scoreDocs.length ) );
130
131 this.pointer = from;
132
133 this.processedArtifactInfoCount = 0;
134
135 this.maxRecPointer = from + count;
136
137 ai = createNextAi();
138
139 if ( ai == null )
140 {
141 cleanUp();
142 }
143 }
144
145 public boolean hasNext()
146 {
147 return ai != null;
148 }
149
150 public ArtifactInfo next()
151 {
152 ArtifactInfo result = ai;
153
154 try
155 {
156 ai = createNextAi();
157 }
158 catch ( IOException e )
159 {
160 ai = null;
161
162 throw new IllegalStateException( "Cannot fetch next ArtifactInfo!", e );
163 }
164 finally
165 {
166 if ( ai == null )
167 {
168 cleanUp();
169 }
170 }
171
172 return result;
173 }
174
175 public void remove()
176 {
177 throw new UnsupportedOperationException( "Method not supported on " + getClass().getName() );
178 }
179
180 public Iterator<ArtifactInfo> iterator()
181 {
182 return this;
183 }
184
185 public void close()
186 {
187 cleanUp();
188 }
189
190 public int getTotalProcessedArtifactInfoCount()
191 {
192 return processedArtifactInfoCount;
193 }
194
195 @Override
196 public void finalize()
197 throws Throwable
198 {
199 super.finalize();
200
201 if ( !cleanedUp )
202 {
203 System.err.println( "#WARNING: Lock leaking from " + getClass().getName() + " for query "
204 + searchRequest.getQuery().toString() );
205
206 cleanUp();
207 }
208 }
209
210
211
212 protected ArtifactInfo createNextAi()
213 throws IOException
214 {
215 ArtifactInfo result = null;
216
217
218
219
220
221
222 while ( ( result == null ) && ( pointer < maxRecPointer ) && ( pointer < hits.scoreDocs.length ) )
223 {
224 Document doc = indexSearcher.doc( hits.scoreDocs[pointer].doc );
225
226 IndexingContext context = getIndexingContextForPointer( doc, hits.scoreDocs[pointer].doc );
227
228 result = IndexUtils.constructArtifactInfo( doc, context );
229
230 if ( result != null )
231 {
232
233
234 if ( searchRequest.isLuceneExplain() )
235 {
236 result.getAttributes().put( Explanation.class.getName(),
237 indexSearcher.explain( searchRequest.getQuery(), hits.scoreDocs[pointer].doc ).toString() );
238 }
239
240 result.setLuceneScore( hits.scoreDocs[pointer].score );
241
242 result.setRepository( context.getRepositoryId() );
243
244 result.setContext( context.getId() );
245
246 if ( filter != null )
247 {
248 if ( !filter.accepts( context, result ) )
249 {
250 result = null;
251 }
252 }
253
254 if ( result != null && postprocessor != null )
255 {
256 postprocessor.postprocess( context, result );
257 }
258
259 if ( result != null && matchHighlightRequests.size() > 0 )
260 {
261 calculateHighlights( context, doc, result );
262 }
263 }
264
265 pointer++;
266 processedArtifactInfoCount++;
267 }
268
269 return result;
270 }
271
272 private volatile boolean cleanedUp = false;
273
274 protected synchronized void cleanUp()
275 {
276 if ( cleanedUp )
277 {
278 return;
279 }
280
281 try
282 {
283 indexSearcher.release();
284 }
285 catch ( IOException e )
286 {
287 throw new IllegalStateException( e );
288 }
289
290 this.cleanedUp = true;
291 }
292
293
294
295
296
297
298
299
300 protected void calculateHighlights( IndexingContext context, Document d, ArtifactInfo ai )
301 throws IOException
302 {
303 IndexerField field;
304
305 String text;
306
307 List<String> highlightFragment;
308
309 for ( MatchHighlightRequest hr : matchHighlightRequests )
310 {
311 field = selectStoredIndexerField( hr.getField() );
312
313 if ( field != null )
314 {
315 text = ai.getFieldValue( field.getOntology() );
316
317 if ( text != null )
318 {
319 highlightFragment = highlightField( context, hr, field, text );
320
321 if ( highlightFragment != null && highlightFragment.size() > 0 )
322 {
323 MatchHighlight matchHighlight = new MatchHighlight( hr.getField(), highlightFragment );
324
325 ai.getMatchHighlights().add( matchHighlight );
326 }
327 }
328 }
329 }
330 }
331
332
333
334
335
336
337
338 protected IndexerField selectStoredIndexerField( Field field )
339 {
340
341 if ( MAVEN.CLASSNAMES.equals( field ) )
342 {
343 return JarFileContentsIndexCreator.FLD_CLASSNAMES;
344 }
345 else
346 {
347 return field.getIndexerFields().isEmpty() ? null : field.getIndexerFields().iterator().next();
348 }
349 }
350
351
352
353
354
355
356
357
358
359
360
361 protected List<String> highlightField( IndexingContext context, MatchHighlightRequest hr, IndexerField field,
362 String text )
363 throws IOException
364 {
365
366 if ( MAVEN.CLASSNAMES.equals( field.getOntology() ) )
367 {
368 text = text.replace( '/', '.' ).replaceAll( "^\\.", "" ).replaceAll( "\n\\.", "\n" );
369 }
370
371 Analyzer analyzer = context.getAnalyzer();
372 TokenStream baseTokenStream = analyzer.tokenStream( field.getKey(), new StringReader( text ) );
373
374 CachingTokenFilter tokenStream = new CachingTokenFilter( baseTokenStream );
375
376 Formatter formatter;
377
378 if ( MatchHighlightMode.HTML.equals( hr.getHighlightMode() ) )
379 {
380 formatter = new SimpleHTMLFormatter();
381 }
382 else
383 {
384 tokenStream.reset();
385 tokenStream.end();
386 tokenStream.close();
387 throw new UnsupportedOperationException( "Hightlight more \"" + hr.getHighlightMode().toString()
388 + "\" is not supported!" );
389 }
390
391 List<String> bestFragments = getBestFragments( hr.getQuery(), formatter, tokenStream, text, 3 );
392
393 return bestFragments;
394 }
395
396 protected final List<String> getBestFragments( Query query, Formatter formatter, TokenStream tokenStream,
397 String text, int maxNumFragments )
398 throws IOException
399 {
400 Highlighter highlighter = new Highlighter( formatter, new CleaningEncoder(), new QueryScorer( query ) );
401
402 highlighter.setTextFragmenter( new OneLineFragmenter() );
403
404 maxNumFragments = Math.max( 1, maxNumFragments );
405
406 TextFragment[] frag;
407
408 ArrayList<String> fragTexts = new ArrayList<>( maxNumFragments );
409
410 try
411 {
412 frag = highlighter.getBestTextFragments( tokenStream, text, false, maxNumFragments );
413
414 for ( TextFragment textFragment : frag )
415 {
416 if ( ( textFragment != null ) && ( textFragment.getScore() > 0 ) )
417 {
418 fragTexts.add( textFragment.toString() );
419 }
420 }
421 }
422 catch ( InvalidTokenOffsetsException e )
423 {
424
425 }
426
427 return fragTexts;
428 }
429
430 protected IndexingContext getIndexingContextForPointer( Document doc, int docPtr )
431 {
432 return contexts.get( readerIndex( docPtr, this.starts, this.contexts.size() ) );
433 }
434
435 private static int readerIndex( int n, int[] starts, int numSubReaders )
436 {
437 int lo = 0;
438 int hi = numSubReaders - 1;
439
440 while ( hi >= lo )
441 {
442 int mid = ( lo + hi ) >>> 1;
443 int midValue = starts[mid];
444 if ( n < midValue )
445 {
446 hi = mid - 1;
447 }
448 else if ( n > midValue )
449 {
450 lo = mid + 1;
451 }
452 else
453 {
454 while ( mid + 1 < numSubReaders && starts[mid + 1] == midValue )
455 {
456 mid++;
457 }
458 return mid;
459 }
460 }
461 return hi;
462 }
463 }