View Javadoc
1   package org.apache.maven.index;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0    
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.IOException;
23  import java.io.StringReader;
24  import java.util.ArrayList;
25  import java.util.Iterator;
26  import java.util.List;
27  import org.apache.lucene.analysis.Analyzer;
28  
29  import org.apache.lucene.analysis.CachingTokenFilter;
30  import org.apache.lucene.analysis.TokenStream;
31  import org.apache.lucene.document.Document;
32  import org.apache.lucene.search.Explanation;
33  import org.apache.lucene.search.IndexSearcher;
34  import org.apache.lucene.search.Query;
35  import org.apache.lucene.search.TopDocs;
36  import org.apache.lucene.search.highlight.Formatter;
37  import org.apache.lucene.search.highlight.Highlighter;
38  import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
39  import org.apache.lucene.search.highlight.QueryScorer;
40  import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
41  import org.apache.lucene.search.highlight.TextFragment;
42  import org.apache.maven.index.context.IndexUtils;
43  import org.apache.maven.index.context.IndexingContext;
44  import org.apache.maven.index.context.NexusIndexMultiSearcher;
45  import org.apache.maven.index.creator.JarFileContentsIndexCreator;
46  
47  /**
48   * Default implementation of IteratorResultSet. TODO: there is too much of logic, refactor this!
49   * 
50   * @author cstamas
51   */
52  public class DefaultIteratorResultSet
53      implements IteratorResultSet
54  {
55      private final IteratorSearchRequest searchRequest;
56  
57      private final NexusIndexMultiSearcher indexSearcher;
58  
59      private final List<IndexingContext> contexts;
60  
61      private final int[] starts;
62  
63      private final ArtifactInfoFilter filter;
64  
65      private final ArtifactInfoPostprocessor postprocessor;
66  
67      private final List<MatchHighlightRequest> matchHighlightRequests;
68  
69      private final TopDocs hits;
70  
71      private final int from;
72  
73      private final int count;
74  
75      private final int maxRecPointer;
76  
77      private int pointer;
78  
79      private int processedArtifactInfoCount;
80  
81      private ArtifactInfo ai;
82  
83      protected DefaultIteratorResultSet( final IteratorSearchRequest request,
84                                          final NexusIndexMultiSearcher indexSearcher,
85                                          final List<IndexingContext> contexts, final TopDocs hits )
86          throws IOException
87      {
88          this.searchRequest = request;
89  
90          this.indexSearcher = indexSearcher;
91  
92          this.contexts = contexts;
93  
94          {
95              int maxDoc = 0;
96              this.starts = new int[contexts.size() + 1]; // build starts array
97              // this is good to do as we have NexusIndexMultiSearcher passed in contructor, so it is already open, hence
98              // #acquire() already invoked on underlying NexusIndexMultiReader
99              final List<IndexSearcher> acquiredSearchers =
100                 indexSearcher.getNexusIndexMultiReader().getAcquiredSearchers();
101             for ( int i = 0; i < contexts.size(); i++ )
102             {
103                 starts[i] = maxDoc;
104                 maxDoc += acquiredSearchers.get( i ).getIndexReader().maxDoc(); // compute maxDocs
105             }
106             starts[contexts.size()] = maxDoc;
107         }
108 
109         this.filter = request.getArtifactInfoFilter();
110 
111         this.postprocessor = request.getArtifactInfoPostprocessor();
112 
113         this.matchHighlightRequests = request.getMatchHighlightRequests();
114 
115         List<MatchHighlightRequest> matchHighlightRequests = new ArrayList<>();
116         for ( MatchHighlightRequest hr : request.getMatchHighlightRequests() )
117         {
118             Query rewrittenQuery = hr.getQuery().rewrite( indexSearcher.getIndexReader() );
119             matchHighlightRequests.add( new MatchHighlightRequest( hr.getField(), rewrittenQuery,
120                                                                    hr.getHighlightMode() ) );
121         }
122 
123         this.hits = hits;
124 
125         this.from = request.getStart();
126 
127         this.count =
128             ( request.getCount() == AbstractSearchRequest.UNDEFINED ? hits.scoreDocs.length : Math.min(
129                 request.getCount(), hits.scoreDocs.length ) );
130 
131         this.pointer = from;
132 
133         this.processedArtifactInfoCount = 0;
134 
135         this.maxRecPointer = from + count;
136 
137         ai = createNextAi();
138 
139         if ( ai == null )
140         {
141             cleanUp();
142         }
143     }
144 
145     public boolean hasNext()
146     {
147         return ai != null;
148     }
149 
150     public ArtifactInfo next()
151     {
152         ArtifactInfo result = ai;
153 
154         try
155         {
156             ai = createNextAi();
157         }
158         catch ( IOException e )
159         {
160             ai = null;
161 
162             throw new IllegalStateException( "Cannot fetch next ArtifactInfo!", e );
163         }
164         finally
165         {
166             if ( ai == null )
167             {
168                 cleanUp();
169             }
170         }
171 
172         return result;
173     }
174 
175     public void remove()
176     {
177         throw new UnsupportedOperationException( "Method not supported on " + getClass().getName() );
178     }
179 
180     public Iterator<ArtifactInfo> iterator()
181     {
182         return this;
183     }
184 
185     public void close()
186     {
187         cleanUp();
188     }
189 
190     public int getTotalProcessedArtifactInfoCount()
191     {
192         return processedArtifactInfoCount;
193     }
194 
195     @Override
196     public void finalize()
197         throws Throwable
198     {
199         super.finalize();
200 
201         if ( !cleanedUp )
202         {
203             System.err.println( "#WARNING: Lock leaking from " + getClass().getName() + " for query "
204                 + searchRequest.getQuery().toString() );
205 
206             cleanUp();
207         }
208     }
209 
210     // ==
211 
212     protected ArtifactInfo createNextAi()
213         throws IOException
214     {
215         ArtifactInfo result = null;
216 
217         // we should stop if:
218         // a) we found what we want
219         // b) pointer advanced over more documents that user requested
220         // c) pointer advanced over more documents that hits has
221         // or we found what we need
222         while ( ( result == null ) && ( pointer < maxRecPointer ) && ( pointer < hits.scoreDocs.length ) )
223         {
224             Document doc = indexSearcher.doc( hits.scoreDocs[pointer].doc );
225 
226             IndexingContext context = getIndexingContextForPointer( doc, hits.scoreDocs[pointer].doc );
227 
228             result = IndexUtils.constructArtifactInfo( doc, context );
229 
230             if ( result != null )
231             {
232                 // WARNING: NOT FOR PRODUCTION SYSTEMS, THIS IS VERY COSTLY OPERATION
233                 // For debugging only!!!
234                 if ( searchRequest.isLuceneExplain() )
235                 {
236                     result.getAttributes().put( Explanation.class.getName(),
237                         indexSearcher.explain( searchRequest.getQuery(), hits.scoreDocs[pointer].doc ).toString() );
238                 }
239 
240                 result.setLuceneScore( hits.scoreDocs[pointer].score );
241 
242                 result.setRepository( context.getRepositoryId() );
243 
244                 result.setContext( context.getId() );
245 
246                 if ( filter != null )
247                 {
248                     if ( !filter.accepts( context, result ) )
249                     {
250                         result = null;
251                     }
252                 }
253 
254                 if ( result != null && postprocessor != null )
255                 {
256                     postprocessor.postprocess( context, result );
257                 }
258 
259                 if ( result != null && matchHighlightRequests.size() > 0 )
260                 {
261                     calculateHighlights( context, doc, result );
262                 }
263             }
264 
265             pointer++;
266             processedArtifactInfoCount++;
267         }
268 
269         return result;
270     }
271 
272     private volatile boolean cleanedUp = false;
273 
274     protected synchronized void cleanUp()
275     {
276         if ( cleanedUp )
277         {
278             return;
279         }
280 
281         try
282         {
283             indexSearcher.release();
284         }
285         catch ( IOException e )
286         {
287             throw new IllegalStateException( e );
288         }
289 
290         this.cleanedUp = true;
291     }
292 
293     /**
294      * Creates the MatchHighlights and adds them to ArtifactInfo if found/can.
295      * 
296      * @param context
297      * @param d
298      * @param ai
299      */
300     protected void calculateHighlights( IndexingContext context, Document d, ArtifactInfo ai )
301         throws IOException
302     {
303         IndexerField field;
304 
305         String text;
306 
307         List<String> highlightFragment;
308 
309         for ( MatchHighlightRequest hr : matchHighlightRequests )
310         {
311             field = selectStoredIndexerField( hr.getField() );
312 
313             if ( field != null )
314             {
315                 text = ai.getFieldValue( field.getOntology() );
316 
317                 if ( text != null )
318                 {
319                     highlightFragment = highlightField( context, hr, field, text );
320 
321                     if ( highlightFragment != null && highlightFragment.size() > 0 )
322                     {
323                         MatchHighlight matchHighlight = new MatchHighlight( hr.getField(), highlightFragment );
324 
325                         ai.getMatchHighlights().add( matchHighlight );
326                     }
327                 }
328             }
329         }
330     }
331 
332     /**
333      * Select a STORED IndexerField assigned to passed in Field.
334      * 
335      * @param field
336      * @return
337      */
338     protected IndexerField selectStoredIndexerField( Field field )
339     {
340         // hack here
341         if ( MAVEN.CLASSNAMES.equals( field ) )
342         {
343             return JarFileContentsIndexCreator.FLD_CLASSNAMES;
344         }
345         else
346         {
347             return field.getIndexerFields().isEmpty() ? null : field.getIndexerFields().iterator().next();
348         }
349     }
350 
351     /**
352      * Returns a string that contains match fragment highlighted in style as user requested.
353      * 
354      * @param context
355      * @param hr
356      * @param field
357      * @param text
358      * @return
359      * @throws IOException
360      */
361     protected List<String> highlightField( IndexingContext context, MatchHighlightRequest hr, IndexerField field,
362                                            String text )
363         throws IOException
364     {
365         // exception with classnames
366         if ( MAVEN.CLASSNAMES.equals( field.getOntology() ) )
367         {
368             text = text.replace( '/', '.' ).replaceAll( "^\\.", "" ).replaceAll( "\n\\.", "\n" );
369         }
370         
371         Analyzer analyzer = context.getAnalyzer();
372         TokenStream baseTokenStream = analyzer.tokenStream( field.getKey(), new StringReader( text ) );
373         
374         CachingTokenFilter tokenStream = new CachingTokenFilter( baseTokenStream );
375 
376         Formatter formatter;
377 
378         if ( MatchHighlightMode.HTML.equals( hr.getHighlightMode() ) )
379         {
380             formatter = new SimpleHTMLFormatter();
381         }
382         else
383         {
384             tokenStream.reset();
385             tokenStream.end();
386             tokenStream.close();
387             throw new UnsupportedOperationException( "Hightlight more \"" + hr.getHighlightMode().toString()
388                 + "\" is not supported!" );
389         }
390 
391         List<String> bestFragments = getBestFragments( hr.getQuery(), formatter, tokenStream, text, 3 );
392         
393         return bestFragments;
394     }
395 
396     protected final List<String> getBestFragments( Query query, Formatter formatter, TokenStream tokenStream,
397                                                    String text, int maxNumFragments )
398         throws IOException
399     {
400         Highlighter highlighter = new Highlighter( formatter, new CleaningEncoder(), new QueryScorer( query ) );
401 
402         highlighter.setTextFragmenter( new OneLineFragmenter() );
403 
404         maxNumFragments = Math.max( 1, maxNumFragments ); // sanity check
405 
406         TextFragment[] frag;
407         // Get text
408         ArrayList<String> fragTexts = new ArrayList<>( maxNumFragments );
409 
410         try
411         {
412             frag = highlighter.getBestTextFragments( tokenStream, text, false, maxNumFragments );
413 
414             for ( TextFragment textFragment : frag )
415             {
416                 if ( ( textFragment != null ) && ( textFragment.getScore() > 0 ) )
417                 {
418                     fragTexts.add( textFragment.toString() );
419                 }
420             }
421         }
422         catch ( InvalidTokenOffsetsException e )
423         {
424             // empty?
425         }
426 
427         return fragTexts;
428     }
429 
430     protected IndexingContext getIndexingContextForPointer( Document doc, int docPtr )
431     {
432         return contexts.get( readerIndex( docPtr, this.starts, this.contexts.size() ) );
433     }
434 
435     private static int readerIndex( int n, int[] starts, int numSubReaders )
436     { // find reader for doc n:
437         int lo = 0; // search starts array
438         int hi = numSubReaders - 1; // for first element less
439 
440         while ( hi >= lo )
441         {
442             int mid = ( lo + hi ) >>> 1;
443             int midValue = starts[mid];
444             if ( n < midValue )
445             {
446                 hi = mid - 1;
447             }
448             else if ( n > midValue )
449             {
450                 lo = mid + 1;
451             }
452             else
453             { // found a match
454                 while ( mid + 1 < numSubReaders && starts[mid + 1] == midValue )
455                 {
456                     mid++; // scan to last match
457                 }
458                 return mid;
459             }
460         }
461         return hi;
462     }
463 }