View Javadoc

1   package org.apache.maven.index;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0    
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.IOException;
23  import java.io.StringReader;
24  import java.util.ArrayList;
25  import java.util.Iterator;
26  import java.util.List;
27  
28  import org.apache.lucene.analysis.CachingTokenFilter;
29  import org.apache.lucene.analysis.TokenStream;
30  import org.apache.lucene.document.Document;
31  import org.apache.lucene.search.Explanation;
32  import org.apache.lucene.search.IndexSearcher;
33  import org.apache.lucene.search.Query;
34  import org.apache.lucene.search.TopDocs;
35  import org.apache.lucene.search.highlight.Formatter;
36  import org.apache.lucene.search.highlight.Highlighter;
37  import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
38  import org.apache.lucene.search.highlight.QueryScorer;
39  import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
40  import org.apache.lucene.search.highlight.TextFragment;
41  import org.apache.maven.index.context.IndexUtils;
42  import org.apache.maven.index.context.IndexingContext;
43  import org.apache.maven.index.context.NexusIndexMultiSearcher;
44  import org.apache.maven.index.creator.JarFileContentsIndexCreator;
45  
46  /**
47   * Default implementation of IteratorResultSet. TODO: there is too much of logic, refactor this!
48   * 
49   * @author cstamas
50   */
51  public class DefaultIteratorResultSet
52      implements IteratorResultSet
53  {
54      private final IteratorSearchRequest searchRequest;
55  
56      private final NexusIndexMultiSearcher indexSearcher;
57  
58      private final List<IndexingContext> contexts;
59  
60      private final int[] starts;
61  
62      private final ArtifactInfoFilter filter;
63  
64      private final ArtifactInfoPostprocessor postprocessor;
65  
66      private final List<MatchHighlightRequest> matchHighlightRequests;
67  
68      private final TopDocs hits;
69  
70      private final int from;
71  
72      private final int count;
73  
74      private final int maxRecPointer;
75  
76      private int pointer;
77  
78      private int processedArtifactInfoCount;
79  
80      private ArtifactInfo ai;
81  
82      protected DefaultIteratorResultSet( final IteratorSearchRequest request,
83                                          final NexusIndexMultiSearcher indexSearcher,
84                                          final List<IndexingContext> contexts, final TopDocs hits )
85          throws IOException
86      {
87          this.searchRequest = request;
88  
89          this.indexSearcher = indexSearcher;
90  
91          this.contexts = contexts;
92  
93          {
94              int maxDoc = 0;
95              this.starts = new int[contexts.size() + 1]; // build starts array
96              // this is good to do as we have NexusIndexMultiSearcher passed in contructor, so it is already open, hence
97              // #acquire() already invoked on underlying NexusIndexMultiReader
98              final List<IndexSearcher> acquiredSearchers = indexSearcher.getNexusIndexMultiReader().getAcquiredSearchers();
99              for ( int i = 0; i < contexts.size(); i++ )
100             {
101                 starts[i] = maxDoc;
102                 maxDoc += acquiredSearchers.get( i ).maxDoc(); // compute maxDocs
103             }
104             starts[contexts.size()] = maxDoc;
105         }
106 
107         this.filter = request.getArtifactInfoFilter();
108 
109         this.postprocessor = request.getArtifactInfoPostprocessor();
110 
111         this.matchHighlightRequests = request.getMatchHighlightRequests();
112 
113         List<MatchHighlightRequest> matchHighlightRequests = new ArrayList<MatchHighlightRequest>();
114         for ( MatchHighlightRequest hr : request.getMatchHighlightRequests() )
115         {
116             Query rewrittenQuery = hr.getQuery().rewrite( indexSearcher.getIndexReader() );
117             matchHighlightRequests.add( new MatchHighlightRequest( hr.getField(), rewrittenQuery, hr.getHighlightMode() ) );
118         }
119 
120         this.hits = hits;
121 
122         this.from = request.getStart();
123 
124         this.count =
125             ( request.getCount() == AbstractSearchRequest.UNDEFINED ? hits.scoreDocs.length : Math.min(
126                 request.getCount(), hits.scoreDocs.length ) );
127 
128         this.pointer = from;
129 
130         this.processedArtifactInfoCount = 0;
131 
132         this.maxRecPointer = from + count;
133 
134         ai = createNextAi();
135 
136         if ( ai == null )
137         {
138             cleanUp();
139         }
140     }
141 
142     public boolean hasNext()
143     {
144         return ai != null;
145     }
146 
147     public ArtifactInfo next()
148     {
149         ArtifactInfo result = ai;
150 
151         try
152         {
153             ai = createNextAi();
154         }
155         catch ( IOException e )
156         {
157             ai = null;
158 
159             throw new IllegalStateException( "Cannot fetch next ArtifactInfo!", e );
160         }
161         finally
162         {
163             if ( ai == null )
164             {
165                 cleanUp();
166             }
167         }
168 
169         return result;
170     }
171 
172     public void remove()
173     {
174         throw new UnsupportedOperationException( "Method not supported on " + getClass().getName() );
175     }
176 
177     public Iterator<ArtifactInfo> iterator()
178     {
179         return this;
180     }
181 
182     public void close()
183     {
184         cleanUp();
185     }
186 
187     public int getTotalProcessedArtifactInfoCount()
188     {
189         return processedArtifactInfoCount;
190     }
191 
192     @Override
193     public void finalize()
194         throws Throwable
195     {
196         super.finalize();
197 
198         if ( !cleanedUp )
199         {
200             System.err.println( "#WARNING: Lock leaking from " + getClass().getName() + " for query "
201                 + searchRequest.getQuery().toString() );
202 
203             cleanUp();
204         }
205     }
206 
207     // ==
208 
209     protected ArtifactInfo createNextAi()
210         throws IOException
211     {
212         ArtifactInfo result = null;
213 
214         // we should stop if:
215         // a) we found what we want
216         // b) pointer advanced over more documents that user requested
217         // c) pointer advanced over more documents that hits has
218         // or we found what we need
219         while ( ( result == null ) && ( pointer < maxRecPointer ) && ( pointer < hits.scoreDocs.length ) )
220         {
221             Document doc = indexSearcher.doc( hits.scoreDocs[pointer].doc );
222 
223             IndexingContext context = getIndexingContextForPointer( doc, hits.scoreDocs[pointer].doc );
224 
225             result = IndexUtils.constructArtifactInfo( doc, context );
226 
227             if ( result != null )
228             {
229                 // WARNING: NOT FOR PRODUCTION SYSTEMS, THIS IS VERY COSTLY OPERATION
230                 // For debugging only!!!
231                 if ( searchRequest.isLuceneExplain() )
232                 {
233                     result.getAttributes().put( Explanation.class.getName(),
234                         indexSearcher.explain( searchRequest.getQuery(), hits.scoreDocs[pointer].doc ).toString() );
235                 }
236 
237                 result.setLuceneScore( hits.scoreDocs[pointer].score );
238 
239                 result.repository = context.getRepositoryId();
240 
241                 result.context = context.getId();
242 
243                 if ( filter != null )
244                 {
245                     if ( !filter.accepts( context, result ) )
246                     {
247                         result = null;
248                     }
249                 }
250 
251                 if ( result != null && postprocessor != null )
252                 {
253                     postprocessor.postprocess( context, result );
254                 }
255 
256                 if ( result != null && matchHighlightRequests.size() > 0 )
257                 {
258                     calculateHighlights( context, doc, result );
259                 }
260             }
261 
262             pointer++;
263             processedArtifactInfoCount++;
264         }
265 
266         return result;
267     }
268 
269     private volatile boolean cleanedUp = false;
270 
271     protected synchronized void cleanUp()
272     {
273         if ( cleanedUp )
274         {
275             return;
276         }
277 
278         try
279         {
280             indexSearcher.release();
281         }
282         catch ( IOException e )
283         {
284             throw new IllegalStateException( e );
285         }
286 
287         this.cleanedUp = true;
288     }
289 
290     /**
291      * Creates the MatchHighlights and adds them to ArtifactInfo if found/can.
292      * 
293      * @param context
294      * @param d
295      * @param ai
296      */
297     protected void calculateHighlights( IndexingContext context, Document d, ArtifactInfo ai )
298         throws IOException
299     {
300         IndexerField field = null;
301 
302         String text = null;
303 
304         List<String> highlightFragment = null;
305 
306         for ( MatchHighlightRequest hr : matchHighlightRequests )
307         {
308             field = selectStoredIndexerField( hr.getField() );
309 
310             if ( field != null )
311             {
312                 text = ai.getFieldValue( field.getOntology() );
313 
314                 if ( text != null )
315                 {
316                     highlightFragment = highlightField( context, hr, field, text );
317 
318                     if ( highlightFragment != null && highlightFragment.size() > 0 )
319                     {
320                         MatchHighlight matchHighlight = new MatchHighlight( hr.getField(), highlightFragment );
321 
322                         ai.getMatchHighlights().add( matchHighlight );
323                     }
324                 }
325             }
326         }
327     }
328 
329     /**
330      * Select a STORED IndexerField assigned to passed in Field.
331      * 
332      * @param field
333      * @return
334      */
335     protected IndexerField selectStoredIndexerField( Field field )
336     {
337         // hack here
338         if ( MAVEN.CLASSNAMES.equals( field ) )
339         {
340             return JarFileContentsIndexCreator.FLD_CLASSNAMES;
341         }
342         else
343         {
344             return field.getIndexerFields().isEmpty() ? null : field.getIndexerFields().iterator().next();
345         }
346     }
347 
348     /**
349      * Returns a string that contains match fragment highlighted in style as user requested.
350      * 
351      * @param context
352      * @param hr
353      * @param field
354      * @param doc
355      * @return
356      * @throws IOException
357      */
358     protected List<String> highlightField( IndexingContext context, MatchHighlightRequest hr, IndexerField field,
359                                            String text )
360         throws IOException
361     {
362         // exception with classnames
363         if ( MAVEN.CLASSNAMES.equals( field.getOntology() ) )
364         {
365             text = text.replace( '/', '.' ).replaceAll( "^\\.", "" ).replaceAll( "\n\\.", "\n" );
366         }
367 
368         CachingTokenFilter tokenStream =
369             new CachingTokenFilter( context.getAnalyzer().tokenStream( field.getKey(), new StringReader( text ) ) );
370 
371         Formatter formatter = null;
372 
373         if ( MatchHighlightMode.HTML.equals( hr.getHighlightMode() ) )
374         {
375             formatter = new SimpleHTMLFormatter();
376         }
377         else
378         {
379             tokenStream.close();
380             throw new UnsupportedOperationException( "Hightlight more \"" + hr.getHighlightMode().toString()
381                 + "\" is not supported!" );
382         }
383 
384         return getBestFragments( hr.getQuery(), formatter, tokenStream, text, 3 );
385     }
386 
387     protected final List<String> getBestFragments( Query query, Formatter formatter, TokenStream tokenStream,
388                                                    String text, int maxNumFragments )
389         throws IOException
390     {
391         Highlighter highlighter = new Highlighter( formatter, new CleaningEncoder(), new QueryScorer( query ) );
392 
393         highlighter.setTextFragmenter( new OneLineFragmenter() );
394 
395         tokenStream.reset();
396 
397         maxNumFragments = Math.max( 1, maxNumFragments ); // sanity check
398 
399         TextFragment[] frag;
400         // Get text
401         ArrayList<String> fragTexts = new ArrayList<String>( maxNumFragments );
402 
403         try
404         {
405             frag = highlighter.getBestTextFragments( tokenStream, text, false, maxNumFragments );
406 
407             for ( int i = 0; i < frag.length; i++ )
408             {
409                 if ( ( frag[i] != null ) && ( frag[i].getScore() > 0 ) )
410                 {
411                     fragTexts.add( frag[i].toString() );
412                 }
413             }
414         }
415         catch ( InvalidTokenOffsetsException e )
416         {
417             // empty?
418         }
419 
420         return fragTexts;
421     }
422 
423     protected IndexingContext getIndexingContextForPointer( Document doc, int docPtr )
424     {
425         return contexts.get( readerIndex( docPtr, this.starts, this.contexts.size() ) );
426     }
427 
428     private static int readerIndex( int n, int[] starts, int numSubReaders )
429     { // find reader for doc n:
430         int lo = 0; // search starts array
431         int hi = numSubReaders - 1; // for first element less
432 
433         while ( hi >= lo )
434         {
435             int mid = ( lo + hi ) >>> 1;
436             int midValue = starts[mid];
437             if ( n < midValue )
438             {
439                 hi = mid - 1;
440             }
441             else if ( n > midValue )
442             {
443                 lo = mid + 1;
444             }
445             else
446             { // found a match
447                 while ( mid + 1 < numSubReaders && starts[mid + 1] == midValue )
448                 {
449                     mid++; // scan to last match
450                 }
451                 return mid;
452             }
453         }
454         return hi;
455     }
456 }