View Javadoc

1   package org.apache.maven.index;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0    
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import java.io.IOException;
23  import java.util.ArrayList;
24  import java.util.Arrays;
25  import java.util.Collection;
26  import java.util.Comparator;
27  import java.util.List;
28  import java.util.Map;
29  import java.util.Set;
30  import java.util.TreeMap;
31  import java.util.TreeSet;
32  
33  import org.apache.lucene.document.Document;
34  import org.apache.lucene.search.IndexSearcher;
35  import org.apache.lucene.search.Query;
36  import org.apache.lucene.search.ScoreDoc;
37  import org.apache.lucene.search.TopScoreDocCollector;
38  import org.apache.maven.index.context.IndexUtils;
39  import org.apache.maven.index.context.IndexingContext;
40  import org.apache.maven.index.context.NexusIndexMultiReader;
41  import org.apache.maven.index.context.NexusIndexMultiSearcher;
42  import org.codehaus.plexus.component.annotations.Component;
43  import org.codehaus.plexus.logging.AbstractLogEnabled;
44  
45  /**
46   * A default search engine implementation
47   * 
48   * @author Eugene Kuleshov
49   * @author Tamas Cservenak
50   */
51  @Component( role = SearchEngine.class )
52  public class DefaultSearchEngine
53      extends AbstractLogEnabled
54      implements SearchEngine
55  {
56      @Deprecated
57      public Set<ArtifactInfo> searchFlat( Comparator<ArtifactInfo> artifactInfoComparator,
58                                           IndexingContext indexingContext, Query query )
59          throws IOException
60      {
61          return searchFlatPaged( new FlatSearchRequest( query, artifactInfoComparator, indexingContext ),
62              Arrays.asList( indexingContext ), true ).getResults();
63      }
64  
65      @Deprecated
66      public Set<ArtifactInfo> searchFlat( Comparator<ArtifactInfo> artifactInfoComparator,
67                                           Collection<IndexingContext> indexingContexts, Query query )
68          throws IOException
69      {
70          return searchFlatPaged( new FlatSearchRequest( query, artifactInfoComparator ), indexingContexts ).getResults();
71      }
72  
73      public FlatSearchResponse searchFlatPaged( FlatSearchRequest request, Collection<IndexingContext> indexingContexts )
74          throws IOException
75      {
76          return searchFlatPaged( request, indexingContexts, false );
77      }
78  
79      public FlatSearchResponse forceSearchFlatPaged( FlatSearchRequest request,
80                                                      Collection<IndexingContext> indexingContexts )
81          throws IOException
82      {
83          return searchFlatPaged( request, indexingContexts, true );
84      }
85  
86      protected FlatSearchResponse searchFlatPaged( FlatSearchRequest request,
87                                                    Collection<IndexingContext> indexingContexts, boolean ignoreContext )
88          throws IOException
89      {
90          List<IndexingContext> contexts = getParticipatingContexts( indexingContexts, ignoreContext );
91  
92          final TreeSet<ArtifactInfo> result = new TreeSet<ArtifactInfo>( request.getArtifactInfoComparator() );
93          return new FlatSearchResponse( request.getQuery(), searchFlat( request, result, contexts, request.getQuery() ),
94              result );
95      }
96  
97      // ==
98  
99      public GroupedSearchResponse searchGrouped( GroupedSearchRequest request,
100                                                 Collection<IndexingContext> indexingContexts )
101         throws IOException
102     {
103         return searchGrouped( request, indexingContexts, false );
104     }
105 
106     public GroupedSearchResponse forceSearchGrouped( GroupedSearchRequest request,
107                                                      Collection<IndexingContext> indexingContexts )
108         throws IOException
109     {
110         return searchGrouped( request, indexingContexts, true );
111     }
112 
113     protected GroupedSearchResponse searchGrouped( GroupedSearchRequest request,
114                                                    Collection<IndexingContext> indexingContexts, boolean ignoreContext )
115         throws IOException
116     {
117         List<IndexingContext> contexts = getParticipatingContexts( indexingContexts, ignoreContext );
118 
119         final TreeMap<String, ArtifactInfoGroup> result =
120             new TreeMap<String, ArtifactInfoGroup>( request.getGroupKeyComparator() );
121 
122         return new GroupedSearchResponse( request.getQuery(), searchGrouped( request, result, request.getGrouping(),
123             contexts, request.getQuery() ), result );
124     }
125 
126     // ===
127 
128     protected int searchFlat( FlatSearchRequest req, Collection<ArtifactInfo> result,
129                               List<IndexingContext> participatingContexts, Query query )
130         throws IOException
131     {
132         int hitCount = 0;
133         for ( IndexingContext context : participatingContexts )
134         {
135             final IndexSearcher indexSearcher = context.acquireIndexSearcher();
136             try
137             {
138                 final TopScoreDocCollector collector = doSearchWithCeiling( req, indexSearcher, query );
139 
140                 if ( collector.getTotalHits() == 0 )
141                 {
142                     // context has no hits, just continue to next one
143                     continue;
144                 }
145 
146                 ScoreDoc[] scoreDocs = collector.topDocs().scoreDocs;
147 
148                 // uhm btw hitCount contains dups
149 
150                 hitCount += collector.getTotalHits();
151 
152                 int start = 0; // from == FlatSearchRequest.UNDEFINED ? 0 : from;
153 
154                 // we have to pack the results as long: a) we have found aiCount ones b) we depleted hits
155                 for ( int i = start; i < scoreDocs.length; i++ )
156                 {
157                     Document doc = indexSearcher.doc( scoreDocs[i].doc );
158 
159                     ArtifactInfo artifactInfo = IndexUtils.constructArtifactInfo( doc, context );
160 
161                     if ( artifactInfo != null )
162                     {
163                         artifactInfo.repository = context.getRepositoryId();
164                         artifactInfo.context = context.getId();
165 
166                         if ( req.getArtifactInfoFilter() != null )
167                         {
168                             if ( !req.getArtifactInfoFilter().accepts( context, artifactInfo ) )
169                             {
170                                 continue;
171                             }
172                         }
173                         if ( req.getArtifactInfoPostprocessor() != null )
174                         {
175                             req.getArtifactInfoPostprocessor().postprocess( context, artifactInfo );
176                         }
177 
178                         result.add( artifactInfo );
179                     }
180                 }
181             }
182             finally
183             {
184                 context.releaseIndexSearcher( indexSearcher );
185             }
186         }
187 
188         return hitCount;
189     }
190 
191     protected int searchGrouped( GroupedSearchRequest req, Map<String, ArtifactInfoGroup> result, Grouping grouping,
192                                  List<IndexingContext> participatingContexts, Query query )
193         throws IOException
194     {
195         int hitCount = 0;
196 
197         for ( IndexingContext context : participatingContexts )
198         {
199             final IndexSearcher indexSearcher = context.acquireIndexSearcher();
200             try
201             {
202                 final TopScoreDocCollector collector = doSearchWithCeiling( req, indexSearcher, query );
203 
204                 if ( collector.getTotalHits() > 0 )
205                 {
206                     ScoreDoc[] scoreDocs = collector.topDocs().scoreDocs;
207 
208                     hitCount += collector.getTotalHits();
209 
210                     for ( int i = 0; i < scoreDocs.length; i++ )
211                     {
212                         Document doc = indexSearcher.doc( scoreDocs[i].doc );
213 
214                         ArtifactInfo artifactInfo = IndexUtils.constructArtifactInfo( doc, context );
215 
216                         if ( artifactInfo != null )
217                         {
218                             artifactInfo.repository = context.getRepositoryId();
219                             artifactInfo.context = context.getId();
220 
221                             if ( req.getArtifactInfoFilter() != null )
222                             {
223                                 if ( !req.getArtifactInfoFilter().accepts( context, artifactInfo ) )
224                                 {
225                                     continue;
226                                 }
227                             }
228                             if ( req.getArtifactInfoPostprocessor() != null )
229                             {
230                                 req.getArtifactInfoPostprocessor().postprocess( context, artifactInfo );
231                             }
232 
233                             if ( !grouping.addArtifactInfo( result, artifactInfo ) )
234                             {
235                                 // fix the hitCount accordingly
236                                 hitCount--;
237                             }
238                         }
239                     }
240                 }
241             }
242             finally
243             {
244                 context.releaseIndexSearcher( indexSearcher );
245             }
246         }
247 
248         return hitCount;
249     }
250 
251     // == NG Search
252 
253     public IteratorSearchResponse searchIteratorPaged( IteratorSearchRequest request,
254                                                        Collection<IndexingContext> indexingContexts )
255         throws IOException
256     {
257         return searchIteratorPaged( request, indexingContexts, false );
258     }
259 
260     public IteratorSearchResponse forceSearchIteratorPaged( IteratorSearchRequest request,
261                                                             Collection<IndexingContext> indexingContexts )
262         throws IOException
263     {
264         return searchIteratorPaged( request, indexingContexts, true );
265     }
266 
267     private IteratorSearchResponse searchIteratorPaged( IteratorSearchRequest request,
268                                                         Collection<IndexingContext> indexingContexts,
269                                                         boolean ignoreContext )
270         throws IOException
271     {
272         List<IndexingContext> contexts = getParticipatingContexts( indexingContexts, ignoreContext );
273 
274         NexusIndexMultiReader multiReader = getMergedIndexReader( indexingContexts, ignoreContext );
275 
276         NexusIndexMultiSearcher indexSearcher = new NexusIndexMultiSearcher( multiReader );
277 
278         try
279         {
280             TopScoreDocCollector hits = doSearchWithCeiling( request, indexSearcher, request.getQuery() );
281 
282             return new IteratorSearchResponse( request.getQuery(), hits.getTotalHits(),
283                                                new DefaultIteratorResultSet( request, indexSearcher, contexts,
284                                                                              hits.topDocs() ) );
285         }
286         catch ( IOException e )
287         {
288             try
289             {
290                 indexSearcher.release();
291             }
292             catch ( Exception secondary )
293             {
294                 // do not mask original exception
295             }
296             throw e;
297         }
298         catch ( RuntimeException e )
299         {
300             try
301             {
302                 indexSearcher.release();
303             }
304             catch ( Exception secondary )
305             {
306                 // do not mask original exception
307             }
308             throw e;
309         }
310     }
311 
312     // ==
313 
314     protected TopScoreDocCollector doSearchWithCeiling( final AbstractSearchRequest request,
315                                                         final IndexSearcher indexSearcher, final Query query )
316         throws IOException
317     {
318         int topHitCount = getTopDocsCollectorHitNum( request, AbstractSearchRequest.UNDEFINED );
319 
320         if ( AbstractSearchRequest.UNDEFINED != topHitCount )
321         {
322             // count is set, simply just execute it as-is
323             final TopScoreDocCollector hits = TopScoreDocCollector.create( topHitCount, true );
324 
325             indexSearcher.search( query, hits );
326 
327             return hits;
328         }
329         else
330         {
331             // set something reasonable as 1k
332             topHitCount = 1000;
333 
334             // perform search
335             TopScoreDocCollector hits = TopScoreDocCollector.create( topHitCount, true );
336             indexSearcher.search( query, hits );
337 
338             // check total hits against, does it fit?
339             if ( topHitCount < hits.getTotalHits() )
340             {
341                 topHitCount = hits.getTotalHits();
342 
343                 if ( getLogger().isDebugEnabled() )
344                 {
345                     // warn the user and leave trace just before OOM might happen
346                     // the hits.getTotalHits() might be HUUGE
347                     getLogger().debug(
348                         "Executing unbounded search, and fitting topHitCounts to "
349                             + topHitCount
350                             + ", an OOMEx might follow. To avoid OOM use narrower queries or limit your expectancy with request.setCount() method where appropriate. See MINDEXER-14 for details." );
351                 }
352 
353                 // redo all, but this time with correct numbers
354                 hits = TopScoreDocCollector.create( topHitCount, true );
355                 indexSearcher.search( query, hits );
356             }
357 
358             return hits;
359         }
360     }
361 
362     /**
363      * Returns the list of participating contexts. Does not locks them, just builds a list of them.
364      */
365     protected List<IndexingContext> getParticipatingContexts( final Collection<IndexingContext> indexingContexts,
366                                                               final boolean ignoreContext )
367     {
368         // to not change the API all away, but we need stable ordering here
369         // filter for those 1st, that take part in here
370         final ArrayList<IndexingContext> contexts = new ArrayList<IndexingContext>( indexingContexts.size() );
371 
372         for ( IndexingContext ctx : indexingContexts )
373         {
374             if ( ignoreContext || ctx.isSearchable() )
375             {
376                 contexts.add( ctx );
377             }
378         }
379 
380         return contexts;
381     }
382 
383     /**
384      * Locks down participating contexts, and returns a "merged" reader of them. In case of error, unlocks as part of
385      * cleanup and re-throws exception. Without error, it is the duty of caller to unlock contexts!
386      * 
387      * @param indexingContexts
388      * @param ignoreContext
389      * @return
390      * @throws IOException
391      */
392     protected NexusIndexMultiReader getMergedIndexReader( final Collection<IndexingContext> indexingContexts,
393                                                           final boolean ignoreContext )
394         throws IOException
395     {
396         final List<IndexingContext> contexts = getParticipatingContexts( indexingContexts, ignoreContext );
397         return new NexusIndexMultiReader( contexts );
398     }
399 
400     protected int getTopDocsCollectorHitNum( final AbstractSearchRequest request, final int ceiling )
401     {
402         if ( request instanceof AbstractSearchPageableRequest )
403         {
404             final AbstractSearchPageableRequest prequest = (AbstractSearchPageableRequest) request;
405 
406             if ( AbstractSearchRequest.UNDEFINED != prequest.getCount() )
407             {
408                 // easy, user knows and tells us how many results he want
409                 return prequest.getCount() + prequest.getStart();
410             }
411         }
412         else
413         {
414             if ( AbstractSearchRequest.UNDEFINED != request.getCount() )
415             {
416                 // easy, user knows and tells us how many results he want
417                 return request.getCount();
418             }
419         }
420 
421         return ceiling;
422     }
423 }