View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.maven.index;
20  
21  import javax.inject.Named;
22  import javax.inject.Singleton;
23  
24  import java.io.IOException;
25  import java.util.ArrayList;
26  import java.util.Arrays;
27  import java.util.Collection;
28  import java.util.Comparator;
29  import java.util.List;
30  import java.util.Map;
31  import java.util.Set;
32  import java.util.TreeMap;
33  import java.util.TreeSet;
34  
35  import org.apache.lucene.document.Document;
36  import org.apache.lucene.search.IndexSearcher;
37  import org.apache.lucene.search.Query;
38  import org.apache.lucene.search.ScoreDoc;
39  import org.apache.lucene.search.TopScoreDocCollector;
40  import org.apache.maven.index.context.IndexUtils;
41  import org.apache.maven.index.context.IndexingContext;
42  import org.apache.maven.index.context.NexusIndexMultiReader;
43  import org.apache.maven.index.context.NexusIndexMultiSearcher;
44  import org.slf4j.Logger;
45  import org.slf4j.LoggerFactory;
46  
47  /**
48   * A default search engine implementation
49   *
50   * @author Eugene Kuleshov
51   * @author Tamas Cservenak
52   */
53  @Singleton
54  @Named
55  public class DefaultSearchEngine implements SearchEngine {
56  
57      private final Logger logger = LoggerFactory.getLogger(getClass());
58  
59      protected Logger getLogger() {
60          return logger;
61      }
62  
63      @Deprecated
64      public Set<ArtifactInfo> searchFlat(
65              Comparator<ArtifactInfo> artifactInfoComparator, IndexingContext indexingContext, Query query)
66              throws IOException {
67          return searchFlatPaged(
68                          new FlatSearchRequest(query, artifactInfoComparator, indexingContext),
69                          Arrays.asList(indexingContext),
70                          true)
71                  .getResults();
72      }
73  
74      @Deprecated
75      public Set<ArtifactInfo> searchFlat(
76              Comparator<ArtifactInfo> artifactInfoComparator, Collection<IndexingContext> indexingContexts, Query query)
77              throws IOException {
78          return searchFlatPaged(new FlatSearchRequest(query, artifactInfoComparator), indexingContexts)
79                  .getResults();
80      }
81  
82      public FlatSearchResponse searchFlatPaged(FlatSearchRequest request, Collection<IndexingContext> indexingContexts)
83              throws IOException {
84          return searchFlatPaged(request, indexingContexts, false);
85      }
86  
87      public FlatSearchResponse forceSearchFlatPaged(
88              FlatSearchRequest request, Collection<IndexingContext> indexingContexts) throws IOException {
89          return searchFlatPaged(request, indexingContexts, true);
90      }
91  
92      protected FlatSearchResponse searchFlatPaged(
93              FlatSearchRequest request, Collection<IndexingContext> indexingContexts, boolean ignoreContext)
94              throws IOException {
95          List<IndexingContext> contexts = getParticipatingContexts(indexingContexts, ignoreContext);
96  
97          final TreeSet<ArtifactInfo> result = new TreeSet<>(request.getArtifactInfoComparator());
98          return new FlatSearchResponse(
99                  request.getQuery(), searchFlat(request, result, contexts, request.getQuery()), result);
100     }
101 
102     // ==
103 
104     public GroupedSearchResponse searchGrouped(
105             GroupedSearchRequest request, Collection<IndexingContext> indexingContexts) throws IOException {
106         return searchGrouped(request, indexingContexts, false);
107     }
108 
109     public GroupedSearchResponse forceSearchGrouped(
110             GroupedSearchRequest request, Collection<IndexingContext> indexingContexts) throws IOException {
111         return searchGrouped(request, indexingContexts, true);
112     }
113 
114     protected GroupedSearchResponse searchGrouped(
115             GroupedSearchRequest request, Collection<IndexingContext> indexingContexts, boolean ignoreContext)
116             throws IOException {
117         List<IndexingContext> contexts = getParticipatingContexts(indexingContexts, ignoreContext);
118 
119         final TreeMap<String, ArtifactInfoGroup> result = new TreeMap<>(request.getGroupKeyComparator());
120 
121         return new GroupedSearchResponse(
122                 request.getQuery(),
123                 searchGrouped(request, result, request.getGrouping(), contexts, request.getQuery()),
124                 result);
125     }
126 
127     // ===
128 
129     protected int searchFlat(
130             FlatSearchRequest req,
131             Collection<ArtifactInfo> result,
132             List<IndexingContext> participatingContexts,
133             Query query)
134             throws IOException {
135         int hitCount = 0;
136         for (IndexingContext context : participatingContexts) {
137             final IndexSearcher indexSearcher = context.acquireIndexSearcher();
138             try {
139                 final TopScoreDocCollector collector = doSearchWithCeiling(req, indexSearcher, query);
140 
141                 if (collector.getTotalHits() == 0) {
142                     // context has no hits, just continue to next one
143                     continue;
144                 }
145 
146                 ScoreDoc[] scoreDocs = collector.topDocs().scoreDocs;
147 
148                 // uhm btw hitCount contains dups
149 
150                 hitCount += collector.getTotalHits();
151 
152                 int start = 0; // from == FlatSearchRequest.UNDEFINED ? 0 : from;
153 
154                 // we have to pack the results as long: a) we have found aiCount ones b) we depleted hits
155                 for (int i = start; i < scoreDocs.length; i++) {
156                     Document doc = indexSearcher.doc(scoreDocs[i].doc);
157 
158                     ArtifactInfo artifactInfo = IndexUtils.constructArtifactInfo(doc, context);
159 
160                     if (artifactInfo != null) {
161                         artifactInfo.setRepository(context.getRepositoryId());
162                         artifactInfo.setContext(context.getId());
163 
164                         if (req.getArtifactInfoFilter() != null) {
165                             if (!req.getArtifactInfoFilter().accepts(context, artifactInfo)) {
166                                 continue;
167                             }
168                         }
169                         if (req.getArtifactInfoPostprocessor() != null) {
170                             req.getArtifactInfoPostprocessor().postprocess(context, artifactInfo);
171                         }
172 
173                         result.add(artifactInfo);
174                     }
175                 }
176             } finally {
177                 context.releaseIndexSearcher(indexSearcher);
178             }
179         }
180 
181         return hitCount;
182     }
183 
184     protected int searchGrouped(
185             GroupedSearchRequest req,
186             Map<String, ArtifactInfoGroup> result,
187             Grouping grouping,
188             List<IndexingContext> participatingContexts,
189             Query query)
190             throws IOException {
191         int hitCount = 0;
192 
193         for (IndexingContext context : participatingContexts) {
194             final IndexSearcher indexSearcher = context.acquireIndexSearcher();
195             try {
196                 final TopScoreDocCollector collector = doSearchWithCeiling(req, indexSearcher, query);
197 
198                 if (collector.getTotalHits() > 0) {
199                     ScoreDoc[] scoreDocs = collector.topDocs().scoreDocs;
200 
201                     hitCount += collector.getTotalHits();
202 
203                     for (ScoreDoc scoreDoc : scoreDocs) {
204                         Document doc = indexSearcher.doc(scoreDoc.doc);
205 
206                         ArtifactInfo artifactInfo = IndexUtils.constructArtifactInfo(doc, context);
207 
208                         if (artifactInfo != null) {
209                             artifactInfo.setRepository(context.getRepositoryId());
210                             artifactInfo.setContext(context.getId());
211 
212                             if (req.getArtifactInfoFilter() != null) {
213                                 if (!req.getArtifactInfoFilter().accepts(context, artifactInfo)) {
214                                     continue;
215                                 }
216                             }
217                             if (req.getArtifactInfoPostprocessor() != null) {
218                                 req.getArtifactInfoPostprocessor().postprocess(context, artifactInfo);
219                             }
220 
221                             if (!grouping.addArtifactInfo(result, artifactInfo)) {
222                                 // fix the hitCount accordingly
223                                 hitCount--;
224                             }
225                         }
226                     }
227                 }
228             } finally {
229                 context.releaseIndexSearcher(indexSearcher);
230             }
231         }
232 
233         return hitCount;
234     }
235 
236     // == NG Search
237 
238     public IteratorSearchResponse searchIteratorPaged(
239             IteratorSearchRequest request, Collection<IndexingContext> indexingContexts) throws IOException {
240         return searchIteratorPaged(request, indexingContexts, false);
241     }
242 
243     public IteratorSearchResponse forceSearchIteratorPaged(
244             IteratorSearchRequest request, Collection<IndexingContext> indexingContexts) throws IOException {
245         return searchIteratorPaged(request, indexingContexts, true);
246     }
247 
248     private IteratorSearchResponse searchIteratorPaged(
249             IteratorSearchRequest request, Collection<IndexingContext> indexingContexts, boolean ignoreContext)
250             throws IOException {
251         List<IndexingContext> contexts = getParticipatingContexts(indexingContexts, ignoreContext);
252 
253         NexusIndexMultiReader multiReader = getMergedIndexReader(indexingContexts, ignoreContext);
254 
255         NexusIndexMultiSearcher indexSearcher = new NexusIndexMultiSearcher(multiReader);
256 
257         try {
258             TopScoreDocCollector hits = doSearchWithCeiling(request, indexSearcher, request.getQuery());
259 
260             return new IteratorSearchResponse(
261                     request.getQuery(),
262                     hits.getTotalHits(),
263                     new DefaultIteratorResultSet(request, indexSearcher, contexts, hits.topDocs()));
264         } catch (IOException | RuntimeException e) {
265             try {
266                 indexSearcher.release();
267             } catch (Exception secondary) {
268                 // do not mask original exception
269             }
270             throw e;
271         }
272     }
273 
274     // ==
275 
276     protected TopScoreDocCollector doSearchWithCeiling(
277             final AbstractSearchRequest request, final IndexSearcher indexSearcher, final Query query)
278             throws IOException {
279         int topHitCount = getTopDocsCollectorHitNum(request, AbstractSearchRequest.UNDEFINED);
280 
281         if (AbstractSearchRequest.UNDEFINED != topHitCount) {
282             // count is set, simply just execute it as-is
283             final TopScoreDocCollector hits = TopScoreDocCollector.create(topHitCount, Integer.MAX_VALUE);
284 
285             indexSearcher.search(query, hits);
286 
287             return hits;
288         } else {
289             // set something reasonable as 1k
290             topHitCount = 1000;
291 
292             // perform search
293             TopScoreDocCollector hits = TopScoreDocCollector.create(topHitCount, Integer.MAX_VALUE);
294             indexSearcher.search(query, hits);
295 
296             // check total hits against, does it fit?
297             if (topHitCount < hits.getTotalHits()) {
298                 topHitCount = hits.getTotalHits();
299 
300                 if (getLogger().isDebugEnabled()) {
301                     // warn the user and leave trace just before OOM might happen
302                     // the hits.getTotalHits() might be HUUGE
303                     getLogger()
304                             .debug("Executing unbounded search, and fitting topHitCounts to " + topHitCount
305                                     + ", an OOMEx might follow. To avoid OOM use narrower queries or limit your expectancy with "
306                                     + "request.setCount() method where appropriate. See MINDEXER-14 for details.");
307                 }
308 
309                 // redo all, but this time with correct numbers
310                 hits = TopScoreDocCollector.create(topHitCount, Integer.MAX_VALUE);
311                 indexSearcher.search(query, hits);
312             }
313 
314             return hits;
315         }
316     }
317 
318     /**
319      * Returns the list of participating contexts. Does not locks them, just builds a list of them.
320      */
321     protected List<IndexingContext> getParticipatingContexts(
322             final Collection<IndexingContext> indexingContexts, final boolean ignoreContext) {
323         // to not change the API all away, but we need stable ordering here
324         // filter for those 1st, that take part in here
325         final ArrayList<IndexingContext> contexts = new ArrayList<>(indexingContexts.size());
326 
327         for (IndexingContext ctx : indexingContexts) {
328             if (ignoreContext || ctx.isSearchable()) {
329                 contexts.add(ctx);
330             }
331         }
332 
333         return contexts;
334     }
335 
336     /**
337      * Locks down participating contexts, and returns a "merged" reader of them. In case of error, unlocks as part of
338      * cleanup and re-throws exception. Without error, it is the duty of caller to unlock contexts!
339      *
340      * @param indexingContexts
341      * @param ignoreContext
342      * @return
343      * @throws IOException
344      */
345     protected NexusIndexMultiReader getMergedIndexReader(
346             final Collection<IndexingContext> indexingContexts, final boolean ignoreContext) throws IOException {
347         final List<IndexingContext> contexts = getParticipatingContexts(indexingContexts, ignoreContext);
348         return new NexusIndexMultiReader(contexts);
349     }
350 
351     protected int getTopDocsCollectorHitNum(final AbstractSearchRequest request, final int ceiling) {
352         if (request instanceof AbstractSearchPageableRequest) {
353             final AbstractSearchPageableRequest prequest = (AbstractSearchPageableRequest) request;
354 
355             if (AbstractSearchRequest.UNDEFINED != prequest.getCount()) {
356                 // easy, user knows and tells us how many results he want
357                 return prequest.getCount() + prequest.getStart();
358             }
359         } else {
360             if (AbstractSearchRequest.UNDEFINED != request.getCount()) {
361                 // easy, user knows and tells us how many results he want
362                 return request.getCount();
363             }
364         }
365 
366         return ceiling;
367     }
368 }