View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.maven.index;
20  
21  import java.io.IOException;
22  import java.io.StringReader;
23  import java.util.ArrayList;
24  import java.util.Iterator;
25  import java.util.List;
26  
27  import org.apache.lucene.analysis.Analyzer;
28  import org.apache.lucene.analysis.CachingTokenFilter;
29  import org.apache.lucene.analysis.TokenStream;
30  import org.apache.lucene.document.Document;
31  import org.apache.lucene.search.Explanation;
32  import org.apache.lucene.search.IndexSearcher;
33  import org.apache.lucene.search.Query;
34  import org.apache.lucene.search.TopDocs;
35  import org.apache.lucene.search.highlight.Formatter;
36  import org.apache.lucene.search.highlight.Highlighter;
37  import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
38  import org.apache.lucene.search.highlight.QueryScorer;
39  import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
40  import org.apache.lucene.search.highlight.TextFragment;
41  import org.apache.maven.index.context.IndexUtils;
42  import org.apache.maven.index.context.IndexingContext;
43  import org.apache.maven.index.context.NexusIndexMultiSearcher;
44  import org.apache.maven.index.creator.JarFileContentsIndexCreator;
45  
46  /**
47   * Default implementation of IteratorResultSet. TODO: there is too much of logic, refactor this!
48   *
49   * @author cstamas
50   */
51  public class DefaultIteratorResultSet implements IteratorResultSet {
52      private final IteratorSearchRequest searchRequest;
53  
54      private final NexusIndexMultiSearcher indexSearcher;
55  
56      private final List<IndexingContext> contexts;
57  
58      private final int[] starts;
59  
60      private final ArtifactInfoFilter filter;
61  
62      private final ArtifactInfoPostprocessor postprocessor;
63  
64      private final List<MatchHighlightRequest> matchHighlightRequests;
65  
66      private final TopDocs hits;
67  
68      private final int from;
69  
70      private final int count;
71  
72      private final int maxRecPointer;
73  
74      private int pointer;
75  
76      private int processedArtifactInfoCount;
77  
78      private ArtifactInfo ai;
79  
80      protected DefaultIteratorResultSet(
81              final IteratorSearchRequest request,
82              final NexusIndexMultiSearcher indexSearcher,
83              final List<IndexingContext> contexts,
84              final TopDocs hits)
85              throws IOException {
86          this.searchRequest = request;
87  
88          this.indexSearcher = indexSearcher;
89  
90          this.contexts = contexts;
91  
92          {
93              int maxDoc = 0;
94              this.starts = new int[contexts.size() + 1]; // build starts array
95              // this is good to do as we have NexusIndexMultiSearcher passed in contructor, so it is already open, hence
96              // #acquire() already invoked on underlying NexusIndexMultiReader
97              final List<IndexSearcher> acquiredSearchers =
98                      indexSearcher.getNexusIndexMultiReader().getAcquiredSearchers();
99              for (int i = 0; i < contexts.size(); i++) {
100                 starts[i] = maxDoc;
101                 maxDoc += acquiredSearchers.get(i).getIndexReader().maxDoc(); // compute maxDocs
102             }
103             starts[contexts.size()] = maxDoc;
104         }
105 
106         this.filter = request.getArtifactInfoFilter();
107 
108         this.postprocessor = request.getArtifactInfoPostprocessor();
109 
110         this.matchHighlightRequests = request.getMatchHighlightRequests();
111 
112         List<MatchHighlightRequest> matchHighlightRequests = new ArrayList<>();
113         for (MatchHighlightRequest hr : request.getMatchHighlightRequests()) {
114             Query rewrittenQuery = hr.getQuery().rewrite(indexSearcher.getIndexReader());
115             matchHighlightRequests.add(new MatchHighlightRequest(hr.getField(), rewrittenQuery, hr.getHighlightMode()));
116         }
117 
118         this.hits = hits;
119 
120         this.from = request.getStart();
121 
122         this.count = (request.getCount() == AbstractSearchRequest.UNDEFINED
123                 ? hits.scoreDocs.length
124                 : Math.min(request.getCount(), hits.scoreDocs.length));
125 
126         this.pointer = from;
127 
128         this.processedArtifactInfoCount = 0;
129 
130         this.maxRecPointer = from + count;
131 
132         ai = createNextAi();
133 
134         if (ai == null) {
135             cleanUp();
136         }
137     }
138 
139     public boolean hasNext() {
140         return ai != null;
141     }
142 
143     public ArtifactInfo next() {
144         ArtifactInfo result = ai;
145 
146         try {
147             ai = createNextAi();
148         } catch (IOException e) {
149             ai = null;
150 
151             throw new IllegalStateException("Cannot fetch next ArtifactInfo!", e);
152         } finally {
153             if (ai == null) {
154                 cleanUp();
155             }
156         }
157 
158         return result;
159     }
160 
161     public void remove() {
162         throw new UnsupportedOperationException(
163                 "Method not supported on " + getClass().getName());
164     }
165 
166     public Iterator<ArtifactInfo> iterator() {
167         return this;
168     }
169 
170     public void close() {
171         cleanUp();
172     }
173 
174     public int getTotalProcessedArtifactInfoCount() {
175         return processedArtifactInfoCount;
176     }
177 
178     @Override
179     public void finalize() throws Throwable {
180         super.finalize();
181 
182         if (!cleanedUp) {
183             System.err.println("#WARNING: Lock leaking from " + getClass().getName() + " for query "
184                     + searchRequest.getQuery().toString());
185 
186             cleanUp();
187         }
188     }
189 
190     // ==
191 
192     protected ArtifactInfo createNextAi() throws IOException {
193         ArtifactInfo result = null;
194 
195         // we should stop if:
196         // a) we found what we want
197         // b) pointer advanced over more documents that user requested
198         // c) pointer advanced over more documents that hits has
199         // or we found what we need
200         while ((result == null) && (pointer < maxRecPointer) && (pointer < hits.scoreDocs.length)) {
201             Document doc = indexSearcher.doc(hits.scoreDocs[pointer].doc);
202 
203             IndexingContext context = getIndexingContextForPointer(doc, hits.scoreDocs[pointer].doc);
204 
205             result = IndexUtils.constructArtifactInfo(doc, context);
206 
207             if (result != null) {
208                 // WARNING: NOT FOR PRODUCTION SYSTEMS, THIS IS VERY COSTLY OPERATION
209                 // For debugging only!!!
210                 if (searchRequest.isLuceneExplain()) {
211                     result.getAttributes()
212                             .put(
213                                     Explanation.class.getName(),
214                                     indexSearcher
215                                             .explain(searchRequest.getQuery(), hits.scoreDocs[pointer].doc)
216                                             .toString());
217                 }
218 
219                 result.setLuceneScore(hits.scoreDocs[pointer].score);
220 
221                 result.setRepository(context.getRepositoryId());
222 
223                 result.setContext(context.getId());
224 
225                 if (filter != null) {
226                     if (!filter.accepts(context, result)) {
227                         result = null;
228                     }
229                 }
230 
231                 if (result != null && postprocessor != null) {
232                     postprocessor.postprocess(context, result);
233                 }
234 
235                 if (result != null && matchHighlightRequests.size() > 0) {
236                     calculateHighlights(context, doc, result);
237                 }
238             }
239 
240             pointer++;
241             processedArtifactInfoCount++;
242         }
243 
244         return result;
245     }
246 
247     private volatile boolean cleanedUp = false;
248 
249     protected synchronized void cleanUp() {
250         if (cleanedUp) {
251             return;
252         }
253 
254         try {
255             indexSearcher.release();
256         } catch (IOException e) {
257             throw new IllegalStateException(e);
258         }
259 
260         this.cleanedUp = true;
261     }
262 
263     /**
264      * Creates the MatchHighlights and adds them to ArtifactInfo if found/can.
265      *
266      * @param context
267      * @param d
268      * @param ai
269      */
270     protected void calculateHighlights(IndexingContext context, Document d, ArtifactInfo ai) throws IOException {
271         IndexerField field;
272 
273         String text;
274 
275         List<String> highlightFragment;
276 
277         for (MatchHighlightRequest hr : matchHighlightRequests) {
278             field = selectStoredIndexerField(hr.getField());
279 
280             if (field != null) {
281                 text = ai.getFieldValue(field.getOntology());
282 
283                 if (text != null) {
284                     highlightFragment = highlightField(context, hr, field, text);
285 
286                     if (highlightFragment != null && highlightFragment.size() > 0) {
287                         MatchHighlight matchHighlight = new MatchHighlight(hr.getField(), highlightFragment);
288 
289                         ai.getMatchHighlights().add(matchHighlight);
290                     }
291                 }
292             }
293         }
294     }
295 
296     /**
297      * Select a STORED IndexerField assigned to passed in Field.
298      *
299      * @param field
300      * @return
301      */
302     protected IndexerField selectStoredIndexerField(Field field) {
303         // hack here
304         if (MAVEN.CLASSNAMES.equals(field)) {
305             return JarFileContentsIndexCreator.FLD_CLASSNAMES;
306         } else {
307             return field.getIndexerFields().isEmpty()
308                     ? null
309                     : field.getIndexerFields().iterator().next();
310         }
311     }
312 
313     /**
314      * Returns a string that contains match fragment highlighted in style as user requested.
315      *
316      * @param context
317      * @param hr
318      * @param field
319      * @param text
320      * @return
321      * @throws IOException
322      */
323     protected List<String> highlightField(
324             IndexingContext context, MatchHighlightRequest hr, IndexerField field, String text) throws IOException {
325         // exception with classnames
326         if (MAVEN.CLASSNAMES.equals(field.getOntology())) {
327             text = text.replace('/', '.').replaceAll("^\\.", "").replaceAll("\n\\.", "\n");
328         }
329 
330         Analyzer analyzer = context.getAnalyzer();
331         TokenStream baseTokenStream = analyzer.tokenStream(field.getKey(), new StringReader(text));
332 
333         CachingTokenFilter tokenStream = new CachingTokenFilter(baseTokenStream);
334 
335         Formatter formatter;
336 
337         if (MatchHighlightMode.HTML.equals(hr.getHighlightMode())) {
338             formatter = new SimpleHTMLFormatter();
339         } else {
340             tokenStream.reset();
341             tokenStream.end();
342             tokenStream.close();
343             throw new UnsupportedOperationException(
344                     "Hightlight more \"" + hr.getHighlightMode().toString() + "\" is not supported!");
345         }
346 
347         List<String> bestFragments = getBestFragments(hr.getQuery(), formatter, tokenStream, text, 3);
348 
349         return bestFragments;
350     }
351 
352     protected final List<String> getBestFragments(
353             Query query, Formatter formatter, TokenStream tokenStream, String text, int maxNumFragments)
354             throws IOException {
355         Highlighter highlighter = new Highlighter(formatter, new CleaningEncoder(), new QueryScorer(query));
356 
357         highlighter.setTextFragmenter(new OneLineFragmenter());
358 
359         maxNumFragments = Math.max(1, maxNumFragments); // sanity check
360 
361         TextFragment[] frag;
362         // Get text
363         ArrayList<String> fragTexts = new ArrayList<>(maxNumFragments);
364 
365         try {
366             frag = highlighter.getBestTextFragments(tokenStream, text, false, maxNumFragments);
367 
368             for (TextFragment textFragment : frag) {
369                 if ((textFragment != null) && (textFragment.getScore() > 0)) {
370                     fragTexts.add(textFragment.toString());
371                 }
372             }
373         } catch (InvalidTokenOffsetsException e) {
374             // empty?
375         }
376 
377         return fragTexts;
378     }
379 
380     protected IndexingContext getIndexingContextForPointer(Document doc, int docPtr) {
381         return contexts.get(readerIndex(docPtr, this.starts, this.contexts.size()));
382     }
383 
384     private static int readerIndex(int n, int[] starts, int numSubReaders) { // find reader for doc n:
385         int lo = 0; // search starts array
386         int hi = numSubReaders - 1; // for first element less
387 
388         while (hi >= lo) {
389             int mid = (lo + hi) >>> 1;
390             int midValue = starts[mid];
391             if (n < midValue) {
392                 hi = mid - 1;
393             } else if (n > midValue) {
394                 lo = mid + 1;
395             } else { // found a match
396                 while (mid + 1 < numSubReaders && starts[mid + 1] == midValue) {
397                     mid++; // scan to last match
398                 }
399                 return mid;
400             }
401         }
402         return hi;
403     }
404 }