View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.maven.index.updater;
20  
21  import javax.inject.Inject;
22  import javax.inject.Named;
23  import javax.inject.Singleton;
24  
25  import java.io.BufferedInputStream;
26  import java.io.BufferedOutputStream;
27  import java.io.BufferedReader;
28  import java.io.File;
29  import java.io.FileInputStream;
30  import java.io.FileNotFoundException;
31  import java.io.FileOutputStream;
32  import java.io.IOException;
33  import java.io.InputStream;
34  import java.io.InputStreamReader;
35  import java.io.OutputStream;
36  import java.io.OutputStreamWriter;
37  import java.io.Writer;
38  import java.nio.charset.StandardCharsets;
39  import java.nio.file.Files;
40  import java.text.ParseException;
41  import java.text.SimpleDateFormat;
42  import java.util.ArrayList;
43  import java.util.Date;
44  import java.util.List;
45  import java.util.Properties;
46  import java.util.Set;
47  import java.util.TimeZone;
48  
49  import org.apache.lucene.document.Document;
50  import org.apache.lucene.index.DirectoryReader;
51  import org.apache.lucene.index.IndexReader;
52  import org.apache.lucene.index.IndexWriter;
53  import org.apache.lucene.index.IndexWriterConfig;
54  import org.apache.lucene.index.MultiBits;
55  import org.apache.lucene.store.Directory;
56  import org.apache.lucene.util.Bits;
57  import org.apache.maven.index.context.DocumentFilter;
58  import org.apache.maven.index.context.IndexUtils;
59  import org.apache.maven.index.context.IndexingContext;
60  import org.apache.maven.index.context.NexusAnalyzer;
61  import org.apache.maven.index.context.NexusIndexWriter;
62  import org.apache.maven.index.fs.Lock;
63  import org.apache.maven.index.fs.Locker;
64  import org.apache.maven.index.incremental.IncrementalHandler;
65  import org.apache.maven.index.updater.IndexDataReader.IndexDataReadResult;
66  import org.codehaus.plexus.util.FileUtils;
67  import org.codehaus.plexus.util.io.RawInputStreamFacade;
68  import org.slf4j.Logger;
69  import org.slf4j.LoggerFactory;
70  
71  /**
72   * A default index updater implementation
73   *
74   * @author Jason van Zyl
75   * @author Eugene Kuleshov
76   */
77  @Singleton
78  @Named
79  public class DefaultIndexUpdater implements IndexUpdater {
80  
81      private final Logger logger = LoggerFactory.getLogger(getClass());
82  
83      protected Logger getLogger() {
84          return logger;
85      }
86  
87      private final IncrementalHandler incrementalHandler;
88  
89      private final List<IndexUpdateSideEffect> sideEffects;
90  
91      @Inject
92      public DefaultIndexUpdater(
93              final IncrementalHandler incrementalHandler, final List<IndexUpdateSideEffect> sideEffects) {
94          this.incrementalHandler = incrementalHandler;
95          this.sideEffects = sideEffects;
96      }
97  
98      public IndexUpdateResult fetchAndUpdateIndex(final IndexUpdateRequest updateRequest) throws IOException {
99          IndexUpdateResult result = new IndexUpdateResult();
100 
101         IndexingContext context = updateRequest.getIndexingContext();
102 
103         ResourceFetcher fetcher = null;
104 
105         if (!updateRequest.isOffline()) {
106             fetcher = updateRequest.getResourceFetcher();
107 
108             // If no resource fetcher passed in, use the wagon fetcher by default
109             // and put back in request for future use
110             if (fetcher == null) {
111                 throw new IOException("Update of the index without provided ResourceFetcher is impossible.");
112             }
113 
114             fetcher.connect(context.getId(), context.getIndexUpdateUrl());
115         }
116 
117         File cacheDir = updateRequest.getLocalIndexCacheDir();
118         Locker locker = updateRequest.getLocker();
119         Lock lock = locker != null && cacheDir != null ? locker.lock(cacheDir) : null;
120         try {
121             if (cacheDir != null) {
122                 LocalCacheIndexAdaptor cache = new LocalCacheIndexAdaptor(cacheDir, result);
123 
124                 if (!updateRequest.isOffline()) {
125                     cacheDir.mkdirs();
126 
127                     try {
128                         if (fetchAndUpdateIndex(updateRequest, fetcher, cache).isSuccessful()) {
129                             cache.commit();
130                         }
131                     } finally {
132                         fetcher.disconnect();
133                     }
134                 }
135 
136                 fetcher = cache.getFetcher();
137             } else if (updateRequest.isOffline()) {
138                 throw new IllegalArgumentException("LocalIndexCacheDir can not be null in offline mode");
139             }
140 
141             try {
142                 if (!updateRequest.isCacheOnly()) {
143                     LuceneIndexAdaptor target = new LuceneIndexAdaptor(updateRequest);
144                     result = fetchAndUpdateIndex(updateRequest, fetcher, target);
145 
146                     if (result.isSuccessful()) {
147                         target.commit();
148                     }
149                 }
150             } finally {
151                 fetcher.disconnect();
152             }
153         } finally {
154             if (lock != null) {
155                 lock.release();
156             }
157         }
158 
159         return result;
160     }
161 
162     private Date loadIndexDirectory(
163             final IndexUpdateRequest updateRequest,
164             final ResourceFetcher fetcher,
165             final boolean merge,
166             final String remoteIndexFile)
167             throws IOException {
168         File indexDir;
169         if (updateRequest.getIndexTempDir() != null) {
170             updateRequest.getIndexTempDir().mkdirs();
171             indexDir = Files.createTempDirectory(updateRequest.getIndexTempDir().toPath(), remoteIndexFile + ".dir")
172                     .toFile();
173         } else {
174             indexDir = Files.createTempDirectory(remoteIndexFile + ".dir").toFile();
175         }
176         try (BufferedInputStream is = new BufferedInputStream(fetcher.retrieve(remoteIndexFile)); //
177                 Directory directory = updateRequest.getFSDirectoryFactory().open(indexDir)) {
178             Date timestamp;
179 
180             Set<String> rootGroups;
181             Set<String> allGroups;
182             if (remoteIndexFile.endsWith(".gz")) {
183                 IndexDataReadResult result =
184                         unpackIndexData(is, updateRequest, directory, updateRequest.getIndexingContext());
185                 timestamp = result.getTimestamp();
186                 rootGroups = result.getRootGroups();
187                 allGroups = result.getAllGroups();
188             } else {
189                 // legacy transfer format
190                 throw new IllegalArgumentException(
191                         "The legacy format is no longer supported " + "by this version of maven-indexer.");
192             }
193 
194             if (updateRequest.getDocumentFilter() != null) {
195                 filterDirectory(directory, updateRequest.getDocumentFilter());
196             }
197 
198             if (merge) {
199                 updateRequest.getIndexingContext().merge(directory);
200             } else {
201                 updateRequest.getIndexingContext().replace(directory, allGroups, rootGroups);
202             }
203             if (sideEffects != null && sideEffects.size() > 0) {
204                 getLogger().info(IndexUpdateSideEffect.class.getName() + " extensions found: " + sideEffects.size());
205                 for (IndexUpdateSideEffect sideeffect : sideEffects) {
206                     sideeffect.updateIndex(directory, updateRequest.getIndexingContext(), merge);
207                 }
208             }
209 
210             return timestamp;
211         } finally {
212             try {
213                 FileUtils.deleteDirectory(indexDir);
214             } catch (IOException ex) {
215                 // ignore
216             }
217         }
218     }
219 
220     private static void filterDirectory(final Directory directory, final DocumentFilter filter) throws IOException {
221         IndexReader r = null;
222         IndexWriter w = null;
223         try {
224             r = DirectoryReader.open(directory);
225             w = new NexusIndexWriter(directory, new IndexWriterConfig(new NexusAnalyzer()));
226 
227             Bits liveDocs = MultiBits.getLiveDocs(r);
228 
229             int numDocs = r.maxDoc();
230 
231             for (int i = 0; i < numDocs; i++) {
232                 if (liveDocs != null && !liveDocs.get(i)) {
233                     continue;
234                 }
235 
236                 Document d = r.storedFields().document(i);
237 
238                 if (!filter.accept(d)) {
239                     boolean success = w.tryDeleteDocument(r, i) != -1;
240                     // FIXME handle deletion failure
241                 }
242             }
243             w.commit();
244         } finally {
245             IndexUtils.close(r);
246             IndexUtils.close(w);
247         }
248 
249         w = null;
250         try {
251             // analyzer is unimportant, since we are not adding/searching to/on index, only reading/deleting
252             w = new NexusIndexWriter(directory, new IndexWriterConfig(new NexusAnalyzer()));
253 
254             w.commit();
255         } finally {
256             IndexUtils.close(w);
257         }
258     }
259 
260     private Properties loadIndexProperties(final File indexDirectoryFile, final String remoteIndexPropertiesName) {
261         File indexProperties = new File(indexDirectoryFile, remoteIndexPropertiesName);
262 
263         try (FileInputStream fis = new FileInputStream(indexProperties)) {
264             Properties properties = new Properties();
265 
266             properties.load(fis);
267 
268             return properties;
269         } catch (IOException e) {
270             getLogger().debug("Unable to read remote properties stored locally", e);
271         }
272         return null;
273     }
274 
275     private void storeIndexProperties(final File dir, final String indexPropertiesName, final Properties properties)
276             throws IOException {
277         File file = new File(dir, indexPropertiesName);
278 
279         if (properties != null) {
280             try (OutputStream os = new BufferedOutputStream(new FileOutputStream(file))) {
281                 properties.store(os, null);
282             }
283         } else {
284             file.delete();
285         }
286     }
287 
288     private Properties downloadIndexProperties(final ResourceFetcher fetcher) throws IOException {
289         try (InputStream fis = fetcher.retrieve(IndexingContext.INDEX_REMOTE_PROPERTIES_FILE)) {
290             Properties properties = new Properties();
291 
292             properties.load(fis);
293 
294             return properties;
295         }
296     }
297 
298     public Date getTimestamp(final Properties properties, final String key) {
299         String indexTimestamp = properties.getProperty(key);
300 
301         if (indexTimestamp != null) {
302             try {
303                 SimpleDateFormat df = new SimpleDateFormat(IndexingContext.INDEX_TIME_FORMAT);
304                 df.setTimeZone(TimeZone.getTimeZone("GMT"));
305                 return df.parse(indexTimestamp);
306             } catch (ParseException ex) {
307             }
308         }
309         return null;
310     }
311 
312     /**
313      * @param is an input stream to unpack index data from
314      * @param threads thread count to use
315      * @param d
316      * @param context
317      */
318     public static IndexDataReadResult unpackIndexData(
319             final InputStream is, final int threads, final Directory d, final IndexingContext context)
320             throws IOException {
321         return unpackIndexData(d, new IndexDataReader(is, threads), context);
322     }
323 
324     /**
325      * @param is an input stream to unpack index data from
326      * @param request IndexUpdateRequest for configuration
327      * @param d
328      * @param context
329      */
330     public static IndexDataReadResult unpackIndexData(
331             final InputStream is, final IndexUpdateRequest request, final Directory d, final IndexingContext context)
332             throws IOException {
333         return unpackIndexData(d, new IndexDataReader(is, request), context);
334     }
335 
336     private static IndexDataReadResult unpackIndexData(
337             final Directory d, IndexDataReader dr, final IndexingContext context) throws IOException {
338         IndexWriterConfig config = new IndexWriterConfig(new NexusAnalyzer());
339         config.setUseCompoundFile(false);
340         try (NexusIndexWriter w = new NexusIndexWriter(d, config)) {
341             return dr.readIndex(w, context);
342         }
343     }
344 
345     /**
346      * Filesystem-based ResourceFetcher implementation
347      */
348     public static class FileFetcher implements ResourceFetcher {
349         private final File basedir;
350 
351         public FileFetcher(File basedir) {
352             this.basedir = basedir;
353         }
354 
355         public void connect(String id, String url) throws IOException {
356             // don't need to do anything
357         }
358 
359         public void disconnect() throws IOException {
360             // don't need to do anything
361         }
362 
363         public void retrieve(String name, File targetFile) throws IOException, FileNotFoundException {
364             FileUtils.copyFile(getFile(name), targetFile);
365         }
366 
367         public InputStream retrieve(String name) throws IOException, FileNotFoundException {
368             return new FileInputStream(getFile(name));
369         }
370 
371         private File getFile(String name) {
372             return new File(basedir, name);
373         }
374     }
375 
376     private abstract class IndexAdaptor {
377         protected final File dir;
378 
379         protected Properties properties;
380 
381         protected IndexAdaptor(File dir) {
382             this.dir = dir;
383         }
384 
385         public abstract Properties getProperties();
386 
387         public abstract void storeProperties() throws IOException;
388 
389         public abstract void addIndexChunk(ResourceFetcher source, String filename) throws IOException;
390 
391         public abstract Date setIndexFile(ResourceFetcher source, String string) throws IOException;
392 
393         public Properties setProperties(ResourceFetcher source) throws IOException {
394             this.properties = downloadIndexProperties(source);
395             return properties;
396         }
397 
398         public abstract Date getTimestamp();
399 
400         public void commit() throws IOException {
401             storeProperties();
402         }
403     }
404 
405     private class LuceneIndexAdaptor extends IndexAdaptor {
406         private final IndexUpdateRequest updateRequest;
407 
408         LuceneIndexAdaptor(IndexUpdateRequest updateRequest) {
409             super(updateRequest.getIndexingContext().getIndexDirectoryFile());
410             this.updateRequest = updateRequest;
411         }
412 
413         public Properties getProperties() {
414             if (properties == null) {
415                 properties = loadIndexProperties(dir, IndexingContext.INDEX_UPDATER_PROPERTIES_FILE);
416             }
417             return properties;
418         }
419 
420         public void storeProperties() throws IOException {
421             storeIndexProperties(dir, IndexingContext.INDEX_UPDATER_PROPERTIES_FILE, properties);
422         }
423 
424         public Date getTimestamp() {
425             return updateRequest.getIndexingContext().getTimestamp();
426         }
427 
428         public void addIndexChunk(ResourceFetcher source, String filename) throws IOException {
429             loadIndexDirectory(updateRequest, source, true, filename);
430         }
431 
432         public Date setIndexFile(ResourceFetcher source, String filename) throws IOException {
433             return loadIndexDirectory(updateRequest, source, false, filename);
434         }
435 
436         public void commit() throws IOException {
437             super.commit();
438 
439             updateRequest.getIndexingContext().commit();
440         }
441     }
442 
443     private class LocalCacheIndexAdaptor extends IndexAdaptor {
444         private static final String CHUNKS_FILENAME = "chunks.lst";
445 
446         private final IndexUpdateResult result;
447 
448         private final ArrayList<String> newChunks = new ArrayList<>();
449 
450         LocalCacheIndexAdaptor(File dir, IndexUpdateResult result) {
451             super(dir);
452             this.result = result;
453         }
454 
455         public Properties getProperties() {
456             if (properties == null) {
457                 properties = loadIndexProperties(dir, IndexingContext.INDEX_REMOTE_PROPERTIES_FILE);
458             }
459             return properties;
460         }
461 
462         public void storeProperties() throws IOException {
463             storeIndexProperties(dir, IndexingContext.INDEX_REMOTE_PROPERTIES_FILE, properties);
464         }
465 
466         public Date getTimestamp() {
467             Properties properties = getProperties();
468             if (properties == null) {
469                 return null;
470             }
471 
472             Date timestamp = DefaultIndexUpdater.this.getTimestamp(properties, IndexingContext.INDEX_TIMESTAMP);
473 
474             if (timestamp == null) {
475                 timestamp = DefaultIndexUpdater.this.getTimestamp(properties, IndexingContext.INDEX_LEGACY_TIMESTAMP);
476             }
477 
478             return timestamp;
479         }
480 
481         public void addIndexChunk(ResourceFetcher source, String filename) throws IOException {
482             File chunk = new File(dir, filename);
483             FileUtils.copyStreamToFile(new RawInputStreamFacade(source.retrieve(filename)), chunk);
484             newChunks.add(filename);
485         }
486 
487         public Date setIndexFile(ResourceFetcher source, String filename) throws IOException {
488             cleanCacheDirectory(dir);
489 
490             result.setFullUpdate(true);
491 
492             File target = new File(dir, filename);
493             FileUtils.copyStreamToFile(new RawInputStreamFacade(source.retrieve(filename)), target);
494 
495             return null;
496         }
497 
498         @Override
499         public void commit() throws IOException {
500             File chunksFile = new File(dir, CHUNKS_FILENAME);
501             try (BufferedOutputStream os = new BufferedOutputStream(new FileOutputStream(chunksFile, true)); //
502                     Writer w = new OutputStreamWriter(os, StandardCharsets.UTF_8)) {
503                 for (String filename : newChunks) {
504                     w.write(filename + "\n");
505                 }
506                 w.flush();
507             }
508             super.commit();
509         }
510 
511         public List<String> getChunks() throws IOException {
512             ArrayList<String> chunks = new ArrayList<>();
513 
514             File chunksFile = new File(dir, CHUNKS_FILENAME);
515             try (BufferedReader r = new BufferedReader(
516                     new InputStreamReader(new FileInputStream(chunksFile), StandardCharsets.UTF_8))) {
517                 String str;
518                 while ((str = r.readLine()) != null) {
519                     chunks.add(str);
520                 }
521             }
522             return chunks;
523         }
524 
525         public ResourceFetcher getFetcher() {
526             return new LocalIndexCacheFetcher(dir) {
527                 @Override
528                 public List<String> getChunks() throws IOException {
529                     return LocalCacheIndexAdaptor.this.getChunks();
530                 }
531             };
532         }
533     }
534 
535     abstract static class LocalIndexCacheFetcher extends FileFetcher {
536         LocalIndexCacheFetcher(File basedir) {
537             super(basedir);
538         }
539 
540         public abstract List<String> getChunks() throws IOException;
541     }
542 
543     private IndexUpdateResult fetchAndUpdateIndex(
544             final IndexUpdateRequest updateRequest, ResourceFetcher source, IndexAdaptor target) throws IOException {
545         IndexUpdateResult result = new IndexUpdateResult();
546 
547         if (!updateRequest.isForceFullUpdate()) {
548             Properties localProperties = target.getProperties();
549             Date localTimestamp = null;
550 
551             if (localProperties != null) {
552                 localTimestamp = getTimestamp(localProperties, IndexingContext.INDEX_TIMESTAMP);
553             }
554 
555             // this will download and store properties in the target, so next run
556             // target.getProperties() will retrieve it
557             Properties remoteProperties = target.setProperties(source);
558 
559             Date updateTimestamp = getTimestamp(remoteProperties, IndexingContext.INDEX_TIMESTAMP);
560 
561             // If new timestamp is missing, dont bother checking incremental, we have an old file
562             if (updateTimestamp != null) {
563                 List<String> filenames = incrementalHandler.loadRemoteIncrementalUpdates(
564                         updateRequest, localProperties, remoteProperties);
565 
566                 // if we have some incremental files, merge them in
567                 if (filenames != null) {
568                     for (String filename : filenames) {
569                         target.addIndexChunk(source, filename);
570                     }
571 
572                     result.setTimestamp(updateTimestamp);
573                     result.setSuccessful(true);
574                     return result;
575                 }
576             } else {
577                 updateTimestamp = getTimestamp(remoteProperties, IndexingContext.INDEX_LEGACY_TIMESTAMP);
578             }
579 
580             // fallback to timestamp comparison, but try with one coming from local properties, and if not possible (is
581             // null)
582             // fallback to context timestamp
583             if (localTimestamp != null) {
584                 // if we have localTimestamp
585                 // if incremental can't be done for whatever reason, simply use old logic of
586                 // checking the timestamp, if the same, nothing to do
587                 if (updateTimestamp != null && localTimestamp != null && !updateTimestamp.after(localTimestamp)) {
588                     // Index is up to date
589                     result.setSuccessful(true);
590                     return result;
591                 }
592             }
593         } else {
594             // create index properties during forced full index download
595             target.setProperties(source);
596         }
597 
598         if (!updateRequest.isIncrementalOnly()) {
599             Date timestamp;
600             try {
601                 timestamp = target.setIndexFile(source, IndexingContext.INDEX_FILE_PREFIX + ".gz");
602                 if (source instanceof LocalIndexCacheFetcher) {
603                     // local cache has inverse organization compared to remote indexes,
604                     // i.e. initial index file and delta chunks to apply on top of it
605                     for (String filename : ((LocalIndexCacheFetcher) source).getChunks()) {
606                         target.addIndexChunk(source, filename);
607                     }
608                 }
609             } catch (IOException ex) {
610                 // try to look for legacy index transfer format
611                 try {
612                     timestamp = target.setIndexFile(source, IndexingContext.INDEX_FILE_PREFIX + ".zip");
613                 } catch (IOException ex2) {
614                     getLogger().error("Fallback to *.zip also failed: " + ex2); // do not bother with stack trace
615 
616                     throw ex; // original exception more likely to be interesting
617                 }
618             }
619 
620             result.setTimestamp(timestamp);
621             result.setSuccessful(true);
622             result.setFullUpdate(true);
623         }
624 
625         return result;
626     }
627 
628     /**
629      * Cleans specified cache directory. If present, Locker.LOCK_FILE will not be deleted.
630      */
631     protected void cleanCacheDirectory(File dir) throws IOException {
632         File[] members = dir.listFiles();
633         if (members == null) {
634             return;
635         }
636 
637         for (File member : members) {
638             if (!Locker.LOCK_FILE.equals(member.getName())) {
639                 FileUtils.forceDelete(member);
640             }
641         }
642     }
643 }