View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.maven.index.creator;
20  
21  import javax.inject.Named;
22  import javax.inject.Singleton;
23  
24  import java.io.File;
25  import java.io.IOException;
26  import java.util.Arrays;
27  import java.util.Collection;
28  
29  import org.apache.lucene.document.Document;
30  import org.apache.lucene.document.Field;
31  import org.apache.lucene.document.StoredField;
32  import org.apache.maven.index.ArtifactAvailability;
33  import org.apache.maven.index.ArtifactContext;
34  import org.apache.maven.index.ArtifactInfo;
35  import org.apache.maven.index.IndexerField;
36  import org.apache.maven.index.IndexerFieldVersion;
37  import org.apache.maven.index.MAVEN;
38  import org.apache.maven.index.NEXUS;
39  import org.apache.maven.index.artifact.Gav;
40  import org.apache.maven.index.locator.JavadocLocator;
41  import org.apache.maven.index.locator.Locator;
42  import org.apache.maven.index.locator.Sha1Locator;
43  import org.apache.maven.index.locator.SignatureLocator;
44  import org.apache.maven.index.locator.SourcesLocator;
45  import org.apache.maven.model.Model;
46  import org.codehaus.plexus.util.FileUtils;
47  import org.codehaus.plexus.util.StringUtils;
48  
49  /**
50   * A minimal index creator used to provide basic information about Maven artifact. This creator will create the index
51   * fast, will not open any file to be fastest as possible but it has some drawbacks: The information gathered by this
52   * creator are sometimes based on "best-effort" only, and does not reflect the reality (ie. maven archetype packaging @see
53   * {@link MavenArchetypeArtifactInfoIndexCreator}).
54   *
55   * @author cstamas
56   */
57  @Singleton
58  @Named(MinimalArtifactInfoIndexCreator.ID)
59  public class MinimalArtifactInfoIndexCreator extends AbstractIndexCreator implements LegacyDocumentUpdater {
60  
61      private static final char FS = ArtifactInfo.FS.charAt(0);
62  
63      static {
64          if (ArtifactInfo.FS.length() != 1) {
65              throw new IllegalStateException("field format changed");
66          }
67      }
68  
69      public static final String ID = "min";
70  
71      /**
72       * Info: packaging, lastModified, size, sourcesExists, javadocExists, signatureExists. Stored, not indexed.
73       */
74      public static final IndexerField FLD_INFO = new IndexerField(
75              NEXUS.INFO, IndexerFieldVersion.V1, "i", "Artifact INFO (not indexed, stored)", StoredField.TYPE);
76  
77      public static final IndexerField FLD_GROUP_ID_KW = new IndexerField(
78              MAVEN.GROUP_ID,
79              IndexerFieldVersion.V1,
80              "g",
81              "Artifact GroupID (as keyword)",
82              IndexerField.KEYWORD_NOT_STORED);
83  
84      public static final IndexerField FLD_GROUP_ID = new IndexerField(
85              MAVEN.GROUP_ID,
86              IndexerFieldVersion.V3,
87              "groupId",
88              "Artifact GroupID (tokenized)",
89              IndexerField.ANALYZED_NOT_STORED);
90  
91      public static final IndexerField FLD_ARTIFACT_ID_KW = new IndexerField(
92              MAVEN.ARTIFACT_ID,
93              IndexerFieldVersion.V1,
94              "a",
95              "Artifact ArtifactID (as keyword)",
96              IndexerField.KEYWORD_NOT_STORED);
97  
98      public static final IndexerField FLD_ARTIFACT_ID = new IndexerField(
99              MAVEN.ARTIFACT_ID,
100             IndexerFieldVersion.V3,
101             "artifactId",
102             "Artifact ArtifactID (tokenized)",
103             IndexerField.ANALYZED_NOT_STORED);
104 
105     public static final IndexerField FLD_VERSION_KW = new IndexerField(
106             MAVEN.VERSION,
107             IndexerFieldVersion.V1,
108             "v",
109             "Artifact Version (as keyword)",
110             IndexerField.KEYWORD_NOT_STORED);
111 
112     public static final IndexerField FLD_VERSION = new IndexerField(
113             MAVEN.VERSION,
114             IndexerFieldVersion.V3,
115             "version",
116             "Artifact Version (tokenized)",
117             IndexerField.ANALYZED_NOT_STORED);
118 
119     public static final IndexerField FLD_PACKAGING = new IndexerField(
120             MAVEN.PACKAGING,
121             IndexerFieldVersion.V1,
122             "p",
123             "Artifact Packaging (as keyword)",
124             IndexerField.KEYWORD_NOT_STORED);
125 
126     public static final IndexerField FLD_EXTENSION = new IndexerField(
127             MAVEN.EXTENSION,
128             IndexerFieldVersion.V1,
129             "e",
130             "Artifact extension (as keyword)",
131             IndexerField.KEYWORD_NOT_STORED);
132 
133     public static final IndexerField FLD_CLASSIFIER = new IndexerField(
134             MAVEN.CLASSIFIER,
135             IndexerFieldVersion.V1,
136             "l",
137             "Artifact classifier (as keyword)",
138             IndexerField.KEYWORD_NOT_STORED);
139 
140     public static final IndexerField FLD_NAME = new IndexerField(
141             MAVEN.NAME, IndexerFieldVersion.V1, "n", "Artifact name (tokenized, stored)", IndexerField.ANALYZED_STORED);
142 
143     public static final IndexerField FLD_DESCRIPTION = new IndexerField(
144             MAVEN.DESCRIPTION,
145             IndexerFieldVersion.V1,
146             "d",
147             "Artifact description (tokenized, stored)",
148             IndexerField.ANALYZED_STORED);
149 
150     public static final IndexerField FLD_LAST_MODIFIED = new IndexerField(
151             MAVEN.LAST_MODIFIED,
152             IndexerFieldVersion.V1,
153             "m",
154             "Artifact last modified (not indexed, stored)",
155             StoredField.TYPE);
156 
157     public static final IndexerField FLD_SHA1 = new IndexerField(
158             MAVEN.SHA1,
159             IndexerFieldVersion.V1,
160             "1",
161             "Artifact SHA1 checksum (as keyword, stored)",
162             IndexerField.KEYWORD_STORED);
163 
164     private Locator jl = new JavadocLocator();
165 
166     private Locator sl = new SourcesLocator();
167 
168     private Locator sigl = new SignatureLocator();
169 
170     private Locator sha1l = new Sha1Locator();
171 
172     public MinimalArtifactInfoIndexCreator() {
173         super(ID);
174     }
175 
176     @Override
177     public void populateArtifactInfo(ArtifactContext ac) {
178         File artifact = ac.getArtifact();
179 
180         File pom = ac.getPom();
181 
182         ArtifactInfo ai = ac.getArtifactInfo();
183 
184         if (pom != null && pom.isFile()) {
185             ai.setLastModified(pom.lastModified());
186 
187             ai.setFileExtension("pom");
188         }
189 
190         // TODO handle artifacts without poms
191         if (pom != null && pom.isFile()) {
192             if (ai.getClassifier() != null) {
193                 ai.setSourcesExists(ArtifactAvailability.NOT_AVAILABLE);
194 
195                 ai.setJavadocExists(ArtifactAvailability.NOT_AVAILABLE);
196             } else {
197                 File sources = sl.locate(pom);
198                 if (!sources.exists()) {
199                     ai.setSourcesExists(ArtifactAvailability.NOT_PRESENT);
200                 } else {
201                     ai.setSourcesExists(ArtifactAvailability.PRESENT);
202                 }
203 
204                 File javadoc = jl.locate(pom);
205                 if (!javadoc.exists()) {
206                     ai.setJavadocExists(ArtifactAvailability.NOT_PRESENT);
207                 } else {
208                     ai.setJavadocExists(ArtifactAvailability.PRESENT);
209                 }
210             }
211         }
212 
213         Model model = ac.getPomModel();
214 
215         if (model != null) {
216             ai.setName(model.getName());
217 
218             ai.setDescription(model.getDescription());
219 
220             // for main artifacts (without classifier) only:
221             if (ai.getClassifier() == null) {
222                 // only when this is not a classified artifact
223                 if (model.getPackaging() != null) {
224                     // set the read value that is coming from POM
225                     ai.setPackaging(model.getPackaging());
226                 } else {
227                     // default it, since POM is present, is read, but does not contain explicit packaging
228                     // TODO: this change breaks junit tests, but not sure why is "null" expected value?
229                     ai.setPackaging("jar");
230                 }
231             }
232         }
233 
234         if ("pom".equals(ai.getPackaging())) {
235             // special case, the POM _is_ the artifact
236             artifact = pom;
237         }
238 
239         if (artifact != null) {
240             File signature = sigl.locate(artifact);
241 
242             ai.setSignatureExists(signature.exists() ? ArtifactAvailability.PRESENT : ArtifactAvailability.NOT_PRESENT);
243 
244             File sha1 = sha1l.locate(artifact);
245 
246             if (sha1.exists()) {
247                 try {
248                     ai.setSha1(
249                             StringUtils.chomp(FileUtils.fileRead(sha1)).trim().split(" ")[0]);
250                 } catch (IOException e) {
251                     ac.addError(e);
252                 }
253             }
254 
255             ai.setLastModified(artifact.lastModified());
256 
257             ai.setSize(artifact.length());
258 
259             ai.setFileExtension(getExtension(artifact, ac.getGav()));
260         }
261     }
262 
263     private String getExtension(File artifact, Gav gav) {
264         if (gav != null && StringUtils.isNotBlank(gav.getExtension())) {
265             return gav.getExtension();
266         }
267 
268         // last resort, the extension of the file
269         String artifactFileName = artifact.getName().toLowerCase();
270 
271         // tar.gz? and other "special" combinations
272         if (artifactFileName.endsWith("tar.gz")) {
273             return "tar.gz";
274         } else if (artifactFileName.equals("tar.bz2")) {
275             return "tar.bz2";
276         }
277 
278         // get the part after the last dot
279         return FileUtils.getExtension(artifactFileName);
280     }
281 
282     @Override
283     public void updateDocument(ArtifactInfo ai, Document doc) {
284         String info = ArtifactInfo.nvl(ai.getPackaging())
285                 + ArtifactInfo.FS
286                 + ai.getLastModified()
287                 + ArtifactInfo.FS
288                 + ai.getSize()
289                 + ArtifactInfo.FS
290                 + ai.getSourcesExists().toString()
291                 + ArtifactInfo.FS
292                 + ai.getJavadocExists().toString()
293                 + ArtifactInfo.FS
294                 + ai.getSignatureExists().toString()
295                 + ArtifactInfo.FS
296                 + ai.getFileExtension();
297 
298         doc.add(FLD_INFO.toField(info));
299 
300         doc.add(FLD_GROUP_ID_KW.toField(ai.getGroupId()));
301         doc.add(FLD_ARTIFACT_ID_KW.toField(ai.getArtifactId()));
302         doc.add(FLD_VERSION_KW.toField(ai.getVersion()));
303 
304         // V3
305         doc.add(FLD_GROUP_ID.toField(ai.getGroupId()));
306         doc.add(FLD_ARTIFACT_ID.toField(ai.getArtifactId()));
307         doc.add(FLD_VERSION.toField(ai.getVersion()));
308         doc.add(FLD_EXTENSION.toField(ai.getFileExtension()));
309 
310         if (ai.getName() != null) {
311             doc.add(FLD_NAME.toField(ai.getName()));
312         }
313 
314         if (ai.getDescription() != null) {
315             doc.add(FLD_DESCRIPTION.toField(ai.getDescription()));
316         }
317 
318         if (ai.getPackaging() != null) {
319             doc.add(FLD_PACKAGING.toField(ai.getPackaging()));
320         }
321 
322         if (ai.getClassifier() != null) {
323             doc.add(FLD_CLASSIFIER.toField(ai.getClassifier()));
324         }
325 
326         if (ai.getSha1() != null) {
327             doc.add(FLD_SHA1.toField(ai.getSha1()));
328         }
329     }
330 
331     @Override
332     public void updateLegacyDocument(ArtifactInfo ai, Document doc) {
333         updateDocument(ai, doc);
334 
335         // legacy!
336         if (ai.getPrefix() != null) {
337             doc.add(new Field(ArtifactInfo.PLUGIN_PREFIX, ai.getPrefix(), IndexerField.KEYWORD_STORED));
338         }
339 
340         if (ai.getGoals() != null) {
341             doc.add(new StoredField(ArtifactInfo.PLUGIN_GOALS, ArtifactInfo.lst2str(ai.getGoals())));
342         }
343 
344         doc.removeField(ArtifactInfo.GROUP_ID);
345         doc.add(new Field(ArtifactInfo.GROUP_ID, ai.getGroupId(), IndexerField.KEYWORD_NOT_STORED));
346     }
347 
348     @Override
349     public boolean updateArtifactInfo(Document doc, ArtifactInfo ai) {
350         boolean res = false;
351 
352         String uinfo = doc.get(ArtifactInfo.UINFO);
353 
354         if (uinfo != null) {
355 
356             int start = 0;
357             int end = uinfo.indexOf(FS);
358             ai.setGroupId(uinfo.substring(start, end));
359 
360             start = end + 1;
361             end = uinfo.indexOf(FS, start);
362             ai.setArtifactId(uinfo.substring(start, end));
363 
364             start = end + 1;
365             end = uinfo.indexOf(FS, start);
366             ai.setVersion(uinfo.substring(start, end));
367 
368             start = end + 1;
369             end = uinfo.indexOf(FS, start);
370             if (end == -1) {
371                 end = uinfo.length();
372             }
373             ai.setClassifier(ArtifactInfo.renvl(uinfo.substring(start, end)));
374 
375             if (end != uinfo.length()) {
376                 start = end + 1;
377                 end = uinfo.length();
378                 ai.setFileExtension(uinfo.substring(start, end));
379             }
380 
381             res = true;
382         }
383 
384         String info = doc.get(ArtifactInfo.INFO);
385 
386         if (info != null) {
387 
388             int start = 0;
389             int end = info.indexOf(FS);
390             ai.setPackaging(ArtifactInfo.renvl(info.substring(start, end)));
391 
392             start = end + 1;
393             end = info.indexOf(FS, start);
394             ai.setLastModified(Long.parseLong(info.substring(start, end)));
395 
396             start = end + 1;
397             end = info.indexOf(FS, start);
398             ai.setSize(Long.parseLong(info.substring(start, end)));
399 
400             start = end + 1;
401             end = info.indexOf(FS, start);
402             ai.setSourcesExists(ArtifactAvailability.fromString(info.substring(start, end)));
403 
404             start = end + 1;
405             end = info.indexOf(FS, start);
406             ai.setJavadocExists(ArtifactAvailability.fromString(info.substring(start, end)));
407 
408             start = end + 1;
409             end = info.indexOf(FS, start);
410             if (end == -1) {
411                 end = info.length();
412             }
413             ai.setSignatureExists(ArtifactAvailability.fromString(info.substring(start, end)));
414 
415             if (end != info.length()) {
416                 start = end + 1;
417                 end = info.length();
418                 ai.setFileExtension(info.substring(start, end));
419             } else {
420                 if (ai.getClassifier() != null //
421                         || "pom".equals(ai.getPackaging()) //
422                         || "war".equals(ai.getPackaging()) //
423                         || "ear".equals(ai.getPackaging())) {
424                     ai.setFileExtension(ai.getPackaging());
425                 } else {
426                     ai.setFileExtension("jar"); // best guess
427                 }
428             }
429 
430             res = true;
431         }
432 
433         String name = doc.get(ArtifactInfo.NAME);
434 
435         if (name != null) {
436             ai.setName(name);
437 
438             res = true;
439         }
440 
441         String description = doc.get(ArtifactInfo.DESCRIPTION);
442 
443         if (description != null) {
444             ai.setDescription(description);
445 
446             res = true;
447         }
448 
449         // sometimes there's a pom without packaging(default to jar), but no artifact, then the value will be a "null"
450         // String
451         if ("null".equals(ai.getPackaging())) {
452             ai.setPackaging(null);
453         }
454 
455         String sha1 = doc.get(ArtifactInfo.SHA1);
456 
457         if (sha1 != null) {
458             ai.setSha1(sha1);
459         }
460 
461         return res;
462 
463         // artifactInfo.fname = ???
464     }
465 
466     // ==
467 
468     @Override
469     public String toString() {
470         return ID;
471     }
472 
473     @Override
474     public Collection<IndexerField> getIndexerFields() {
475         return Arrays.asList(
476                 FLD_INFO,
477                 FLD_GROUP_ID_KW,
478                 FLD_GROUP_ID,
479                 FLD_ARTIFACT_ID_KW,
480                 FLD_ARTIFACT_ID,
481                 FLD_VERSION_KW,
482                 FLD_VERSION,
483                 FLD_PACKAGING,
484                 FLD_CLASSIFIER,
485                 FLD_NAME,
486                 FLD_DESCRIPTION,
487                 FLD_LAST_MODIFIED,
488                 FLD_SHA1);
489     }
490 }