View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.maven.index.creator;
20  
21  import javax.inject.Named;
22  import javax.inject.Singleton;
23  
24  import java.io.File;
25  import java.io.IOException;
26  import java.util.Arrays;
27  import java.util.Collection;
28  
29  import org.apache.lucene.document.Document;
30  import org.apache.lucene.document.Field;
31  import org.apache.lucene.document.StoredField;
32  import org.apache.maven.index.ArtifactAvailability;
33  import org.apache.maven.index.ArtifactContext;
34  import org.apache.maven.index.ArtifactInfo;
35  import org.apache.maven.index.IndexerField;
36  import org.apache.maven.index.IndexerFieldVersion;
37  import org.apache.maven.index.MAVEN;
38  import org.apache.maven.index.NEXUS;
39  import org.apache.maven.index.artifact.Gav;
40  import org.apache.maven.index.locator.JavadocLocator;
41  import org.apache.maven.index.locator.Locator;
42  import org.apache.maven.index.locator.Sha1Locator;
43  import org.apache.maven.index.locator.SignatureLocator;
44  import org.apache.maven.index.locator.SourcesLocator;
45  import org.apache.maven.model.Model;
46  import org.codehaus.plexus.util.FileUtils;
47  import org.codehaus.plexus.util.StringUtils;
48  
49  /**
50   * A minimal index creator used to provide basic information about Maven artifact. This creator will create the index
51   * fast, will not open any file to be fastest as possible but it has some drawbacks: The information gathered by this
52   * creator are sometimes based on "best-effort" only, and does not reflect the reality (ie. maven archetype packaging @see
53   * {@link MavenArchetypeArtifactInfoIndexCreator}).
54   *
55   * @author cstamas
56   */
57  @Singleton
58  @Named(MinimalArtifactInfoIndexCreator.ID)
59  public class MinimalArtifactInfoIndexCreator extends AbstractIndexCreator implements LegacyDocumentUpdater {
60      public static final String ID = "min";
61  
62      /**
63       * Info: packaging, lastModified, size, sourcesExists, javadocExists, signatureExists. Stored, not indexed.
64       */
65      public static final IndexerField FLD_INFO = new IndexerField(
66              NEXUS.INFO, IndexerFieldVersion.V1, "i", "Artifact INFO (not indexed, stored)", StoredField.TYPE);
67  
68      public static final IndexerField FLD_GROUP_ID_KW = new IndexerField(
69              MAVEN.GROUP_ID,
70              IndexerFieldVersion.V1,
71              "g",
72              "Artifact GroupID (as keyword)",
73              IndexerField.KEYWORD_NOT_STORED);
74  
75      public static final IndexerField FLD_GROUP_ID = new IndexerField(
76              MAVEN.GROUP_ID,
77              IndexerFieldVersion.V3,
78              "groupId",
79              "Artifact GroupID (tokenized)",
80              IndexerField.ANALYZED_NOT_STORED);
81  
82      public static final IndexerField FLD_ARTIFACT_ID_KW = new IndexerField(
83              MAVEN.ARTIFACT_ID,
84              IndexerFieldVersion.V1,
85              "a",
86              "Artifact ArtifactID (as keyword)",
87              IndexerField.KEYWORD_NOT_STORED);
88  
89      public static final IndexerField FLD_ARTIFACT_ID = new IndexerField(
90              MAVEN.ARTIFACT_ID,
91              IndexerFieldVersion.V3,
92              "artifactId",
93              "Artifact ArtifactID (tokenized)",
94              IndexerField.ANALYZED_NOT_STORED);
95  
96      public static final IndexerField FLD_VERSION_KW = new IndexerField(
97              MAVEN.VERSION,
98              IndexerFieldVersion.V1,
99              "v",
100             "Artifact Version (as keyword)",
101             IndexerField.KEYWORD_NOT_STORED);
102 
103     public static final IndexerField FLD_VERSION = new IndexerField(
104             MAVEN.VERSION,
105             IndexerFieldVersion.V3,
106             "version",
107             "Artifact Version (tokenized)",
108             IndexerField.ANALYZED_NOT_STORED);
109 
110     public static final IndexerField FLD_PACKAGING = new IndexerField(
111             MAVEN.PACKAGING,
112             IndexerFieldVersion.V1,
113             "p",
114             "Artifact Packaging (as keyword)",
115             IndexerField.KEYWORD_NOT_STORED);
116 
117     public static final IndexerField FLD_EXTENSION = new IndexerField(
118             MAVEN.EXTENSION,
119             IndexerFieldVersion.V1,
120             "e",
121             "Artifact extension (as keyword)",
122             IndexerField.KEYWORD_NOT_STORED);
123 
124     public static final IndexerField FLD_CLASSIFIER = new IndexerField(
125             MAVEN.CLASSIFIER,
126             IndexerFieldVersion.V1,
127             "l",
128             "Artifact classifier (as keyword)",
129             IndexerField.KEYWORD_NOT_STORED);
130 
131     public static final IndexerField FLD_NAME = new IndexerField(
132             MAVEN.NAME, IndexerFieldVersion.V1, "n", "Artifact name (tokenized, stored)", IndexerField.ANALYZED_STORED);
133 
134     public static final IndexerField FLD_DESCRIPTION = new IndexerField(
135             MAVEN.DESCRIPTION,
136             IndexerFieldVersion.V1,
137             "d",
138             "Artifact description (tokenized, stored)",
139             IndexerField.ANALYZED_STORED);
140 
141     public static final IndexerField FLD_LAST_MODIFIED = new IndexerField(
142             MAVEN.LAST_MODIFIED,
143             IndexerFieldVersion.V1,
144             "m",
145             "Artifact last modified (not indexed, stored)",
146             StoredField.TYPE);
147 
148     public static final IndexerField FLD_SHA1 = new IndexerField(
149             MAVEN.SHA1,
150             IndexerFieldVersion.V1,
151             "1",
152             "Artifact SHA1 checksum (as keyword, stored)",
153             IndexerField.KEYWORD_STORED);
154 
155     private Locator jl = new JavadocLocator();
156 
157     private Locator sl = new SourcesLocator();
158 
159     private Locator sigl = new SignatureLocator();
160 
161     private Locator sha1l = new Sha1Locator();
162 
163     public MinimalArtifactInfoIndexCreator() {
164         super(ID);
165     }
166 
167     public void populateArtifactInfo(ArtifactContext ac) {
168         File artifact = ac.getArtifact();
169 
170         File pom = ac.getPom();
171 
172         ArtifactInfo ai = ac.getArtifactInfo();
173 
174         if (pom != null && pom.isFile()) {
175             ai.setLastModified(pom.lastModified());
176 
177             ai.setFileExtension("pom");
178         }
179 
180         // TODO handle artifacts without poms
181         if (pom != null && pom.isFile()) {
182             if (ai.getClassifier() != null) {
183                 ai.setSourcesExists(ArtifactAvailability.NOT_AVAILABLE);
184 
185                 ai.setJavadocExists(ArtifactAvailability.NOT_AVAILABLE);
186             } else {
187                 File sources = sl.locate(pom);
188                 if (!sources.exists()) {
189                     ai.setSourcesExists(ArtifactAvailability.NOT_PRESENT);
190                 } else {
191                     ai.setSourcesExists(ArtifactAvailability.PRESENT);
192                 }
193 
194                 File javadoc = jl.locate(pom);
195                 if (!javadoc.exists()) {
196                     ai.setJavadocExists(ArtifactAvailability.NOT_PRESENT);
197                 } else {
198                     ai.setJavadocExists(ArtifactAvailability.PRESENT);
199                 }
200             }
201         }
202 
203         Model model = ac.getPomModel();
204 
205         if (model != null) {
206             ai.setName(model.getName());
207 
208             ai.setDescription(model.getDescription());
209 
210             // for main artifacts (without classifier) only:
211             if (ai.getClassifier() == null) {
212                 // only when this is not a classified artifact
213                 if (model.getPackaging() != null) {
214                     // set the read value that is coming from POM
215                     ai.setPackaging(model.getPackaging());
216                 } else {
217                     // default it, since POM is present, is read, but does not contain explicit packaging
218                     // TODO: this change breaks junit tests, but not sure why is "null" expected value?
219                     ai.setPackaging("jar");
220                 }
221             }
222         }
223 
224         if ("pom".equals(ai.getPackaging())) {
225             // special case, the POM _is_ the artifact
226             artifact = pom;
227         }
228 
229         if (artifact != null) {
230             File signature = sigl.locate(artifact);
231 
232             ai.setSignatureExists(signature.exists() ? ArtifactAvailability.PRESENT : ArtifactAvailability.NOT_PRESENT);
233 
234             File sha1 = sha1l.locate(artifact);
235 
236             if (sha1.exists()) {
237                 try {
238                     ai.setSha1(
239                             StringUtils.chomp(FileUtils.fileRead(sha1)).trim().split(" ")[0]);
240                 } catch (IOException e) {
241                     ac.addError(e);
242                 }
243             }
244 
245             ai.setLastModified(artifact.lastModified());
246 
247             ai.setSize(artifact.length());
248 
249             ai.setFileExtension(getExtension(artifact, ac.getGav()));
250         }
251     }
252 
253     private String getExtension(File artifact, Gav gav) {
254         if (gav != null && StringUtils.isNotBlank(gav.getExtension())) {
255             return gav.getExtension();
256         }
257 
258         // last resort, the extension of the file
259         String artifactFileName = artifact.getName().toLowerCase();
260 
261         // tar.gz? and other "special" combinations
262         if (artifactFileName.endsWith("tar.gz")) {
263             return "tar.gz";
264         } else if (artifactFileName.equals("tar.bz2")) {
265             return "tar.bz2";
266         }
267 
268         // get the part after the last dot
269         return FileUtils.getExtension(artifactFileName);
270     }
271 
272     public void updateDocument(ArtifactInfo ai, Document doc) {
273         String info = ArtifactInfo.nvl(ai.getPackaging())
274                 + ArtifactInfo.FS
275                 + ai.getLastModified()
276                 + ArtifactInfo.FS
277                 + ai.getSize()
278                 + ArtifactInfo.FS
279                 + ai.getSourcesExists().toString()
280                 + ArtifactInfo.FS
281                 + ai.getJavadocExists().toString()
282                 + ArtifactInfo.FS
283                 + ai.getSignatureExists().toString()
284                 + ArtifactInfo.FS
285                 + ai.getFileExtension();
286 
287         doc.add(FLD_INFO.toField(info));
288 
289         doc.add(FLD_GROUP_ID_KW.toField(ai.getGroupId()));
290         doc.add(FLD_ARTIFACT_ID_KW.toField(ai.getArtifactId()));
291         doc.add(FLD_VERSION_KW.toField(ai.getVersion()));
292 
293         // V3
294         doc.add(FLD_GROUP_ID.toField(ai.getGroupId()));
295         doc.add(FLD_ARTIFACT_ID.toField(ai.getArtifactId()));
296         doc.add(FLD_VERSION.toField(ai.getVersion()));
297         doc.add(FLD_EXTENSION.toField(ai.getFileExtension()));
298 
299         if (ai.getName() != null) {
300             doc.add(FLD_NAME.toField(ai.getName()));
301         }
302 
303         if (ai.getDescription() != null) {
304             doc.add(FLD_DESCRIPTION.toField(ai.getDescription()));
305         }
306 
307         if (ai.getPackaging() != null) {
308             doc.add(FLD_PACKAGING.toField(ai.getPackaging()));
309         }
310 
311         if (ai.getClassifier() != null) {
312             doc.add(FLD_CLASSIFIER.toField(ai.getClassifier()));
313         }
314 
315         if (ai.getSha1() != null) {
316             doc.add(FLD_SHA1.toField(ai.getSha1()));
317         }
318     }
319 
320     public void updateLegacyDocument(ArtifactInfo ai, Document doc) {
321         updateDocument(ai, doc);
322 
323         // legacy!
324         if (ai.getPrefix() != null) {
325             doc.add(new Field(ArtifactInfo.PLUGIN_PREFIX, ai.getPrefix(), IndexerField.KEYWORD_STORED));
326         }
327 
328         if (ai.getGoals() != null) {
329             doc.add(new StoredField(ArtifactInfo.PLUGIN_GOALS, ArtifactInfo.lst2str(ai.getGoals())));
330         }
331 
332         doc.removeField(ArtifactInfo.GROUP_ID);
333         doc.add(new Field(ArtifactInfo.GROUP_ID, ai.getGroupId(), IndexerField.KEYWORD_NOT_STORED));
334     }
335 
336     public boolean updateArtifactInfo(Document doc, ArtifactInfo ai) {
337         boolean res = false;
338 
339         String uinfo = doc.get(ArtifactInfo.UINFO);
340 
341         if (uinfo != null) {
342             String[] r = ArtifactInfo.FS_PATTERN.split(uinfo);
343 
344             ai.setGroupId(r[0]);
345 
346             ai.setArtifactId(r[1]);
347 
348             ai.setVersion(r[2]);
349 
350             ai.setClassifier(ArtifactInfo.renvl(r[3]));
351 
352             if (r.length > 4) {
353                 ai.setFileExtension(r[4]);
354             }
355 
356             res = true;
357         }
358 
359         String info = doc.get(ArtifactInfo.INFO);
360 
361         if (info != null) {
362             String[] r = ArtifactInfo.FS_PATTERN.split(info);
363 
364             ai.setPackaging(ArtifactInfo.renvl(r[0]));
365 
366             ai.setLastModified(Long.parseLong(r[1]));
367 
368             ai.setSize(Long.parseLong(r[2]));
369 
370             ai.setSourcesExists(ArtifactAvailability.fromString(r[3]));
371 
372             ai.setJavadocExists(ArtifactAvailability.fromString(r[4]));
373 
374             ai.setSignatureExists(ArtifactAvailability.fromString(r[5]));
375 
376             if (r.length > 6) {
377                 ai.setFileExtension(r[6]);
378             } else {
379                 if (ai.getClassifier() != null //
380                         || "pom".equals(ai.getPackaging()) //
381                         || "war".equals(ai.getPackaging()) //
382                         || "ear".equals(ai.getPackaging())) {
383                     ai.setFileExtension(ai.getPackaging());
384                 } else {
385                     ai.setFileExtension("jar"); // best guess
386                 }
387             }
388 
389             res = true;
390         }
391 
392         String name = doc.get(ArtifactInfo.NAME);
393 
394         if (name != null) {
395             ai.setName(name);
396 
397             res = true;
398         }
399 
400         String description = doc.get(ArtifactInfo.DESCRIPTION);
401 
402         if (description != null) {
403             ai.setDescription(description);
404 
405             res = true;
406         }
407 
408         // sometimes there's a pom without packaging(default to jar), but no artifact, then the value will be a "null"
409         // String
410         if ("null".equals(ai.getPackaging())) {
411             ai.setPackaging(null);
412         }
413 
414         String sha1 = doc.get(ArtifactInfo.SHA1);
415 
416         if (sha1 != null) {
417             ai.setSha1(sha1);
418         }
419 
420         return res;
421 
422         // artifactInfo.fname = ???
423     }
424 
425     // ==
426 
427     @Override
428     public String toString() {
429         return ID;
430     }
431 
432     public Collection<IndexerField> getIndexerFields() {
433         return Arrays.asList(
434                 FLD_INFO,
435                 FLD_GROUP_ID_KW,
436                 FLD_GROUP_ID,
437                 FLD_ARTIFACT_ID_KW,
438                 FLD_ARTIFACT_ID,
439                 FLD_VERSION_KW,
440                 FLD_VERSION,
441                 FLD_PACKAGING,
442                 FLD_CLASSIFIER,
443                 FLD_NAME,
444                 FLD_DESCRIPTION,
445                 FLD_LAST_MODIFIED,
446                 FLD_SHA1);
447     }
448 }