View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.maven.search.backend.remoterepository.internal;
20  
21  import java.io.BufferedReader;
22  import java.io.IOException;
23  import java.io.InputStream;
24  import java.io.InputStreamReader;
25  import java.nio.charset.StandardCharsets;
26  import java.time.ZoneId;
27  import java.time.ZonedDateTime;
28  import java.time.format.DateTimeFormatter;
29  import java.util.ArrayList;
30  import java.util.List;
31  import java.util.Locale;
32  import java.util.Map;
33  import java.util.Objects;
34  import java.util.Properties;
35  
36  import org.apache.maven.search.api.Record;
37  import org.apache.maven.search.api.SearchRequest;
38  import org.apache.maven.search.api.support.SearchBackendSupport;
39  import org.apache.maven.search.api.transport.Transport;
40  import org.apache.maven.search.backend.remoterepository.Context;
41  import org.apache.maven.search.backend.remoterepository.RecordFactory;
42  import org.apache.maven.search.backend.remoterepository.RemoteRepositorySearchBackend;
43  import org.apache.maven.search.backend.remoterepository.RemoteRepositorySearchResponse;
44  import org.apache.maven.search.backend.remoterepository.ResponseExtractor;
45  import org.jsoup.Jsoup;
46  import org.jsoup.nodes.Document;
47  import org.jsoup.parser.Parser;
48  
49  import static java.util.Objects.requireNonNull;
50  
51  /**
52   * Implementation of {@link RemoteRepositorySearchBackend} that is tested against Maven Central.
53   * All the methods are "loosely encapsulated" (are protected) to enable easy override of any
54   * required aspect of this implementation, to suit it against different remote repositories
55   * (HTML parsing) if needed.
56   */
57  public class RemoteRepositorySearchBackendImpl extends SearchBackendSupport implements RemoteRepositorySearchBackend {
58      private final String baseUri;
59  
60      private final Transport transport;
61  
62      private final ResponseExtractor responseExtractor;
63  
64      private final Map<String, String> commonHeaders;
65  
66      protected enum State {
67          G,
68          GA,
69          GAV,
70          GAVCE,
71          GAVCE1
72      }
73  
74      /**
75       * Creates a customized instance of SMO backend, like an in-house instances of SMO or different IDs.
76       */
77      public RemoteRepositorySearchBackendImpl(
78              String backendId,
79              String repositoryId,
80              String baseUri,
81              Transport transport,
82              ResponseExtractor responseExtractor) {
83          super(backendId, repositoryId);
84          this.baseUri = requireNonNull(baseUri);
85          this.transport = requireNonNull(transport);
86          this.responseExtractor = requireNonNull(responseExtractor);
87  
88          this.commonHeaders = Map.of(
89                  "User-Agent",
90                  "Apache-Maven-Search-RR/" + discoverVersion() + " "
91                          + transport.getClass().getSimpleName());
92      }
93  
94      private String discoverVersion() {
95          Properties properties = new Properties();
96          InputStream inputStream = getClass()
97                  .getClassLoader()
98                  .getResourceAsStream(
99                          "org/apache/maven/search/backend/smo/internal/remoterepository-version.properties");
100         if (inputStream != null) {
101             try (InputStream is = inputStream) {
102                 properties.load(is);
103             } catch (IOException e) {
104                 // fall through
105             }
106         }
107         return properties.getProperty("version", "unknown");
108     }
109 
110     @Override
111     public String getBaseUri() {
112         return baseUri;
113     }
114 
115     @Override
116     public RemoteRepositorySearchResponse search(SearchRequest searchRequest) throws IOException {
117         Context context = new Context(searchRequest);
118         String uri = baseUri;
119         State state = null;
120         if (context.getGroupId() != null) {
121             uri += context.getGroupId().replace('.', '/') + "/";
122             state = State.G;
123             if (context.getArtifactId() != null) {
124                 uri += context.getArtifactId() + "/";
125                 state = State.GA;
126                 if (context.getVersion() == null) {
127                     uri += "maven-metadata.xml";
128                 } else {
129                     uri += context.getVersion() + "/";
130                     state = State.GAV;
131                     if (context.getFileExtension() != null) {
132                         // we go for actually specified artifact
133                         uri += context.getArtifactId() + "-" + context.getVersion();
134                         if (context.getClassifier() != null) {
135                             uri += "-" + context.getClassifier();
136                         }
137                         uri += "." + context.getFileExtension();
138                         state = State.GAVCE;
139                         if (context.getSha1() != null) {
140                             state = State.GAVCE1;
141                         }
142                     }
143                 }
144             }
145         }
146         if (state == null) {
147             throw new IllegalArgumentException("Unsupported Query: " + searchRequest.getQuery());
148         }
149 
150         int totalHits = 0;
151         List<Record> page = new ArrayList<>(searchRequest.getPaging().getPageSize());
152         RecordFactory recordFactory = new RecordFactory(this);
153         Document document = null;
154         if (state.ordinal() < State.GAVCE.ordinal()) {
155             Parser parser = state == State.GA ? Parser.xmlParser() : Parser.htmlParser();
156             try (Transport.Response response = transport.get(uri, commonHeaders)) {
157                 if (response.getCode() == 200) {
158                     document = Jsoup.parse(response.getBody(), StandardCharsets.UTF_8.name(), uri, parser);
159                 } else if (response.getCode() == 404) {
160                     document = Jsoup.parse(InputStream.nullInputStream(), StandardCharsets.UTF_8.name(), uri, parser);
161                 }
162             }
163 
164             if (document == null) {
165                 throw new IOException("Unexpected response from: " + uri);
166             }
167 
168             switch (state) {
169                 case G:
170                     totalHits = responseExtractor.populateG(context, document, recordFactory, page);
171                     break;
172                 case GA:
173                     totalHits = responseExtractor.populateGA(context, document, recordFactory, page);
174                     break;
175                 case GAV:
176                     totalHits = responseExtractor.populateGAV(context, document, recordFactory, page);
177                     break;
178                 default:
179                     throw new IllegalStateException("State" + state); // checkstyle
180             }
181         } else {
182             try (Transport.Response response = transport.head(uri, commonHeaders)) {
183                 if (response.getCode() == 200) {
184                     boolean matches = context.getSha1() == null;
185                     if (context.getSha1() != null) {
186                         try (Transport.Response sha1Response = transport.get(uri + ".sha1", commonHeaders)) {
187                             if (response.getCode() == 200) {
188                                 try (InputStream body = sha1Response.getBody()) {
189                                     String remoteSha1 = readChecksum(body);
190                                     matches = Objects.equals(context.getSha1(), remoteSha1);
191                                 }
192                             }
193                         }
194                     }
195                     if (matches) {
196                         String lastModifiedHeader = response.getHeaders().get("last-modified");
197                         Long lastModified = lastModifiedHeader == null
198                                 ? null
199                                 : ZonedDateTime.parse(lastModifiedHeader, RFC7231)
200                                         .toInstant()
201                                         .toEpochMilli();
202                         page.add(recordFactory.create(
203                                 context.getGroupId(),
204                                 context.getArtifactId(),
205                                 context.getVersion(),
206                                 context.getClassifier(),
207                                 context.getFileExtension(),
208                                 lastModified));
209                         totalHits = 1;
210                     }
211                 }
212             }
213         }
214         return new RemoteRepositorySearchResponseImpl(searchRequest, totalHits, page, uri, document);
215     }
216 
217     private static final DateTimeFormatter RFC7231 = DateTimeFormatter.ofPattern(
218                     "EEE, dd MMM yyyy HH:mm:ss z", Locale.ENGLISH)
219             .withZone(ZoneId.of("GMT"));
220 
221     private static String readChecksum(InputStream inputStream) throws IOException {
222         String checksum = "";
223         try (BufferedReader br = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8), 512)) {
224             while (true) {
225                 String line = br.readLine();
226                 if (line == null) {
227                     break;
228                 }
229                 line = line.trim();
230                 if (!line.isEmpty()) {
231                     checksum = line;
232                     break;
233                 }
234             }
235         }
236 
237         if (checksum.matches(".+= [0-9A-Fa-f]+")) {
238             int lastSpacePos = checksum.lastIndexOf(' ');
239             checksum = checksum.substring(lastSpacePos + 1);
240         } else {
241             int spacePos = checksum.indexOf(' ');
242 
243             if (spacePos != -1) {
244                 checksum = checksum.substring(0, spacePos);
245             }
246         }
247 
248         return checksum;
249     }
250 }