1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19 package org.apache.maven.search.backend.remoterepository.extractor;
20
21 import java.util.List;
22
23 import org.apache.maven.search.Record;
24 import org.apache.maven.search.backend.remoterepository.Context;
25 import org.apache.maven.search.backend.remoterepository.RecordFactory;
26 import org.jsoup.nodes.Document;
27 import org.jsoup.nodes.Element;
28
29 /**
30 * Extractor for Maven Central.
31 */
32 public class MavenCentralResponseExtractor extends ResponseExtractorSupport {
33 /**
34 * Extracts the "name" from {@code href} attribute. In case of Maven Central, the href
35 * attribute contains name in realative form as {@code "name/"} (followed by slash), if name denotes
36 * a directory. The trailing slash is removed by this method, if any.
37 */
38 private String nameInHref(Element element) {
39 String name = element.attr("href");
40 if (name.endsWith("/")) {
41 name = name.substring(0, name.length() - 1);
42 }
43 return name;
44 }
45
46 @Override
47 public int populateG(Context context, Document document, RecordFactory recordFactory, List<Record> page) {
48 // Index HTML page like this one:
49 // https://repo.maven.apache.org/maven2/org/apache/maven/indexer/
50 Element contents = document.getElementById("contents");
51 if (contents != null) {
52 for (Element element : contents.getElementsByTag("a")) {
53 String name = nameInHref(element);
54 if (accept(name)) {
55 page.add(recordFactory.create(context.getGroupId(), name, null, null, null));
56 }
57 }
58 }
59 return page.size();
60 }
61
62 @Override
63 public int populateGAV(Context context, Document document, RecordFactory recordFactory, List<Record> page) {
64 // Index HTML page like this one:
65 // https://repo.maven.apache.org/maven2/org/apache/maven/indexer/search-api/7.0.3/
66 Element contents = document.getElementById("contents");
67 if (contents != null) {
68 for (Element element : contents.getElementsByTag("a")) {
69 // skip possible subdirectories and files without extensions
70 String name = element.attr("href");
71 if (name.endsWith("/") || !name.contains(".")) {
72 continue;
73 }
74 populateGAVName(context, nameInHref(element), recordFactory, page);
75 }
76 }
77 return page.size();
78 }
79 }