View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.maven.buildcache.checksum;
20  
21  import java.io.IOException;
22  import java.nio.ByteBuffer;
23  import java.nio.CharBuffer;
24  import java.nio.charset.Charset;
25  import java.nio.file.Files;
26  import java.nio.file.Path;
27  
28  import org.apache.commons.io.FilenameUtils;
29  import org.apache.commons.lang3.StringUtils;
30  import org.apache.maven.buildcache.hash.HashChecksum;
31  import org.apache.maven.buildcache.xml.build.DigestItem;
32  import org.mozilla.universalchardet.UniversalDetector;
33  import org.slf4j.Logger;
34  import org.slf4j.LoggerFactory;
35  
36  import static java.nio.charset.StandardCharsets.UTF_8;
37  import static org.apache.commons.lang3.StringUtils.containsAny;
38  import static org.apache.commons.lang3.StringUtils.equalsAny;
39  import static org.apache.commons.lang3.StringUtils.startsWith;
40  import static org.apache.commons.lang3.StringUtils.startsWithAny;
41  
42  /**
43   * DigestUtils
44   */
45  public class DigestUtils {
46  
47      private static final Logger LOGGER = LoggerFactory.getLogger(DigestUtils.class);
48  
49      private static final ThreadLocal<UniversalDetector> ENCODING_DETECTOR =
50              ThreadLocal.withInitial(() -> new UniversalDetector(null));
51  
52      public static DigestItem pom(HashChecksum checksum, String effectivePom) {
53          return item("pom", effectivePom, checksum.update(effectivePom.getBytes(UTF_8)));
54      }
55  
56      public static DigestItem file(HashChecksum checksum, Path basedir, Path file) throws IOException {
57          byte[] content = Files.readAllBytes(file);
58          String normalized = normalize(basedir, file);
59          DigestItem item = item("file", normalized, checksum.update(content));
60          try {
61              populateContentDetails(file, content, item);
62          } catch (IOException ignore) {
63              LOGGER.debug("Failed to compute digest for file {}", normalized, ignore);
64          }
65          return item;
66      }
67  
68      private static void populateContentDetails(Path file, byte[] content, DigestItem item) throws IOException {
69          String contentType = Files.probeContentType(file);
70          if (contentType != null) {
71              item.setContent(contentType);
72          }
73          final boolean binary = isBinary(contentType);
74          item.setIsText(isText(contentType) ? "yes" : binary ? "no" : "unknown");
75          if (!binary) { // probing application/ files as well though might be binary
76              UniversalDetector detector = ENCODING_DETECTOR.get();
77              detector.reset();
78              detector.handleData(content, 0, Math.min(content.length, 16 * 1024));
79              detector.dataEnd();
80              String detectedCharset = detector.getDetectedCharset();
81              Charset charset = UTF_8;
82              if (detectedCharset != null) {
83                  item.setCharset(detectedCharset);
84                  charset = Charset.forName(detectedCharset);
85              }
86              CharBuffer charBuffer = charset.decode(ByteBuffer.wrap(content));
87              String lineSeparator = detectLineSeparator(charBuffer);
88              item.setEol(StringUtils.defaultString(lineSeparator, "unknown"));
89          }
90      }
91  
92      // TODO add support for .gitattributes to statically configure file type before falling back to probe based content
93      // checks
94      private static boolean isText(String contentType) {
95          return startsWith(contentType, "text/")
96                  || containsAny(contentType, "+json", "+xml") // common mime type suffixes
97                  || equalsAny(
98                          contentType, // some common text types
99                          "application/json",
100                         "application/rtf",
101                         "application/x-sh",
102                         "application/xml",
103                         "application/javascript",
104                         "application/sql");
105     }
106 
107     private static boolean isBinary(String contentType) {
108         return startsWithAny(contentType, "image/", "audio/", "video/", "font/")
109                 || containsAny(contentType, "+zip", "+gzip")
110                 || equalsAny(
111                         contentType,
112                         "application/octet-stream",
113                         "application/java-archive",
114                         "application/x-bzip",
115                         "application/x-bzip2",
116                         "application/zip",
117                         "application/gzip",
118                         "application/x-tar",
119                         "application/msword",
120                         "application/vnd.ms-excel",
121                         "application/vnd.ms-powerpoint",
122                         "application/pdf");
123     }
124 
125     public static DigestItem dependency(HashChecksum checksum, String key, String hash) {
126         return item("dependency", key, checksum.update(hash));
127     }
128 
129     private static String normalize(Path basedirPath, Path file) {
130         return FilenameUtils.separatorsToUnix(relativize(basedirPath, file).toString());
131     }
132 
133     private static Path relativize(Path basedirPath, Path file) {
134         try {
135             return basedirPath.relativize(file);
136         } catch (Exception ignore) {
137             return file;
138         }
139     }
140 
141     private static DigestItem item(String type, String reference, String hash) {
142         final DigestItem item = new DigestItem();
143         item.setType(type);
144         item.setValue(reference);
145         item.setHash(hash);
146         return item;
147     }
148 
149     private DigestUtils() {}
150 
151     public static String detectLineSeparator(CharSequence text) {
152         // first line break only
153         int index = StringUtils.indexOfAny(text, "\n\r");
154         if (index == -1 || index >= text.length()) {
155             return null;
156         }
157         char ch = text.charAt(index);
158         if (ch == '\r') {
159             return index + 1 < text.length() && text.charAt(index + 1) == '\n' ? "CRLF" : "CR";
160         }
161         return ch == '\n' ? "LF" : null;
162     }
163 }