View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.maven.buildcache.checksum;
20  
21  import java.io.IOException;
22  import java.nio.ByteBuffer;
23  import java.nio.CharBuffer;
24  import java.nio.charset.Charset;
25  import java.nio.file.Files;
26  import java.nio.file.Path;
27  import java.util.Objects;
28  
29  import org.apache.commons.io.FilenameUtils;
30  import org.apache.commons.lang3.StringUtils;
31  import org.apache.commons.lang3.Strings;
32  import org.apache.maven.buildcache.hash.HashChecksum;
33  import org.apache.maven.buildcache.xml.build.DigestItem;
34  import org.mozilla.universalchardet.UniversalDetector;
35  import org.slf4j.Logger;
36  import org.slf4j.LoggerFactory;
37  
38  import static java.nio.charset.StandardCharsets.UTF_8;
39  
40  /**
41   * DigestUtils
42   */
43  public class DigestUtils {
44  
45      private static final Logger LOGGER = LoggerFactory.getLogger(DigestUtils.class);
46  
47      private static final ThreadLocal<UniversalDetector> ENCODING_DETECTOR =
48              ThreadLocal.withInitial(() -> new UniversalDetector(null));
49  
50      public static DigestItem pom(HashChecksum checksum, String effectivePom) {
51          return item("pom", effectivePom, checksum.update(effectivePom.getBytes(UTF_8)));
52      }
53  
54      public static DigestItem file(HashChecksum checksum, Path basedir, Path file) throws IOException {
55          byte[] content = Files.readAllBytes(file);
56          String normalized = normalize(basedir, file);
57          checksum.update(normalized.getBytes(UTF_8));
58          DigestItem item = item("file", normalized, checksum.update(content));
59          try {
60              populateContentDetails(file, content, item);
61          } catch (IOException exception) {
62              LOGGER.debug("Failed to compute digest for file {}", normalized, exception);
63          }
64          return item;
65      }
66  
67      private static void populateContentDetails(Path file, byte[] content, DigestItem item) throws IOException {
68          String contentType = Files.probeContentType(file);
69          if (contentType != null) {
70              item.setContent(contentType);
71          }
72          final boolean binary = isBinary(contentType);
73          item.setIsText(isText(contentType) ? "yes" : binary ? "no" : "unknown");
74          if (!binary) { // probing application/ files as well though might be binary
75              UniversalDetector detector = ENCODING_DETECTOR.get();
76              detector.reset();
77              detector.handleData(content, 0, Math.min(content.length, 16 * 1024));
78              detector.dataEnd();
79              String detectedCharset = detector.getDetectedCharset();
80              Charset charset = UTF_8;
81              if (detectedCharset != null) {
82                  item.setCharset(detectedCharset);
83                  charset = Charset.forName(detectedCharset);
84              }
85              CharBuffer charBuffer = charset.decode(ByteBuffer.wrap(content));
86              String lineSeparator = detectLineSeparator(charBuffer);
87              item.setEol(Objects.toString(lineSeparator, "unknown"));
88          }
89      }
90  
91      // TODO add support for .gitattributes to statically configure file type before falling back to probe based content
92      // checks
93      private static boolean isText(String contentType) {
94          return Strings.CS.startsWith(contentType, "text/")
95                  || Strings.CS.containsAny(contentType, "+json", "+xml") // common mime type suffixes
96                  || Strings.CS.equalsAny(
97                          contentType, // some common text types
98                          "application/json",
99                          "application/rtf",
100                         "application/x-sh",
101                         "application/xml",
102                         "application/javascript",
103                         "application/sql");
104     }
105 
106     private static boolean isBinary(String contentType) {
107         return Strings.CS.startsWithAny(contentType, "image/", "audio/", "video/", "font/")
108                 || Strings.CS.containsAny(contentType, "+zip", "+gzip")
109                 || Strings.CS.equalsAny(
110                         contentType,
111                         "application/octet-stream",
112                         "application/java-archive",
113                         "application/x-bzip",
114                         "application/x-bzip2",
115                         "application/zip",
116                         "application/gzip",
117                         "application/x-tar",
118                         "application/msword",
119                         "application/vnd.ms-excel",
120                         "application/vnd.ms-powerpoint",
121                         "application/pdf");
122     }
123 
124     public static DigestItem dependency(HashChecksum checksum, String key, String hash) {
125         return item("dependency", key, checksum.update(hash));
126     }
127 
128     public static DigestItem pluginDependency(HashChecksum checksum, String key, String hash) {
129         return item("pluginDependency", key, checksum.update(hash));
130     }
131 
132     private static String normalize(Path basedirPath, Path file) {
133         return FilenameUtils.separatorsToUnix(relativize(basedirPath, file).toString());
134     }
135 
136     private static Path relativize(Path basedirPath, Path file) {
137         try {
138             return basedirPath.relativize(file);
139         } catch (Exception ignore) {
140             return file;
141         }
142     }
143 
144     private static DigestItem item(String type, String reference, String hash) {
145         final DigestItem item = new DigestItem();
146         item.setType(type);
147         item.setValue(reference);
148         item.setHash(hash);
149         return item;
150     }
151 
152     private DigestUtils() {}
153 
154     public static String detectLineSeparator(CharSequence text) {
155         // first line break only
156         int index = StringUtils.indexOfAny(text, "\n\r");
157         if (index == -1 || index >= text.length()) {
158             return null;
159         }
160         char ch = text.charAt(index);
161         if (ch == '\r') {
162             return index + 1 < text.length() && text.charAt(index + 1) == '\n' ? "CRLF" : "CR";
163         }
164         return ch == '\n' ? "LF" : null;
165     }
166 }