1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.maven.buildcache.checksum;
20
21 import java.io.IOException;
22 import java.nio.ByteBuffer;
23 import java.nio.CharBuffer;
24 import java.nio.charset.Charset;
25 import java.nio.file.Files;
26 import java.nio.file.Path;
27 import java.util.Objects;
28
29 import org.apache.commons.io.FilenameUtils;
30 import org.apache.commons.lang3.StringUtils;
31 import org.apache.commons.lang3.Strings;
32 import org.apache.maven.buildcache.hash.HashChecksum;
33 import org.apache.maven.buildcache.xml.build.DigestItem;
34 import org.mozilla.universalchardet.UniversalDetector;
35 import org.slf4j.Logger;
36 import org.slf4j.LoggerFactory;
37
38 import static java.nio.charset.StandardCharsets.UTF_8;
39
40
41
42
43 public class DigestUtils {
44
45 private static final Logger LOGGER = LoggerFactory.getLogger(DigestUtils.class);
46
47 private static final ThreadLocal<UniversalDetector> ENCODING_DETECTOR =
48 ThreadLocal.withInitial(() -> new UniversalDetector(null));
49
50 public static DigestItem pom(HashChecksum checksum, String effectivePom) {
51 return item("pom", effectivePom, checksum.update(effectivePom.getBytes(UTF_8)));
52 }
53
54 public static DigestItem file(HashChecksum checksum, Path basedir, Path file) throws IOException {
55 byte[] content = Files.readAllBytes(file);
56 String normalized = normalize(basedir, file);
57 checksum.update(normalized.getBytes(UTF_8));
58 DigestItem item = item("file", normalized, checksum.update(content));
59 try {
60 populateContentDetails(file, content, item);
61 } catch (IOException exception) {
62 LOGGER.debug("Failed to compute digest for file {}", normalized, exception);
63 }
64 return item;
65 }
66
67 private static void populateContentDetails(Path file, byte[] content, DigestItem item) throws IOException {
68 String contentType = Files.probeContentType(file);
69 if (contentType != null) {
70 item.setContent(contentType);
71 }
72 final boolean binary = isBinary(contentType);
73 item.setIsText(isText(contentType) ? "yes" : binary ? "no" : "unknown");
74 if (!binary) {
75 UniversalDetector detector = ENCODING_DETECTOR.get();
76 detector.reset();
77 detector.handleData(content, 0, Math.min(content.length, 16 * 1024));
78 detector.dataEnd();
79 String detectedCharset = detector.getDetectedCharset();
80 Charset charset = UTF_8;
81 if (detectedCharset != null) {
82 item.setCharset(detectedCharset);
83 charset = Charset.forName(detectedCharset);
84 }
85 CharBuffer charBuffer = charset.decode(ByteBuffer.wrap(content));
86 String lineSeparator = detectLineSeparator(charBuffer);
87 item.setEol(Objects.toString(lineSeparator, "unknown"));
88 }
89 }
90
91
92
93 private static boolean isText(String contentType) {
94 return Strings.CS.startsWith(contentType, "text/")
95 || Strings.CS.containsAny(contentType, "+json", "+xml")
96 || Strings.CS.equalsAny(
97 contentType,
98 "application/json",
99 "application/rtf",
100 "application/x-sh",
101 "application/xml",
102 "application/javascript",
103 "application/sql");
104 }
105
106 private static boolean isBinary(String contentType) {
107 return Strings.CS.startsWithAny(contentType, "image/", "audio/", "video/", "font/")
108 || Strings.CS.containsAny(contentType, "+zip", "+gzip")
109 || Strings.CS.equalsAny(
110 contentType,
111 "application/octet-stream",
112 "application/java-archive",
113 "application/x-bzip",
114 "application/x-bzip2",
115 "application/zip",
116 "application/gzip",
117 "application/x-tar",
118 "application/msword",
119 "application/vnd.ms-excel",
120 "application/vnd.ms-powerpoint",
121 "application/pdf");
122 }
123
124 public static DigestItem dependency(HashChecksum checksum, String key, String hash) {
125 return item("dependency", key, checksum.update(hash));
126 }
127
128 public static DigestItem pluginDependency(HashChecksum checksum, String key, String hash) {
129 return item("pluginDependency", key, checksum.update(hash));
130 }
131
132 private static String normalize(Path basedirPath, Path file) {
133 return FilenameUtils.separatorsToUnix(relativize(basedirPath, file).toString());
134 }
135
136 private static Path relativize(Path basedirPath, Path file) {
137 try {
138 return basedirPath.relativize(file);
139 } catch (Exception ignore) {
140 return file;
141 }
142 }
143
144 private static DigestItem item(String type, String reference, String hash) {
145 final DigestItem item = new DigestItem();
146 item.setType(type);
147 item.setValue(reference);
148 item.setHash(hash);
149 return item;
150 }
151
152 private DigestUtils() {}
153
154 public static String detectLineSeparator(CharSequence text) {
155
156 int index = StringUtils.indexOfAny(text, "\n\r");
157 if (index == -1 || index >= text.length()) {
158 return null;
159 }
160 char ch = text.charAt(index);
161 if (ch == '\r') {
162 return index + 1 < text.length() && text.charAt(index + 1) == '\n' ? "CRLF" : "CR";
163 }
164 return ch == '\n' ? "LF" : null;
165 }
166 }