1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.maven.buildcache.checksum;
20
21 import java.io.IOException;
22 import java.nio.ByteBuffer;
23 import java.nio.CharBuffer;
24 import java.nio.charset.Charset;
25 import java.nio.file.Files;
26 import java.nio.file.Path;
27
28 import org.apache.commons.io.FilenameUtils;
29 import org.apache.commons.lang3.StringUtils;
30 import org.apache.maven.buildcache.hash.HashChecksum;
31 import org.apache.maven.buildcache.xml.build.DigestItem;
32 import org.mozilla.universalchardet.UniversalDetector;
33 import org.slf4j.Logger;
34 import org.slf4j.LoggerFactory;
35
36 import static java.nio.charset.StandardCharsets.UTF_8;
37 import static org.apache.commons.lang3.StringUtils.containsAny;
38 import static org.apache.commons.lang3.StringUtils.equalsAny;
39 import static org.apache.commons.lang3.StringUtils.startsWith;
40 import static org.apache.commons.lang3.StringUtils.startsWithAny;
41
42
43
44
45 public class DigestUtils {
46
47 private static final Logger LOGGER = LoggerFactory.getLogger(DigestUtils.class);
48
49 private static final ThreadLocal<UniversalDetector> ENCODING_DETECTOR =
50 ThreadLocal.withInitial(() -> new UniversalDetector(null));
51
52 public static DigestItem pom(HashChecksum checksum, String effectivePom) {
53 return item("pom", effectivePom, checksum.update(effectivePom.getBytes(UTF_8)));
54 }
55
56 public static DigestItem file(HashChecksum checksum, Path basedir, Path file) throws IOException {
57 byte[] content = Files.readAllBytes(file);
58 String normalized = normalize(basedir, file);
59 DigestItem item = item("file", normalized, checksum.update(content));
60 try {
61 populateContentDetails(file, content, item);
62 } catch (IOException ignore) {
63 LOGGER.debug("Failed to compute digest for file {}", normalized, ignore);
64 }
65 return item;
66 }
67
68 private static void populateContentDetails(Path file, byte[] content, DigestItem item) throws IOException {
69 String contentType = Files.probeContentType(file);
70 if (contentType != null) {
71 item.setContent(contentType);
72 }
73 final boolean binary = isBinary(contentType);
74 item.setIsText(isText(contentType) ? "yes" : binary ? "no" : "unknown");
75 if (!binary) {
76 UniversalDetector detector = ENCODING_DETECTOR.get();
77 detector.reset();
78 detector.handleData(content, 0, Math.min(content.length, 16 * 1024));
79 detector.dataEnd();
80 String detectedCharset = detector.getDetectedCharset();
81 Charset charset = UTF_8;
82 if (detectedCharset != null) {
83 item.setCharset(detectedCharset);
84 charset = Charset.forName(detectedCharset);
85 }
86 CharBuffer charBuffer = charset.decode(ByteBuffer.wrap(content));
87 String lineSeparator = detectLineSeparator(charBuffer);
88 item.setEol(StringUtils.defaultString(lineSeparator, "unknown"));
89 }
90 }
91
92
93
94 private static boolean isText(String contentType) {
95 return startsWith(contentType, "text/")
96 || containsAny(contentType, "+json", "+xml")
97 || equalsAny(
98 contentType,
99 "application/json",
100 "application/rtf",
101 "application/x-sh",
102 "application/xml",
103 "application/javascript",
104 "application/sql");
105 }
106
107 private static boolean isBinary(String contentType) {
108 return startsWithAny(contentType, "image/", "audio/", "video/", "font/")
109 || containsAny(contentType, "+zip", "+gzip")
110 || equalsAny(
111 contentType,
112 "application/octet-stream",
113 "application/java-archive",
114 "application/x-bzip",
115 "application/x-bzip2",
116 "application/zip",
117 "application/gzip",
118 "application/x-tar",
119 "application/msword",
120 "application/vnd.ms-excel",
121 "application/vnd.ms-powerpoint",
122 "application/pdf");
123 }
124
125 public static DigestItem dependency(HashChecksum checksum, String key, String hash) {
126 return item("dependency", key, checksum.update(hash));
127 }
128
129 private static String normalize(Path basedirPath, Path file) {
130 return FilenameUtils.separatorsToUnix(relativize(basedirPath, file).toString());
131 }
132
133 private static Path relativize(Path basedirPath, Path file) {
134 try {
135 return basedirPath.relativize(file);
136 } catch (Exception ignore) {
137 return file;
138 }
139 }
140
141 private static DigestItem item(String type, String reference, String hash) {
142 final DigestItem item = new DigestItem();
143 item.setType(type);
144 item.setValue(reference);
145 item.setHash(hash);
146 return item;
147 }
148
149 private DigestUtils() {}
150
151 public static String detectLineSeparator(CharSequence text) {
152
153 int index = StringUtils.indexOfAny(text, "\n\r");
154 if (index == -1 || index >= text.length()) {
155 return null;
156 }
157 char ch = text.charAt(index);
158 if (ch == '\r') {
159 return index + 1 < text.length() && text.charAt(index + 1) == '\n' ? "CRLF" : "CR";
160 }
161 return ch == '\n' ? "LF" : null;
162 }
163 }