1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19 package org.apache.maven.plugin.compiler;
20
21 import java.io.File;
22 import java.nio.file.FileSystem;
23 import java.nio.file.Path;
24 import java.nio.file.PathMatcher;
25 import java.util.ArrayList;
26 import java.util.Arrays;
27 import java.util.Collection;
28 import java.util.Iterator;
29 import java.util.LinkedHashSet;
30 import java.util.List;
31 import java.util.Objects;
32 import java.util.Set;
33
34 /**
35 * Determines whether a path is selected according to include/exclude patterns.
36 * The pathnames used for method parameters will be relative to some base directory
37 * and use {@code '/'} as separator, regardless of the hosting operating system.
38 *
39 * <h2>Syntax</h2>
40 * If a pattern contains the {@code ':'} character and the prefix before is longer than 1 character,
41 * then that pattern is given verbatim to {@link FileSystem#getPathMatcher(String)}, which interprets
42 * the part before {@code ':'} as the syntax (usually {@code "glob"} or {@code "regex"}).
43 * If a pattern does not contain the {@code ':'} character, or if the prefix is one character long
44 * (interpreted as a Windows drive), then the syntax defaults to a reproduction of the Maven 3 behavior.
45 * This is implemented as the {@code "glob"} syntax with the following modifications:
46 *
47 * <ul>
48 * <li>The platform-specific separator ({@code '\\'} on Windows) is replaced by {@code '/'}.
49 * Note that it means that the backslash cannot be used for escaping characters.</li>
50 * <li>Trailing {@code "/"} is completed as {@code "/**"}.</li>
51 * <li>The {@code "**"} wildcard means "0 or more directories" instead of "1 or more directories".
52 * This is implemented by adding variants of the pattern without the {@code "**"} wildcard.</li>
53 * <li>Bracket characters [ ] and { } are escaped.</li>
54 * <li>On Unix only, the escape character {@code '\\'} is itself escaped.</li>
55 * </ul>
56 *
57 * If above changes are not desired, put an explicit {@code "glob:"} prefix before the pattern.
58 * Note that putting such a prefix is recommended anyway for better performances.
59 *
60 * @author Benjamin Bentmann
61 * @author Martin Desruisseaux
62 *
63 * @see java.nio.file.FileSystem#getPathMatcher(String)
64 */
65 final class PathSelector implements PathMatcher {
66 /**
67 * Maximum number of characters of the prefix before {@code ':'} for handling as a Maven syntax.
68 */
69 private static final int MAVEN_SYNTAX_THRESHOLD = 1;
70
71 /**
72 * The default syntax to use if none was specified. Note that when this default syntax is applied,
73 * the user-provided pattern get some changes as documented in class Javadoc.
74 */
75 private static final String DEFAULT_SYNTAX = "glob:";
76
77 /**
78 * Characters having a special meaning in the glob syntax.
79 *
80 * @see FileSystem#getPathMatcher(String)
81 */
82 private static final String SPECIAL_CHARACTERS = "*?[]{}\\";
83
84 /**
85 * A path matcher which accepts all files.
86 *
87 * @see #simplify()
88 */
89 private static final PathMatcher INCLUDES_ALL = (path) -> true;
90
91 /**
92 * String representations of the normalized include filters.
93 * Each pattern shall be prefixed by its syntax, which is {@value #DEFAULT_SYNTAX} by default.
94 * An empty array means to include all files.
95 *
96 * @see #toString()
97 */
98 private final String[] includePatterns;
99
100 /**
101 * String representations of the normalized exclude filters.
102 * Each pattern shall be prefixed by its syntax. If no syntax is specified,
103 * the default is a Maven 3 syntax similar, but not identical, to {@value #DEFAULT_SYNTAX}.
104 * This array may be longer or shorter than the user-supplied excludes, depending on whether
105 * default excludes have been added and whether some unnecessary excludes have been omitted.
106 *
107 * @see #toString()
108 */
109 private final String[] excludePatterns;
110
111 /**
112 * The matcher for includes. The length of this array is equal to {@link #includePatterns} array length.
113 * An empty array means to include all files.
114 */
115 private final PathMatcher[] includes;
116
117 /**
118 * The matcher for excludes. The length of this array is equal to {@link #excludePatterns} array length.
119 */
120 private final PathMatcher[] excludes;
121
122 /**
123 * The matcher for all directories to include. This array includes the parents of all those directories,
124 * because they need to be accepted before we can walk to the sub-directories.
125 * This is an optimization for skipping whole directories when possible.
126 * An empty array means to include all directories.
127 */
128 private final PathMatcher[] dirIncludes;
129
130 /**
131 * The matcher for directories to exclude. This array does <em>not</em> include the parent directories,
132 * because they may contain other sub-trees that need to be included.
133 * This is an optimization for skipping whole directories when possible.
134 */
135 private final PathMatcher[] dirExcludes;
136
137 /**
138 * The base directory. All files will be relativized to that directory before to be matched.
139 */
140 private final Path baseDirectory;
141
142 /**
143 * Whether paths must be relativized before to be given to a matcher. If {@code true}, then every paths
144 * will be made relative to {@link #baseDirectory} for allowing patterns like {@code "foo/bar/*.java"}
145 * to work. As a slight optimization, we can skip this step if all patterns start with {@code "**"}.
146 */
147 private final boolean needRelativize;
148
149 /**
150 * Creates a new selector from the given includes and excludes.
151 *
152 * @param directory the base directory of the files to filter
153 * @param includes the patterns of the files to include, or null or empty for including all files
154 * @param excludes the patterns of the files to exclude, or null or empty for no exclusion
155 */
156 PathSelector(Path directory, Collection<String> includes, Collection<String> excludes) {
157 includePatterns = normalizePatterns(includes, false);
158 excludePatterns = normalizePatterns(effectiveExcludes(excludes, includePatterns), true);
159 baseDirectory = directory;
160 FileSystem fs = directory.getFileSystem();
161 this.includes = matchers(fs, includePatterns);
162 this.excludes = matchers(fs, excludePatterns);
163 dirIncludes = matchers(fs, directoryPatterns(includePatterns, false));
164 dirExcludes = matchers(fs, directoryPatterns(excludePatterns, true));
165 needRelativize = needRelativize(includePatterns) || needRelativize(excludePatterns);
166 }
167
168 /**
169 * Returns the given array of excludes, optionally expanded with a default set of excludes,
170 * then with unnecessary excludes omitted. An unnecessary exclude is an exclude which will never
171 * match a file because there is no include which would accept a file that could match the exclude.
172 * For example, if the only include is {@code "*.java"}, then the <code>"**/project.pj"</code>,
173 * <code>"**/.DS_Store"</code> and other excludes will never match a file and can be omitted.
174 * Because the list of {@linkplain #DEFAULT_EXCLUDES default excludes} contains many elements,
175 * removing unnecessary excludes can reduce a lot the number of matches tested on each source file.
176 *
177 * <h4>Implementation note</h4>
178 * The removal of unnecessary excludes is done on a best effort basis. The current implementation
179 * compares only the prefixes and suffixes of each pattern, keeping the pattern in case of doubt.
180 * This is not bad, but it does not remove all unnecessary patterns. It would be possible to do
181 * better in the future if benchmarking suggests that it would be worth the effort.
182 *
183 * @param excludes the user-specified excludes, potentially not yet converted to glob syntax
184 * @param includes the include patterns converted to glob syntax
185 * @param useDefaultExcludes whether to expand user exclude with the set of default excludes
186 * @return the potentially expanded or reduced set of excludes to use
187 */
188 private static Collection<String> effectiveExcludes(Collection<String> excludes, final String[] includes) {
189 if (excludes == null || excludes.isEmpty()) {
190 return List.of();
191 } else {
192 excludes = new ArrayList<>(excludes);
193 excludes.removeIf(Objects::isNull);
194 }
195 if (includes.length == 0) {
196 return excludes;
197 }
198 /*
199 * Get the prefixes and suffixes of all includes, stopping at the first special character.
200 * Redundant prefixes and suffixes are omitted.
201 */
202 var prefixes = new String[includes.length];
203 var suffixes = new String[includes.length];
204 for (int i = 0; i < includes.length; i++) {
205 String include = includes[i];
206 if (!include.startsWith(DEFAULT_SYNTAX)) {
207 return excludes; // Do not filter if at least one pattern is too complicated.
208 }
209 include = include.substring(DEFAULT_SYNTAX.length());
210 prefixes[i] = prefixOrSuffix(include, false);
211 suffixes[i] = prefixOrSuffix(include, true);
212 }
213 prefixes = sortByLength(prefixes, false);
214 suffixes = sortByLength(suffixes, true);
215 /*
216 * Keep only the exclude which start with one of the prefixes and end with one of the suffixes.
217 * Note that a prefix or suffix may be the empty string, which match everything.
218 */
219 final Iterator<String> it = excludes.iterator();
220 nextExclude:
221 while (it.hasNext()) {
222 final String exclude = it.next();
223 final int s = exclude.indexOf(':');
224 if (s <= MAVEN_SYNTAX_THRESHOLD || exclude.startsWith(DEFAULT_SYNTAX)) {
225 if (cannotMatch(exclude, prefixes, false) || cannotMatch(exclude, suffixes, true)) {
226 it.remove();
227 }
228 }
229 }
230 return excludes;
231 }
232
233 /**
234 * Returns the maximal amount of ordinary characters at the beginning or end of the given pattern.
235 * The prefix or suffix stops at the first {@linkplain #SPECIAL_CHARACTERS special character}.
236 *
237 * @param include the pattern for which to get a prefix or suffix without special character
238 * @param suffix {@code false} if a prefix is desired, or {@code true} if a suffix is desired
239 */
240 private static String prefixOrSuffix(final String include, boolean suffix) {
241 int s = suffix ? -1 : include.length();
242 for (int i = SPECIAL_CHARACTERS.length(); --i >= 0; ) {
243 char c = SPECIAL_CHARACTERS.charAt(i);
244 if (suffix) {
245 s = Math.max(s, include.lastIndexOf(c));
246 } else {
247 int p = include.indexOf(c);
248 if (p >= 0 && p < s) {
249 s = p;
250 }
251 }
252 }
253 return suffix ? include.substring(s + 1) : include.substring(0, s);
254 }
255
256 /**
257 * Returns {@code true} if the given exclude cannot match any include patterns.
258 * In case of doubt, returns {@code false}.
259 *
260 * @param exclude the exclude pattern to test
261 * @param fragments the prefixes or suffixes (fragments without special characters) of the includes
262 * @param suffix {@code false} if the specified fragments are prefixes, {@code true} if they are suffixes
263 * @return {@code true} if it is certain that the exclude pattern cannot match, or {@code false} in case of doubt
264 */
265 private static boolean cannotMatch(String exclude, final String[] fragments, final boolean suffix) {
266 exclude = prefixOrSuffix(exclude, suffix);
267 for (String fragment : fragments) {
268 int fg = fragment.length();
269 int ex = exclude.length();
270 int length = Math.min(fg, ex);
271 if (suffix) {
272 fg -= length;
273 ex -= length;
274 } else {
275 fg = 0;
276 ex = 0;
277 }
278 if (exclude.regionMatches(ex, fragment, fg, length)) {
279 return false;
280 }
281 }
282 return true;
283 }
284
285 /**
286 * Sorts the given patterns by their length. The main intent is to have the empty string first,
287 * while will cause the loops testing for prefixes and suffixes to stop almost immediately.
288 * Short prefixes or suffixes are also more likely to be matched.
289 *
290 * @param fragments the fragments to sort in-place
291 * @param suffix {@code false} if the specified fragments are prefixes, {@code true} if they are suffixes
292 * @return the given array, or a smaller array if some fragments were discarded because redundant
293 */
294 private static String[] sortByLength(final String[] fragments, final boolean suffix) {
295 Arrays.sort(fragments, (s1, s2) -> s1.length() - s2.length());
296 int count = 0;
297 /*
298 * Simplify the array of prefixes or suffixes by removing all redundant elements.
299 * An element is redundant if there is a shorter prefix or suffix with the same characters.
300 */
301 nextBase:
302 for (String fragment : fragments) {
303 for (int i = count; --i >= 0; ) {
304 String base = fragments[i];
305 if (suffix ? fragment.endsWith(base) : fragment.startsWith(base)) {
306 continue nextBase; // Skip this fragment
307 }
308 }
309 fragments[count++] = fragment;
310 }
311 return (fragments.length == count) ? fragments : Arrays.copyOf(fragments, count);
312 }
313
314 /**
315 * Returns the given array of patterns with path separator normalized to {@code '/'}.
316 * Null or empty patterns are ignored, and duplications are removed.
317 *
318 * @param patterns the patterns to normalize
319 * @param excludes whether the patterns are exclude patterns
320 * @return normalized patterns without null, empty or duplicated patterns
321 */
322 private static String[] normalizePatterns(final Collection<String> patterns, final boolean excludes) {
323 if (patterns == null || patterns.isEmpty()) {
324 return new String[0];
325 }
326 // TODO: use `LinkedHashSet.newLinkedHashSet(int)` instead with JDK19.
327 final var normalized = new LinkedHashSet<String>(patterns.size());
328 for (String pattern : patterns) {
329 if (pattern != null && !pattern.isEmpty()) {
330 if (pattern.indexOf(':') <= MAVEN_SYNTAX_THRESHOLD) {
331 pattern = pattern.replace(File.separatorChar, '/');
332 if (pattern.endsWith("/")) {
333 pattern += "**";
334 }
335 // Following are okay only when "**" means "0 or more directories".
336 while (pattern.endsWith("/**/**")) {
337 pattern = pattern.substring(0, pattern.length() - 3);
338 }
339 while (pattern.startsWith("**/**/")) {
340 pattern = pattern.substring(3);
341 }
342 pattern = pattern.replace("/**/**/", "/**/");
343 pattern = pattern.replace("\\", "\\\\")
344 .replace("[", "\\[")
345 .replace("]", "\\]")
346 .replace("{", "\\{")
347 .replace("}", "\\}");
348 normalized.add(DEFAULT_SYNTAX + pattern);
349 /*
350 * If the pattern starts or ends with "**", Java GLOB expects a directory level at
351 * that location while Maven seems to consider that "**" can mean "no directory".
352 * Add another pattern for reproducing this effect.
353 */
354 addPatternsWithOneDirRemoved(normalized, pattern, 0);
355 } else {
356 normalized.add(pattern);
357 }
358 }
359 }
360 return simplify(normalized, excludes);
361 }
362
363 /**
364 * Adds all variants of the given pattern with {@code **} removed.
365 * This is used for simulating the Maven behavior where {@code "**} may match zero directory.
366 * Tests suggest that we need an explicit GLOB pattern with no {@code "**"} for matching an absence of directory.
367 *
368 * @param patterns where to add the derived patterns
369 * @param pattern the pattern for which to add derived forms, without the "glob:" syntax prefix
370 * @param end should be 0 (reserved for recursive invocations of this method)
371 */
372 private static void addPatternsWithOneDirRemoved(final Set<String> patterns, final String pattern, int end) {
373 final int length = pattern.length();
374 int start;
375 while ((start = pattern.indexOf("**", end)) >= 0) {
376 end = start + 2; // 2 is the length of "**".
377 if (end < length) {
378 if (pattern.charAt(end) != '/') {
379 continue;
380 }
381 if (start == 0) {
382 end++; // Ommit the leading slash if there is nothing before it.
383 }
384 }
385 if (start > 0 && pattern.charAt(--start) != '/') {
386 continue;
387 }
388 String reduced = pattern.substring(0, start) + pattern.substring(end);
389 patterns.add(DEFAULT_SYNTAX + reduced);
390 addPatternsWithOneDirRemoved(patterns, reduced, start);
391 }
392 }
393
394 /**
395 * Applies some heuristic rules for simplifying the set of patterns,
396 * then returns the patterns as an array.
397 *
398 * @param patterns the patterns to simplify and return as an array
399 * @param excludes whether the patterns are exclude patterns
400 * @return the set content as an array, after simplification
401 */
402 private static String[] simplify(Set<String> patterns, boolean excludes) {
403 /*
404 * If the "**" pattern is present, it makes all other patterns useless.
405 * In the case of include patterns, an empty set means to include everything.
406 */
407 if (patterns.remove("**")) {
408 patterns.clear();
409 if (excludes) {
410 patterns.add("**");
411 }
412 }
413 return patterns.toArray(String[]::new);
414 }
415
416 /**
417 * Eventually adds the parent directory of the given patterns, without duplicated values.
418 * The patterns given to this method should have been normalized.
419 *
420 * @param patterns the normalized include or exclude patterns
421 * @param excludes whether the patterns are exclude patterns
422 * @return pattens of directories to include or exclude
423 */
424 private static String[] directoryPatterns(final String[] patterns, final boolean excludes) {
425 // TODO: use `LinkedHashSet.newLinkedHashSet(int)` instead with JDK19.
426 final var directories = new LinkedHashSet<String>(patterns.length);
427 for (String pattern : patterns) {
428 if (pattern.startsWith(DEFAULT_SYNTAX)) {
429 if (excludes) {
430 if (pattern.endsWith("/**")) {
431 directories.add(pattern.substring(0, pattern.length() - 3));
432 }
433 } else {
434 int s = pattern.indexOf(':');
435 if (pattern.regionMatches(++s, "**/", 0, 3)) {
436 s = pattern.indexOf('/', s + 3);
437 if (s < 0) {
438 return new String[0]; // Pattern is "**", so we need to accept everything.
439 }
440 directories.add(pattern.substring(0, s));
441 }
442 }
443 }
444 }
445 return simplify(directories, excludes);
446 }
447
448 /**
449 * Returns {@code true} if at least one pattern requires path to be relativized before to be matched.
450 *
451 * @param patterns include or exclude patterns
452 * @return whether at least one pattern require relativization
453 */
454 private static boolean needRelativize(String[] patterns) {
455 for (String pattern : patterns) {
456 if (!pattern.startsWith(DEFAULT_SYNTAX + "**/")) {
457 return true;
458 }
459 }
460 return false;
461 }
462
463 /**
464 * Creates the path matchers for the given patterns.
465 * The syntax (usually {@value #DEFAULT_SYNTAX}) must be specified for each pattern.
466 */
467 private static PathMatcher[] matchers(final FileSystem fs, final String[] patterns) {
468 final var matchers = new PathMatcher[patterns.length];
469 for (int i = 0; i < patterns.length; i++) {
470 matchers[i] = fs.getPathMatcher(patterns[i]);
471 }
472 return matchers;
473 }
474
475 /**
476 * {@return a potentially simpler matcher equivalent to this matcher}
477 */
478 @SuppressWarnings("checkstyle:MissingSwitchDefault")
479 public PathMatcher simplify() {
480 if (!needRelativize && excludes.length == 0) {
481 switch (includes.length) {
482 case 0:
483 return INCLUDES_ALL;
484 case 1:
485 return includes[0];
486 }
487 }
488 return this;
489 }
490
491 /**
492 * Determines whether a path is selected.
493 * This is true if the given file matches an include pattern and no exclude pattern.
494 *
495 * @param path the pathname to test, must not be {@code null}
496 * @return {@code true} if the given path is selected, {@code false} otherwise
497 */
498 @Override
499 public boolean matches(Path path) {
500 if (needRelativize) {
501 path = baseDirectory.relativize(path);
502 }
503 return (includes.length == 0 || isMatched(path, includes))
504 && (excludes.length == 0 || !isMatched(path, excludes));
505 }
506
507 /**
508 * {@return whether the given file matches according to one of the given matchers}
509 */
510 private static boolean isMatched(Path path, PathMatcher[] matchers) {
511 for (PathMatcher matcher : matchers) {
512 if (matcher.matches(path)) {
513 return true;
514 }
515 }
516 return false;
517 }
518
519 /**
520 * Determines whether a directory could contain selected paths.
521 *
522 * @param directory the directory pathname to test, must not be {@code null}
523 * @return {@code true} if the given directory might contain selected paths, {@code false} if the
524 * directory will definitively not contain selected paths
525 */
526 public boolean couldHoldSelected(Path directory) {
527 if (baseDirectory.equals(directory)) {
528 return true;
529 }
530 directory = baseDirectory.relativize(directory);
531 return (dirIncludes.length == 0 || isMatched(directory, dirIncludes))
532 && (dirExcludes.length == 0 || !isMatched(directory, dirExcludes));
533 }
534
535 /**
536 * Appends the elements of the given array in the given buffer.
537 * This is a helper method for {@link #toString()} implementations.
538 *
539 * @param buffer the buffer to add the elements to
540 * @param label label identifying the array of elements to add
541 * @param patterns the elements to append, or {@code null} if none
542 */
543 private static void append(StringBuilder buffer, String label, String[] patterns) {
544 buffer.append(label).append(": [");
545 if (patterns != null) {
546 for (int i = 0; i < patterns.length; i++) {
547 if (i != 0) {
548 buffer.append(", ");
549 }
550 buffer.append(patterns[i]);
551 }
552 }
553 buffer.append(']');
554 }
555
556 /**
557 * {@return a string representation for logging purposes}
558 */
559 @Override
560 public String toString() {
561 var buffer = new StringBuilder();
562 append(buffer, "includes", includePatterns);
563 append(buffer.append(", "), "excludes", excludePatterns);
564 return buffer.toString();
565 }
566 }