View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.maven.plugin.compiler;
20  
21  import java.io.File;
22  import java.nio.file.FileSystem;
23  import java.nio.file.Path;
24  import java.nio.file.PathMatcher;
25  import java.util.ArrayList;
26  import java.util.Arrays;
27  import java.util.Collection;
28  import java.util.Iterator;
29  import java.util.LinkedHashSet;
30  import java.util.List;
31  import java.util.Objects;
32  import java.util.Set;
33  
34  /**
35   * Determines whether a path is selected according to include/exclude patterns.
36   * The pathnames used for method parameters will be relative to some base directory
37   * and use {@code '/'} as separator, regardless of the hosting operating system.
38   *
39   * <h2>Syntax</h2>
40   * If a pattern contains the {@code ':'} character and the prefix before is longer than 1 character,
41   * then that pattern is given verbatim to {@link FileSystem#getPathMatcher(String)}, which interprets
42   * the part before {@code ':'} as the syntax (usually {@code "glob"} or {@code "regex"}).
43   * If a pattern does not contain the {@code ':'} character, or if the prefix is one character long
44   * (interpreted as a Windows drive), then the syntax defaults to a reproduction of the Maven 3 behavior.
45   * This is implemented as the {@code "glob"} syntax with the following modifications:
46   *
47   * <ul>
48   *   <li>The platform-specific separator ({@code '\\'} on Windows) is replaced by {@code '/'}.
49   *       Note that it means that the backslash cannot be used for escaping characters.</li>
50   *   <li>Trailing {@code "/"} is completed as {@code "/**"}.</li>
51   *   <li>The {@code "**"} wildcard means "0 or more directories" instead of "1 or more directories".
52   *       This is implemented by adding variants of the pattern without the {@code "**"} wildcard.</li>
53   *   <li>Bracket characters [ ] and { } are escaped.</li>
54   *   <li>On Unix only, the escape character {@code '\\'} is itself escaped.</li>
55   * </ul>
56   *
57   * If above changes are not desired, put an explicit {@code "glob:"} prefix before the pattern.
58   * Note that putting such a prefix is recommended anyway for better performances.
59   *
60   * @author Benjamin Bentmann
61   * @author Martin Desruisseaux
62   *
63   * @see java.nio.file.FileSystem#getPathMatcher(String)
64   */
65  final class PathSelector implements PathMatcher {
66      /**
67       * Maximum number of characters of the prefix before {@code ':'} for handling as a Maven syntax.
68       */
69      private static final int MAVEN_SYNTAX_THRESHOLD = 1;
70  
71      /**
72       * The default syntax to use if none was specified. Note that when this default syntax is applied,
73       * the user-provided pattern get some changes as documented in class Javadoc.
74       */
75      private static final String DEFAULT_SYNTAX = "glob:";
76  
77      /**
78       * Characters having a special meaning in the glob syntax.
79       *
80       * @see FileSystem#getPathMatcher(String)
81       */
82      private static final String SPECIAL_CHARACTERS = "*?[]{}\\";
83  
84      /**
85       * A path matcher which accepts all files.
86       *
87       * @see #simplify()
88       */
89      private static final PathMatcher INCLUDES_ALL = (path) -> true;
90  
91      /**
92       * String representations of the normalized include filters.
93       * Each pattern shall be prefixed by its syntax, which is {@value #DEFAULT_SYNTAX} by default.
94       * An empty array means to include all files.
95       *
96       * @see #toString()
97       */
98      private final String[] includePatterns;
99  
100     /**
101      * String representations of the normalized exclude filters.
102      * Each pattern shall be prefixed by its syntax. If no syntax is specified,
103      * the default is a Maven 3 syntax similar, but not identical, to {@value #DEFAULT_SYNTAX}.
104      * This array may be longer or shorter than the user-supplied excludes, depending on whether
105      * default excludes have been added and whether some unnecessary excludes have been omitted.
106      *
107      * @see #toString()
108      */
109     private final String[] excludePatterns;
110 
111     /**
112      * The matcher for includes. The length of this array is equal to {@link #includePatterns} array length.
113      * An empty array means to include all files.
114      */
115     private final PathMatcher[] includes;
116 
117     /**
118      * The matcher for excludes. The length of this array is equal to {@link #excludePatterns} array length.
119      */
120     private final PathMatcher[] excludes;
121 
122     /**
123      * The matcher for all directories to include. This array includes the parents of all those directories,
124      * because they need to be accepted before we can walk to the sub-directories.
125      * This is an optimization for skipping whole directories when possible.
126      * An empty array means to include all directories.
127      */
128     private final PathMatcher[] dirIncludes;
129 
130     /**
131      * The matcher for directories to exclude. This array does <em>not</em> include the parent directories,
132      * because they may contain other sub-trees that need to be included.
133      * This is an optimization for skipping whole directories when possible.
134      */
135     private final PathMatcher[] dirExcludes;
136 
137     /**
138      * The base directory. All files will be relativized to that directory before to be matched.
139      */
140     private final Path baseDirectory;
141 
142     /**
143      * Whether paths must be relativized before to be given to a matcher. If {@code true}, then every paths
144      * will be made relative to {@link #baseDirectory} for allowing patterns like {@code "foo/bar/*.java"}
145      * to work. As a slight optimization, we can skip this step if all patterns start with {@code "**"}.
146      */
147     private final boolean needRelativize;
148 
149     /**
150      * Creates a new selector from the given includes and excludes.
151      *
152      * @param directory the base directory of the files to filter
153      * @param includes the patterns of the files to include, or null or empty for including all files
154      * @param excludes the patterns of the files to exclude, or null or empty for no exclusion
155      */
156     PathSelector(Path directory, Collection<String> includes, Collection<String> excludes) {
157         includePatterns = normalizePatterns(includes, false);
158         excludePatterns = normalizePatterns(effectiveExcludes(excludes, includePatterns), true);
159         baseDirectory = directory;
160         FileSystem fs = directory.getFileSystem();
161         this.includes = matchers(fs, includePatterns);
162         this.excludes = matchers(fs, excludePatterns);
163         dirIncludes = matchers(fs, directoryPatterns(includePatterns, false));
164         dirExcludes = matchers(fs, directoryPatterns(excludePatterns, true));
165         needRelativize = needRelativize(includePatterns) || needRelativize(excludePatterns);
166     }
167 
168     /**
169      * Returns the given array of excludes, optionally expanded with a default set of excludes,
170      * then with unnecessary excludes omitted. An unnecessary exclude is an exclude which will never
171      * match a file because there is no include which would accept a file that could match the exclude.
172      * For example, if the only include is {@code "*.java"}, then the <code>"**&sol;project.pj"</code>,
173      * <code>"**&sol;.DS_Store"</code> and other excludes will never match a file and can be omitted.
174      * Because the list of {@linkplain #DEFAULT_EXCLUDES default excludes} contains many elements,
175      * removing unnecessary excludes can reduce a lot the number of matches tested on each source file.
176      *
177      * <h4>Implementation note</h4>
178      * The removal of unnecessary excludes is done on a best effort basis. The current implementation
179      * compares only the prefixes and suffixes of each pattern, keeping the pattern in case of doubt.
180      * This is not bad, but it does not remove all unnecessary patterns. It would be possible to do
181      * better in the future if benchmarking suggests that it would be worth the effort.
182      *
183      * @param excludes the user-specified excludes, potentially not yet converted to glob syntax
184      * @param includes the include patterns converted to glob syntax
185      * @param useDefaultExcludes whether to expand user exclude with the set of default excludes
186      * @return the potentially expanded or reduced set of excludes to use
187      */
188     private static Collection<String> effectiveExcludes(Collection<String> excludes, final String[] includes) {
189         if (excludes == null || excludes.isEmpty()) {
190             return List.of();
191         } else {
192             excludes = new ArrayList<>(excludes);
193             excludes.removeIf(Objects::isNull);
194         }
195         if (includes.length == 0) {
196             return excludes;
197         }
198         /*
199          * Get the prefixes and suffixes of all includes, stopping at the first special character.
200          * Redundant prefixes and suffixes are omitted.
201          */
202         var prefixes = new String[includes.length];
203         var suffixes = new String[includes.length];
204         for (int i = 0; i < includes.length; i++) {
205             String include = includes[i];
206             if (!include.startsWith(DEFAULT_SYNTAX)) {
207                 return excludes; // Do not filter if at least one pattern is too complicated.
208             }
209             include = include.substring(DEFAULT_SYNTAX.length());
210             prefixes[i] = prefixOrSuffix(include, false);
211             suffixes[i] = prefixOrSuffix(include, true);
212         }
213         prefixes = sortByLength(prefixes, false);
214         suffixes = sortByLength(suffixes, true);
215         /*
216          * Keep only the exclude which start with one of the prefixes and end with one of the suffixes.
217          * Note that a prefix or suffix may be the empty string, which match everything.
218          */
219         final Iterator<String> it = excludes.iterator();
220         nextExclude:
221         while (it.hasNext()) {
222             final String exclude = it.next();
223             final int s = exclude.indexOf(':');
224             if (s <= MAVEN_SYNTAX_THRESHOLD || exclude.startsWith(DEFAULT_SYNTAX)) {
225                 if (cannotMatch(exclude, prefixes, false) || cannotMatch(exclude, suffixes, true)) {
226                     it.remove();
227                 }
228             }
229         }
230         return excludes;
231     }
232 
233     /**
234      * Returns the maximal amount of ordinary characters at the beginning or end of the given pattern.
235      * The prefix or suffix stops at the first {@linkplain #SPECIAL_CHARACTERS special character}.
236      *
237      * @param include the pattern for which to get a prefix or suffix without special character
238      * @param suffix {@code false} if a prefix is desired, or {@code true} if a suffix is desired
239      */
240     private static String prefixOrSuffix(final String include, boolean suffix) {
241         int s = suffix ? -1 : include.length();
242         for (int i = SPECIAL_CHARACTERS.length(); --i >= 0; ) {
243             char c = SPECIAL_CHARACTERS.charAt(i);
244             if (suffix) {
245                 s = Math.max(s, include.lastIndexOf(c));
246             } else {
247                 int p = include.indexOf(c);
248                 if (p >= 0 && p < s) {
249                     s = p;
250                 }
251             }
252         }
253         return suffix ? include.substring(s + 1) : include.substring(0, s);
254     }
255 
256     /**
257      * Returns {@code true} if the given exclude cannot match any include patterns.
258      * In case of doubt, returns {@code false}.
259      *
260      * @param exclude the exclude pattern to test
261      * @param fragments the prefixes or suffixes (fragments without special characters) of the includes
262      * @param suffix {@code false} if the specified fragments are prefixes, {@code true} if they are suffixes
263      * @return {@code true} if it is certain that the exclude pattern cannot match, or {@code false} in case of doubt
264      */
265     private static boolean cannotMatch(String exclude, final String[] fragments, final boolean suffix) {
266         exclude = prefixOrSuffix(exclude, suffix);
267         for (String fragment : fragments) {
268             int fg = fragment.length();
269             int ex = exclude.length();
270             int length = Math.min(fg, ex);
271             if (suffix) {
272                 fg -= length;
273                 ex -= length;
274             } else {
275                 fg = 0;
276                 ex = 0;
277             }
278             if (exclude.regionMatches(ex, fragment, fg, length)) {
279                 return false;
280             }
281         }
282         return true;
283     }
284 
285     /**
286      * Sorts the given patterns by their length. The main intent is to have the empty string first,
287      * while will cause the loops testing for prefixes and suffixes to stop almost immediately.
288      * Short prefixes or suffixes are also more likely to be matched.
289      *
290      * @param fragments the fragments to sort in-place
291      * @param suffix {@code false} if the specified fragments are prefixes, {@code true} if they are suffixes
292      * @return the given array, or a smaller array if some fragments were discarded because redundant
293      */
294     private static String[] sortByLength(final String[] fragments, final boolean suffix) {
295         Arrays.sort(fragments, (s1, s2) -> s1.length() - s2.length());
296         int count = 0;
297         /*
298          * Simplify the array of prefixes or suffixes by removing all redundant elements.
299          * An element is redundant if there is a shorter prefix or suffix with the same characters.
300          */
301         nextBase:
302         for (String fragment : fragments) {
303             for (int i = count; --i >= 0; ) {
304                 String base = fragments[i];
305                 if (suffix ? fragment.endsWith(base) : fragment.startsWith(base)) {
306                     continue nextBase; // Skip this fragment
307                 }
308             }
309             fragments[count++] = fragment;
310         }
311         return (fragments.length == count) ? fragments : Arrays.copyOf(fragments, count);
312     }
313 
314     /**
315      * Returns the given array of patterns with path separator normalized to {@code '/'}.
316      * Null or empty patterns are ignored, and duplications are removed.
317      *
318      * @param patterns the patterns to normalize
319      * @param excludes whether the patterns are exclude patterns
320      * @return normalized patterns without null, empty or duplicated patterns
321      */
322     private static String[] normalizePatterns(final Collection<String> patterns, final boolean excludes) {
323         if (patterns == null || patterns.isEmpty()) {
324             return new String[0];
325         }
326         // TODO: use `LinkedHashSet.newLinkedHashSet(int)` instead with JDK19.
327         final var normalized = new LinkedHashSet<String>(patterns.size());
328         for (String pattern : patterns) {
329             if (pattern != null && !pattern.isEmpty()) {
330                 if (pattern.indexOf(':') <= MAVEN_SYNTAX_THRESHOLD) {
331                     pattern = pattern.replace(File.separatorChar, '/');
332                     if (pattern.endsWith("/")) {
333                         pattern += "**";
334                     }
335                     // Following are okay only when "**" means "0 or more directories".
336                     while (pattern.endsWith("/**/**")) {
337                         pattern = pattern.substring(0, pattern.length() - 3);
338                     }
339                     while (pattern.startsWith("**/**/")) {
340                         pattern = pattern.substring(3);
341                     }
342                     pattern = pattern.replace("/**/**/", "/**/");
343                     pattern = pattern.replace("\\", "\\\\")
344                             .replace("[", "\\[")
345                             .replace("]", "\\]")
346                             .replace("{", "\\{")
347                             .replace("}", "\\}");
348                     normalized.add(DEFAULT_SYNTAX + pattern);
349                     /*
350                      * If the pattern starts or ends with "**", Java GLOB expects a directory level at
351                      * that location while Maven seems to consider that "**" can mean "no directory".
352                      * Add another pattern for reproducing this effect.
353                      */
354                     addPatternsWithOneDirRemoved(normalized, pattern, 0);
355                 } else {
356                     normalized.add(pattern);
357                 }
358             }
359         }
360         return simplify(normalized, excludes);
361     }
362 
363     /**
364      * Adds all variants of the given pattern with {@code **} removed.
365      * This is used for simulating the Maven behavior where {@code "**} may match zero directory.
366      * Tests suggest that we need an explicit GLOB pattern with no {@code "**"} for matching an absence of directory.
367      *
368      * @param patterns where to add the derived patterns
369      * @param pattern  the pattern for which to add derived forms, without the "glob:" syntax prefix
370      * @param end      should be 0 (reserved for recursive invocations of this method)
371      */
372     private static void addPatternsWithOneDirRemoved(final Set<String> patterns, final String pattern, int end) {
373         final int length = pattern.length();
374         int start;
375         while ((start = pattern.indexOf("**", end)) >= 0) {
376             end = start + 2; // 2 is the length of "**".
377             if (end < length) {
378                 if (pattern.charAt(end) != '/') {
379                     continue;
380                 }
381                 if (start == 0) {
382                     end++; // Ommit the leading slash if there is nothing before it.
383                 }
384             }
385             if (start > 0 && pattern.charAt(--start) != '/') {
386                 continue;
387             }
388             String reduced = pattern.substring(0, start) + pattern.substring(end);
389             patterns.add(DEFAULT_SYNTAX + reduced);
390             addPatternsWithOneDirRemoved(patterns, reduced, start);
391         }
392     }
393 
394     /**
395      * Applies some heuristic rules for simplifying the set of patterns,
396      * then returns the patterns as an array.
397      *
398      * @param patterns the patterns to simplify and return as an array
399      * @param excludes whether the patterns are exclude patterns
400      * @return the set content as an array, after simplification
401      */
402     private static String[] simplify(Set<String> patterns, boolean excludes) {
403         /*
404          * If the "**" pattern is present, it makes all other patterns useless.
405          * In the case of include patterns, an empty set means to include everything.
406          */
407         if (patterns.remove("**")) {
408             patterns.clear();
409             if (excludes) {
410                 patterns.add("**");
411             }
412         }
413         return patterns.toArray(String[]::new);
414     }
415 
416     /**
417      * Eventually adds the parent directory of the given patterns, without duplicated values.
418      * The patterns given to this method should have been normalized.
419      *
420      * @param patterns the normalized include or exclude patterns
421      * @param excludes whether the patterns are exclude patterns
422      * @return pattens of directories to include or exclude
423      */
424     private static String[] directoryPatterns(final String[] patterns, final boolean excludes) {
425         // TODO: use `LinkedHashSet.newLinkedHashSet(int)` instead with JDK19.
426         final var directories = new LinkedHashSet<String>(patterns.length);
427         for (String pattern : patterns) {
428             if (pattern.startsWith(DEFAULT_SYNTAX)) {
429                 if (excludes) {
430                     if (pattern.endsWith("/**")) {
431                         directories.add(pattern.substring(0, pattern.length() - 3));
432                     }
433                 } else {
434                     int s = pattern.indexOf(':');
435                     if (pattern.regionMatches(++s, "**/", 0, 3)) {
436                         s = pattern.indexOf('/', s + 3);
437                         if (s < 0) {
438                             return new String[0]; // Pattern is "**", so we need to accept everything.
439                         }
440                         directories.add(pattern.substring(0, s));
441                     }
442                 }
443             }
444         }
445         return simplify(directories, excludes);
446     }
447 
448     /**
449      * Returns {@code true} if at least one pattern requires path to be relativized before to be matched.
450      *
451      * @param patterns include or exclude patterns
452      * @return whether at least one pattern require relativization
453      */
454     private static boolean needRelativize(String[] patterns) {
455         for (String pattern : patterns) {
456             if (!pattern.startsWith(DEFAULT_SYNTAX + "**/")) {
457                 return true;
458             }
459         }
460         return false;
461     }
462 
463     /**
464      * Creates the path matchers for the given patterns.
465      * The syntax (usually {@value #DEFAULT_SYNTAX}) must be specified for each pattern.
466      */
467     private static PathMatcher[] matchers(final FileSystem fs, final String[] patterns) {
468         final var matchers = new PathMatcher[patterns.length];
469         for (int i = 0; i < patterns.length; i++) {
470             matchers[i] = fs.getPathMatcher(patterns[i]);
471         }
472         return matchers;
473     }
474 
475     /**
476      * {@return a potentially simpler matcher equivalent to this matcher}
477      */
478     @SuppressWarnings("checkstyle:MissingSwitchDefault")
479     public PathMatcher simplify() {
480         if (!needRelativize && excludes.length == 0) {
481             switch (includes.length) {
482                 case 0:
483                     return INCLUDES_ALL;
484                 case 1:
485                     return includes[0];
486             }
487         }
488         return this;
489     }
490 
491     /**
492      * Determines whether a path is selected.
493      * This is true if the given file matches an include pattern and no exclude pattern.
494      *
495      * @param path the pathname to test, must not be {@code null}
496      * @return {@code true} if the given path is selected, {@code false} otherwise
497      */
498     @Override
499     public boolean matches(Path path) {
500         if (needRelativize) {
501             path = baseDirectory.relativize(path);
502         }
503         return (includes.length == 0 || isMatched(path, includes))
504                 && (excludes.length == 0 || !isMatched(path, excludes));
505     }
506 
507     /**
508      * {@return whether the given file matches according to one of the given matchers}
509      */
510     private static boolean isMatched(Path path, PathMatcher[] matchers) {
511         for (PathMatcher matcher : matchers) {
512             if (matcher.matches(path)) {
513                 return true;
514             }
515         }
516         return false;
517     }
518 
519     /**
520      * Determines whether a directory could contain selected paths.
521      *
522      * @param directory the directory pathname to test, must not be {@code null}
523      * @return {@code true} if the given directory might contain selected paths, {@code false} if the
524      *         directory will definitively not contain selected paths
525      */
526     public boolean couldHoldSelected(Path directory) {
527         if (baseDirectory.equals(directory)) {
528             return true;
529         }
530         directory = baseDirectory.relativize(directory);
531         return (dirIncludes.length == 0 || isMatched(directory, dirIncludes))
532                 && (dirExcludes.length == 0 || !isMatched(directory, dirExcludes));
533     }
534 
535     /**
536      * Appends the elements of the given array in the given buffer.
537      * This is a helper method for {@link #toString()} implementations.
538      *
539      * @param buffer the buffer to add the elements to
540      * @param label label identifying the array of elements to add
541      * @param patterns the elements to append, or {@code null} if none
542      */
543     private static void append(StringBuilder buffer, String label, String[] patterns) {
544         buffer.append(label).append(": [");
545         if (patterns != null) {
546             for (int i = 0; i < patterns.length; i++) {
547                 if (i != 0) {
548                     buffer.append(", ");
549                 }
550                 buffer.append(patterns[i]);
551             }
552         }
553         buffer.append(']');
554     }
555 
556     /**
557      * {@return a string representation for logging purposes}
558      */
559     @Override
560     public String toString() {
561         var buffer = new StringBuilder();
562         append(buffer, "includes", includePatterns);
563         append(buffer.append(", "), "excludes", excludePatterns);
564         return buffer.toString();
565     }
566 }