1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, 13 * software distributed under the License is distributed on an 14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 * KIND, either express or implied. See the License for the 16 * specific language governing permissions and limitations 17 * under the License. 18 */ 19 package org.apache.maven.plugin.compiler; 20 21 import java.io.File; 22 import java.nio.file.FileSystem; 23 import java.nio.file.Path; 24 import java.nio.file.PathMatcher; 25 import java.util.ArrayList; 26 import java.util.Arrays; 27 import java.util.Collection; 28 import java.util.Iterator; 29 import java.util.LinkedHashSet; 30 import java.util.List; 31 import java.util.Objects; 32 import java.util.Set; 33 34 /** 35 * Determines whether a path is selected according to include/exclude patterns. 36 * The pathnames used for method parameters will be relative to some base directory 37 * and use {@code '/'} as separator, regardless of the hosting operating system. 38 * 39 * <h2>Syntax</h2> 40 * If a pattern contains the {@code ':'} character and the prefix before is longer than 1 character, 41 * then that pattern is given verbatim to {@link FileSystem#getPathMatcher(String)}, which interprets 42 * the part before {@code ':'} as the syntax (usually {@code "glob"} or {@code "regex"}). 43 * If a pattern does not contain the {@code ':'} character, or if the prefix is one character long 44 * (interpreted as a Windows drive), then the syntax defaults to a reproduction of the Maven 3 behavior. 45 * This is implemented as the {@code "glob"} syntax with the following modifications: 46 * 47 * <ul> 48 * <li>The platform-specific separator ({@code '\\'} on Windows) is replaced by {@code '/'}. 49 * Note that it means that the backslash cannot be used for escaping characters.</li> 50 * <li>Trailing {@code "/"} is completed as {@code "/**"}.</li> 51 * <li>The {@code "**"} wildcard means "0 or more directories" instead of "1 or more directories". 52 * This is implemented by adding variants of the pattern without the {@code "**"} wildcard.</li> 53 * <li>Bracket characters [ ] and { } are escaped.</li> 54 * <li>On Unix only, the escape character {@code '\\'} is itself escaped.</li> 55 * </ul> 56 * 57 * If above changes are not desired, put an explicit {@code "glob:"} prefix before the pattern. 58 * Note that putting such a prefix is recommended anyway for better performances. 59 * 60 * @author Benjamin Bentmann 61 * @author Martin Desruisseaux 62 * 63 * @see java.nio.file.FileSystem#getPathMatcher(String) 64 */ 65 final class PathSelector implements PathMatcher { 66 /** 67 * Maximum number of characters of the prefix before {@code ':'} for handling as a Maven syntax. 68 */ 69 private static final int MAVEN_SYNTAX_THRESHOLD = 1; 70 71 /** 72 * The default syntax to use if none was specified. Note that when this default syntax is applied, 73 * the user-provided pattern get some changes as documented in class Javadoc. 74 */ 75 private static final String DEFAULT_SYNTAX = "glob:"; 76 77 /** 78 * Characters having a special meaning in the glob syntax. 79 * 80 * @see FileSystem#getPathMatcher(String) 81 */ 82 private static final String SPECIAL_CHARACTERS = "*?[]{}\\"; 83 84 /** 85 * A path matcher which accepts all files. 86 * 87 * @see #simplify() 88 */ 89 private static final PathMatcher INCLUDES_ALL = (path) -> true; 90 91 /** 92 * String representations of the normalized include filters. 93 * Each pattern shall be prefixed by its syntax, which is {@value #DEFAULT_SYNTAX} by default. 94 * An empty array means to include all files. 95 * 96 * @see #toString() 97 */ 98 private final String[] includePatterns; 99 100 /** 101 * String representations of the normalized exclude filters. 102 * Each pattern shall be prefixed by its syntax. If no syntax is specified, 103 * the default is a Maven 3 syntax similar, but not identical, to {@value #DEFAULT_SYNTAX}. 104 * This array may be longer or shorter than the user-supplied excludes, depending on whether 105 * default excludes have been added and whether some unnecessary excludes have been omitted. 106 * 107 * @see #toString() 108 */ 109 private final String[] excludePatterns; 110 111 /** 112 * The matcher for includes. The length of this array is equal to {@link #includePatterns} array length. 113 * An empty array means to include all files. 114 */ 115 private final PathMatcher[] includes; 116 117 /** 118 * The matcher for excludes. The length of this array is equal to {@link #excludePatterns} array length. 119 */ 120 private final PathMatcher[] excludes; 121 122 /** 123 * The matcher for all directories to include. This array includes the parents of all those directories, 124 * because they need to be accepted before we can walk to the sub-directories. 125 * This is an optimization for skipping whole directories when possible. 126 * An empty array means to include all directories. 127 */ 128 private final PathMatcher[] dirIncludes; 129 130 /** 131 * The matcher for directories to exclude. This array does <em>not</em> include the parent directories, 132 * because they may contain other sub-trees that need to be included. 133 * This is an optimization for skipping whole directories when possible. 134 */ 135 private final PathMatcher[] dirExcludes; 136 137 /** 138 * The base directory. All files will be relativized to that directory before to be matched. 139 */ 140 private final Path baseDirectory; 141 142 /** 143 * Whether paths must be relativized before to be given to a matcher. If {@code true}, then every paths 144 * will be made relative to {@link #baseDirectory} for allowing patterns like {@code "foo/bar/*.java"} 145 * to work. As a slight optimization, we can skip this step if all patterns start with {@code "**"}. 146 */ 147 private final boolean needRelativize; 148 149 /** 150 * Creates a new selector from the given includes and excludes. 151 * 152 * @param directory the base directory of the files to filter 153 * @param includes the patterns of the files to include, or null or empty for including all files 154 * @param excludes the patterns of the files to exclude, or null or empty for no exclusion 155 */ 156 PathSelector(Path directory, Collection<String> includes, Collection<String> excludes) { 157 includePatterns = normalizePatterns(includes, false); 158 excludePatterns = normalizePatterns(effectiveExcludes(excludes, includePatterns), true); 159 baseDirectory = directory; 160 FileSystem fs = directory.getFileSystem(); 161 this.includes = matchers(fs, includePatterns); 162 this.excludes = matchers(fs, excludePatterns); 163 dirIncludes = matchers(fs, directoryPatterns(includePatterns, false)); 164 dirExcludes = matchers(fs, directoryPatterns(excludePatterns, true)); 165 needRelativize = needRelativize(includePatterns) || needRelativize(excludePatterns); 166 } 167 168 /** 169 * Returns the given array of excludes, optionally expanded with a default set of excludes, 170 * then with unnecessary excludes omitted. An unnecessary exclude is an exclude which will never 171 * match a file because there is no include which would accept a file that could match the exclude. 172 * For example, if the only include is {@code "*.java"}, then the <code>"**/project.pj"</code>, 173 * <code>"**/.DS_Store"</code> and other excludes will never match a file and can be omitted. 174 * Because the list of {@linkplain #DEFAULT_EXCLUDES default excludes} contains many elements, 175 * removing unnecessary excludes can reduce a lot the number of matches tested on each source file. 176 * 177 * <h4>Implementation note</h4> 178 * The removal of unnecessary excludes is done on a best effort basis. The current implementation 179 * compares only the prefixes and suffixes of each pattern, keeping the pattern in case of doubt. 180 * This is not bad, but it does not remove all unnecessary patterns. It would be possible to do 181 * better in the future if benchmarking suggests that it would be worth the effort. 182 * 183 * @param excludes the user-specified excludes, potentially not yet converted to glob syntax 184 * @param includes the include patterns converted to glob syntax 185 * @param useDefaultExcludes whether to expand user exclude with the set of default excludes 186 * @return the potentially expanded or reduced set of excludes to use 187 */ 188 private static Collection<String> effectiveExcludes(Collection<String> excludes, final String[] includes) { 189 if (excludes == null || excludes.isEmpty()) { 190 return List.of(); 191 } else { 192 excludes = new ArrayList<>(excludes); 193 excludes.removeIf(Objects::isNull); 194 } 195 if (includes.length == 0) { 196 return excludes; 197 } 198 /* 199 * Get the prefixes and suffixes of all includes, stopping at the first special character. 200 * Redundant prefixes and suffixes are omitted. 201 */ 202 var prefixes = new String[includes.length]; 203 var suffixes = new String[includes.length]; 204 for (int i = 0; i < includes.length; i++) { 205 String include = includes[i]; 206 if (!include.startsWith(DEFAULT_SYNTAX)) { 207 return excludes; // Do not filter if at least one pattern is too complicated. 208 } 209 include = include.substring(DEFAULT_SYNTAX.length()); 210 prefixes[i] = prefixOrSuffix(include, false); 211 suffixes[i] = prefixOrSuffix(include, true); 212 } 213 prefixes = sortByLength(prefixes, false); 214 suffixes = sortByLength(suffixes, true); 215 /* 216 * Keep only the exclude which start with one of the prefixes and end with one of the suffixes. 217 * Note that a prefix or suffix may be the empty string, which match everything. 218 */ 219 final Iterator<String> it = excludes.iterator(); 220 nextExclude: 221 while (it.hasNext()) { 222 final String exclude = it.next(); 223 final int s = exclude.indexOf(':'); 224 if (s <= MAVEN_SYNTAX_THRESHOLD || exclude.startsWith(DEFAULT_SYNTAX)) { 225 if (cannotMatch(exclude, prefixes, false) || cannotMatch(exclude, suffixes, true)) { 226 it.remove(); 227 } 228 } 229 } 230 return excludes; 231 } 232 233 /** 234 * Returns the maximal amount of ordinary characters at the beginning or end of the given pattern. 235 * The prefix or suffix stops at the first {@linkplain #SPECIAL_CHARACTERS special character}. 236 * 237 * @param include the pattern for which to get a prefix or suffix without special character 238 * @param suffix {@code false} if a prefix is desired, or {@code true} if a suffix is desired 239 */ 240 private static String prefixOrSuffix(final String include, boolean suffix) { 241 int s = suffix ? -1 : include.length(); 242 for (int i = SPECIAL_CHARACTERS.length(); --i >= 0; ) { 243 char c = SPECIAL_CHARACTERS.charAt(i); 244 if (suffix) { 245 s = Math.max(s, include.lastIndexOf(c)); 246 } else { 247 int p = include.indexOf(c); 248 if (p >= 0 && p < s) { 249 s = p; 250 } 251 } 252 } 253 return suffix ? include.substring(s + 1) : include.substring(0, s); 254 } 255 256 /** 257 * Returns {@code true} if the given exclude cannot match any include patterns. 258 * In case of doubt, returns {@code false}. 259 * 260 * @param exclude the exclude pattern to test 261 * @param fragments the prefixes or suffixes (fragments without special characters) of the includes 262 * @param suffix {@code false} if the specified fragments are prefixes, {@code true} if they are suffixes 263 * @return {@code true} if it is certain that the exclude pattern cannot match, or {@code false} in case of doubt 264 */ 265 private static boolean cannotMatch(String exclude, final String[] fragments, final boolean suffix) { 266 exclude = prefixOrSuffix(exclude, suffix); 267 for (String fragment : fragments) { 268 int fg = fragment.length(); 269 int ex = exclude.length(); 270 int length = Math.min(fg, ex); 271 if (suffix) { 272 fg -= length; 273 ex -= length; 274 } else { 275 fg = 0; 276 ex = 0; 277 } 278 if (exclude.regionMatches(ex, fragment, fg, length)) { 279 return false; 280 } 281 } 282 return true; 283 } 284 285 /** 286 * Sorts the given patterns by their length. The main intent is to have the empty string first, 287 * while will cause the loops testing for prefixes and suffixes to stop almost immediately. 288 * Short prefixes or suffixes are also more likely to be matched. 289 * 290 * @param fragments the fragments to sort in-place 291 * @param suffix {@code false} if the specified fragments are prefixes, {@code true} if they are suffixes 292 * @return the given array, or a smaller array if some fragments were discarded because redundant 293 */ 294 private static String[] sortByLength(final String[] fragments, final boolean suffix) { 295 Arrays.sort(fragments, (s1, s2) -> s1.length() - s2.length()); 296 int count = 0; 297 /* 298 * Simplify the array of prefixes or suffixes by removing all redundant elements. 299 * An element is redundant if there is a shorter prefix or suffix with the same characters. 300 */ 301 nextBase: 302 for (String fragment : fragments) { 303 for (int i = count; --i >= 0; ) { 304 String base = fragments[i]; 305 if (suffix ? fragment.endsWith(base) : fragment.startsWith(base)) { 306 continue nextBase; // Skip this fragment 307 } 308 } 309 fragments[count++] = fragment; 310 } 311 return (fragments.length == count) ? fragments : Arrays.copyOf(fragments, count); 312 } 313 314 /** 315 * Returns the given array of patterns with path separator normalized to {@code '/'}. 316 * Null or empty patterns are ignored, and duplications are removed. 317 * 318 * @param patterns the patterns to normalize 319 * @param excludes whether the patterns are exclude patterns 320 * @return normalized patterns without null, empty or duplicated patterns 321 */ 322 private static String[] normalizePatterns(final Collection<String> patterns, final boolean excludes) { 323 if (patterns == null || patterns.isEmpty()) { 324 return new String[0]; 325 } 326 // TODO: use `LinkedHashSet.newLinkedHashSet(int)` instead with JDK19. 327 final var normalized = new LinkedHashSet<String>(patterns.size()); 328 for (String pattern : patterns) { 329 if (pattern != null && !pattern.isEmpty()) { 330 if (pattern.indexOf(':') <= MAVEN_SYNTAX_THRESHOLD) { 331 pattern = pattern.replace(File.separatorChar, '/'); 332 if (pattern.endsWith("/")) { 333 pattern += "**"; 334 } 335 // Following are okay only when "**" means "0 or more directories". 336 while (pattern.endsWith("/**/**")) { 337 pattern = pattern.substring(0, pattern.length() - 3); 338 } 339 while (pattern.startsWith("**/**/")) { 340 pattern = pattern.substring(3); 341 } 342 pattern = pattern.replace("/**/**/", "/**/"); 343 pattern = pattern.replace("\\", "\\\\") 344 .replace("[", "\\[") 345 .replace("]", "\\]") 346 .replace("{", "\\{") 347 .replace("}", "\\}"); 348 normalized.add(DEFAULT_SYNTAX + pattern); 349 /* 350 * If the pattern starts or ends with "**", Java GLOB expects a directory level at 351 * that location while Maven seems to consider that "**" can mean "no directory". 352 * Add another pattern for reproducing this effect. 353 */ 354 addPatternsWithOneDirRemoved(normalized, pattern, 0); 355 } else { 356 normalized.add(pattern); 357 } 358 } 359 } 360 return simplify(normalized, excludes); 361 } 362 363 /** 364 * Adds all variants of the given pattern with {@code **} removed. 365 * This is used for simulating the Maven behavior where {@code "**} may match zero directory. 366 * Tests suggest that we need an explicit GLOB pattern with no {@code "**"} for matching an absence of directory. 367 * 368 * @param patterns where to add the derived patterns 369 * @param pattern the pattern for which to add derived forms, without the "glob:" syntax prefix 370 * @param end should be 0 (reserved for recursive invocations of this method) 371 */ 372 private static void addPatternsWithOneDirRemoved(final Set<String> patterns, final String pattern, int end) { 373 final int length = pattern.length(); 374 int start; 375 while ((start = pattern.indexOf("**", end)) >= 0) { 376 end = start + 2; // 2 is the length of "**". 377 if (end < length) { 378 if (pattern.charAt(end) != '/') { 379 continue; 380 } 381 if (start == 0) { 382 end++; // Ommit the leading slash if there is nothing before it. 383 } 384 } 385 if (start > 0 && pattern.charAt(--start) != '/') { 386 continue; 387 } 388 String reduced = pattern.substring(0, start) + pattern.substring(end); 389 patterns.add(DEFAULT_SYNTAX + reduced); 390 addPatternsWithOneDirRemoved(patterns, reduced, start); 391 } 392 } 393 394 /** 395 * Applies some heuristic rules for simplifying the set of patterns, 396 * then returns the patterns as an array. 397 * 398 * @param patterns the patterns to simplify and return as an array 399 * @param excludes whether the patterns are exclude patterns 400 * @return the set content as an array, after simplification 401 */ 402 private static String[] simplify(Set<String> patterns, boolean excludes) { 403 /* 404 * If the "**" pattern is present, it makes all other patterns useless. 405 * In the case of include patterns, an empty set means to include everything. 406 */ 407 if (patterns.remove("**")) { 408 patterns.clear(); 409 if (excludes) { 410 patterns.add("**"); 411 } 412 } 413 return patterns.toArray(String[]::new); 414 } 415 416 /** 417 * Eventually adds the parent directory of the given patterns, without duplicated values. 418 * The patterns given to this method should have been normalized. 419 * 420 * @param patterns the normalized include or exclude patterns 421 * @param excludes whether the patterns are exclude patterns 422 * @return pattens of directories to include or exclude 423 */ 424 private static String[] directoryPatterns(final String[] patterns, final boolean excludes) { 425 // TODO: use `LinkedHashSet.newLinkedHashSet(int)` instead with JDK19. 426 final var directories = new LinkedHashSet<String>(patterns.length); 427 for (String pattern : patterns) { 428 if (pattern.startsWith(DEFAULT_SYNTAX)) { 429 if (excludes) { 430 if (pattern.endsWith("/**")) { 431 directories.add(pattern.substring(0, pattern.length() - 3)); 432 } 433 } else { 434 int s = pattern.indexOf(':'); 435 if (pattern.regionMatches(++s, "**/", 0, 3)) { 436 s = pattern.indexOf('/', s + 3); 437 if (s < 0) { 438 return new String[0]; // Pattern is "**", so we need to accept everything. 439 } 440 directories.add(pattern.substring(0, s)); 441 } 442 } 443 } 444 } 445 return simplify(directories, excludes); 446 } 447 448 /** 449 * Returns {@code true} if at least one pattern requires path to be relativized before to be matched. 450 * 451 * @param patterns include or exclude patterns 452 * @return whether at least one pattern require relativization 453 */ 454 private static boolean needRelativize(String[] patterns) { 455 for (String pattern : patterns) { 456 if (!pattern.startsWith(DEFAULT_SYNTAX + "**/")) { 457 return true; 458 } 459 } 460 return false; 461 } 462 463 /** 464 * Creates the path matchers for the given patterns. 465 * The syntax (usually {@value #DEFAULT_SYNTAX}) must be specified for each pattern. 466 */ 467 private static PathMatcher[] matchers(final FileSystem fs, final String[] patterns) { 468 final var matchers = new PathMatcher[patterns.length]; 469 for (int i = 0; i < patterns.length; i++) { 470 matchers[i] = fs.getPathMatcher(patterns[i]); 471 } 472 return matchers; 473 } 474 475 /** 476 * {@return a potentially simpler matcher equivalent to this matcher} 477 */ 478 @SuppressWarnings("checkstyle:MissingSwitchDefault") 479 public PathMatcher simplify() { 480 if (!needRelativize && excludes.length == 0) { 481 switch (includes.length) { 482 case 0: 483 return INCLUDES_ALL; 484 case 1: 485 return includes[0]; 486 } 487 } 488 return this; 489 } 490 491 /** 492 * Determines whether a path is selected. 493 * This is true if the given file matches an include pattern and no exclude pattern. 494 * 495 * @param path the pathname to test, must not be {@code null} 496 * @return {@code true} if the given path is selected, {@code false} otherwise 497 */ 498 @Override 499 public boolean matches(Path path) { 500 if (needRelativize) { 501 path = baseDirectory.relativize(path); 502 } 503 return (includes.length == 0 || isMatched(path, includes)) 504 && (excludes.length == 0 || !isMatched(path, excludes)); 505 } 506 507 /** 508 * {@return whether the given file matches according to one of the given matchers} 509 */ 510 private static boolean isMatched(Path path, PathMatcher[] matchers) { 511 for (PathMatcher matcher : matchers) { 512 if (matcher.matches(path)) { 513 return true; 514 } 515 } 516 return false; 517 } 518 519 /** 520 * Determines whether a directory could contain selected paths. 521 * 522 * @param directory the directory pathname to test, must not be {@code null} 523 * @return {@code true} if the given directory might contain selected paths, {@code false} if the 524 * directory will definitively not contain selected paths 525 */ 526 public boolean couldHoldSelected(Path directory) { 527 if (baseDirectory.equals(directory)) { 528 return true; 529 } 530 directory = baseDirectory.relativize(directory); 531 return (dirIncludes.length == 0 || isMatched(directory, dirIncludes)) 532 && (dirExcludes.length == 0 || !isMatched(directory, dirExcludes)); 533 } 534 535 /** 536 * Appends the elements of the given array in the given buffer. 537 * This is a helper method for {@link #toString()} implementations. 538 * 539 * @param buffer the buffer to add the elements to 540 * @param label label identifying the array of elements to add 541 * @param patterns the elements to append, or {@code null} if none 542 */ 543 private static void append(StringBuilder buffer, String label, String[] patterns) { 544 buffer.append(label).append(": ["); 545 if (patterns != null) { 546 for (int i = 0; i < patterns.length; i++) { 547 if (i != 0) { 548 buffer.append(", "); 549 } 550 buffer.append(patterns[i]); 551 } 552 } 553 buffer.append(']'); 554 } 555 556 /** 557 * {@return a string representation for logging purposes} 558 */ 559 @Override 560 public String toString() { 561 var buffer = new StringBuilder(); 562 append(buffer, "includes", includePatterns); 563 append(buffer.append(", "), "excludes", excludePatterns); 564 return buffer.toString(); 565 } 566 }