1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, 13 * software distributed under the License is distributed on an 14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 * KIND, either express or implied. See the License for the 16 * specific language governing permissions and limitations 17 * under the License. 18 */ 19 package org.apache.maven.impl; 20 21 import java.io.File; 22 import java.nio.file.FileSystem; 23 import java.nio.file.Path; 24 import java.nio.file.PathMatcher; 25 import java.util.ArrayList; 26 import java.util.Arrays; 27 import java.util.Collection; 28 import java.util.Iterator; 29 import java.util.LinkedHashSet; 30 import java.util.List; 31 import java.util.Set; 32 33 /** 34 * Determines whether a path is selected according to include/exclude patterns. 35 * The pathnames used for method parameters will be relative to some base directory 36 * and use {@code '/'} as separator, regardless of the hosting operating system. 37 * 38 * <h2>Syntax</h2> 39 * If a pattern contains the {@code ':'} character and the prefix before is longer than 1 character, 40 * then that pattern is given verbatim to {@link FileSystem#getPathMatcher(String)}, which interprets 41 * the part before {@code ':'} as the syntax (usually {@code "glob"} or {@code "regex"}). 42 * If a pattern does not contain the {@code ':'} character, or if the prefix is one character long 43 * (interpreted as a Windows drive), then the syntax defaults to a reproduction of the Maven 3 behavior. 44 * This is implemented as the {@code "glob"} syntax with the following modifications: 45 * 46 * <ul> 47 * <li>The platform-specific separator ({@code '\\'} on Windows) is replaced by {@code '/'}. 48 * Note that it means that the backslash cannot be used for escaping characters.</li> 49 * <li>Trailing {@code "/"} is completed as {@code "/**"}.</li> 50 * <li>The {@code "**"} wildcard means "0 or more directories" instead of "1 or more directories". 51 * This is implemented by adding variants of the pattern without the {@code "**"} wildcard.</li> 52 * <li>Bracket characters [ ] and { } are escaped.</li> 53 * <li>On Unix only, the escape character {@code '\\'} is itself escaped.</li> 54 * </ul> 55 * 56 * If above changes are not desired, put an explicit {@code "glob:"} prefix before the pattern. 57 * Note that putting such a prefix is recommended anyway for better performances. 58 * 59 * @author Benjamin Bentmann 60 * @author Martin Desruisseaux 61 * 62 * @see java.nio.file.FileSystem#getPathMatcher(String) 63 */ 64 public class PathSelector implements PathMatcher { 65 /** 66 * Patterns which should be excluded by default, like <abbr>SCM</abbr> files. 67 * 68 * <p><b>Source:</b> this list is copied from {@code plexus-utils-4.0.2} (released in 69 * September 23, 2024), class {@code org.codehaus.plexus.util.AbstractScanner}.</p> 70 */ 71 private static final List<String> DEFAULT_EXCLUDES = List.of( 72 // Miscellaneous typical temporary files 73 "**/*~", 74 "**/#*#", 75 "**/.#*", 76 "**/%*%", 77 "**/._*", 78 79 // CVS 80 "**/CVS", 81 "**/CVS/**", 82 "**/.cvsignore", 83 84 // RCS 85 "**/RCS", 86 "**/RCS/**", 87 88 // SCCS 89 "**/SCCS", 90 "**/SCCS/**", 91 92 // Visual SourceSafe 93 "**/vssver.scc", 94 95 // MKS 96 "**/project.pj", 97 98 // Subversion 99 "**/.svn", 100 "**/.svn/**", 101 102 // Arch 103 "**/.arch-ids", 104 "**/.arch-ids/**", 105 106 // Bazaar 107 "**/.bzr", 108 "**/.bzr/**", 109 110 // SurroundSCM 111 "**/.MySCMServerInfo", 112 113 // Mac 114 "**/.DS_Store", 115 116 // Serena Dimensions Version 10 117 "**/.metadata", 118 "**/.metadata/**", 119 120 // Mercurial 121 "**/.hg", 122 "**/.hg/**", 123 124 // git 125 "**/.git", 126 "**/.git/**", 127 "**/.gitignore", 128 129 // BitKeeper 130 "**/BitKeeper", 131 "**/BitKeeper/**", 132 "**/ChangeSet", 133 "**/ChangeSet/**", 134 135 // darcs 136 "**/_darcs", 137 "**/_darcs/**", 138 "**/.darcsrepo", 139 "**/.darcsrepo/**", 140 "**/-darcs-backup*", 141 "**/.darcs-temp-mail"); 142 143 /** 144 * Maximum number of characters of the prefix before {@code ':'} for handling as a Maven syntax. 145 */ 146 private static final int MAVEN_SYNTAX_THRESHOLD = 1; 147 148 /** 149 * The default syntax to use if none was specified. Note that when this default syntax is applied, 150 * the user-provided pattern get some changes as documented in class Javadoc. 151 */ 152 private static final String DEFAULT_SYNTAX = "glob:"; 153 154 /** 155 * Characters having a special meaning in the glob syntax. 156 * 157 * @see FileSystem#getPathMatcher(String) 158 */ 159 private static final String SPECIAL_CHARACTERS = "*?[]{}\\"; 160 161 /** 162 * A path matcher which accepts all files. 163 * 164 * @see #simplify() 165 */ 166 private static final PathMatcher INCLUDES_ALL = (path) -> true; 167 168 /** 169 * String representations of the normalized include filters. 170 * Each pattern shall be prefixed by its syntax, which is {@value #DEFAULT_SYNTAX} by default. 171 * 172 * @see #toString() 173 */ 174 private final String[] includePatterns; 175 176 /** 177 * String representations of the normalized exclude filters. 178 * Each pattern shall be prefixed by its syntax, which is {@value #DEFAULT_SYNTAX} by default. 179 * This array may be longer or shorter than the user-supplied excludes, depending on whether 180 * default excludes have been added and whether some unnecessary excludes have been omitted. 181 * 182 * @see #toString() 183 */ 184 private final String[] excludePatterns; 185 186 /** 187 * The matcher for includes. The length of this array is equal to {@link #includePatterns} array length. 188 */ 189 private final PathMatcher[] includes; 190 191 /** 192 * The matcher for excludes. The length of this array is equal to {@link #excludePatterns} array length. 193 */ 194 private final PathMatcher[] excludes; 195 196 /** 197 * The matcher for all directories to include. This array includes the parents of all those directories, 198 * because they need to be accepted before we can walk to the sub-directories. 199 * This is an optimization for skipping whole directories when possible. 200 */ 201 private final PathMatcher[] dirIncludes; 202 203 /** 204 * The matcher for directories to exclude. This array does <em>not</em> include the parent directories, 205 * because they may contain other sub-trees that need to be included. 206 * This is an optimization for skipping whole directories when possible. 207 */ 208 private final PathMatcher[] dirExcludes; 209 210 /** 211 * The base directory. All files will be relativized to that directory before to be matched. 212 */ 213 private final Path baseDirectory; 214 215 /** 216 * Creates a new selector from the given includes and excludes. 217 * 218 * @param directory the base directory of the files to filter 219 * @param includes the patterns of the files to include, or null or empty for including all files 220 * @param excludes the patterns of the files to exclude, or null or empty for no exclusion 221 * @param useDefaultExcludes whether to augment the excludes with a default set of <abbr>SCM</abbr> patterns 222 */ 223 public PathSelector( 224 Path directory, Collection<String> includes, Collection<String> excludes, boolean useDefaultExcludes) { 225 includePatterns = normalizePatterns(includes, false); 226 excludePatterns = normalizePatterns(effectiveExcludes(excludes, includePatterns, useDefaultExcludes), true); 227 baseDirectory = directory; 228 FileSystem system = directory.getFileSystem(); 229 this.includes = matchers(system, includePatterns); 230 this.excludes = matchers(system, excludePatterns); 231 dirIncludes = matchers(system, directoryPatterns(includePatterns, false)); 232 dirExcludes = matchers(system, directoryPatterns(excludePatterns, true)); 233 } 234 235 /** 236 * Returns the given array of excludes, optionally expanded with a default set of excludes, 237 * then with unnecessary excludes omitted. An unnecessary exclude is an exclude which will never 238 * match a file because there is no include which would accept a file that could match the exclude. 239 * For example, if the only include is {@code "*.java"}, then the <code>"**/project.pj"</code>, 240 * <code>"**/.DS_Store"</code> and other excludes will never match a file and can be omitted. 241 * Because the list of {@linkplain #DEFAULT_EXCLUDES default excludes} contains many elements, 242 * removing unnecessary excludes can reduce a lot the number of matches tested on each source file. 243 * 244 * <h4>Implementation note</h4> 245 * The removal of unnecessary excludes is done on a best effort basis. The current implementation 246 * compares only the prefixes and suffixes of each pattern, keeping the pattern in case of doubt. 247 * This is not bad, but it does not remove all unnecessary patterns. It would be possible to do 248 * better in the future if benchmarking suggests that it would be worth the effort. 249 * 250 * @param excludes the user-specified excludes, potentially not yet converted to glob syntax 251 * @param includes the include patterns converted to glob syntax 252 * @param useDefaultExcludes whether to expand user exclude with the set of default excludes 253 * @return the potentially expanded or reduced set of excludes to use 254 */ 255 private static Collection<String> effectiveExcludes( 256 Collection<String> excludes, final String[] includes, final boolean useDefaultExcludes) { 257 if (excludes == null || excludes.isEmpty()) { 258 if (useDefaultExcludes) { 259 excludes = new ArrayList<>(DEFAULT_EXCLUDES); 260 } else { 261 return List.of(); 262 } 263 } else { 264 excludes = new ArrayList<>(excludes); 265 if (useDefaultExcludes) { 266 excludes.addAll(DEFAULT_EXCLUDES); 267 } 268 } 269 /* 270 * Get the prefixes and suffixes of all includes, stopping at the first special character. 271 * Redundant prefixes and suffixes are omitted. 272 */ 273 var prefixes = new String[includes.length]; 274 var suffixes = new String[includes.length]; 275 for (int i = 0; i < includes.length; i++) { 276 String include = includes[i]; 277 if (!include.startsWith(DEFAULT_SYNTAX)) { 278 return excludes; // Do not filter if at least one pattern is too complicated. 279 } 280 include = include.substring(DEFAULT_SYNTAX.length()); 281 prefixes[i] = prefixOrSuffix(include, false); 282 suffixes[i] = prefixOrSuffix(include, true); 283 } 284 prefixes = sortByLength(prefixes, false); 285 suffixes = sortByLength(suffixes, true); 286 /* 287 * Keep only the exclude which start with one of the prefixes and end with one of the suffixes. 288 * Note that a prefix or suffix may be the empty string, which match everything. 289 */ 290 final Iterator<String> it = excludes.iterator(); 291 nextExclude: 292 while (it.hasNext()) { 293 final String exclude = it.next(); 294 final int s = exclude.indexOf(':'); 295 if (s <= MAVEN_SYNTAX_THRESHOLD || exclude.startsWith(DEFAULT_SYNTAX)) { 296 if (cannotMatch(exclude, prefixes, false) || cannotMatch(exclude, suffixes, true)) { 297 it.remove(); 298 } 299 } 300 } 301 return excludes; 302 } 303 304 /** 305 * Returns the maximal amount of ordinary characters at the beginning or end of the given pattern. 306 * The prefix or suffix stops at the first {@linkplain #SPECIAL_CHARACTERS special character}. 307 * 308 * @param include the pattern for which to get a prefix or suffix without special character 309 * @param suffix {@code false} if a prefix is desired, or {@code true} if a suffix is desired 310 */ 311 private static String prefixOrSuffix(final String include, boolean suffix) { 312 int s = suffix ? -1 : include.length(); 313 for (int i = SPECIAL_CHARACTERS.length(); --i >= 0; ) { 314 char c = SPECIAL_CHARACTERS.charAt(i); 315 if (suffix) { 316 s = Math.max(s, include.lastIndexOf(c)); 317 } else { 318 int p = include.indexOf(c); 319 if (p >= 0 && p < s) { 320 s = p; 321 } 322 } 323 } 324 return suffix ? include.substring(s + 1) : include.substring(0, s); 325 } 326 327 /** 328 * Returns {@code true} if the given exclude cannot match any include patterns. 329 * In case of doubt, returns {@code false}. 330 * 331 * @param exclude the exclude pattern to test 332 * @param fragments the prefixes or suffixes (fragments without special characters) of the includes 333 * @param suffix {@code false} if the specified fragments are prefixes, {@code true} if they are suffixes 334 * @return {@code true} if it is certain that the exclude pattern cannot match, or {@code false} in case of doubt 335 */ 336 private static boolean cannotMatch(String exclude, final String[] fragments, final boolean suffix) { 337 exclude = prefixOrSuffix(exclude, suffix); 338 for (String fragment : fragments) { 339 int fg = fragment.length(); 340 int ex = exclude.length(); 341 int length = Math.min(fg, ex); 342 if (suffix) { 343 fg -= length; 344 ex -= length; 345 } else { 346 fg = 0; 347 ex = 0; 348 } 349 if (exclude.regionMatches(ex, fragment, fg, length)) { 350 return false; 351 } 352 } 353 return true; 354 } 355 356 /** 357 * Sorts the given patterns by their length. The main intent is to have the empty string first, 358 * while will cause the loops testing for prefixes and suffixes to stop almost immediately. 359 * Short prefixes or suffixes are also more likely to be matched. 360 * 361 * @param fragments the fragments to sort in-place 362 * @param suffix {@code false} if the specified fragments are prefixes, {@code true} if they are suffixes 363 * @return the given array, or a smaller array if some fragments were discarded because redundant 364 */ 365 private static String[] sortByLength(final String[] fragments, final boolean suffix) { 366 Arrays.sort(fragments, (s1, s2) -> s1.length() - s2.length()); 367 int count = 0; 368 /* 369 * Simplify the array of prefixes or suffixes by removing all redundant elements. 370 * An element is redundant if there is a shorter prefix or suffix with the same characters. 371 */ 372 nextBase: 373 for (String fragment : fragments) { 374 for (int i = count; --i >= 0; ) { 375 String base = fragments[i]; 376 if (suffix ? fragment.endsWith(base) : fragment.startsWith(base)) { 377 continue nextBase; // Skip this fragment 378 } 379 } 380 fragments[count++] = fragment; 381 } 382 return (fragments.length == count) ? fragments : Arrays.copyOf(fragments, count); 383 } 384 385 /** 386 * Returns the given array of patterns with path separator normalized to {@code '/'}. 387 * Null or empty patterns are ignored, and duplications are removed. 388 * 389 * @param patterns the patterns to normalize 390 * @param excludes whether the patterns are exclude patterns 391 * @return normalized patterns without null, empty or duplicated patterns 392 */ 393 private static String[] normalizePatterns(final Collection<String> patterns, final boolean excludes) { 394 if (patterns == null || patterns.isEmpty()) { 395 return new String[0]; 396 } 397 // TODO: use `LinkedHashSet.newLinkedHashSet(int)` instead with JDK19. 398 final var normalized = new LinkedHashSet<String>(patterns.size()); 399 for (String pattern : patterns) { 400 if (pattern != null && !pattern.isEmpty()) { 401 if (pattern.indexOf(':') <= MAVEN_SYNTAX_THRESHOLD) { 402 pattern = pattern.replace(File.separatorChar, '/'); 403 if (pattern.endsWith("/")) { 404 pattern += "**"; 405 } 406 // Following are okay only when "**" means "0 or more directories". 407 while (pattern.endsWith("/**/**")) { 408 pattern = pattern.substring(0, pattern.length() - 3); 409 } 410 while (pattern.startsWith("**/**/")) { 411 pattern = pattern.substring(3); 412 } 413 pattern = pattern.replace("/**/**/", "/**/"); 414 pattern = pattern.replace("\\", "\\\\") 415 .replace("[", "\\[") 416 .replace("]", "\\]") 417 .replace("{", "\\{") 418 .replace("}", "\\}"); 419 normalized.add(DEFAULT_SYNTAX + pattern); 420 /* 421 * If the pattern starts or ends with "**", Java GLOB expects a directory level at 422 * that location while Maven seems to consider that "**" can mean "no directory". 423 * Add another pattern for reproducing this effect. 424 */ 425 addPatternsWithOneDirRemoved(normalized, pattern, 0); 426 } else { 427 normalized.add(pattern); 428 } 429 } 430 } 431 return simplify(normalized, excludes); 432 } 433 434 /** 435 * Adds all variants of the given pattern with {@code **} removed. 436 * This is used for simulating the Maven behavior where {@code "**} may match zero directory. 437 * Tests suggest that we need an explicit GLOB pattern with no {@code "**"} for matching an absence of directory. 438 * 439 * @param patterns where to add the derived patterns 440 * @param pattern the pattern for which to add derived forms, without the "glob:" syntax prefix 441 * @param end should be 0 (reserved for recursive invocations of this method) 442 */ 443 private static void addPatternsWithOneDirRemoved(final Set<String> patterns, final String pattern, int end) { 444 final int length = pattern.length(); 445 int start; 446 while ((start = pattern.indexOf("**", end)) >= 0) { 447 end = start + 2; // 2 is the length of "**". 448 if (end < length) { 449 if (pattern.charAt(end) != '/') { 450 continue; 451 } 452 if (start == 0) { 453 end++; // Ommit the leading slash if there is nothing before it. 454 } 455 } 456 if (start > 0 && pattern.charAt(--start) != '/') { 457 continue; 458 } 459 String reduced = pattern.substring(0, start) + pattern.substring(end); 460 patterns.add(DEFAULT_SYNTAX + reduced); 461 addPatternsWithOneDirRemoved(patterns, reduced, start); 462 } 463 } 464 465 /** 466 * Applies some heuristic rules for simplifying the set of patterns, 467 * then returns the patterns as an array. 468 * 469 * @param patterns the patterns to simplify and return asarray 470 * @param excludes whether the patterns are exclude patterns 471 * @return the set content as an array, after simplification 472 */ 473 private static String[] simplify(Set<String> patterns, boolean excludes) { 474 /* 475 * If the "**" pattern is present, it makes all other patterns useless. 476 * In the case of include patterns, an empty set means to include everything. 477 */ 478 if (patterns.remove("**")) { 479 patterns.clear(); 480 if (excludes) { 481 patterns.add("**"); 482 } 483 } 484 return patterns.toArray(String[]::new); 485 } 486 487 /** 488 * Eventually adds the parent directory of the given patterns, without duplicated values. 489 * The patterns given to this method should have been normalized. 490 * 491 * @param patterns the normalized include or exclude patterns 492 * @param excludes whether the patterns are exclude patterns 493 * @return pattens of directories to include or exclude 494 */ 495 private static String[] directoryPatterns(final String[] patterns, final boolean excludes) { 496 // TODO: use `LinkedHashSet.newLinkedHashSet(int)` instead with JDK19. 497 final var directories = new LinkedHashSet<String>(patterns.length); 498 for (String pattern : patterns) { 499 if (pattern.startsWith(DEFAULT_SYNTAX)) { 500 if (excludes) { 501 if (pattern.endsWith("/**")) { 502 directories.add(pattern.substring(0, pattern.length() - 3)); 503 } 504 } else { 505 int s = pattern.indexOf(':'); 506 if (pattern.regionMatches(++s, "**/", 0, 3)) { 507 s = pattern.indexOf('/', s + 3); 508 if (s < 0) { 509 return new String[0]; // Pattern is "**", so we need to accept everything. 510 } 511 directories.add(pattern.substring(0, s)); 512 } 513 } 514 } 515 } 516 return simplify(directories, excludes); 517 } 518 519 /** 520 * Creates the path matchers for the given patterns. 521 * The syntax (usually {@value #DEFAULT_SYNTAX}) must be specified for each pattern. 522 */ 523 private static PathMatcher[] matchers(final FileSystem fs, final String[] patterns) { 524 final var matchers = new PathMatcher[patterns.length]; 525 for (int i = 0; i < patterns.length; i++) { 526 matchers[i] = fs.getPathMatcher(patterns[i]); 527 } 528 return matchers; 529 } 530 531 /** 532 * {@return a potentially simpler matcher equivalent to this matcher}. 533 */ 534 @SuppressWarnings("checkstyle:MissingSwitchDefault") 535 public PathMatcher simplify() { 536 if (excludes.length == 0) { 537 switch (includes.length) { 538 case 0: 539 return INCLUDES_ALL; 540 case 1: 541 return includes[0]; 542 } 543 } 544 return this; 545 } 546 547 /** 548 * Determines whether a path is selected. 549 * This is true if the given file matches an include pattern and no exclude pattern. 550 * 551 * @param path the pathname to test, must not be {@code null} 552 * @return {@code true} if the given path is selected, {@code false} otherwise 553 */ 554 @Override 555 public boolean matches(Path path) { 556 path = baseDirectory.relativize(path); 557 return (includes.length == 0 || isMatched(path, includes)) 558 && (excludes.length == 0 || !isMatched(path, excludes)); 559 } 560 561 /** 562 * {@return whether the given file matches according to one of the given matchers}. 563 */ 564 private static boolean isMatched(Path path, PathMatcher[] matchers) { 565 for (PathMatcher matcher : matchers) { 566 if (matcher.matches(path)) { 567 return true; 568 } 569 } 570 return false; 571 } 572 573 /** 574 * Determines whether a directory could contain selected paths. 575 * 576 * @param directory the directory pathname to test, must not be {@code null} 577 * @return {@code true} if the given directory might contain selected paths, {@code false} if the 578 * directory will definitively not contain selected paths 579 */ 580 public boolean couldHoldSelected(Path directory) { 581 if (baseDirectory.equals(directory)) { 582 return true; 583 } 584 directory = baseDirectory.relativize(directory); 585 return (dirIncludes.length == 0 || isMatched(directory, dirIncludes)) 586 && (dirExcludes.length == 0 || !isMatched(directory, dirExcludes)); 587 } 588 589 /** 590 * Appends the elements of the given array in the given buffer. 591 * This is a helper method for {@link #toString()} implementations. 592 * 593 * @param buffer the buffer to add the elements to 594 * @param label label identifying the array of elements to add 595 * @param patterns the elements to append, or {@code null} if none 596 */ 597 private static void append(StringBuilder buffer, String label, String[] patterns) { 598 buffer.append(label).append(": ["); 599 if (patterns != null) { 600 for (int i = 0; i < patterns.length; i++) { 601 if (i != 0) { 602 buffer.append(", "); 603 } 604 buffer.append(patterns[i]); 605 } 606 } 607 buffer.append(']'); 608 } 609 610 /** 611 * {@return a string representation for logging purposes}. 612 */ 613 @Override 614 public String toString() { 615 var buffer = new StringBuilder(); 616 append(buffer, "includes", includePatterns); 617 append(buffer.append(", "), "excludes", excludePatterns); 618 return buffer.toString(); 619 } 620 }