1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, 13 * software distributed under the License is distributed on an 14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 * KIND, either express or implied. See the License for the 16 * specific language governing permissions and limitations 17 * under the License. 18 */ 19 package org.apache.maven.archetype.common.util; 20 21 import java.io.File; 22 import java.util.ArrayList; 23 import java.util.List; 24 25 import org.codehaus.plexus.util.SelectorUtils; 26 import org.codehaus.plexus.util.StringUtils; 27 28 /** 29 * <p>Class for scanning a directory for files/directories which match certain criteria.</p> 30 * 31 * <p>These criteria consist of selectors and patterns which have been specified. With the selectors 32 * you can select which files you want to have included. Files which are not selected are excluded. 33 * With patterns you can include or exclude files based on their filename.</p> 34 * 35 * <p>The idea is simple. A given directory is recursively scanned for all files and directories. 36 * Each file/directory is matched against a set of selectors, including special support for matching 37 * against filenames with include and and exclude patterns. Only files/directories which match at 38 * least one pattern of the include pattern list or other file selector, and don't match any pattern 39 * of the exclude pattern list or fail to match against a required selector will be placed in the 40 * list of files/directories found.</p> 41 * 42 * <p>When no list of include patterns is supplied, "**" will be used, which means that everything 43 * will be matched. When no list of exclude patterns is supplied, an empty list is used, such that 44 * nothing will be excluded. When no selectors are supplied, none are applied.</p> 45 * 46 * <p>The filename pattern matching is done as follows: The name to be matched is split up in path 47 * segments. A path segment is the name of a directory or file, which is bounded by <code> 48 * File.separator</code> ('/' under UNIX, '\' under Windows). For example, "abc/def/ghi/xyz.java" is 49 * split up in the segments "abc", "def","ghi" and "xyz.java". The same is done for the pattern 50 * against which should be matched.</p> 51 * 52 * <p>The segments of the name and the pattern are then matched against each other. When '**' is 53 * used for a path segment in the pattern, it matches zero or more path segments of the name.</p> 54 * 55 * <p>There is a special case regarding the use of <code>File.separator</code>s at the beginning of 56 * the pattern and the string to match:<br> 57 * When a pattern starts with a <code>File.separator</code>, the string to match must also start 58 * with a <code>File.separator</code>. When a pattern does not start with a <code> 59 * File.separator</code>, the string to match may not start with a <code>File.separator</code>. When 60 * one of these rules is not obeyed, the string will not match.</p> 61 * 62 * <p>When a name path segment is matched against a pattern path segment, the following special 63 * characters can be used:<br> 64 * '*' matches zero or more characters<br> 65 * '?' matches one character.</p> 66 * 67 * <p>Examples:</p> 68 * 69 * <p>"**\*.class" matches all .class files/dirs in a directory tree.</p> 70 * 71 * <p>"test\a??.java" matches all files/dirs which start with an 'a', then two more characters and 72 * then ".java", in a directory called test.</p> 73 * 74 * <p>"**" matches everything in a directory tree.</p> 75 * 76 * <p>"**\test\**\XYZ*" matches all files/dirs which start with "XYZ" and where there is a parent 77 * directory called test (e.g. "abc\test\def\ghi\XYZ123").</p> 78 * 79 * <p>Case sensitivity may be turned off if necessary. By default, it is turned on.</p> 80 * 81 * <p>Example of usage:</p> 82 * 83 * <pre> 84 * String[] includes = {"**\\*.class"}; 85 * String[] excludes = {"modules\\*\\**"}; 86 * ds.setIncludes(includes); 87 * ds.setExcludes(excludes); 88 * ds.setBasedir(new File("test")); 89 * ds.setCaseSensitive(true); 90 * ds.scan(); 91 * 92 * System.out.println("FILES:"); 93 * String[] files = ds.getIncludedFiles(); 94 * for (int i = 0; i < files.length; i++) { 95 * System.out.println(files[i]); 96 * } 97 * </pre> 98 * 99 * <p>This will scan a directory called test for .class files, but excludes all files in all proper 100 * subdirectories of a directory called "modules"</p> 101 * 102 * <p>This class was stealed from rg.coudehaus.plexus.util.DirectoryScanner and adapted to search 103 * from a List<String></p> 104 * 105 * @author Arnout J. Kuiper <a href="mailto:ajkuiper@wxs.nl">ajkuiper@wxs.nl</a> 106 * @author Magesh Umasankar 107 * @author <a href="mailto:bruce@callenish.com">Bruce Atherton</a> 108 * @author <a href="mailto:levylambert@tiscali-dsl.de">Antoine Levy-Lambert</a> 109 */ 110 public class ListScanner { 111 /** 112 * Patterns which should be excluded by default. 113 * 114 * @see #addDefaultExcludes() 115 */ 116 public static final String[] DEFAULTEXCLUDES = { // Miscellaneous typical temporary files 117 "**/*~", 118 "**/#*#", 119 "**/.#*", 120 "**/%*%", 121 "**/._*", 122 123 // CVS 124 "**/CVS", 125 "**/CVS/**", 126 "**/.cvsignore", 127 128 // SCCS 129 "**/SCCS", 130 "**/SCCS/**", 131 132 // Visual SourceSafe 133 "**/vssver.scc", 134 135 // Subversion 136 "**/.svn", 137 "**/.svn/**", 138 139 // Arch 140 "**/.arch-ids", 141 "**/.arch-ids/**", 142 143 // Bazaar 144 "**/.bzr", 145 "**/.bzr/**", 146 147 // GIT 148 "**/.git", 149 "**/.git/**", 150 151 // Mercurial 152 "**/.hg", 153 "**/.hg/**", 154 155 // SurroundSCM 156 "**/.MySCMServerInfo", 157 158 // Mac 159 "**/.DS_Store" 160 }; 161 162 /** The base directory to be scanned. */ 163 protected String basedir; 164 165 /** Whether or not everything tested so far has been included. */ 166 protected boolean everythingIncluded = true; 167 168 /** The patterns for the files to be excluded. */ 169 protected String[] excludes; 170 171 /** The patterns for the files to be included. */ 172 protected String[] includes; 173 174 /** Whether or not the file system should be treated as a case sensitive one. */ 175 protected boolean isCaseSensitive = true; 176 177 /** Sole constructor. */ 178 public ListScanner() {} 179 180 public static String getDefaultExcludes() { 181 return StringUtils.join(DEFAULTEXCLUDES, ","); 182 } 183 184 /** 185 * Tests whether or not a string matches against a pattern. The pattern may contain two special 186 * characters:<br> 187 * '*' means zero or more characters<br> 188 * '?' means one and only one character 189 * 190 * @param pattern The pattern to match against. Must not be <code>null</code>. 191 * @param str The string which must be matched against the pattern. Must not be <code> 192 * null</code>. 193 * @return <code>true</code> if the string matches against the pattern, or <code>false</code> 194 * otherwise. 195 */ 196 public static boolean match(String pattern, String str) { 197 // default matches the SelectorUtils default 198 return match(pattern, str, true); 199 } 200 201 /** 202 * Tests whether or not a string matches against a pattern. The pattern may contain two special 203 * characters:<br> 204 * '*' means zero or more characters<br> 205 * '?' means one and only one character 206 * 207 * @param pattern The pattern to match against. Must not be <code>null</code>. 208 * @param str The string which must be matched against the pattern. Must not be 209 * <code>null</code>. 210 * @param isCaseSensitive Whether or not matching should be performed case sensitively. 211 * @return <code>true</code> if the string matches against the pattern, or <code>false</code> 212 * otherwise. 213 */ 214 protected static boolean match(String pattern, String str, boolean isCaseSensitive) { 215 return SelectorUtils.match(pattern, str, isCaseSensitive); 216 } 217 218 /** 219 * Tests whether or not a given path matches a given pattern. 220 * 221 * @param pattern The pattern to match against. Must not be <code>null</code>. 222 * @param str The path to match, as a String. Must not be <code>null</code>. 223 * @return <code>true</code> if the pattern matches against the string, or <code>false</code> 224 * otherwise. 225 */ 226 protected static boolean matchPath(String pattern, String str) { 227 // default matches the SelectorUtils default 228 return matchPath(pattern, str, true); 229 } 230 231 /** 232 * Tests whether or not a given path matches a given pattern. 233 * 234 * @param pattern The pattern to match against. Must not be <code>null</code>. 235 * @param str The path to match, as a String. Must not be <code>null</code>. 236 * @param isCaseSensitive Whether or not matching should be performed case sensitively. 237 * @return <code>true</code> if the pattern matches against the string, or <code>false</code> 238 * otherwise. 239 */ 240 protected static boolean matchPath(String pattern, String str, boolean isCaseSensitive) { 241 return SelectorUtils.matchPath( 242 PathUtils.convertPathForOS(pattern), PathUtils.convertPathForOS(str), isCaseSensitive); 243 } 244 245 /** 246 * <p>Tests whether or not a given path matches the start of a given pattern up to the first "**".<p> 247 * 248 * <p>This is not a general purpose test and should only be used if you can live with false 249 * positives. For example, <code>pattern=**\a</code> and <code>str=b</code> will yield <code> 250 * true</code>.</p> 251 * 252 * @param pattern The pattern to match against. Must not be <code>null</code>. 253 * @param str The path to match, as a String. Must not be <code>null</code>. 254 * @return whether or not a given path matches the start of a given pattern up to the first 255 * "**". 256 */ 257 protected static boolean matchPatternStart(String pattern, String str) { 258 // default matches SelectorUtils default 259 return matchPatternStart(pattern, str, true); 260 } 261 262 /** 263 * <p>Tests whether or not a given path matches the start of a given pattern up to the first "**".</p> 264 * 265 * <p>This is not a general purpose test and should only be used if you can live with false 266 * positives. For example, <code>pattern=**\a</code> and <code>str=b</code> will yield <code> 267 * true</code>.</p> 268 * 269 * @param pattern The pattern to match against. Must not be <code>null</code>. 270 * @param str The path to match, as a String. Must not be <code>null</code>. 271 * @param isCaseSensitive Whether or not matching should be performed case sensitively. 272 * @return whether or not a given path matches the start of a given pattern up to the first 273 * "**". 274 */ 275 protected static boolean matchPatternStart(String pattern, String str, boolean isCaseSensitive) { 276 return SelectorUtils.matchPatternStart( 277 PathUtils.convertPathForOS(pattern), PathUtils.convertPathForOS(str), isCaseSensitive); 278 } 279 280 /** Adds default exclusions to the current exclusions set. */ 281 public void addDefaultExcludes() { 282 int excludesLength = (excludes == null) ? 0 : excludes.length; 283 String[] newExcludes; 284 newExcludes = new String[excludesLength + DEFAULTEXCLUDES.length]; 285 if (excludesLength > 0) { 286 System.arraycopy(excludes, 0, newExcludes, 0, excludesLength); 287 } 288 for (int i = 0; i < DEFAULTEXCLUDES.length; i++) { 289 newExcludes[i + excludesLength] = 290 DEFAULTEXCLUDES[i].replace('/', File.separatorChar).replace('\\', File.separatorChar); 291 } 292 excludes = newExcludes; 293 } 294 295 /** 296 * Returns the base directory to be scanned. This is the directory which is scanned recursively. 297 * 298 * @return the base directory to be scanned 299 */ 300 public String getBasedir() { 301 return basedir; 302 } 303 304 /** 305 * Sets the base directory to be scanned. This is the directory which is scanned recursively. 306 * This directory is normalized for multiple os's (all / and \\ are replaced with 307 * File.separatorChar 308 * 309 * @param basedir The base directory for scanning. Should not be <code>null</code>. 310 */ 311 public void setBasedir(String basedir) { 312 this.basedir = basedir; 313 } 314 315 /** 316 * Sets whether or not the file system should be regarded as case sensitive. 317 * 318 * @param isCaseSensitive whether or not the file system should be regarded as a case 319 * sensitive one 320 */ 321 public void setCaseSensitive(boolean isCaseSensitive) { 322 this.isCaseSensitive = isCaseSensitive; 323 } 324 325 /** 326 * <p>Sets the list of exclude patterns to use. All '/' and '\' characters are replaced by <code> 327 * File.separatorChar</code>, so the separator used need not match <code> 328 * File.separatorChar</code>.</p> 329 * 330 * <p>When a pattern ends with a '/' or '\', "**" is appended.</p> 331 * 332 * @param excludesList A list of exclude patterns. May be <code>null</code>, indicating that no 333 * files should be excluded. If a non-<code>null</code> list is given, all 334 * elements must be non-<code>null</code>. 335 */ 336 public void setExcludes(List<String> excludesList) { 337 String[] excludes = excludesList.toArray(new String[0]); 338 if (excludes == null) { 339 this.excludes = null; 340 } else { 341 setExcludes(excludes); 342 } 343 } 344 345 public void setExcludes(String excludes) { 346 if (excludes == null) { 347 this.excludes = null; 348 } else { 349 setExcludes(StringUtils.split(excludes, ",")); 350 } 351 } 352 353 /** 354 * <p>Sets the list of include patterns to use. All '/' and '\' characters are replaced by <code> 355 * File.separatorChar</code>, so the separator used need not match <code> 356 * File.separatorChar</code>.</p> 357 * 358 * <p>When a pattern ends with a '/' or '\', "**" is appended.</p> 359 * 360 * @param includesList A list of include patterns. May be <code>null</code>, indicating that all 361 * files should be included. If a non-<code>null</code> list is given, all 362 * elements must be non-<code>null</code>. 363 */ 364 public void setIncludes(List<String> includesList) { 365 String[] includes = includesList.toArray(new String[0]); 366 if (includes == null) { 367 this.includes = null; 368 } else { 369 setIncludes(includes); 370 } 371 } 372 373 public void setIncludes(String includes) { 374 if (includes == null) { 375 this.includes = null; 376 } else { 377 setIncludes(StringUtils.split(includes, ",")); 378 } 379 } 380 381 /** 382 * Scans the base directory for files which match at least one include pattern and don't match 383 * any exclude patterns. If there are selectors then the files must pass muster there, as well. 384 * 385 * @throws IllegalStateException if the base directory was set incorrectly (i.e. if it is 386 * <code>null</code>, doesn't exist, or isn't a directory). 387 */ 388 public List<String> scan(List<String> files) throws IllegalStateException { 389 // System.err.println("Scanning \nbasedir="+basedir + 390 // " \nincludes=" + java.util.Arrays.toString(includes) + 391 // " \nexcludes=" + java.util.Arrays.toString(excludes) + 392 // " \non files="+files); 393 if (basedir == null) { 394 throw new IllegalStateException("No basedir set"); 395 } 396 397 if (includes == null) { 398 // No includes supplied, so set it to 'matches all' 399 includes = new String[1]; 400 includes[0] = "**"; 401 } 402 if (excludes == null) { 403 excludes = new String[0]; 404 } 405 406 List<String> result = new ArrayList<>(); 407 408 for (String fileName : files) { 409 if (isIncluded(fileName) && !isExcluded(fileName)) { 410 result.add(fileName); 411 } 412 } 413 // System.err.println("Result "+result+"\n\n\n"); 414 return result; 415 } 416 417 /** 418 * Tests whether or not a name matches against at least one exclude pattern. 419 * 420 * @param name The name to match. Must not be <code>null</code>. 421 * @return <code>true</code> when the name matches against at least one exclude pattern, or 422 * <code>false</code> otherwise. 423 */ 424 protected boolean isExcluded(String name) { 425 return matchesPatterns(name, excludes); 426 } 427 428 /** 429 * Tests whether or not a name matches against at least one include pattern. 430 * 431 * @param name The name to match. Must not be <code>null</code>. 432 * @return <code>true</code> when the name matches against at least one include pattern, or 433 * <code>false</code> otherwise. 434 */ 435 protected boolean isIncluded(String name) { 436 return matchesPatterns(name, includes); 437 } 438 439 /** 440 * Tests whether or not a name matches against at least one include pattern. 441 * 442 * @param name The name to match. Must not be <code>null</code>. 443 * @param patterns The list of patterns to match. 444 * @return <code>true</code> when the name matches against at least one include pattern, or 445 * <code>false</code> otherwise. 446 */ 447 protected boolean matchesPatterns(String name, String[] patterns) { 448 // avoid extra object creation in the loop 449 String path = null; 450 451 String baseDir = getBasedir(); 452 if (!baseDir.isEmpty()) { 453 baseDir = baseDir.concat(File.separator); 454 } 455 456 for (int i = 0; i < patterns.length; i++) { 457 path = PathUtils.convertPathForOS(baseDir + patterns[i]); 458 // System.err.println("path="+path); 459 if (matchPath(path, name, isCaseSensitive)) { 460 return true; 461 } 462 } 463 return false; 464 } 465 466 private void setExcludes(String[] excludes) { 467 this.excludes = setPatterns(excludes); 468 } 469 470 private void setIncludes(String[] includes) { 471 this.includes = setPatterns(includes); 472 } 473 474 private String[] setPatterns(String[] patterns) { 475 String[] result = null; 476 if ((patterns != null) && (patterns.length > 0)) { 477 result = new String[patterns.length]; 478 for (int i = 0; i < patterns.length; i++) { 479 String pattern = patterns[i].trim(); 480 481 // don't normalize the pattern here, we internalize the normalization 482 // just normalize for comparison purposes 483 if (PathUtils.convertPathForOS(pattern).endsWith(File.separator)) { 484 pattern += "**"; 485 } 486 result[i] = pattern; 487 } 488 } 489 return result; 490 } 491 }