1 package org.apache.maven.archetype.common.util;
2
3 /*
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
19 * under the License.
20 */
21
22 import org.codehaus.plexus.util.SelectorUtils;
23 import org.codehaus.plexus.util.StringUtils;
24
25 import java.io.File;
26 import java.util.ArrayList;
27 import java.util.List;
28
29 /**
30 * <p>Class for scanning a directory for files/directories which match certain criteria.</p>
31 *
32 * <p>These criteria consist of selectors and patterns which have been specified. With the selectors
33 * you can select which files you want to have included. Files which are not selected are excluded.
34 * With patterns you can include or exclude files based on their filename.</p>
35 *
36 * <p>The idea is simple. A given directory is recursively scanned for all files and directories.
37 * Each file/directory is matched against a set of selectors, including special support for matching
38 * against filenames with include and and exclude patterns. Only files/directories which match at
39 * least one pattern of the include pattern list or other file selector, and don't match any pattern
40 * of the exclude pattern list or fail to match against a required selector will be placed in the
41 * list of files/directories found.</p>
42 *
43 * <p>When no list of include patterns is supplied, "**" will be used, which means that everything
44 * will be matched. When no list of exclude patterns is supplied, an empty list is used, such that
45 * nothing will be excluded. When no selectors are supplied, none are applied.</p>
46 *
47 * <p>The filename pattern matching is done as follows: The name to be matched is split up in path
48 * segments. A path segment is the name of a directory or file, which is bounded by <code>
49 * File.separator</code> ('/' under UNIX, '\' under Windows). For example, "abc/def/ghi/xyz.java" is
50 * split up in the segments "abc", "def","ghi" and "xyz.java". The same is done for the pattern
51 * against which should be matched.</p>
52 *
53 * <p>The segments of the name and the pattern are then matched against each other. When '**' is
54 * used for a path segment in the pattern, it matches zero or more path segments of the name.</p>
55 *
56 * <p>There is a special case regarding the use of <code>File.separator</code>s at the beginning of
57 * the pattern and the string to match:<br>
58 * When a pattern starts with a <code>File.separator</code>, the string to match must also start
59 * with a <code>File.separator</code>. When a pattern does not start with a <code>
60 * File.separator</code>, the string to match may not start with a <code>File.separator</code>. When
61 * one of these rules is not obeyed, the string will not match.</p>
62 *
63 * <p>When a name path segment is matched against a pattern path segment, the following special
64 * characters can be used:<br>
65 * '*' matches zero or more characters<br>
66 * '?' matches one character.</p>
67 *
68 * <p>Examples:</p>
69 *
70 * <p>"**\*.class" matches all .class files/dirs in a directory tree.</p>
71 *
72 * <p>"test\a??.java" matches all files/dirs which start with an 'a', then two more characters and
73 * then ".java", in a directory called test.</p>
74 *
75 * <p>"**" matches everything in a directory tree.</p>
76 *
77 * <p>"**\test\**\XYZ*" matches all files/dirs which start with "XYZ" and where there is a parent
78 * directory called test (e.g. "abc\test\def\ghi\XYZ123").</p>
79 *
80 * <p>Case sensitivity may be turned off if necessary. By default, it is turned on.</p>
81 *
82 * <p>Example of usage:</p>
83 *
84 * <pre>
85 * String[] includes = {"**\\*.class"};
86 * String[] excludes = {"modules\\*\\**"};
87 * ds.setIncludes(includes);
88 * ds.setExcludes(excludes);
89 * ds.setBasedir(new File("test"));
90 * ds.setCaseSensitive(true);
91 * ds.scan();
92 *
93 * System.out.println("FILES:");
94 * String[] files = ds.getIncludedFiles();
95 * for (int i = 0; i < files.length; i++) {
96 * System.out.println(files[i]);
97 * }
98 * </pre>
99 *
100 * <p>This will scan a directory called test for .class files, but excludes all files in all proper
101 * subdirectories of a directory called "modules"</p>
102 *
103 * <p>This class was stealed from rg.coudehaus.plexus.util.DirectoryScanner and adapted to search
104 * from a List<String></p>
105 *
106 * @author Arnout J. Kuiper <a href="mailto:ajkuiper@wxs.nl">ajkuiper@wxs.nl</a>
107 * @author Magesh Umasankar
108 * @author <a href="mailto:bruce@callenish.com">Bruce Atherton</a>
109 * @author <a href="mailto:levylambert@tiscali-dsl.de">Antoine Levy-Lambert</a>
110 */
111 public class ListScanner
112 {
113 /**
114 * Patterns which should be excluded by default.
115 *
116 * @see #addDefaultExcludes()
117 */
118 public static final String[] DEFAULTEXCLUDES =
119 { // Miscellaneous typical temporary files
120 "**/*~", "**/#*#", "**/.#*", "**/%*%", "**/._*",
121
122 // CVS
123 "**/CVS", "**/CVS/**", "**/.cvsignore",
124
125 // SCCS
126 "**/SCCS", "**/SCCS/**",
127
128 // Visual SourceSafe
129 "**/vssver.scc",
130
131 // Subversion
132 "**/.svn", "**/.svn/**",
133
134 // Arch
135 "**/.arch-ids", "**/.arch-ids/**",
136
137 // Bazaar
138 "**/.bzr", "**/.bzr/**",
139
140 // GIT
141 "**/.git", "**/.git/**",
142
143 // Mercurial
144 "**/.hg", "**/.hg/**",
145
146 // SurroundSCM
147 "**/.MySCMServerInfo",
148
149 // Mac
150 "**/.DS_Store"
151 };
152
153 /** The base directory to be scanned. */
154 protected String basedir;
155
156 /** Whether or not everything tested so far has been included. */
157 protected boolean everythingIncluded = true;
158
159 /** The patterns for the files to be excluded. */
160 protected String[] excludes;
161
162 /** The patterns for the files to be included. */
163 protected String[] includes;
164
165 /** Whether or not the file system should be treated as a case sensitive one. */
166 protected boolean isCaseSensitive = true;
167
168 /** Sole constructor. */
169 public ListScanner()
170 {
171 }
172
173 public static String getDefaultExcludes()
174 {
175 return StringUtils.join( DEFAULTEXCLUDES, "," );
176 }
177
178 /**
179 * Tests whether or not a string matches against a pattern. The pattern may contain two special
180 * characters:<br>
181 * '*' means zero or more characters<br>
182 * '?' means one and only one character
183 *
184 * @param pattern The pattern to match against. Must not be <code>null</code>.
185 * @param str The string which must be matched against the pattern. Must not be <code>
186 * null</code>.
187 * @return <code>true</code> if the string matches against the pattern, or <code>false</code>
188 * otherwise.
189 */
190 public static boolean match( String pattern, String str )
191 {
192 // default matches the SelectorUtils default
193 return match( pattern, str, true );
194 }
195
196 /**
197 * Tests whether or not a string matches against a pattern. The pattern may contain two special
198 * characters:<br>
199 * '*' means zero or more characters<br>
200 * '?' means one and only one character
201 *
202 * @param pattern The pattern to match against. Must not be <code>null</code>.
203 * @param str The string which must be matched against the pattern. Must not be
204 * <code>null</code>.
205 * @param isCaseSensitive Whether or not matching should be performed case sensitively.
206 * @return <code>true</code> if the string matches against the pattern, or <code>false</code>
207 * otherwise.
208 */
209 protected static boolean match( String pattern, String str, boolean isCaseSensitive )
210 {
211 return SelectorUtils.match( pattern, str, isCaseSensitive );
212 }
213
214 /**
215 * Tests whether or not a given path matches a given pattern.
216 *
217 * @param pattern The pattern to match against. Must not be <code>null</code>.
218 * @param str The path to match, as a String. Must not be <code>null</code>.
219 * @return <code>true</code> if the pattern matches against the string, or <code>false</code>
220 * otherwise.
221 */
222 protected static boolean matchPath( String pattern, String str )
223 {
224 // default matches the SelectorUtils default
225 return matchPath( pattern, str, true );
226 }
227
228 /**
229 * Tests whether or not a given path matches a given pattern.
230 *
231 * @param pattern The pattern to match against. Must not be <code>null</code>.
232 * @param str The path to match, as a String. Must not be <code>null</code>.
233 * @param isCaseSensitive Whether or not matching should be performed case sensitively.
234 * @return <code>true</code> if the pattern matches against the string, or <code>false</code>
235 * otherwise.
236 */
237 protected static boolean matchPath( String pattern, String str, boolean isCaseSensitive )
238 {
239 return SelectorUtils.matchPath( PathUtils.convertPathForOS( pattern ), PathUtils.convertPathForOS( str ),
240 isCaseSensitive );
241 }
242
243 /**
244 * <p>Tests whether or not a given path matches the start of a given pattern up to the first "**".<p>
245 *
246 * <p>This is not a general purpose test and should only be used if you can live with false
247 * positives. For example, <code>pattern=**\a</code> and <code>str=b</code> will yield <code>
248 * true</code>.</p>
249 *
250 * @param pattern The pattern to match against. Must not be <code>null</code>.
251 * @param str The path to match, as a String. Must not be <code>null</code>.
252 * @return whether or not a given path matches the start of a given pattern up to the first
253 * "**".
254 */
255 protected static boolean matchPatternStart( String pattern, String str )
256 {
257 // default matches SelectorUtils default
258 return matchPatternStart( pattern, str, true );
259 }
260
261 /**
262 * <p>Tests whether or not a given path matches the start of a given pattern up to the first "**".</p>
263 *
264 * <p>This is not a general purpose test and should only be used if you can live with false
265 * positives. For example, <code>pattern=**\a</code> and <code>str=b</code> will yield <code>
266 * true</code>.</p>
267 *
268 * @param pattern The pattern to match against. Must not be <code>null</code>.
269 * @param str The path to match, as a String. Must not be <code>null</code>.
270 * @param isCaseSensitive Whether or not matching should be performed case sensitively.
271 * @return whether or not a given path matches the start of a given pattern up to the first
272 * "**".
273 */
274 protected static boolean matchPatternStart( String pattern, String str, boolean isCaseSensitive )
275 {
276 return SelectorUtils.matchPatternStart( PathUtils.convertPathForOS( pattern ),
277 PathUtils.convertPathForOS( str ), isCaseSensitive );
278 }
279
280 /** Adds default exclusions to the current exclusions set. */
281 public void addDefaultExcludes()
282 {
283 int excludesLength = ( excludes == null ) ? 0 : excludes.length;
284 String[] newExcludes;
285 newExcludes = new String[excludesLength + DEFAULTEXCLUDES.length];
286 if ( excludesLength > 0 )
287 {
288 System.arraycopy( excludes, 0, newExcludes, 0, excludesLength );
289 }
290 for ( int i = 0; i < DEFAULTEXCLUDES.length; i++ )
291 {
292 newExcludes[i + excludesLength] =
293 DEFAULTEXCLUDES[i].replace( '/', File.separatorChar ).replace( '\\', File.separatorChar );
294 }
295 excludes = newExcludes;
296 }
297
298 /**
299 * Returns the base directory to be scanned. This is the directory which is scanned recursively.
300 *
301 * @return the base directory to be scanned
302 */
303 public String getBasedir()
304 {
305 return basedir;
306 }
307
308 /**
309 * Sets the base directory to be scanned. This is the directory which is scanned recursively.
310 * This directory is normalized for multiple os's (all / and \\ are replaced with
311 * File.separatorChar
312 *
313 * @param basedir The base directory for scanning. Should not be <code>null</code>.
314 */
315 public void setBasedir( String basedir )
316 {
317 this.basedir = basedir;
318 }
319
320 /**
321 * Sets whether or not the file system should be regarded as case sensitive.
322 *
323 * @param isCaseSensitive whether or not the file system should be regarded as a case
324 * sensitive one
325 */
326 public void setCaseSensitive( boolean isCaseSensitive )
327 {
328 this.isCaseSensitive = isCaseSensitive;
329 }
330
331 /**
332 * <p>Sets the list of exclude patterns to use. All '/' and '\' characters are replaced by <code>
333 * File.separatorChar</code>, so the separator used need not match <code>
334 * File.separatorChar</code>.</p>
335 *
336 * <p>When a pattern ends with a '/' or '\', "**" is appended.</p>
337 *
338 * @param excludesList A list of exclude patterns. May be <code>null</code>, indicating that no
339 * files should be excluded. If a non-<code>null</code> list is given, all
340 * elements must be non-<code>null</code>.
341 */
342 public void setExcludes( List<String> excludesList )
343 {
344 String[] excludes = excludesList.toArray( new String[excludesList.size()] );
345 if ( excludes == null )
346 {
347 this.excludes = null;
348 }
349 else
350 {
351 setExcludes( excludes );
352 }
353 }
354
355 public void setExcludes( String excludes )
356 {
357 if ( excludes == null )
358 {
359 this.excludes = null;
360 }
361 else
362 {
363 setExcludes( StringUtils.split( excludes, "," ) );
364 }
365 }
366
367 /**
368 * <p>Sets the list of include patterns to use. All '/' and '\' characters are replaced by <code>
369 * File.separatorChar</code>, so the separator used need not match <code>
370 * File.separatorChar</code>.</p>
371 *
372 * <p>When a pattern ends with a '/' or '\', "**" is appended.</p>
373 *
374 * @param includesList A list of include patterns. May be <code>null</code>, indicating that all
375 * files should be included. If a non-<code>null</code> list is given, all
376 * elements must be non-<code>null</code>.
377 */
378 public void setIncludes( List<String> includesList )
379 {
380 String[] includes = includesList.toArray( new String[includesList.size()] );
381 if ( includes == null )
382 {
383 this.includes = null;
384 }
385 else
386 {
387 setIncludes( includes );
388 }
389 }
390
391 public void setIncludes( String includes )
392 {
393 if ( includes == null )
394 {
395 this.includes = null;
396 }
397 else
398 {
399 setIncludes( StringUtils.split( includes, "," ) );
400 }
401 }
402
403 /**
404 * Scans the base directory for files which match at least one include pattern and don't match
405 * any exclude patterns. If there are selectors then the files must pass muster there, as well.
406 *
407 * @throws IllegalStateException if the base directory was set incorrectly (i.e. if it is
408 * <code>null</code>, doesn't exist, or isn't a directory).
409 */
410 public List<String> scan( List<String> files )
411 throws
412 IllegalStateException
413 {
414 // System.err.println("Scanning \nbasedir="+basedir +
415 // " \nincludes=" + java.util.Arrays.toString(includes) +
416 // " \nexcludes=" + java.util.Arrays.toString(excludes) +
417 // " \non files="+files);
418 if ( basedir == null )
419 {
420 throw new IllegalStateException( "No basedir set" );
421 }
422
423 if ( includes == null )
424 {
425 // No includes supplied, so set it to 'matches all'
426 includes = new String[1];
427 includes[0] = "**";
428 }
429 if ( excludes == null )
430 {
431 excludes = new String[0];
432 }
433
434 List<String> result = new ArrayList<>();
435
436 for ( String fileName : files )
437 {
438 if ( isIncluded( fileName ) && !isExcluded( fileName ) )
439 {
440 result.add( fileName );
441 }
442 }
443 // System.err.println("Result "+result+"\n\n\n");
444 return result;
445 }
446
447 /**
448 * Tests whether or not a name matches against at least one exclude pattern.
449 *
450 * @param name The name to match. Must not be <code>null</code>.
451 * @return <code>true</code> when the name matches against at least one exclude pattern, or
452 * <code>false</code> otherwise.
453 */
454 protected boolean isExcluded( String name )
455 {
456 return matchesPatterns( name, excludes );
457 }
458
459 /**
460 * Tests whether or not a name matches against at least one include pattern.
461 *
462 * @param name The name to match. Must not be <code>null</code>.
463 * @return <code>true</code> when the name matches against at least one include pattern, or
464 * <code>false</code> otherwise.
465 */
466 protected boolean isIncluded( String name )
467 {
468 return matchesPatterns( name, includes );
469 }
470
471 /**
472 * Tests whether or not a name matches against at least one include pattern.
473 *
474 * @param name The name to match. Must not be <code>null</code>.
475 * @param patterns The list of patterns to match.
476 * @return <code>true</code> when the name matches against at least one include pattern, or
477 * <code>false</code> otherwise.
478 */
479 protected boolean matchesPatterns( String name, String[] patterns )
480 {
481 // avoid extra object creation in the loop
482 String path = null;
483
484 String baseDir = getBasedir();
485 if ( baseDir.length() > 0 )
486 {
487 baseDir = baseDir.concat( File.separator );
488 }
489
490 for ( int i = 0; i < patterns.length; i++ )
491 {
492 path = PathUtils.convertPathForOS( baseDir + patterns[i] );
493 // System.err.println("path="+path);
494 if ( matchPath( path, name, isCaseSensitive ) )
495 {
496 return true;
497 }
498 }
499 return false;
500 }
501
502 private void setExcludes( String[] excludes )
503 {
504 this.excludes = setPatterns( excludes );
505 }
506
507 private void setIncludes( String[] includes )
508 {
509 this.includes = setPatterns( includes );
510 }
511
512 private String[] setPatterns( String[] patterns )
513 {
514 String[] result = null;
515 if ( ( patterns != null ) && ( patterns.length > 0 ) )
516 {
517 result = new String[patterns.length];
518 for ( int i = 0; i < patterns.length; i++ )
519 {
520 String pattern = patterns[i].trim();
521
522 // don't normalize the pattern here, we internalize the normalization
523 // just normalize for comparison purposes
524 if ( PathUtils.convertPathForOS( pattern ).endsWith( File.separator ) )
525 {
526 pattern += "**";
527 }
528 result[i] = pattern;
529 }
530 }
531 return result;
532 }
533 }