1 package org.codehaus.plexus.util;
2
3 /*
4 * The Apache Software License, Version 1.1
5 *
6 * Copyright (c) 2000-2003 The Apache Software Foundation. All rights
7 * reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 *
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in
18 * the documentation and/or other materials provided with the
19 * distribution.
20 *
21 * 3. The end-user documentation included with the redistribution, if
22 * any, must include the following acknowledgement:
23 * "This product includes software developed by the
24 * Apache Software Foundation (http://www.codehaus.org/)."
25 * Alternately, this acknowledgement may appear in the software itself,
26 * if and wherever such third-party acknowledgements normally appear.
27 *
28 * 4. The names "Ant" and "Apache Software
29 * Foundation" must not be used to endorse or promote products derived
30 * from this software without prior written permission. For written
31 * permission, please contact codehaus@codehaus.org.
32 *
33 * 5. Products derived from this software may not be called "Apache"
34 * nor may "Apache" appear in their names without prior written
35 * permission of the Apache Group.
36 *
37 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
38 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
39 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
40 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
41 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
44 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
45 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
46 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
47 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48 * SUCH DAMAGE.
49 * ====================================================================
50 *
51 * This software consists of voluntary contributions made by many
52 * individuals on behalf of the Apache Software Foundation. For more
53 * information on the Apache Software Foundation, please see
54 * <http://www.codehaus.org/>.
55 */
56
57 import java.io.File;
58 import java.io.IOException;
59 import java.util.ArrayList;
60 import java.util.Arrays;
61
62 /**
63 * <p>Class for scanning a directory for files/directories which match certain criteria.</p>
64 *
65 * <p>These criteria consist of selectors and patterns which have been specified. With the selectors you can select which
66 * files you want to have included. Files which are not selected are excluded. With patterns you can include or exclude
67 * files based on their filename.</p>
68 *
69 * <p>The idea is simple. A given directory is recursively scanned for all files and directories. Each file/directory is
70 * matched against a set of selectors, including special support for matching against filenames with include and and
71 * exclude patterns. Only files/directories which match at least one pattern of the include pattern list or other file
72 * selector, and don't match any pattern of the exclude pattern list or fail to match against a required selector will
73 * be placed in the list of files/directories found.</p>
74 *
75 * <p>When no list of include patterns is supplied, "**" will be used, which means that everything will be matched. When no
76 * list of exclude patterns is supplied, an empty list is used, such that nothing will be excluded. When no selectors
77 * are supplied, none are applied.</p>
78 *
79 * <p>The filename pattern matching is done as follows: The name to be matched is split up in path segments. A path segment
80 * is the name of a directory or file, which is bounded by <code>File.separator</code> ('/' under UNIX, '\' under
81 * Windows). For example, "abc/def/ghi/xyz.java" is split up in the segments "abc", "def","ghi" and "xyz.java". The same
82 * is done for the pattern against which should be matched.</p>
83 *
84 * <p>The segments of the name and the pattern are then matched against each other. When '**' is used for a path segment in
85 * the pattern, it matches zero or more path segments of the name.</p>
86 *
87 * <p>There is a special case regarding the use of <code>File.separator</code>s at the beginning of the pattern and the
88 * string to match:<br>
89 * When a pattern starts with a <code>File.separator</code>, the string to match must also start with a
90 * <code>File.separator</code>. When a pattern does not start with a <code>File.separator</code>, the string to match
91 * may not start with a <code>File.separator</code>. When one of these rules is not obeyed, the string will not match.</p>
92 *
93 * <p>When a name path segment is matched against a pattern path segment, the following special characters can be used:<br>
94 * '*' matches zero or more characters<br>
95 * '?' matches one character.</p>
96 *
97 * Examples:
98 * <ul>
99 * <li>"**\*.class" matches all .class files/dirs in a directory tree.</li>
100 * <li>"test\a??.java" matches all files/dirs which start with an 'a', then two more characters and then ".java", in a
101 * directory called test.</li>
102 * <li>"**" matches everything in a directory tree.</li>
103 * <li>"**\test\**\XYZ*" matches all files/dirs which start with "XYZ" and where there is a parent directory called test
104 * (e.g. "abc\test\def\ghi\XYZ123").</li>
105 * </ul>
106 *
107 * <p>Case sensitivity may be turned off if necessary. By default, it is turned on.</p>
108 * Example of usage:
109 * <pre>
110 * String[] includes = { "**\\*.class" };
111 * String[] excludes = { "modules\\*\\**" };
112 * ds.setIncludes( includes );
113 * ds.setExcludes( excludes );
114 * ds.setBasedir( new File( "test" ) );
115 * ds.setCaseSensitive( true );
116 * ds.scan();
117 *
118 * System.out.println( "FILES:" );
119 * String[] files = ds.getIncludedFiles();
120 * for ( int i = 0; i < files.length; i++ )
121 * {
122 * System.out.println( files[i] );
123 * }
124 * </pre>
125 *
126 * <p>This will scan a directory called test for .class files, but excludes all files in all proper subdirectories of a
127 * directory called "modules"</p>
128 *
129 * @author Arnout J. Kuiper <a href="mailto:ajkuiper@wxs.nl">ajkuiper@wxs.nl</a>
130 * @author Magesh Umasankar
131 * @author <a href="mailto:bruce@callenish.com">Bruce Atherton</a>
132 * @author <a href="mailto:levylambert@tiscali-dsl.de">Antoine Levy-Lambert</a>
133 */
134 public class DirectoryScanner
135 extends AbstractScanner
136 {
137
138 private static final String[] EMPTY_STRING_ARRAY = new String[0];
139
140 /**
141 * The base directory to be scanned.
142 */
143 protected File basedir;
144
145 /**
146 * The files which matched at least one include and no excludes and were selected.
147 */
148 protected ArrayList<String> filesIncluded;
149
150 /**
151 * The files which did not match any includes or selectors.
152 */
153 protected ArrayList<String> filesNotIncluded;
154
155 /**
156 * The files which matched at least one include and at least one exclude.
157 */
158 protected ArrayList<String> filesExcluded;
159
160 /**
161 * The directories which matched at least one include and no excludes and were selected.
162 */
163 protected ArrayList<String> dirsIncluded;
164
165 /**
166 * The directories which were found and did not match any includes.
167 */
168 protected ArrayList<String> dirsNotIncluded;
169
170 /**
171 * The directories which matched at least one include and at least one exclude.
172 */
173 protected ArrayList<String> dirsExcluded;
174
175 /**
176 * The files which matched at least one include and no excludes and which a selector discarded.
177 */
178 protected ArrayList<String> filesDeselected;
179
180 /**
181 * The directories which matched at least one include and no excludes but which a selector discarded.
182 */
183 protected ArrayList<String> dirsDeselected;
184
185 /**
186 * Whether or not our results were built by a slow scan.
187 */
188 protected boolean haveSlowResults = false;
189
190 /**
191 * Whether or not symbolic links should be followed.
192 *
193 * @since Ant 1.5
194 */
195 private boolean followSymlinks = true;
196
197 /**
198 * Whether or not everything tested so far has been included.
199 */
200 protected boolean everythingIncluded = true;
201
202 private final char[][] tokenizedEmpty = MatchPattern.tokenizePathToCharArray( "", File.separator );
203
204 /**
205 * Sole constructor.
206 */
207 public DirectoryScanner()
208 {
209 }
210
211 /**
212 * Sets the base directory to be scanned. This is the directory which is scanned recursively. All '/' and '\'
213 * characters are replaced by <code>File.separatorChar</code>, so the separator used need not match
214 * <code>File.separatorChar</code>.
215 *
216 * @param basedir The base directory to scan. Must not be <code>null</code>.
217 */
218 public void setBasedir( String basedir )
219 {
220 setBasedir( new File( basedir.replace( '/', File.separatorChar ).replace( '\\', File.separatorChar ) ) );
221 }
222
223 /**
224 * Sets the base directory to be scanned. This is the directory which is scanned recursively.
225 *
226 * @param basedir The base directory for scanning. Should not be <code>null</code>.
227 */
228 public void setBasedir( File basedir )
229 {
230 this.basedir = basedir;
231 }
232
233 /**
234 * Returns the base directory to be scanned. This is the directory which is scanned recursively.
235 *
236 * @return the base directory to be scanned
237 */
238 @Override
239 public File getBasedir()
240 {
241 return basedir;
242 }
243
244 /**
245 * Sets whether or not symbolic links should be followed.
246 *
247 * @param followSymlinks whether or not symbolic links should be followed
248 */
249 public void setFollowSymlinks( boolean followSymlinks )
250 {
251 this.followSymlinks = followSymlinks;
252 }
253
254 /**
255 * Returns whether or not the scanner has included all the files or directories it has come across so far.
256 *
257 * @return <code>true</code> if all files and directories which have been found so far have been included.
258 */
259 public boolean isEverythingIncluded()
260 {
261 return everythingIncluded;
262 }
263
264 /**
265 * Scans the base directory for files which match at least one include pattern and don't match any exclude patterns.
266 * If there are selectors then the files must pass muster there, as well.
267 *
268 * @throws IllegalStateException if the base directory was set incorrectly (i.e. if it is <code>null</code>, doesn't
269 * exist, or isn't a directory).
270 */
271 @Override
272 public void scan()
273 throws IllegalStateException
274 {
275 if ( basedir == null )
276 {
277 throw new IllegalStateException( "No basedir set" );
278 }
279 if ( !basedir.exists() )
280 {
281 throw new IllegalStateException( "basedir " + basedir + " does not exist" );
282 }
283 if ( !basedir.isDirectory() )
284 {
285 throw new IllegalStateException( "basedir " + basedir + " is not a directory" );
286 }
287
288 setupDefaultFilters();
289 setupMatchPatterns();
290
291 filesIncluded = new ArrayList<String>();
292 filesNotIncluded = new ArrayList<String>();
293 filesExcluded = new ArrayList<String>();
294 filesDeselected = new ArrayList<String>();
295 dirsIncluded = new ArrayList<String>();
296 dirsNotIncluded = new ArrayList<String>();
297 dirsExcluded = new ArrayList<String>();
298 dirsDeselected = new ArrayList<String>();
299
300 if ( isIncluded( "", tokenizedEmpty ) )
301 {
302
303 if ( !isExcluded( "", tokenizedEmpty ) )
304 {
305 if ( isSelected( "", basedir ) )
306 {
307 dirsIncluded.add( "" );
308 }
309 else
310 {
311 dirsDeselected.add( "" );
312 }
313 }
314 else
315 {
316 dirsExcluded.add( "" );
317 }
318 }
319 else
320 {
321 dirsNotIncluded.add( "" );
322 }
323 scandir( basedir, "", true );
324 }
325
326 /**
327 * <p>Top level invocation for a slow scan. A slow scan builds up a full list of excluded/included files/directories,
328 * whereas a fast scan will only have full results for included files, as it ignores directories which can't
329 * possibly hold any included files/directories.</p>
330 *
331 * <p>Returns immediately if a slow scan has already been completed.</p>
332 */
333 protected void slowScan()
334 {
335 if ( haveSlowResults )
336 {
337 return;
338 }
339
340 String[] excl = dirsExcluded.toArray( EMPTY_STRING_ARRAY );
341 String[] notIncl = dirsNotIncluded.toArray( EMPTY_STRING_ARRAY );
342
343 for ( String anExcl : excl )
344 {
345 if ( !couldHoldIncluded( anExcl ) )
346 {
347 scandir( new File( basedir, anExcl ), anExcl + File.separator, false );
348 }
349 }
350
351 for ( String aNotIncl : notIncl )
352 {
353 if ( !couldHoldIncluded( aNotIncl ) )
354 {
355 scandir( new File( basedir, aNotIncl ), aNotIncl + File.separator, false );
356 }
357 }
358
359 haveSlowResults = true;
360 }
361
362 /**
363 * Scans the given directory for files and directories. Found files and directories are placed in their respective
364 * collections, based on the matching of includes, excludes, and the selectors. When a directory is found, it is
365 * scanned recursively.
366 *
367 * @param dir The directory to scan. Must not be <code>null</code>.
368 * @param vpath The path relative to the base directory (needed to prevent problems with an absolute path when using
369 * dir). Must not be <code>null</code>.
370 * @param fast Whether or not this call is part of a fast scan.
371 * @see #filesIncluded
372 * @see #filesNotIncluded
373 * @see #filesExcluded
374 * @see #dirsIncluded
375 * @see #dirsNotIncluded
376 * @see #dirsExcluded
377 * @see #slowScan
378 */
379 protected void scandir( File dir, String vpath, boolean fast )
380 {
381 String[] newfiles = dir.list();
382
383 if ( newfiles == null )
384 {
385 /*
386 * two reasons are mentioned in the API docs for File.list (1) dir is not a directory. This is impossible as
387 * we wouldn't get here in this case. (2) an IO error occurred (why doesn't it throw an exception then???)
388 */
389
390 /*
391 * [jdcasey] (2) is apparently happening to me, as this is killing one of my tests... this is affecting the
392 * assembly plugin, fwiw. I will initialize the newfiles array as zero-length for now. NOTE: I can't find
393 * the problematic code, as it appears to come from a native method in UnixFileSystem...
394 */
395 /*
396 * [bentmann] A null array will also be returned from list() on NTFS when dir refers to a soft link or
397 * junction point whose target is not existent.
398 */
399 newfiles = EMPTY_STRING_ARRAY;
400
401 // throw new IOException( "IO error scanning directory " + dir.getAbsolutePath() );
402 }
403
404 if ( !followSymlinks )
405 {
406 try
407 {
408 if ( isParentSymbolicLink( dir, null ) )
409 {
410 for ( String newfile : newfiles )
411 {
412 String name = vpath + newfile;
413 File file = new File( dir, newfile );
414 if ( file.isDirectory() )
415 {
416 dirsExcluded.add( name );
417 }
418 else
419 {
420 filesExcluded.add( name );
421 }
422 }
423 return;
424 }
425 }
426 catch ( IOException ioe )
427 {
428 String msg = "IOException caught while checking for links!";
429 // will be caught and redirected to Ant's logging system
430 System.err.println( msg );
431 }
432 }
433
434 if ( filenameComparator != null )
435 {
436 Arrays.sort( newfiles, filenameComparator );
437 }
438
439 for ( String newfile : newfiles )
440 {
441 String name = vpath + newfile;
442 char[][] tokenizedName = MatchPattern.tokenizePathToCharArray( name, File.separator );
443 File file = new File( dir, newfile );
444 if ( file.isDirectory() )
445 {
446
447 if ( isIncluded( name, tokenizedName ) )
448 {
449 if ( !isExcluded( name, tokenizedName ) )
450 {
451 if ( isSelected( name, file ) )
452 {
453 dirsIncluded.add( name );
454 if ( fast )
455 {
456 scandir( file, name + File.separator, fast );
457 }
458 }
459 else
460 {
461 everythingIncluded = false;
462 dirsDeselected.add( name );
463 if ( fast && couldHoldIncluded( name ) )
464 {
465 scandir( file, name + File.separator, fast );
466 }
467 }
468
469 }
470 else
471 {
472 everythingIncluded = false;
473 dirsExcluded.add( name );
474 if ( fast && couldHoldIncluded( name ) )
475 {
476 scandir( file, name + File.separator, fast );
477 }
478 }
479 }
480 else
481 {
482 everythingIncluded = false;
483 dirsNotIncluded.add( name );
484 if ( fast && couldHoldIncluded( name ) )
485 {
486 scandir( file, name + File.separator, fast );
487 }
488 }
489 if ( !fast )
490 {
491 scandir( file, name + File.separator, fast );
492 }
493 }
494 else if ( file.isFile() )
495 {
496 if ( isIncluded( name, tokenizedName ) )
497 {
498 if ( !isExcluded( name, tokenizedName ) )
499 {
500 if ( isSelected( name, file ) )
501 {
502 filesIncluded.add( name );
503 }
504 else
505 {
506 everythingIncluded = false;
507 filesDeselected.add( name );
508 }
509 }
510 else
511 {
512 everythingIncluded = false;
513 filesExcluded.add( name );
514 }
515 }
516 else
517 {
518 everythingIncluded = false;
519 filesNotIncluded.add( name );
520 }
521 }
522 }
523 }
524
525 /**
526 * Tests whether a name should be selected.
527 *
528 * @param name the filename to check for selecting
529 * @param file the java.io.File object for this filename
530 * @return <code>false</code> when the selectors says that the file should not be selected, <code>true</code>
531 * otherwise.
532 */
533 protected boolean isSelected( String name, File file )
534 {
535 return true;
536 }
537
538 /**
539 * Returns the names of the files which matched at least one of the include patterns and none of the exclude
540 * patterns. The names are relative to the base directory.
541 *
542 * @return the names of the files which matched at least one of the include patterns and none of the exclude
543 * patterns.
544 */
545 @Override
546 public String[] getIncludedFiles()
547 {
548 return filesIncluded.toArray( EMPTY_STRING_ARRAY );
549 }
550
551 /**
552 * Returns the names of the files which matched none of the include patterns. The names are relative to the base
553 * directory. This involves performing a slow scan if one has not already been completed.
554 *
555 * @return the names of the files which matched none of the include patterns.
556 * @see #slowScan
557 */
558 public String[] getNotIncludedFiles()
559 {
560 slowScan();
561 return filesNotIncluded.toArray( EMPTY_STRING_ARRAY );
562 }
563
564 /**
565 * Returns the names of the files which matched at least one of the include patterns and at least one of the exclude
566 * patterns. The names are relative to the base directory. This involves performing a slow scan if one has not
567 * already been completed.
568 *
569 * @return the names of the files which matched at least one of the include patterns and at at least one of the
570 * exclude patterns.
571 * @see #slowScan
572 */
573 public String[] getExcludedFiles()
574 {
575 slowScan();
576 return filesExcluded.toArray( EMPTY_STRING_ARRAY );
577 }
578
579 /**
580 * <p>Returns the names of the files which were selected out and therefore not ultimately included.</p>
581 *
582 * <p>The names are relative to the base directory. This involves performing a slow scan if one has not already been
583 * completed.</p>
584 *
585 * @return the names of the files which were deselected.
586 * @see #slowScan
587 */
588 public String[] getDeselectedFiles()
589 {
590 slowScan();
591 return filesDeselected.toArray( EMPTY_STRING_ARRAY );
592 }
593
594 /**
595 * Returns the names of the directories which matched at least one of the include patterns and none of the exclude
596 * patterns. The names are relative to the base directory.
597 *
598 * @return the names of the directories which matched at least one of the include patterns and none of the exclude
599 * patterns.
600 */
601 @Override
602 public String[] getIncludedDirectories()
603 {
604 return dirsIncluded.toArray( EMPTY_STRING_ARRAY );
605 }
606
607 /**
608 * Returns the names of the directories which matched none of the include patterns. The names are relative to the
609 * base directory. This involves performing a slow scan if one has not already been completed.
610 *
611 * @return the names of the directories which matched none of the include patterns.
612 * @see #slowScan
613 */
614 public String[] getNotIncludedDirectories()
615 {
616 slowScan();
617 return dirsNotIncluded.toArray( EMPTY_STRING_ARRAY );
618 }
619
620 /**
621 * Returns the names of the directories which matched at least one of the include patterns and at least one of the
622 * exclude patterns. The names are relative to the base directory. This involves performing a slow scan if one has
623 * not already been completed.
624 *
625 * @return the names of the directories which matched at least one of the include patterns and at least one of the
626 * exclude patterns.
627 * @see #slowScan
628 */
629 public String[] getExcludedDirectories()
630 {
631 slowScan();
632 return dirsExcluded.toArray( EMPTY_STRING_ARRAY );
633 }
634
635 /**
636 * <p>Returns the names of the directories which were selected out and therefore not ultimately included.</p>
637 *
638 * <p>The names are relative to the base directory. This involves performing a slow scan if one has not already been
639 * completed.</p>
640 *
641 * @return the names of the directories which were deselected.
642 * @see #slowScan
643 */
644 public String[] getDeselectedDirectories()
645 {
646 slowScan();
647 return dirsDeselected.toArray( EMPTY_STRING_ARRAY );
648 }
649
650 /**
651 * <p>Checks whether a given file is a symbolic link.</p>
652 *
653 * <p>It doesn't really test for symbolic links but whether the canonical and absolute paths of the file are identical
654 * - this may lead to false positives on some platforms.
655 * </p>
656 *
657 * @param parent the parent directory of the file to test
658 * @param name the name of the file to test.
659 * @return true if it's a symbolic link
660 * @throws java.io.IOException .
661 * @since Ant 1.5
662 */
663 public boolean isSymbolicLink( File parent, String name )
664 throws IOException
665 {
666 return NioFiles.isSymbolicLink( new File( parent, name ) );
667 }
668
669 /**
670 * <p>Checks whether the parent of this file is a symbolic link.</p>
671 *
672 * <p>For java versions prior to 7 It doesn't really test for symbolic links but whether the canonical and absolute
673 * paths of the file are identical - this may lead to false positives on some platforms.</p>
674 *
675 * @param parent the parent directory of the file to test
676 * @param name the name of the file to test.
677 * @return true if it's a symbolic link
678 * @throws java.io.IOException .
679 * @since Ant 1.5
680 */
681 public boolean isParentSymbolicLink( File parent, String name )
682 throws IOException
683 {
684 return NioFiles.isSymbolicLink( parent );
685 }
686 }