View Javadoc
1   package org.codehaus.plexus.util;
2   
3   /*
4    * The Apache Software License, Version 1.1
5    *
6    * Copyright (c) 2000-2003 The Apache Software Foundation.  All rights
7    * reserved.
8    *
9    * Redistribution and use in source and binary forms, with or without
10   * modification, are permitted provided that the following conditions
11   * are met:
12   *
13   * 1. Redistributions of source code must retain the above copyright
14   *    notice, this list of conditions and the following disclaimer.
15   *
16   * 2. Redistributions in binary form must reproduce the above copyright
17   *    notice, this list of conditions and the following disclaimer in
18   *    the documentation and/or other materials provided with the
19   *    distribution.
20   *
21   * 3. The end-user documentation included with the redistribution, if
22   *    any, must include the following acknowledgement:
23   *       "This product includes software developed by the
24   *        Apache Software Foundation (http://www.codehaus.org/)."
25   *    Alternately, this acknowledgement may appear in the software itself,
26   *    if and wherever such third-party acknowledgements normally appear.
27   *
28   * 4. The names "Ant" and "Apache Software
29   *    Foundation" must not be used to endorse or promote products derived
30   *    from this software without prior written permission. For written
31   *    permission, please contact codehaus@codehaus.org.
32   *
33   * 5. Products derived from this software may not be called "Apache"
34   *    nor may "Apache" appear in their names without prior written
35   *    permission of the Apache Group.
36   *
37   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
38   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
39   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
40   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
41   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
44   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
45   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
46   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
47   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48   * SUCH DAMAGE.
49   * ====================================================================
50   *
51   * This software consists of voluntary contributions made by many
52   * individuals on behalf of the Apache Software Foundation.  For more
53   * information on the Apache Software Foundation, please see
54   * <http://www.codehaus.org/>.
55   */
56  
57  import java.io.File;
58  import java.io.IOException;
59  import java.util.ArrayList;
60  import java.util.Arrays;
61  
62  /**
63   * <p>Class for scanning a directory for files/directories which match certain criteria.</p>
64   * 
65   * <p>These criteria consist of selectors and patterns which have been specified. With the selectors you can select which
66   * files you want to have included. Files which are not selected are excluded. With patterns you can include or exclude
67   * files based on their filename.</p>
68   * 
69   * <p>The idea is simple. A given directory is recursively scanned for all files and directories. Each file/directory is
70   * matched against a set of selectors, including special support for matching against filenames with include and and
71   * exclude patterns. Only files/directories which match at least one pattern of the include pattern list or other file
72   * selector, and don't match any pattern of the exclude pattern list or fail to match against a required selector will
73   * be placed in the list of files/directories found.</p>
74   * 
75   * <p>When no list of include patterns is supplied, "**" will be used, which means that everything will be matched. When no
76   * list of exclude patterns is supplied, an empty list is used, such that nothing will be excluded. When no selectors
77   * are supplied, none are applied.</p>
78   * 
79   * <p>The filename pattern matching is done as follows: The name to be matched is split up in path segments. A path segment
80   * is the name of a directory or file, which is bounded by <code>File.separator</code> ('/' under UNIX, '\' under
81   * Windows). For example, "abc/def/ghi/xyz.java" is split up in the segments "abc", "def","ghi" and "xyz.java". The same
82   * is done for the pattern against which should be matched.</p>
83   * 
84   * <p>The segments of the name and the pattern are then matched against each other. When '**' is used for a path segment in
85   * the pattern, it matches zero or more path segments of the name.</p>
86   * 
87   * <p>There is a special case regarding the use of <code>File.separator</code>s at the beginning of the pattern and the
88   * string to match:<br>
89   * When a pattern starts with a <code>File.separator</code>, the string to match must also start with a
90   * <code>File.separator</code>. When a pattern does not start with a <code>File.separator</code>, the string to match
91   * may not start with a <code>File.separator</code>. When one of these rules is not obeyed, the string will not match.</p>
92   * 
93   * <p>When a name path segment is matched against a pattern path segment, the following special characters can be used:<br>
94   * '*' matches zero or more characters<br>
95   * '?' matches one character.</p>
96   * 
97   * Examples:
98   * <ul>
99   *   <li>"**\*.class" matches all .class files/dirs in a directory tree.</li>
100  *   <li>"test\a??.java" matches all files/dirs which start with an 'a', then two more characters and then ".java", in a
101  * directory called test.</li>
102  *   <li>"**" matches everything in a directory tree.</li>
103  *   <li>"**\test\**\XYZ*" matches all files/dirs which start with "XYZ" and where there is a parent directory called test
104  * (e.g. "abc\test\def\ghi\XYZ123").</li>
105  * </ul>
106  * 
107  * <p>Case sensitivity may be turned off if necessary. By default, it is turned on.</p>
108  * Example of usage:
109  * <pre>
110  * String[] includes = { "**\\*.class" };
111  * String[] excludes = { "modules\\*\\**" };
112  * ds.setIncludes( includes );
113  * ds.setExcludes( excludes );
114  * ds.setBasedir( new File( "test" ) );
115  * ds.setCaseSensitive( true );
116  * ds.scan();
117  *
118  * System.out.println( "FILES:" );
119  * String[] files = ds.getIncludedFiles();
120  * for ( int i = 0; i &lt; files.length; i++ )
121  * {
122  *     System.out.println( files[i] );
123  * }
124  * </pre>
125  * 
126  * <p>This will scan a directory called test for .class files, but excludes all files in all proper subdirectories of a
127  * directory called "modules"</p>
128  *
129  * @author Arnout J. Kuiper <a href="mailto:ajkuiper@wxs.nl">ajkuiper@wxs.nl</a>
130  * @author Magesh Umasankar
131  * @author <a href="mailto:bruce@callenish.com">Bruce Atherton</a>
132  * @author <a href="mailto:levylambert@tiscali-dsl.de">Antoine Levy-Lambert</a>
133  */
134 public class DirectoryScanner
135     extends AbstractScanner
136 {
137 
138     private static final String[] EMPTY_STRING_ARRAY = new String[0];
139 
140     /**
141      * The base directory to be scanned.
142      */
143     protected File basedir;
144 
145     /**
146      * The files which matched at least one include and no excludes and were selected.
147      */
148     protected ArrayList<String> filesIncluded;
149 
150     /**
151      * The files which did not match any includes or selectors.
152      */
153     protected ArrayList<String> filesNotIncluded;
154 
155     /**
156      * The files which matched at least one include and at least one exclude.
157      */
158     protected ArrayList<String> filesExcluded;
159 
160     /**
161      * The directories which matched at least one include and no excludes and were selected.
162      */
163     protected ArrayList<String> dirsIncluded;
164 
165     /**
166      * The directories which were found and did not match any includes.
167      */
168     protected ArrayList<String> dirsNotIncluded;
169 
170     /**
171      * The directories which matched at least one include and at least one exclude.
172      */
173     protected ArrayList<String> dirsExcluded;
174 
175     /**
176      * The files which matched at least one include and no excludes and which a selector discarded.
177      */
178     protected ArrayList<String> filesDeselected;
179 
180     /**
181      * The directories which matched at least one include and no excludes but which a selector discarded.
182      */
183     protected ArrayList<String> dirsDeselected;
184 
185     /**
186      * Whether or not our results were built by a slow scan.
187      */
188     protected boolean haveSlowResults = false;
189 
190     /**
191      * Whether or not symbolic links should be followed.
192      *
193      * @since Ant 1.5
194      */
195     private boolean followSymlinks = true;
196 
197     /**
198      * Whether or not everything tested so far has been included.
199      */
200     protected boolean everythingIncluded = true;
201 
202     private final char[][] tokenizedEmpty = MatchPattern.tokenizePathToCharArray( "", File.separator );
203 
204     /**
205      * Sole constructor.
206      */
207     public DirectoryScanner()
208     {
209     }
210 
211     /**
212      * Sets the base directory to be scanned. This is the directory which is scanned recursively. All '/' and '\'
213      * characters are replaced by <code>File.separatorChar</code>, so the separator used need not match
214      * <code>File.separatorChar</code>.
215      *
216      * @param basedir The base directory to scan. Must not be <code>null</code>.
217      */
218     public void setBasedir( String basedir )
219     {
220         setBasedir( new File( basedir.replace( '/', File.separatorChar ).replace( '\\', File.separatorChar ) ) );
221     }
222 
223     /**
224      * Sets the base directory to be scanned. This is the directory which is scanned recursively.
225      *
226      * @param basedir The base directory for scanning. Should not be <code>null</code>.
227      */
228     public void setBasedir( File basedir )
229     {
230         this.basedir = basedir;
231     }
232 
233     /**
234      * Returns the base directory to be scanned. This is the directory which is scanned recursively.
235      *
236      * @return the base directory to be scanned
237      */
238     @Override
239     public File getBasedir()
240     {
241         return basedir;
242     }
243 
244     /**
245      * Sets whether or not symbolic links should be followed.
246      *
247      * @param followSymlinks whether or not symbolic links should be followed
248      */
249     public void setFollowSymlinks( boolean followSymlinks )
250     {
251         this.followSymlinks = followSymlinks;
252     }
253 
254     /**
255      * Returns whether or not the scanner has included all the files or directories it has come across so far.
256      *
257      * @return <code>true</code> if all files and directories which have been found so far have been included.
258      */
259     public boolean isEverythingIncluded()
260     {
261         return everythingIncluded;
262     }
263 
264     /**
265      * Scans the base directory for files which match at least one include pattern and don't match any exclude patterns.
266      * If there are selectors then the files must pass muster there, as well.
267      *
268      * @throws IllegalStateException if the base directory was set incorrectly (i.e. if it is <code>null</code>, doesn't
269      *             exist, or isn't a directory).
270      */
271     @Override
272     public void scan()
273         throws IllegalStateException
274     {
275         if ( basedir == null )
276         {
277             throw new IllegalStateException( "No basedir set" );
278         }
279         if ( !basedir.exists() )
280         {
281             throw new IllegalStateException( "basedir " + basedir + " does not exist" );
282         }
283         if ( !basedir.isDirectory() )
284         {
285             throw new IllegalStateException( "basedir " + basedir + " is not a directory" );
286         }
287 
288         setupDefaultFilters();
289         setupMatchPatterns();
290 
291         filesIncluded = new ArrayList<String>();
292         filesNotIncluded = new ArrayList<String>();
293         filesExcluded = new ArrayList<String>();
294         filesDeselected = new ArrayList<String>();
295         dirsIncluded = new ArrayList<String>();
296         dirsNotIncluded = new ArrayList<String>();
297         dirsExcluded = new ArrayList<String>();
298         dirsDeselected = new ArrayList<String>();
299 
300         if ( isIncluded( "", tokenizedEmpty ) )
301         {
302 
303             if ( !isExcluded( "", tokenizedEmpty ) )
304             {
305                 if ( isSelected( "", basedir ) )
306                 {
307                     dirsIncluded.add( "" );
308                 }
309                 else
310                 {
311                     dirsDeselected.add( "" );
312                 }
313             }
314             else
315             {
316                 dirsExcluded.add( "" );
317             }
318         }
319         else
320         {
321             dirsNotIncluded.add( "" );
322         }
323         scandir( basedir, "", true );
324     }
325 
326     /**
327      * <p>Top level invocation for a slow scan. A slow scan builds up a full list of excluded/included files/directories,
328      * whereas a fast scan will only have full results for included files, as it ignores directories which can't
329      * possibly hold any included files/directories.</p>
330      * 
331      * <p>Returns immediately if a slow scan has already been completed.</p>
332      */
333     protected void slowScan()
334     {
335         if ( haveSlowResults )
336         {
337             return;
338         }
339 
340         String[] excl = dirsExcluded.toArray( EMPTY_STRING_ARRAY );
341         String[] notIncl = dirsNotIncluded.toArray( EMPTY_STRING_ARRAY );
342 
343         for ( String anExcl : excl )
344         {
345             if ( !couldHoldIncluded( anExcl ) )
346             {
347                 scandir( new File( basedir, anExcl ), anExcl + File.separator, false );
348             }
349         }
350 
351         for ( String aNotIncl : notIncl )
352         {
353             if ( !couldHoldIncluded( aNotIncl ) )
354             {
355                 scandir( new File( basedir, aNotIncl ), aNotIncl + File.separator, false );
356             }
357         }
358 
359         haveSlowResults = true;
360     }
361 
362     /**
363      * Scans the given directory for files and directories. Found files and directories are placed in their respective
364      * collections, based on the matching of includes, excludes, and the selectors. When a directory is found, it is
365      * scanned recursively.
366      *
367      * @param dir The directory to scan. Must not be <code>null</code>.
368      * @param vpath The path relative to the base directory (needed to prevent problems with an absolute path when using
369      *            dir). Must not be <code>null</code>.
370      * @param fast Whether or not this call is part of a fast scan.
371      * @see #filesIncluded
372      * @see #filesNotIncluded
373      * @see #filesExcluded
374      * @see #dirsIncluded
375      * @see #dirsNotIncluded
376      * @see #dirsExcluded
377      * @see #slowScan
378      */
379     protected void scandir( File dir, String vpath, boolean fast )
380     {
381         String[] newfiles = dir.list();
382 
383         if ( newfiles == null )
384         {
385             /*
386              * two reasons are mentioned in the API docs for File.list (1) dir is not a directory. This is impossible as
387              * we wouldn't get here in this case. (2) an IO error occurred (why doesn't it throw an exception then???)
388              */
389 
390             /*
391              * [jdcasey] (2) is apparently happening to me, as this is killing one of my tests... this is affecting the
392              * assembly plugin, fwiw. I will initialize the newfiles array as zero-length for now. NOTE: I can't find
393              * the problematic code, as it appears to come from a native method in UnixFileSystem...
394              */
395             /*
396              * [bentmann] A null array will also be returned from list() on NTFS when dir refers to a soft link or
397              * junction point whose target is not existent.
398              */
399             newfiles = EMPTY_STRING_ARRAY;
400 
401             // throw new IOException( "IO error scanning directory " + dir.getAbsolutePath() );
402         }
403 
404         if ( !followSymlinks )
405         {
406             try
407             {
408                 if ( isParentSymbolicLink( dir, null ) )
409                 {
410                     for ( String newfile : newfiles )
411                     {
412                         String name = vpath + newfile;
413                         File file = new File( dir, newfile );
414                         if ( file.isDirectory() )
415                         {
416                             dirsExcluded.add( name );
417                         }
418                         else
419                         {
420                             filesExcluded.add( name );
421                         }
422                     }
423                     return;
424                 }
425             }
426             catch ( IOException ioe )
427             {
428                 String msg = "IOException caught while checking for links!";
429                 // will be caught and redirected to Ant's logging system
430                 System.err.println( msg );
431             }
432         }
433 
434         if ( filenameComparator != null )
435         {
436             Arrays.sort( newfiles, filenameComparator );
437         }
438 
439         for ( String newfile : newfiles )
440         {
441             String name = vpath + newfile;
442             char[][] tokenizedName = MatchPattern.tokenizePathToCharArray( name, File.separator );
443             File file = new File( dir, newfile );
444             if ( file.isDirectory() )
445             {
446 
447                 if ( isIncluded( name, tokenizedName ) )
448                 {
449                     if ( !isExcluded( name, tokenizedName ) )
450                     {
451                         if ( isSelected( name, file ) )
452                         {
453                             dirsIncluded.add( name );
454                             if ( fast )
455                             {
456                                 scandir( file, name + File.separator, fast );
457                             }
458                         }
459                         else
460                         {
461                             everythingIncluded = false;
462                             dirsDeselected.add( name );
463                             if ( fast && couldHoldIncluded( name ) )
464                             {
465                                 scandir( file, name + File.separator, fast );
466                             }
467                         }
468 
469                     }
470                     else
471                     {
472                         everythingIncluded = false;
473                         dirsExcluded.add( name );
474                         if ( fast && couldHoldIncluded( name ) )
475                         {
476                             scandir( file, name + File.separator, fast );
477                         }
478                     }
479                 }
480                 else
481                 {
482                     everythingIncluded = false;
483                     dirsNotIncluded.add( name );
484                     if ( fast && couldHoldIncluded( name ) )
485                     {
486                         scandir( file, name + File.separator, fast );
487                     }
488                 }
489                 if ( !fast )
490                 {
491                     scandir( file, name + File.separator, fast );
492                 }
493             }
494             else if ( file.isFile() )
495             {
496                 if ( isIncluded( name, tokenizedName ) )
497                 {
498                     if ( !isExcluded( name, tokenizedName ) )
499                     {
500                         if ( isSelected( name, file ) )
501                         {
502                             filesIncluded.add( name );
503                         }
504                         else
505                         {
506                             everythingIncluded = false;
507                             filesDeselected.add( name );
508                         }
509                     }
510                     else
511                     {
512                         everythingIncluded = false;
513                         filesExcluded.add( name );
514                     }
515                 }
516                 else
517                 {
518                     everythingIncluded = false;
519                     filesNotIncluded.add( name );
520                 }
521             }
522         }
523     }
524 
525     /**
526      * Tests whether a name should be selected.
527      *
528      * @param name the filename to check for selecting
529      * @param file the java.io.File object for this filename
530      * @return <code>false</code> when the selectors says that the file should not be selected, <code>true</code>
531      *         otherwise.
532      */
533     protected boolean isSelected( String name, File file )
534     {
535         return true;
536     }
537 
538     /**
539      * Returns the names of the files which matched at least one of the include patterns and none of the exclude
540      * patterns. The names are relative to the base directory.
541      *
542      * @return the names of the files which matched at least one of the include patterns and none of the exclude
543      *         patterns.
544      */
545     @Override
546     public String[] getIncludedFiles()
547     {
548         return filesIncluded.toArray( EMPTY_STRING_ARRAY );
549     }
550 
551     /**
552      * Returns the names of the files which matched none of the include patterns. The names are relative to the base
553      * directory. This involves performing a slow scan if one has not already been completed.
554      *
555      * @return the names of the files which matched none of the include patterns.
556      * @see #slowScan
557      */
558     public String[] getNotIncludedFiles()
559     {
560         slowScan();
561         return filesNotIncluded.toArray( EMPTY_STRING_ARRAY );
562     }
563 
564     /**
565      * Returns the names of the files which matched at least one of the include patterns and at least one of the exclude
566      * patterns. The names are relative to the base directory. This involves performing a slow scan if one has not
567      * already been completed.
568      *
569      * @return the names of the files which matched at least one of the include patterns and at at least one of the
570      *         exclude patterns.
571      * @see #slowScan
572      */
573     public String[] getExcludedFiles()
574     {
575         slowScan();
576         return filesExcluded.toArray( EMPTY_STRING_ARRAY );
577     }
578 
579     /**
580      * <p>Returns the names of the files which were selected out and therefore not ultimately included.</p>
581      * 
582      * <p>The names are relative to the base directory. This involves performing a slow scan if one has not already been
583      * completed.</p>
584      *
585      * @return the names of the files which were deselected.
586      * @see #slowScan
587      */
588     public String[] getDeselectedFiles()
589     {
590         slowScan();
591         return filesDeselected.toArray( EMPTY_STRING_ARRAY );
592     }
593 
594     /**
595      * Returns the names of the directories which matched at least one of the include patterns and none of the exclude
596      * patterns. The names are relative to the base directory.
597      *
598      * @return the names of the directories which matched at least one of the include patterns and none of the exclude
599      *         patterns.
600      */
601     @Override
602     public String[] getIncludedDirectories()
603     {
604         return dirsIncluded.toArray( EMPTY_STRING_ARRAY );
605     }
606 
607     /**
608      * Returns the names of the directories which matched none of the include patterns. The names are relative to the
609      * base directory. This involves performing a slow scan if one has not already been completed.
610      *
611      * @return the names of the directories which matched none of the include patterns.
612      * @see #slowScan
613      */
614     public String[] getNotIncludedDirectories()
615     {
616         slowScan();
617         return dirsNotIncluded.toArray( EMPTY_STRING_ARRAY );
618     }
619 
620     /**
621      * Returns the names of the directories which matched at least one of the include patterns and at least one of the
622      * exclude patterns. The names are relative to the base directory. This involves performing a slow scan if one has
623      * not already been completed.
624      *
625      * @return the names of the directories which matched at least one of the include patterns and at least one of the
626      *         exclude patterns.
627      * @see #slowScan
628      */
629     public String[] getExcludedDirectories()
630     {
631         slowScan();
632         return dirsExcluded.toArray( EMPTY_STRING_ARRAY );
633     }
634 
635     /**
636      * <p>Returns the names of the directories which were selected out and therefore not ultimately included.</p>
637      * 
638      * <p>The names are relative to the base directory. This involves performing a slow scan if one has not already been
639      * completed.</p>
640      *
641      * @return the names of the directories which were deselected.
642      * @see #slowScan
643      */
644     public String[] getDeselectedDirectories()
645     {
646         slowScan();
647         return dirsDeselected.toArray( EMPTY_STRING_ARRAY );
648     }
649 
650     /**
651      * <p>Checks whether a given file is a symbolic link.</p>
652      * 
653      * <p>It doesn't really test for symbolic links but whether the canonical and absolute paths of the file are identical
654      * - this may lead to false positives on some platforms.
655      * </p>
656      *
657      * @param parent the parent directory of the file to test
658      * @param name the name of the file to test.
659      * @return true if it's a symbolic link
660      * @throws java.io.IOException .
661      * @since Ant 1.5
662      */
663     public boolean isSymbolicLink( File parent, String name )
664         throws IOException
665     {
666         return NioFiles.isSymbolicLink( new File( parent, name ) );
667     }
668 
669     /**
670      * <p>Checks whether the parent of this file is a symbolic link.</p>
671      * 
672      * <p>For java versions prior to 7 It doesn't really test for symbolic links but whether the canonical and absolute
673      * paths of the file are identical - this may lead to false positives on some platforms.</p>
674      *
675      * @param parent the parent directory of the file to test
676      * @param name the name of the file to test.
677      * @return true if it's a symbolic link
678      * @throws java.io.IOException .
679      * @since Ant 1.5
680      */
681     public boolean isParentSymbolicLink( File parent, String name )
682         throws IOException
683     {
684         return NioFiles.isSymbolicLink( parent );
685     }
686 }