View Javadoc
1   package org.apache.maven.archetype.common.util;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *   http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing,
15   * software distributed under the License is distributed on an
16   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17   * KIND, either express or implied.  See the License for the
18   * specific language governing permissions and limitations
19   * under the License.
20   */
21  
22  import org.codehaus.plexus.util.SelectorUtils;
23  import org.codehaus.plexus.util.StringUtils;
24  
25  import java.io.File;
26  import java.util.ArrayList;
27  import java.util.List;
28  
29  /**
30   * <p>Class for scanning a directory for files/directories which match certain criteria.</p>
31   * 
32   * <p>These criteria consist of selectors and patterns which have been specified. With the selectors
33   * you can select which files you want to have included. Files which are not selected are excluded.
34   * With patterns you can include or exclude files based on their filename.</p>
35   *
36   * <p>The idea is simple. A given directory is recursively scanned for all files and directories.
37   * Each file/directory is matched against a set of selectors, including special support for matching
38   * against filenames with include and and exclude patterns. Only files/directories which match at
39   * least one pattern of the include pattern list or other file selector, and don't match any pattern
40   * of the exclude pattern list or fail to match against a required selector will be placed in the
41   * list of files/directories found.</p>
42   *
43   * <p>When no list of include patterns is supplied, "**" will be used, which means that everything
44   * will be matched. When no list of exclude patterns is supplied, an empty list is used, such that
45   * nothing will be excluded. When no selectors are supplied, none are applied.</p>
46   *
47   * <p>The filename pattern matching is done as follows: The name to be matched is split up in path
48   * segments. A path segment is the name of a directory or file, which is bounded by <code>
49   * File.separator</code> ('/' under UNIX, '\' under Windows). For example, "abc/def/ghi/xyz.java" is
50   * split up in the segments "abc", "def","ghi" and "xyz.java". The same is done for the pattern
51   * against which should be matched.</p>
52   *
53   * <p>The segments of the name and the pattern are then matched against each other. When '**' is
54   * used for a path segment in the pattern, it matches zero or more path segments of the name.</p>
55   *
56   * <p>There is a special case regarding the use of <code>File.separator</code>s at the beginning of
57   * the pattern and the string to match:<br>
58   * When a pattern starts with a <code>File.separator</code>, the string to match must also start
59   * with a <code>File.separator</code>. When a pattern does not start with a <code>
60   * File.separator</code>, the string to match may not start with a <code>File.separator</code>. When
61   * one of these rules is not obeyed, the string will not match.</p>
62   *
63   * <p>When a name path segment is matched against a pattern path segment, the following special
64   * characters can be used:<br>
65   * '*' matches zero or more characters<br>
66   * '?' matches one character.</p>
67   *
68   * <p>Examples:</p>
69   *
70   * <p>"**\*.class" matches all .class files/dirs in a directory tree.</p>
71   *
72   * <p>"test\a??.java" matches all files/dirs which start with an 'a', then two more characters and
73   * then ".java", in a directory called test.</p>
74   *
75   * <p>"**" matches everything in a directory tree.</p>
76   *
77   * <p>"**\test\**\XYZ*" matches all files/dirs which start with "XYZ" and where there is a parent
78   * directory called test (e.g. "abc\test\def\ghi\XYZ123").</p>
79   *
80   * <p>Case sensitivity may be turned off if necessary. By default, it is turned on.</p>
81   *
82   * <p>Example of usage:</p>
83   *
84   * <pre>
85   * String[] includes = {"**\\*.class"};
86   * String[] excludes = {"modules\\*\\**"};
87   * ds.setIncludes(includes);
88   * ds.setExcludes(excludes);
89   * ds.setBasedir(new File("test"));
90   * ds.setCaseSensitive(true);
91   * ds.scan();
92   * 
93   * System.out.println("FILES:");
94   * String[] files = ds.getIncludedFiles();
95   * for (int i = 0; i &lt; files.length; i++) {
96   * System.out.println(files[i]);
97   * }
98   * </pre>
99   *
100  * <p>This will scan a directory called test for .class files, but excludes all files in all proper
101  * subdirectories of a directory called "modules"</p>
102  *
103  * <p>This class was stealed from rg.coudehaus.plexus.util.DirectoryScanner and adapted to search
104  * from a List&lt;String&gt;</p>
105  *
106  * @author Arnout J. Kuiper <a href="mailto:ajkuiper@wxs.nl">ajkuiper@wxs.nl</a>
107  * @author Magesh Umasankar
108  * @author <a href="mailto:bruce@callenish.com">Bruce Atherton</a>
109  * @author <a href="mailto:levylambert@tiscali-dsl.de">Antoine Levy-Lambert</a>
110  */
111 public class ListScanner
112 {
113     /**
114      * Patterns which should be excluded by default.
115      *
116      * @see #addDefaultExcludes()
117      */
118     public static final String[] DEFAULTEXCLUDES =
119         { // Miscellaneous typical temporary files
120             "**/*~", "**/#*#", "**/.#*", "**/%*%", "**/._*",
121 
122             // CVS
123             "**/CVS", "**/CVS/**", "**/.cvsignore",
124 
125             // SCCS
126             "**/SCCS", "**/SCCS/**",
127 
128             // Visual SourceSafe
129             "**/vssver.scc",
130 
131             // Subversion
132             "**/.svn", "**/.svn/**",
133 
134             // Arch
135             "**/.arch-ids", "**/.arch-ids/**",
136 
137             // Bazaar
138             "**/.bzr", "**/.bzr/**",
139 
140             // GIT
141             "**/.git", "**/.git/**",
142 
143             // Mercurial
144             "**/.hg",  "**/.hg/**",
145 
146             // SurroundSCM
147             "**/.MySCMServerInfo",
148 
149             // Mac
150             "**/.DS_Store"
151         };
152 
153     /** The base directory to be scanned. */
154     protected String basedir;
155 
156     /** Whether or not everything tested so far has been included. */
157     protected boolean everythingIncluded = true;
158 
159     /** The patterns for the files to be excluded. */
160     protected String[] excludes;
161 
162     /** The patterns for the files to be included. */
163     protected String[] includes;
164 
165     /** Whether or not the file system should be treated as a case sensitive one. */
166     protected boolean isCaseSensitive = true;
167 
168     /** Sole constructor. */
169     public ListScanner()
170     {
171     }
172 
173     public static String getDefaultExcludes()
174     {
175         return StringUtils.join( DEFAULTEXCLUDES, "," );
176     }
177 
178     /**
179      * Tests whether or not a string matches against a pattern. The pattern may contain two special
180      * characters:<br>
181      * '*' means zero or more characters<br>
182      * '?' means one and only one character
183      *
184      * @param pattern The pattern to match against. Must not be <code>null</code>.
185      * @param str     The string which must be matched against the pattern. Must not be <code>
186      *                null</code>.
187      * @return <code>true</code> if the string matches against the pattern, or <code>false</code>
188      *         otherwise.
189      */
190     public static boolean match( String pattern, String str )
191     {
192         // default matches the SelectorUtils default
193         return match( pattern, str, true );
194     }
195 
196     /**
197      * Tests whether or not a string matches against a pattern. The pattern may contain two special
198      * characters:<br>
199      * '*' means zero or more characters<br>
200      * '?' means one and only one character
201      *
202      * @param pattern         The pattern to match against. Must not be <code>null</code>.
203      * @param str             The string which must be matched against the pattern. Must not be
204      *                        <code>null</code>.
205      * @param isCaseSensitive Whether or not matching should be performed case sensitively.
206      * @return <code>true</code> if the string matches against the pattern, or <code>false</code>
207      *         otherwise.
208      */
209     protected static boolean match( String pattern, String str, boolean isCaseSensitive )
210     {
211         return SelectorUtils.match( pattern, str, isCaseSensitive );
212     }
213 
214     /**
215      * Tests whether or not a given path matches a given pattern.
216      *
217      * @param pattern The pattern to match against. Must not be <code>null</code>.
218      * @param str     The path to match, as a String. Must not be <code>null</code>.
219      * @return <code>true</code> if the pattern matches against the string, or <code>false</code>
220      *         otherwise.
221      */
222     protected static boolean matchPath( String pattern, String str )
223     {
224         // default matches the SelectorUtils default
225         return matchPath( pattern, str, true );
226     }
227 
228     /**
229      * Tests whether or not a given path matches a given pattern.
230      *
231      * @param pattern         The pattern to match against. Must not be <code>null</code>.
232      * @param str             The path to match, as a String. Must not be <code>null</code>.
233      * @param isCaseSensitive Whether or not matching should be performed case sensitively.
234      * @return <code>true</code> if the pattern matches against the string, or <code>false</code>
235      *         otherwise.
236      */
237     protected static boolean matchPath( String pattern, String str, boolean isCaseSensitive )
238     {
239         return SelectorUtils.matchPath( PathUtils.convertPathForOS( pattern ), PathUtils.convertPathForOS( str ),
240                                         isCaseSensitive );
241     }
242 
243     /**
244      * <p>Tests whether or not a given path matches the start of a given pattern up to the first "**".<p>
245      * 
246      * <p>This is not a general purpose test and should only be used if you can live with false
247      * positives. For example, <code>pattern=**\a</code> and <code>str=b</code> will yield <code>
248      * true</code>.</p>
249      *
250      * @param pattern The pattern to match against. Must not be <code>null</code>.
251      * @param str     The path to match, as a String. Must not be <code>null</code>.
252      * @return whether or not a given path matches the start of a given pattern up to the first
253      *         "**".
254      */
255     protected static boolean matchPatternStart( String pattern, String str )
256     {
257         // default matches SelectorUtils default
258         return matchPatternStart( pattern, str, true );
259     }
260 
261     /**
262      * <p>Tests whether or not a given path matches the start of a given pattern up to the first "**".</p>
263      * 
264      * <p>This is not a general purpose test and should only be used if you can live with false
265      * positives. For example, <code>pattern=**\a</code> and <code>str=b</code> will yield <code>
266      * true</code>.</p>
267      *
268      * @param pattern         The pattern to match against. Must not be <code>null</code>.
269      * @param str             The path to match, as a String. Must not be <code>null</code>.
270      * @param isCaseSensitive Whether or not matching should be performed case sensitively.
271      * @return whether or not a given path matches the start of a given pattern up to the first
272      *         "**".
273      */
274     protected static boolean matchPatternStart( String pattern, String str, boolean isCaseSensitive )
275     {
276         return SelectorUtils.matchPatternStart( PathUtils.convertPathForOS( pattern ),
277                                                 PathUtils.convertPathForOS( str ), isCaseSensitive );
278     }
279 
280     /** Adds default exclusions to the current exclusions set. */
281     public void addDefaultExcludes()
282     {
283         int excludesLength = ( excludes == null ) ? 0 : excludes.length;
284         String[] newExcludes;
285         newExcludes = new String[excludesLength + DEFAULTEXCLUDES.length];
286         if ( excludesLength > 0 )
287         {
288             System.arraycopy( excludes, 0, newExcludes, 0, excludesLength );
289         }
290         for ( int i = 0; i < DEFAULTEXCLUDES.length; i++ )
291         {
292             newExcludes[i + excludesLength] =
293                 DEFAULTEXCLUDES[i].replace( '/', File.separatorChar ).replace( '\\', File.separatorChar );
294         }
295         excludes = newExcludes;
296     }
297 
298     /**
299      * Returns the base directory to be scanned. This is the directory which is scanned recursively.
300      *
301      * @return the base directory to be scanned
302      */
303     public String getBasedir()
304     {
305         return basedir;
306     }
307 
308     /**
309      * Sets the base directory to be scanned. This is the directory which is scanned recursively.
310      * This directory is normalized for multiple os's (all / and \\ are replaced with
311      * File.separatorChar
312      *
313      * @param basedir The base directory for scanning. Should not be <code>null</code>.
314      */
315     public void setBasedir( String basedir )
316     {
317         this.basedir = basedir;
318     }
319 
320     /**
321      * Sets whether or not the file system should be regarded as case sensitive.
322      *
323      * @param isCaseSensitive whether or not the file system should be regarded as a case
324      *                        sensitive one
325      */
326     public void setCaseSensitive( boolean isCaseSensitive )
327     {
328         this.isCaseSensitive = isCaseSensitive;
329     }
330 
331     /**
332      * <p>Sets the list of exclude patterns to use. All '/' and '\' characters are replaced by <code>
333      * File.separatorChar</code>, so the separator used need not match <code>
334      * File.separatorChar</code>.</p>
335      * 
336      * <p>When a pattern ends with a '/' or '\', "**" is appended.</p>
337      *
338      * @param excludesList A list of exclude patterns. May be <code>null</code>, indicating that no
339      *                 files should be excluded. If a non-<code>null</code> list is given, all
340      *                 elements must be non-<code>null</code>.
341      */
342     public void setExcludes( List<String> excludesList )
343     {
344         String[] excludes = excludesList.toArray( new String[excludesList.size()] );
345         if ( excludes == null )
346         {
347             this.excludes = null;
348         }
349         else
350         {
351             setExcludes( excludes );
352         }
353     }
354 
355     public void setExcludes( String excludes )
356     {
357         if ( excludes == null )
358         {
359             this.excludes = null;
360         }
361         else
362         {
363             setExcludes( StringUtils.split( excludes, "," ) );
364         }
365     }
366 
367     /**
368      * <p>Sets the list of include patterns to use. All '/' and '\' characters are replaced by <code>
369      * File.separatorChar</code>, so the separator used need not match <code>
370      * File.separatorChar</code>.</p>
371      * 
372      * <p>When a pattern ends with a '/' or '\', "**" is appended.</p>
373      *
374      * @param includesList A list of include patterns. May be <code>null</code>, indicating that all
375      *                 files should be included. If a non-<code>null</code> list is given, all
376      *                 elements must be non-<code>null</code>.
377      */
378     public void setIncludes( List<String> includesList )
379     {
380         String[] includes = includesList.toArray( new String[includesList.size()] );
381         if ( includes == null )
382         {
383             this.includes = null;
384         }
385         else
386         {
387             setIncludes( includes );
388         }
389     }
390 
391     public void setIncludes( String includes )
392     {
393         if ( includes == null )
394         {
395             this.includes = null;
396         }
397         else
398         {
399             setIncludes( StringUtils.split( includes, "," ) );
400         }
401     }
402 
403     /**
404      * Scans the base directory for files which match at least one include pattern and don't match
405      * any exclude patterns. If there are selectors then the files must pass muster there, as well.
406      *
407      * @throws IllegalStateException if the base directory was set incorrectly (i.e. if it is
408      *                               <code>null</code>, doesn't exist, or isn't a directory).
409      */
410     public List<String> scan( List<String> files )
411         throws
412         IllegalStateException
413     {
414 //        System.err.println("Scanning \nbasedir="+basedir +
415 //                " \nincludes=" + java.util.Arrays.toString(includes) +
416 //                " \nexcludes=" + java.util.Arrays.toString(excludes) +
417 //                " \non files="+files);
418         if ( basedir == null )
419         {
420             throw new IllegalStateException( "No basedir set" );
421         }
422 
423         if ( includes == null )
424         {
425             // No includes supplied, so set it to 'matches all'
426             includes = new String[1];
427             includes[0] = "**";
428         }
429         if ( excludes == null )
430         {
431             excludes = new String[0];
432         }
433 
434         List<String> result = new ArrayList<>();
435 
436         for ( String fileName : files )
437         {
438             if ( isIncluded( fileName ) && !isExcluded( fileName ) )
439             {
440                 result.add( fileName );
441             }
442         }
443 //        System.err.println("Result "+result+"\n\n\n");
444         return result;
445     }
446 
447     /**
448      * Tests whether or not a name matches against at least one exclude pattern.
449      *
450      * @param name The name to match. Must not be <code>null</code>.
451      * @return <code>true</code> when the name matches against at least one exclude pattern, or
452      *         <code>false</code> otherwise.
453      */
454     protected boolean isExcluded( String name )
455     {
456         return matchesPatterns( name, excludes );
457     }
458 
459     /**
460      * Tests whether or not a name matches against at least one include pattern.
461      *
462      * @param name The name to match. Must not be <code>null</code>.
463      * @return <code>true</code> when the name matches against at least one include pattern, or
464      *         <code>false</code> otherwise.
465      */
466     protected boolean isIncluded( String name )
467     {
468         return matchesPatterns( name, includes );
469     }
470 
471     /**
472      * Tests whether or not a name matches against at least one include pattern.
473      *
474      * @param name     The name to match. Must not be <code>null</code>.
475      * @param patterns The list of patterns to match.
476      * @return <code>true</code> when the name matches against at least one include pattern, or
477      *         <code>false</code> otherwise.
478      */
479     protected boolean matchesPatterns( String name, String[] patterns )
480     {
481         // avoid extra object creation in the loop
482         String path = null;
483 
484         String baseDir = getBasedir();
485         if ( baseDir.length() > 0 )
486         {
487             baseDir = baseDir.concat( File.separator );
488         }
489 
490         for ( int i = 0; i < patterns.length; i++ )
491         {
492             path = PathUtils.convertPathForOS( baseDir + patterns[i] );
493 //            System.err.println("path="+path);
494             if ( matchPath( path, name, isCaseSensitive ) )
495             {
496                 return true;
497             }
498         }
499         return false;
500     }
501 
502     private void setExcludes( String[] excludes )
503     {
504         this.excludes = setPatterns( excludes );
505     }
506 
507     private void setIncludes( String[] includes )
508     {
509         this.includes = setPatterns( includes );
510     }
511 
512     private String[] setPatterns( String[] patterns )
513     {
514         String[] result = null;
515         if ( ( patterns != null ) && ( patterns.length > 0 ) )
516         {
517             result = new String[patterns.length];
518             for ( int i = 0; i < patterns.length; i++ )
519             {
520                 String pattern = patterns[i].trim();
521 
522                 // don't normalize the pattern here, we internalize the normalization
523                 // just normalize for comparison purposes
524                 if ( PathUtils.convertPathForOS( pattern ).endsWith( File.separator ) )
525                 {
526                     pattern += "**";
527                 }
528                 result[i] = pattern;
529             }
530         }
531         return result;
532     }
533 }