View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.maven.archetype.common.util;
20  
21  import java.io.File;
22  import java.util.ArrayList;
23  import java.util.List;
24  
25  import org.codehaus.plexus.util.SelectorUtils;
26  import org.codehaus.plexus.util.StringUtils;
27  
28  /**
29   * <p>Class for scanning a directory for files/directories which match certain criteria.</p>
30   *
31   * <p>These criteria consist of selectors and patterns which have been specified. With the selectors
32   * you can select which files you want to have included. Files which are not selected are excluded.
33   * With patterns you can include or exclude files based on their filename.</p>
34   *
35   * <p>The idea is simple. A given directory is recursively scanned for all files and directories.
36   * Each file/directory is matched against a set of selectors, including special support for matching
37   * against filenames with include and and exclude patterns. Only files/directories which match at
38   * least one pattern of the include pattern list or other file selector, and don't match any pattern
39   * of the exclude pattern list or fail to match against a required selector will be placed in the
40   * list of files/directories found.</p>
41   *
42   * <p>When no list of include patterns is supplied, "**" will be used, which means that everything
43   * will be matched. When no list of exclude patterns is supplied, an empty list is used, such that
44   * nothing will be excluded. When no selectors are supplied, none are applied.</p>
45   *
46   * <p>The filename pattern matching is done as follows: The name to be matched is split up in path
47   * segments. A path segment is the name of a directory or file, which is bounded by <code>
48   * File.separator</code> ('/' under UNIX, '\' under Windows). For example, "abc/def/ghi/xyz.java" is
49   * split up in the segments "abc", "def","ghi" and "xyz.java". The same is done for the pattern
50   * against which should be matched.</p>
51   *
52   * <p>The segments of the name and the pattern are then matched against each other. When '**' is
53   * used for a path segment in the pattern, it matches zero or more path segments of the name.</p>
54   *
55   * <p>There is a special case regarding the use of <code>File.separator</code>s at the beginning of
56   * the pattern and the string to match:<br>
57   * When a pattern starts with a <code>File.separator</code>, the string to match must also start
58   * with a <code>File.separator</code>. When a pattern does not start with a <code>
59   * File.separator</code>, the string to match may not start with a <code>File.separator</code>. When
60   * one of these rules is not obeyed, the string will not match.</p>
61   *
62   * <p>When a name path segment is matched against a pattern path segment, the following special
63   * characters can be used:<br>
64   * '*' matches zero or more characters<br>
65   * '?' matches one character.</p>
66   *
67   * <p>Examples:</p>
68   *
69   * <p>"**\*.class" matches all .class files/dirs in a directory tree.</p>
70   *
71   * <p>"test\a??.java" matches all files/dirs which start with an 'a', then two more characters and
72   * then ".java", in a directory called test.</p>
73   *
74   * <p>"**" matches everything in a directory tree.</p>
75   *
76   * <p>"**\test\**\XYZ*" matches all files/dirs which start with "XYZ" and where there is a parent
77   * directory called test (e.g. "abc\test\def\ghi\XYZ123").</p>
78   *
79   * <p>Case sensitivity may be turned off if necessary. By default, it is turned on.</p>
80   *
81   * <p>Example of usage:</p>
82   *
83   * <pre>
84   * String[] includes = {"**\\*.class"};
85   * String[] excludes = {"modules\\*\\**"};
86   * ds.setIncludes(includes);
87   * ds.setExcludes(excludes);
88   * ds.setBasedir(new File("test"));
89   * ds.setCaseSensitive(true);
90   * ds.scan();
91   *
92   * System.out.println("FILES:");
93   * String[] files = ds.getIncludedFiles();
94   * for (int i = 0; i &lt; files.length; i++) {
95   * System.out.println(files[i]);
96   * }
97   * </pre>
98   *
99   * <p>This will scan a directory called test for .class files, but excludes all files in all proper
100  * subdirectories of a directory called "modules"</p>
101  *
102  * <p>This class was stealed from rg.coudehaus.plexus.util.DirectoryScanner and adapted to search
103  * from a List&lt;String&gt;</p>
104  *
105  * @author Arnout J. Kuiper <a href="mailto:ajkuiper@wxs.nl">ajkuiper@wxs.nl</a>
106  * @author Magesh Umasankar
107  * @author <a href="mailto:bruce@callenish.com">Bruce Atherton</a>
108  * @author <a href="mailto:levylambert@tiscali-dsl.de">Antoine Levy-Lambert</a>
109  */
110 public class ListScanner {
111     /**
112      * Patterns which should be excluded by default.
113      *
114      * @see #addDefaultExcludes()
115      */
116     public static final String[] DEFAULTEXCLUDES = { // Miscellaneous typical temporary files
117         "**/*~",
118         "**/#*#",
119         "**/.#*",
120         "**/%*%",
121         "**/._*",
122 
123         // CVS
124         "**/CVS",
125         "**/CVS/**",
126         "**/.cvsignore",
127 
128         // SCCS
129         "**/SCCS",
130         "**/SCCS/**",
131 
132         // Visual SourceSafe
133         "**/vssver.scc",
134 
135         // Subversion
136         "**/.svn",
137         "**/.svn/**",
138 
139         // Arch
140         "**/.arch-ids",
141         "**/.arch-ids/**",
142 
143         // Bazaar
144         "**/.bzr",
145         "**/.bzr/**",
146 
147         // GIT
148         "**/.git",
149         "**/.git/**",
150 
151         // Mercurial
152         "**/.hg",
153         "**/.hg/**",
154 
155         // SurroundSCM
156         "**/.MySCMServerInfo",
157 
158         // Mac
159         "**/.DS_Store"
160     };
161 
162     /** The base directory to be scanned. */
163     protected String basedir;
164 
165     /** Whether or not everything tested so far has been included. */
166     protected boolean everythingIncluded = true;
167 
168     /** The patterns for the files to be excluded. */
169     protected String[] excludes;
170 
171     /** The patterns for the files to be included. */
172     protected String[] includes;
173 
174     /** Whether or not the file system should be treated as a case sensitive one. */
175     protected boolean isCaseSensitive = true;
176 
177     /** Sole constructor. */
178     public ListScanner() {}
179 
180     public static String getDefaultExcludes() {
181         return StringUtils.join(DEFAULTEXCLUDES, ",");
182     }
183 
184     /**
185      * Tests whether or not a string matches against a pattern. The pattern may contain two special
186      * characters:<br>
187      * '*' means zero or more characters<br>
188      * '?' means one and only one character
189      *
190      * @param pattern The pattern to match against. Must not be <code>null</code>.
191      * @param str     The string which must be matched against the pattern. Must not be <code>
192      *                null</code>.
193      * @return <code>true</code> if the string matches against the pattern, or <code>false</code>
194      *         otherwise.
195      */
196     public static boolean match(String pattern, String str) {
197         // default matches the SelectorUtils default
198         return match(pattern, str, true);
199     }
200 
201     /**
202      * Tests whether or not a string matches against a pattern. The pattern may contain two special
203      * characters:<br>
204      * '*' means zero or more characters<br>
205      * '?' means one and only one character
206      *
207      * @param pattern         The pattern to match against. Must not be <code>null</code>.
208      * @param str             The string which must be matched against the pattern. Must not be
209      *                        <code>null</code>.
210      * @param isCaseSensitive Whether or not matching should be performed case sensitively.
211      * @return <code>true</code> if the string matches against the pattern, or <code>false</code>
212      *         otherwise.
213      */
214     protected static boolean match(String pattern, String str, boolean isCaseSensitive) {
215         return SelectorUtils.match(pattern, str, isCaseSensitive);
216     }
217 
218     /**
219      * Tests whether or not a given path matches a given pattern.
220      *
221      * @param pattern The pattern to match against. Must not be <code>null</code>.
222      * @param str     The path to match, as a String. Must not be <code>null</code>.
223      * @return <code>true</code> if the pattern matches against the string, or <code>false</code>
224      *         otherwise.
225      */
226     protected static boolean matchPath(String pattern, String str) {
227         // default matches the SelectorUtils default
228         return matchPath(pattern, str, true);
229     }
230 
231     /**
232      * Tests whether or not a given path matches a given pattern.
233      *
234      * @param pattern         The pattern to match against. Must not be <code>null</code>.
235      * @param str             The path to match, as a String. Must not be <code>null</code>.
236      * @param isCaseSensitive Whether or not matching should be performed case sensitively.
237      * @return <code>true</code> if the pattern matches against the string, or <code>false</code>
238      *         otherwise.
239      */
240     protected static boolean matchPath(String pattern, String str, boolean isCaseSensitive) {
241         return SelectorUtils.matchPath(
242                 PathUtils.convertPathForOS(pattern), PathUtils.convertPathForOS(str), isCaseSensitive);
243     }
244 
245     /**
246      * <p>Tests whether or not a given path matches the start of a given pattern up to the first "**".<p>
247      *
248      * <p>This is not a general purpose test and should only be used if you can live with false
249      * positives. For example, <code>pattern=**\a</code> and <code>str=b</code> will yield <code>
250      * true</code>.</p>
251      *
252      * @param pattern The pattern to match against. Must not be <code>null</code>.
253      * @param str     The path to match, as a String. Must not be <code>null</code>.
254      * @return whether or not a given path matches the start of a given pattern up to the first
255      *         "**".
256      */
257     protected static boolean matchPatternStart(String pattern, String str) {
258         // default matches SelectorUtils default
259         return matchPatternStart(pattern, str, true);
260     }
261 
262     /**
263      * <p>Tests whether or not a given path matches the start of a given pattern up to the first "**".</p>
264      *
265      * <p>This is not a general purpose test and should only be used if you can live with false
266      * positives. For example, <code>pattern=**\a</code> and <code>str=b</code> will yield <code>
267      * true</code>.</p>
268      *
269      * @param pattern         The pattern to match against. Must not be <code>null</code>.
270      * @param str             The path to match, as a String. Must not be <code>null</code>.
271      * @param isCaseSensitive Whether or not matching should be performed case sensitively.
272      * @return whether or not a given path matches the start of a given pattern up to the first
273      *         "**".
274      */
275     protected static boolean matchPatternStart(String pattern, String str, boolean isCaseSensitive) {
276         return SelectorUtils.matchPatternStart(
277                 PathUtils.convertPathForOS(pattern), PathUtils.convertPathForOS(str), isCaseSensitive);
278     }
279 
280     /** Adds default exclusions to the current exclusions set. */
281     public void addDefaultExcludes() {
282         int excludesLength = (excludes == null) ? 0 : excludes.length;
283         String[] newExcludes;
284         newExcludes = new String[excludesLength + DEFAULTEXCLUDES.length];
285         if (excludesLength > 0) {
286             System.arraycopy(excludes, 0, newExcludes, 0, excludesLength);
287         }
288         for (int i = 0; i < DEFAULTEXCLUDES.length; i++) {
289             newExcludes[i + excludesLength] =
290                     DEFAULTEXCLUDES[i].replace('/', File.separatorChar).replace('\\', File.separatorChar);
291         }
292         excludes = newExcludes;
293     }
294 
295     /**
296      * Returns the base directory to be scanned. This is the directory which is scanned recursively.
297      *
298      * @return the base directory to be scanned
299      */
300     public String getBasedir() {
301         return basedir;
302     }
303 
304     /**
305      * Sets the base directory to be scanned. This is the directory which is scanned recursively.
306      * This directory is normalized for multiple os's (all / and \\ are replaced with
307      * File.separatorChar
308      *
309      * @param basedir The base directory for scanning. Should not be <code>null</code>.
310      */
311     public void setBasedir(String basedir) {
312         this.basedir = basedir;
313     }
314 
315     /**
316      * Sets whether or not the file system should be regarded as case sensitive.
317      *
318      * @param isCaseSensitive whether or not the file system should be regarded as a case
319      *                        sensitive one
320      */
321     public void setCaseSensitive(boolean isCaseSensitive) {
322         this.isCaseSensitive = isCaseSensitive;
323     }
324 
325     /**
326      * <p>Sets the list of exclude patterns to use. All '/' and '\' characters are replaced by <code>
327      * File.separatorChar</code>, so the separator used need not match <code>
328      * File.separatorChar</code>.</p>
329      *
330      * <p>When a pattern ends with a '/' or '\', "**" is appended.</p>
331      *
332      * @param excludesList A list of exclude patterns. May be <code>null</code>, indicating that no
333      *                 files should be excluded. If a non-<code>null</code> list is given, all
334      *                 elements must be non-<code>null</code>.
335      */
336     public void setExcludes(List<String> excludesList) {
337         String[] excludes = excludesList.toArray(new String[0]);
338         if (excludes == null) {
339             this.excludes = null;
340         } else {
341             setExcludes(excludes);
342         }
343     }
344 
345     public void setExcludes(String excludes) {
346         if (excludes == null) {
347             this.excludes = null;
348         } else {
349             setExcludes(StringUtils.split(excludes, ","));
350         }
351     }
352 
353     /**
354      * <p>Sets the list of include patterns to use. All '/' and '\' characters are replaced by <code>
355      * File.separatorChar</code>, so the separator used need not match <code>
356      * File.separatorChar</code>.</p>
357      *
358      * <p>When a pattern ends with a '/' or '\', "**" is appended.</p>
359      *
360      * @param includesList A list of include patterns. May be <code>null</code>, indicating that all
361      *                 files should be included. If a non-<code>null</code> list is given, all
362      *                 elements must be non-<code>null</code>.
363      */
364     public void setIncludes(List<String> includesList) {
365         String[] includes = includesList.toArray(new String[0]);
366         if (includes == null) {
367             this.includes = null;
368         } else {
369             setIncludes(includes);
370         }
371     }
372 
373     public void setIncludes(String includes) {
374         if (includes == null) {
375             this.includes = null;
376         } else {
377             setIncludes(StringUtils.split(includes, ","));
378         }
379     }
380 
381     /**
382      * Scans the base directory for files which match at least one include pattern and don't match
383      * any exclude patterns. If there are selectors then the files must pass muster there, as well.
384      *
385      * @throws IllegalStateException if the base directory was set incorrectly (i.e. if it is
386      *                               <code>null</code>, doesn't exist, or isn't a directory).
387      */
388     public List<String> scan(List<String> files) throws IllegalStateException {
389         //        System.err.println("Scanning \nbasedir="+basedir +
390         //                " \nincludes=" + java.util.Arrays.toString(includes) +
391         //                " \nexcludes=" + java.util.Arrays.toString(excludes) +
392         //                " \non files="+files);
393         if (basedir == null) {
394             throw new IllegalStateException("No basedir set");
395         }
396 
397         if (includes == null) {
398             // No includes supplied, so set it to 'matches all'
399             includes = new String[1];
400             includes[0] = "**";
401         }
402         if (excludes == null) {
403             excludes = new String[0];
404         }
405 
406         List<String> result = new ArrayList<>();
407 
408         for (String fileName : files) {
409             if (isIncluded(fileName) && !isExcluded(fileName)) {
410                 result.add(fileName);
411             }
412         }
413         //        System.err.println("Result "+result+"\n\n\n");
414         return result;
415     }
416 
417     /**
418      * Tests whether or not a name matches against at least one exclude pattern.
419      *
420      * @param name The name to match. Must not be <code>null</code>.
421      * @return <code>true</code> when the name matches against at least one exclude pattern, or
422      *         <code>false</code> otherwise.
423      */
424     protected boolean isExcluded(String name) {
425         return matchesPatterns(name, excludes);
426     }
427 
428     /**
429      * Tests whether or not a name matches against at least one include pattern.
430      *
431      * @param name The name to match. Must not be <code>null</code>.
432      * @return <code>true</code> when the name matches against at least one include pattern, or
433      *         <code>false</code> otherwise.
434      */
435     protected boolean isIncluded(String name) {
436         return matchesPatterns(name, includes);
437     }
438 
439     /**
440      * Tests whether or not a name matches against at least one include pattern.
441      *
442      * @param name     The name to match. Must not be <code>null</code>.
443      * @param patterns The list of patterns to match.
444      * @return <code>true</code> when the name matches against at least one include pattern, or
445      *         <code>false</code> otherwise.
446      */
447     protected boolean matchesPatterns(String name, String[] patterns) {
448         // avoid extra object creation in the loop
449         String path = null;
450 
451         String baseDir = getBasedir();
452         if (!baseDir.isEmpty()) {
453             baseDir = baseDir.concat(File.separator);
454         }
455 
456         for (int i = 0; i < patterns.length; i++) {
457             path = PathUtils.convertPathForOS(baseDir + patterns[i]);
458             //            System.err.println("path="+path);
459             if (matchPath(path, name, isCaseSensitive)) {
460                 return true;
461             }
462         }
463         return false;
464     }
465 
466     private void setExcludes(String[] excludes) {
467         this.excludes = setPatterns(excludes);
468     }
469 
470     private void setIncludes(String[] includes) {
471         this.includes = setPatterns(includes);
472     }
473 
474     private String[] setPatterns(String[] patterns) {
475         String[] result = null;
476         if ((patterns != null) && (patterns.length > 0)) {
477             result = new String[patterns.length];
478             for (int i = 0; i < patterns.length; i++) {
479                 String pattern = patterns[i].trim();
480 
481                 // don't normalize the pattern here, we internalize the normalization
482                 // just normalize for comparison purposes
483                 if (PathUtils.convertPathForOS(pattern).endsWith(File.separator)) {
484                     pattern += "**";
485                 }
486                 result[i] = pattern;
487             }
488         }
489         return result;
490     }
491 }