1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19 package org.apache.maven.archetype.common.util;
20
21 import java.io.File;
22 import java.util.ArrayList;
23 import java.util.List;
24
25 import org.codehaus.plexus.util.SelectorUtils;
26 import org.codehaus.plexus.util.StringUtils;
27
28 /**
29 * <p>Class for scanning a directory for files/directories which match certain criteria.</p>
30 *
31 * <p>These criteria consist of selectors and patterns which have been specified. With the selectors
32 * you can select which files you want to have included. Files which are not selected are excluded.
33 * With patterns you can include or exclude files based on their filename.</p>
34 *
35 * <p>The idea is simple. A given directory is recursively scanned for all files and directories.
36 * Each file/directory is matched against a set of selectors, including special support for matching
37 * against filenames with include and and exclude patterns. Only files/directories which match at
38 * least one pattern of the include pattern list or other file selector, and don't match any pattern
39 * of the exclude pattern list or fail to match against a required selector will be placed in the
40 * list of files/directories found.</p>
41 *
42 * <p>When no list of include patterns is supplied, "**" will be used, which means that everything
43 * will be matched. When no list of exclude patterns is supplied, an empty list is used, such that
44 * nothing will be excluded. When no selectors are supplied, none are applied.</p>
45 *
46 * <p>The filename pattern matching is done as follows: The name to be matched is split up in path
47 * segments. A path segment is the name of a directory or file, which is bounded by <code>
48 * File.separator</code> ('/' under UNIX, '\' under Windows). For example, "abc/def/ghi/xyz.java" is
49 * split up in the segments "abc", "def","ghi" and "xyz.java". The same is done for the pattern
50 * against which should be matched.</p>
51 *
52 * <p>The segments of the name and the pattern are then matched against each other. When '**' is
53 * used for a path segment in the pattern, it matches zero or more path segments of the name.</p>
54 *
55 * <p>There is a special case regarding the use of <code>File.separator</code>s at the beginning of
56 * the pattern and the string to match:<br>
57 * When a pattern starts with a <code>File.separator</code>, the string to match must also start
58 * with a <code>File.separator</code>. When a pattern does not start with a <code>
59 * File.separator</code>, the string to match may not start with a <code>File.separator</code>. When
60 * one of these rules is not obeyed, the string will not match.</p>
61 *
62 * <p>When a name path segment is matched against a pattern path segment, the following special
63 * characters can be used:<br>
64 * '*' matches zero or more characters<br>
65 * '?' matches one character.</p>
66 *
67 * <p>Examples:</p>
68 *
69 * <p>"**\*.class" matches all .class files/dirs in a directory tree.</p>
70 *
71 * <p>"test\a??.java" matches all files/dirs which start with an 'a', then two more characters and
72 * then ".java", in a directory called test.</p>
73 *
74 * <p>"**" matches everything in a directory tree.</p>
75 *
76 * <p>"**\test\**\XYZ*" matches all files/dirs which start with "XYZ" and where there is a parent
77 * directory called test (e.g. "abc\test\def\ghi\XYZ123").</p>
78 *
79 * <p>Case sensitivity may be turned off if necessary. By default, it is turned on.</p>
80 *
81 * <p>Example of usage:</p>
82 *
83 * <pre>
84 * String[] includes = {"**\\*.class"};
85 * String[] excludes = {"modules\\*\\**"};
86 * ds.setIncludes(includes);
87 * ds.setExcludes(excludes);
88 * ds.setBasedir(new File("test"));
89 * ds.setCaseSensitive(true);
90 * ds.scan();
91 *
92 * System.out.println("FILES:");
93 * String[] files = ds.getIncludedFiles();
94 * for (int i = 0; i < files.length; i++) {
95 * System.out.println(files[i]);
96 * }
97 * </pre>
98 *
99 * <p>This will scan a directory called test for .class files, but excludes all files in all proper
100 * subdirectories of a directory called "modules"</p>
101 *
102 * <p>This class was stealed from rg.coudehaus.plexus.util.DirectoryScanner and adapted to search
103 * from a List<String></p>
104 *
105 * @author Arnout J. Kuiper <a href="mailto:ajkuiper@wxs.nl">ajkuiper@wxs.nl</a>
106 * @author Magesh Umasankar
107 * @author <a href="mailto:bruce@callenish.com">Bruce Atherton</a>
108 * @author <a href="mailto:levylambert@tiscali-dsl.de">Antoine Levy-Lambert</a>
109 */
110 public class ListScanner {
111 /**
112 * Patterns which should be excluded by default.
113 *
114 * @see #addDefaultExcludes()
115 */
116 public static final String[] DEFAULTEXCLUDES = { // Miscellaneous typical temporary files
117 "**/*~",
118 "**/#*#",
119 "**/.#*",
120 "**/%*%",
121 "**/._*",
122
123 // CVS
124 "**/CVS",
125 "**/CVS/**",
126 "**/.cvsignore",
127
128 // SCCS
129 "**/SCCS",
130 "**/SCCS/**",
131
132 // Visual SourceSafe
133 "**/vssver.scc",
134
135 // Subversion
136 "**/.svn",
137 "**/.svn/**",
138
139 // Arch
140 "**/.arch-ids",
141 "**/.arch-ids/**",
142
143 // Bazaar
144 "**/.bzr",
145 "**/.bzr/**",
146
147 // GIT
148 "**/.git",
149 "**/.git/**",
150
151 // Mercurial
152 "**/.hg",
153 "**/.hg/**",
154
155 // SurroundSCM
156 "**/.MySCMServerInfo",
157
158 // Mac
159 "**/.DS_Store"
160 };
161
162 /** The base directory to be scanned. */
163 protected String basedir;
164
165 /** Whether or not everything tested so far has been included. */
166 protected boolean everythingIncluded = true;
167
168 /** The patterns for the files to be excluded. */
169 protected String[] excludes;
170
171 /** The patterns for the files to be included. */
172 protected String[] includes;
173
174 /** Whether or not the file system should be treated as a case sensitive one. */
175 protected boolean isCaseSensitive = true;
176
177 /** Sole constructor. */
178 public ListScanner() {}
179
180 public static String getDefaultExcludes() {
181 return StringUtils.join(DEFAULTEXCLUDES, ",");
182 }
183
184 /**
185 * Tests whether or not a string matches against a pattern. The pattern may contain two special
186 * characters:<br>
187 * '*' means zero or more characters<br>
188 * '?' means one and only one character
189 *
190 * @param pattern The pattern to match against. Must not be <code>null</code>.
191 * @param str The string which must be matched against the pattern. Must not be <code>
192 * null</code>.
193 * @return <code>true</code> if the string matches against the pattern, or <code>false</code>
194 * otherwise.
195 */
196 public static boolean match(String pattern, String str) {
197 // default matches the SelectorUtils default
198 return match(pattern, str, true);
199 }
200
201 /**
202 * Tests whether or not a string matches against a pattern. The pattern may contain two special
203 * characters:<br>
204 * '*' means zero or more characters<br>
205 * '?' means one and only one character
206 *
207 * @param pattern The pattern to match against. Must not be <code>null</code>.
208 * @param str The string which must be matched against the pattern. Must not be
209 * <code>null</code>.
210 * @param isCaseSensitive Whether or not matching should be performed case sensitively.
211 * @return <code>true</code> if the string matches against the pattern, or <code>false</code>
212 * otherwise.
213 */
214 protected static boolean match(String pattern, String str, boolean isCaseSensitive) {
215 return SelectorUtils.match(pattern, str, isCaseSensitive);
216 }
217
218 /**
219 * Tests whether or not a given path matches a given pattern.
220 *
221 * @param pattern The pattern to match against. Must not be <code>null</code>.
222 * @param str The path to match, as a String. Must not be <code>null</code>.
223 * @return <code>true</code> if the pattern matches against the string, or <code>false</code>
224 * otherwise.
225 */
226 protected static boolean matchPath(String pattern, String str) {
227 // default matches the SelectorUtils default
228 return matchPath(pattern, str, true);
229 }
230
231 /**
232 * Tests whether or not a given path matches a given pattern.
233 *
234 * @param pattern The pattern to match against. Must not be <code>null</code>.
235 * @param str The path to match, as a String. Must not be <code>null</code>.
236 * @param isCaseSensitive Whether or not matching should be performed case sensitively.
237 * @return <code>true</code> if the pattern matches against the string, or <code>false</code>
238 * otherwise.
239 */
240 protected static boolean matchPath(String pattern, String str, boolean isCaseSensitive) {
241 return SelectorUtils.matchPath(
242 PathUtils.convertPathForOS(pattern), PathUtils.convertPathForOS(str), isCaseSensitive);
243 }
244
245 /**
246 * <p>Tests whether or not a given path matches the start of a given pattern up to the first "**".<p>
247 *
248 * <p>This is not a general purpose test and should only be used if you can live with false
249 * positives. For example, <code>pattern=**\a</code> and <code>str=b</code> will yield <code>
250 * true</code>.</p>
251 *
252 * @param pattern The pattern to match against. Must not be <code>null</code>.
253 * @param str The path to match, as a String. Must not be <code>null</code>.
254 * @return whether or not a given path matches the start of a given pattern up to the first
255 * "**".
256 */
257 protected static boolean matchPatternStart(String pattern, String str) {
258 // default matches SelectorUtils default
259 return matchPatternStart(pattern, str, true);
260 }
261
262 /**
263 * <p>Tests whether or not a given path matches the start of a given pattern up to the first "**".</p>
264 *
265 * <p>This is not a general purpose test and should only be used if you can live with false
266 * positives. For example, <code>pattern=**\a</code> and <code>str=b</code> will yield <code>
267 * true</code>.</p>
268 *
269 * @param pattern The pattern to match against. Must not be <code>null</code>.
270 * @param str The path to match, as a String. Must not be <code>null</code>.
271 * @param isCaseSensitive Whether or not matching should be performed case sensitively.
272 * @return whether or not a given path matches the start of a given pattern up to the first
273 * "**".
274 */
275 protected static boolean matchPatternStart(String pattern, String str, boolean isCaseSensitive) {
276 return SelectorUtils.matchPatternStart(
277 PathUtils.convertPathForOS(pattern), PathUtils.convertPathForOS(str), isCaseSensitive);
278 }
279
280 /** Adds default exclusions to the current exclusions set. */
281 public void addDefaultExcludes() {
282 int excludesLength = (excludes == null) ? 0 : excludes.length;
283 String[] newExcludes;
284 newExcludes = new String[excludesLength + DEFAULTEXCLUDES.length];
285 if (excludesLength > 0) {
286 System.arraycopy(excludes, 0, newExcludes, 0, excludesLength);
287 }
288 for (int i = 0; i < DEFAULTEXCLUDES.length; i++) {
289 newExcludes[i + excludesLength] =
290 DEFAULTEXCLUDES[i].replace('/', File.separatorChar).replace('\\', File.separatorChar);
291 }
292 excludes = newExcludes;
293 }
294
295 /**
296 * Returns the base directory to be scanned. This is the directory which is scanned recursively.
297 *
298 * @return the base directory to be scanned
299 */
300 public String getBasedir() {
301 return basedir;
302 }
303
304 /**
305 * Sets the base directory to be scanned. This is the directory which is scanned recursively.
306 * This directory is normalized for multiple os's (all / and \\ are replaced with
307 * File.separatorChar
308 *
309 * @param basedir The base directory for scanning. Should not be <code>null</code>.
310 */
311 public void setBasedir(String basedir) {
312 this.basedir = basedir;
313 }
314
315 /**
316 * Sets whether or not the file system should be regarded as case sensitive.
317 *
318 * @param isCaseSensitive whether or not the file system should be regarded as a case
319 * sensitive one
320 */
321 public void setCaseSensitive(boolean isCaseSensitive) {
322 this.isCaseSensitive = isCaseSensitive;
323 }
324
325 /**
326 * <p>Sets the list of exclude patterns to use. All '/' and '\' characters are replaced by <code>
327 * File.separatorChar</code>, so the separator used need not match <code>
328 * File.separatorChar</code>.</p>
329 *
330 * <p>When a pattern ends with a '/' or '\', "**" is appended.</p>
331 *
332 * @param excludesList A list of exclude patterns. May be <code>null</code>, indicating that no
333 * files should be excluded. If a non-<code>null</code> list is given, all
334 * elements must be non-<code>null</code>.
335 */
336 public void setExcludes(List<String> excludesList) {
337 String[] excludes = excludesList.toArray(new String[0]);
338 if (excludes == null) {
339 this.excludes = null;
340 } else {
341 setExcludes(excludes);
342 }
343 }
344
345 public void setExcludes(String excludes) {
346 if (excludes == null) {
347 this.excludes = null;
348 } else {
349 setExcludes(StringUtils.split(excludes, ","));
350 }
351 }
352
353 /**
354 * <p>Sets the list of include patterns to use. All '/' and '\' characters are replaced by <code>
355 * File.separatorChar</code>, so the separator used need not match <code>
356 * File.separatorChar</code>.</p>
357 *
358 * <p>When a pattern ends with a '/' or '\', "**" is appended.</p>
359 *
360 * @param includesList A list of include patterns. May be <code>null</code>, indicating that all
361 * files should be included. If a non-<code>null</code> list is given, all
362 * elements must be non-<code>null</code>.
363 */
364 public void setIncludes(List<String> includesList) {
365 String[] includes = includesList.toArray(new String[0]);
366 if (includes == null) {
367 this.includes = null;
368 } else {
369 setIncludes(includes);
370 }
371 }
372
373 public void setIncludes(String includes) {
374 if (includes == null) {
375 this.includes = null;
376 } else {
377 setIncludes(StringUtils.split(includes, ","));
378 }
379 }
380
381 /**
382 * Scans the base directory for files which match at least one include pattern and don't match
383 * any exclude patterns. If there are selectors then the files must pass muster there, as well.
384 *
385 * @throws IllegalStateException if the base directory was set incorrectly (i.e. if it is
386 * <code>null</code>, doesn't exist, or isn't a directory).
387 */
388 public List<String> scan(List<String> files) throws IllegalStateException {
389 if (basedir == null) {
390 throw new IllegalStateException("No basedir set");
391 }
392
393 if (includes == null) {
394 // No includes supplied, so set it to 'matches all'
395 includes = new String[1];
396 includes[0] = "**";
397 }
398 if (excludes == null) {
399 excludes = new String[0];
400 }
401
402 List<String> result = new ArrayList<>();
403
404 for (String fileName : files) {
405 if (isIncluded(fileName) && !isExcluded(fileName)) {
406 result.add(fileName);
407 }
408 }
409 return result;
410 }
411
412 /**
413 * Tests whether a name matches at least one exclude pattern.
414 *
415 * @param name The name to match. Must not be <code>null</code>
416 * @return <code>true</code> when the name matches against at least one exclude pattern, or
417 * <code>false</code> otherwise
418 */
419 protected boolean isExcluded(String name) {
420 return matchesPatterns(name, excludes);
421 }
422
423 /**
424 * Tests whether or not a name matches against at least one include pattern.
425 *
426 * @param name The name to match. Must not be <code>null</code>.
427 * @return <code>true</code> when the name matches against at least one include pattern, or
428 * <code>false</code> otherwise.
429 */
430 protected boolean isIncluded(String name) {
431 return matchesPatterns(name, includes);
432 }
433
434 /**
435 * Tests whether or not a name matches against at least one include pattern.
436 *
437 * @param name The name to match. Must not be <code>null</code>.
438 * @param patterns The list of patterns to match.
439 * @return <code>true</code> when the name matches against at least one include pattern, or
440 * <code>false</code> otherwise.
441 */
442 protected boolean matchesPatterns(String name, String[] patterns) {
443 // avoid extra object creation in the loop
444 String path = null;
445
446 String baseDir = getBasedir();
447 if (!baseDir.isEmpty()) {
448 baseDir = baseDir.concat(File.separator);
449 }
450
451 for (int i = 0; i < patterns.length; i++) {
452 path = PathUtils.convertPathForOS(baseDir + patterns[i]);
453 if (matchPath(path, name, isCaseSensitive)) {
454 return true;
455 }
456 }
457 return false;
458 }
459
460 private void setExcludes(String[] excludes) {
461 this.excludes = setPatterns(excludes);
462 }
463
464 private void setIncludes(String[] includes) {
465 this.includes = setPatterns(includes);
466 }
467
468 private String[] setPatterns(String[] patterns) {
469 String[] result = null;
470 if ((patterns != null) && (patterns.length > 0)) {
471 result = new String[patterns.length];
472 for (int i = 0; i < patterns.length; i++) {
473 String pattern = patterns[i].trim();
474
475 // don't normalize the pattern here, we internalize the normalization
476 // just normalize for comparison purposes
477 if (PathUtils.convertPathForOS(pattern).endsWith(File.separator)) {
478 pattern += "**";
479 }
480 result[i] = pattern;
481 }
482 }
483 return result;
484 }
485 }