1 package org.codehaus.plexus.util;
2
3 /*
4 * The Apache Software License, Version 1.1
5 *
6 * Copyright (c) 2000-2003 The Apache Software Foundation. All rights
7 * reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 *
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in
18 * the documentation and/or other materials provided with the
19 * distribution.
20 *
21 * 3. The end-user documentation included with the redistribution, if
22 * any, must include the following acknowledgement:
23 * "This product includes software developed by the
24 * Apache Software Foundation (http://www.codehaus.org/)."
25 * Alternately, this acknowledgement may appear in the software itself,
26 * if and wherever such third-party acknowledgements normally appear.
27 *
28 * 4. The names "Ant" and "Apache Software
29 * Foundation" must not be used to endorse or promote products derived
30 * from this software without prior written permission. For written
31 * permission, please contact [email protected].
32 *
33 * 5. Products derived from this software may not be called "Apache"
34 * nor may "Apache" appear in their names without prior written
35 * permission of the Apache Group.
36 *
37 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
38 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
39 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
40 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
41 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
43 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
44 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
45 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
46 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
47 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
48 * SUCH DAMAGE.
49 * ====================================================================
50 *
51 * This software consists of voluntary contributions made by many
52 * individuals on behalf of the Apache Software Foundation. For more
53 * information on the Apache Software Foundation, please see
54 * <http://www.codehaus.org/>.
55 */
56
57 import java.io.File;
58 import java.io.IOException;
59 import java.util.ArrayList;
60 import java.util.Arrays;
61
62 /**
63 * <p>Class for scanning a directory for files/directories which match certain criteria.</p>
64 *
65 * <p>These criteria consist of selectors and patterns which have been specified. With the selectors you can select which
66 * files you want to have included. Files which are not selected are excluded. With patterns you can include or exclude
67 * files based on their filename.</p>
68 *
69 * <p>The idea is simple. A given directory is recursively scanned for all files and directories. Each file/directory is
70 * matched against a set of selectors, including special support for matching against filenames with include and and
71 * exclude patterns. Only files/directories which match at least one pattern of the include pattern list or other file
72 * selector, and don't match any pattern of the exclude pattern list or fail to match against a required selector will
73 * be placed in the list of files/directories found.</p>
74 *
75 * <p>When no list of include patterns is supplied, "**" will be used, which means that everything will be matched. When no
76 * list of exclude patterns is supplied, an empty list is used, such that nothing will be excluded. When no selectors
77 * are supplied, none are applied.</p>
78 *
79 * <p>The filename pattern matching is done as follows: The name to be matched is split up in path segments. A path segment
80 * is the name of a directory or file, which is bounded by <code>File.separator</code> ('/' under UNIX, '\' under
81 * Windows). For example, "abc/def/ghi/xyz.java" is split up in the segments "abc", "def","ghi" and "xyz.java". The same
82 * is done for the pattern against which should be matched.</p>
83 *
84 * <p>The segments of the name and the pattern are then matched against each other. When '**' is used for a path segment in
85 * the pattern, it matches zero or more path segments of the name.</p>
86 *
87 * <p>There is a special case regarding the use of <code>File.separator</code>s at the beginning of the pattern and the
88 * string to match:<br>
89 * When a pattern starts with a <code>File.separator</code>, the string to match must also start with a
90 * <code>File.separator</code>. When a pattern does not start with a <code>File.separator</code>, the string to match
91 * may not start with a <code>File.separator</code>. When one of these rules is not obeyed, the string will not match.</p>
92 *
93 * <p>When a name path segment is matched against a pattern path segment, the following special characters can be used:<br>
94 * '*' matches zero or more characters<br>
95 * '?' matches one character.</p>
96 *
97 * Examples:
98 * <ul>
99 * <li>"**\*.class" matches all .class files/dirs in a directory tree.</li>
100 * <li>"test\a??.java" matches all files/dirs which start with an 'a', then two more characters and then ".java", in a
101 * directory called test.</li>
102 * <li>"**" matches everything in a directory tree.</li>
103 * <li>"**\test\**\XYZ*" matches all files/dirs which start with "XYZ" and where there is a parent directory called test
104 * (e.g. "abc\test\def\ghi\XYZ123").</li>
105 * </ul>
106 *
107 * <p>Case sensitivity may be turned off if necessary. By default, it is turned on.</p>
108 * Example of usage:
109 * <pre>
110 * String[] includes = { "**\\*.class" };
111 * String[] excludes = { "modules\\*\\**" };
112 * ds.setIncludes( includes );
113 * ds.setExcludes( excludes );
114 * ds.setBasedir( new File( "test" ) );
115 * ds.setCaseSensitive( true );
116 * ds.scan();
117 *
118 * System.out.println( "FILES:" );
119 * String[] files = ds.getIncludedFiles();
120 * for ( int i = 0; i < files.length; i++ )
121 * {
122 * System.out.println( files[i] );
123 * }
124 * </pre>
125 *
126 * <p>This will scan a directory called test for .class files, but excludes all files in all proper subdirectories of a
127 * directory called "modules"</p>
128 *
129 * @author Arnout J. Kuiper <a href="mailto:[email protected]">[email protected]</a>
130 * @author Magesh Umasankar
131 * @author <a href="mailto:[email protected]">Bruce Atherton</a>
132 * @author <a href="mailto:[email protected]">Antoine Levy-Lambert</a>
133 */
134 public class DirectoryScanner extends AbstractScanner {
135
136 private static final String[] EMPTY_STRING_ARRAY = new String[0];
137
138 /**
139 * The base directory to be scanned.
140 */
141 protected File basedir;
142
143 /**
144 * The files which matched at least one include and no excludes and were selected.
145 */
146 protected ArrayList<String> filesIncluded;
147
148 /**
149 * The files which did not match any includes or selectors.
150 */
151 protected ArrayList<String> filesNotIncluded;
152
153 /**
154 * The files which matched at least one include and at least one exclude.
155 */
156 protected ArrayList<String> filesExcluded;
157
158 /**
159 * The directories which matched at least one include and no excludes and were selected.
160 */
161 protected ArrayList<String> dirsIncluded;
162
163 /**
164 * The directories which were found and did not match any includes.
165 */
166 protected ArrayList<String> dirsNotIncluded;
167
168 /**
169 * The directories which matched at least one include and at least one exclude.
170 */
171 protected ArrayList<String> dirsExcluded;
172
173 /**
174 * The files which matched at least one include and no excludes and which a selector discarded.
175 */
176 protected ArrayList<String> filesDeselected;
177
178 /**
179 * The directories which matched at least one include and no excludes but which a selector discarded.
180 */
181 protected ArrayList<String> dirsDeselected;
182
183 /**
184 * Whether or not our results were built by a slow scan.
185 */
186 protected boolean haveSlowResults = false;
187
188 /**
189 * Whether or not symbolic links should be followed.
190 *
191 * @since Ant 1.5
192 */
193 private boolean followSymlinks = true;
194
195 /**
196 * Whether or not everything tested so far has been included.
197 */
198 protected boolean everythingIncluded = true;
199
200 private final char[][] tokenizedEmpty = MatchPattern.tokenizePathToCharArray("", File.separator);
201
202 /**
203 * Sole constructor.
204 */
205 public DirectoryScanner() {}
206
207 /**
208 * Sets the base directory to be scanned. This is the directory which is scanned recursively. All '/' and '\'
209 * characters are replaced by <code>File.separatorChar</code>, so the separator used need not match
210 * <code>File.separatorChar</code>.
211 *
212 * @param basedir The base directory to scan. Must not be <code>null</code>.
213 */
214 public void setBasedir(String basedir) {
215 setBasedir(new File(basedir.replace('/', File.separatorChar).replace('\\', File.separatorChar)));
216 }
217
218 /**
219 * Sets the base directory to be scanned. This is the directory which is scanned recursively.
220 *
221 * @param basedir The base directory for scanning. Should not be <code>null</code>.
222 */
223 public void setBasedir(File basedir) {
224 this.basedir = basedir;
225 }
226
227 /**
228 * Returns the base directory to be scanned. This is the directory which is scanned recursively.
229 *
230 * @return the base directory to be scanned
231 */
232 @Override
233 public File getBasedir() {
234 return basedir;
235 }
236
237 /**
238 * Sets whether or not symbolic links should be followed.
239 *
240 * @param followSymlinks whether or not symbolic links should be followed
241 */
242 public void setFollowSymlinks(boolean followSymlinks) {
243 this.followSymlinks = followSymlinks;
244 }
245
246 /**
247 * Returns whether or not the scanner has included all the files or directories it has come across so far.
248 *
249 * @return <code>true</code> if all files and directories which have been found so far have been included.
250 */
251 public boolean isEverythingIncluded() {
252 return everythingIncluded;
253 }
254
255 /**
256 * Scans the base directory for files which match at least one include pattern and don't match any exclude patterns.
257 * If there are selectors then the files must pass muster there, as well.
258 *
259 * @throws IllegalStateException if the base directory was set incorrectly (i.e. if it is <code>null</code>, doesn't
260 * exist, or isn't a directory).
261 */
262 @Override
263 public void scan() throws IllegalStateException {
264 if (basedir == null) {
265 throw new IllegalStateException("No basedir set");
266 }
267 if (!basedir.exists()) {
268 throw new IllegalStateException("basedir " + basedir + " does not exist");
269 }
270 if (!basedir.isDirectory()) {
271 throw new IllegalStateException("basedir " + basedir + " is not a directory");
272 }
273
274 setupDefaultFilters();
275 setupMatchPatterns();
276
277 filesIncluded = new ArrayList<String>();
278 filesNotIncluded = new ArrayList<String>();
279 filesExcluded = new ArrayList<String>();
280 filesDeselected = new ArrayList<String>();
281 dirsIncluded = new ArrayList<String>();
282 dirsNotIncluded = new ArrayList<String>();
283 dirsExcluded = new ArrayList<String>();
284 dirsDeselected = new ArrayList<String>();
285
286 if (isIncluded("", tokenizedEmpty)) {
287
288 if (!isExcluded("", tokenizedEmpty)) {
289 if (isSelected("", basedir)) {
290 dirsIncluded.add("");
291 } else {
292 dirsDeselected.add("");
293 }
294 } else {
295 dirsExcluded.add("");
296 }
297 } else {
298 dirsNotIncluded.add("");
299 }
300 scandir(basedir, "", true);
301 }
302
303 /**
304 * <p>Top level invocation for a slow scan. A slow scan builds up a full list of excluded/included files/directories,
305 * whereas a fast scan will only have full results for included files, as it ignores directories which can't
306 * possibly hold any included files/directories.</p>
307 *
308 * <p>Returns immediately if a slow scan has already been completed.</p>
309 */
310 protected void slowScan() {
311 if (haveSlowResults) {
312 return;
313 }
314
315 String[] excl = dirsExcluded.toArray(EMPTY_STRING_ARRAY);
316 String[] notIncl = dirsNotIncluded.toArray(EMPTY_STRING_ARRAY);
317
318 for (String anExcl : excl) {
319 if (!couldHoldIncluded(anExcl)) {
320 scandir(new File(basedir, anExcl), anExcl + File.separator, false);
321 }
322 }
323
324 for (String aNotIncl : notIncl) {
325 if (!couldHoldIncluded(aNotIncl)) {
326 scandir(new File(basedir, aNotIncl), aNotIncl + File.separator, false);
327 }
328 }
329
330 haveSlowResults = true;
331 }
332
333 /**
334 * Scans the given directory for files and directories. Found files and directories are placed in their respective
335 * collections, based on the matching of includes, excludes, and the selectors. When a directory is found, it is
336 * scanned recursively.
337 *
338 * @param dir The directory to scan. Must not be <code>null</code>.
339 * @param vpath The path relative to the base directory (needed to prevent problems with an absolute path when using
340 * dir). Must not be <code>null</code>.
341 * @param fast Whether or not this call is part of a fast scan.
342 * @see #filesIncluded
343 * @see #filesNotIncluded
344 * @see #filesExcluded
345 * @see #dirsIncluded
346 * @see #dirsNotIncluded
347 * @see #dirsExcluded
348 * @see #slowScan
349 */
350 protected void scandir(File dir, String vpath, boolean fast) {
351 String[] newfiles = dir.list();
352
353 if (newfiles == null) {
354 /*
355 * two reasons are mentioned in the API docs for File.list (1) dir is not a directory. This is impossible as
356 * we wouldn't get here in this case. (2) an IO error occurred (why doesn't it throw an exception then???)
357 */
358
359 /*
360 * [jdcasey] (2) is apparently happening to me, as this is killing one of my tests... this is affecting the
361 * assembly plugin, fwiw. I will initialize the newfiles array as zero-length for now. NOTE: I can't find
362 * the problematic code, as it appears to come from a native method in UnixFileSystem...
363 */
364 /*
365 * [bentmann] A null array will also be returned from list() on NTFS when dir refers to a soft link or
366 * junction point whose target is not existent.
367 */
368 newfiles = EMPTY_STRING_ARRAY;
369
370 // throw new IOException( "IO error scanning directory " + dir.getAbsolutePath() );
371 }
372
373 if (!followSymlinks) {
374 try {
375 if (isParentSymbolicLink(dir, null)) {
376 for (String newfile : newfiles) {
377 String name = vpath + newfile;
378 File file = new File(dir, newfile);
379 if (file.isDirectory()) {
380 dirsExcluded.add(name);
381 } else {
382 filesExcluded.add(name);
383 }
384 }
385 return;
386 }
387 } catch (IOException ioe) {
388 String msg = "IOException caught while checking for links!";
389 // will be caught and redirected to Ant's logging system
390 System.err.println(msg);
391 }
392 }
393
394 if (filenameComparator != null) {
395 Arrays.sort(newfiles, filenameComparator);
396 }
397
398 for (String newfile : newfiles) {
399 String name = vpath + newfile;
400 char[][] tokenizedName = MatchPattern.tokenizePathToCharArray(name, File.separator);
401 File file = new File(dir, newfile);
402 if (file.isDirectory()) {
403
404 if (isIncluded(name, tokenizedName)) {
405 if (!isExcluded(name, tokenizedName)) {
406 if (isSelected(name, file)) {
407 dirsIncluded.add(name);
408 if (fast) {
409 scandir(file, name + File.separator, fast);
410 }
411 } else {
412 everythingIncluded = false;
413 dirsDeselected.add(name);
414 if (fast && couldHoldIncluded(name)) {
415 scandir(file, name + File.separator, fast);
416 }
417 }
418
419 } else {
420 everythingIncluded = false;
421 dirsExcluded.add(name);
422 if (fast && couldHoldIncluded(name)) {
423 scandir(file, name + File.separator, fast);
424 }
425 }
426 } else {
427 everythingIncluded = false;
428 dirsNotIncluded.add(name);
429 if (fast && couldHoldIncluded(name)) {
430 scandir(file, name + File.separator, fast);
431 }
432 }
433 if (!fast) {
434 scandir(file, name + File.separator, fast);
435 }
436 } else if (file.isFile()) {
437 if (isIncluded(name, tokenizedName)) {
438 if (!isExcluded(name, tokenizedName)) {
439 if (isSelected(name, file)) {
440 filesIncluded.add(name);
441 } else {
442 everythingIncluded = false;
443 filesDeselected.add(name);
444 }
445 } else {
446 everythingIncluded = false;
447 filesExcluded.add(name);
448 }
449 } else {
450 everythingIncluded = false;
451 filesNotIncluded.add(name);
452 }
453 }
454 }
455 }
456
457 /**
458 * Tests whether a name should be selected.
459 *
460 * @param name the filename to check for selecting
461 * @param file the java.io.File object for this filename
462 * @return <code>false</code> when the selectors says that the file should not be selected, <code>true</code>
463 * otherwise.
464 */
465 protected boolean isSelected(String name, File file) {
466 return true;
467 }
468
469 /**
470 * Returns the names of the files which matched at least one of the include patterns and none of the exclude
471 * patterns. The names are relative to the base directory.
472 *
473 * @return the names of the files which matched at least one of the include patterns and none of the exclude
474 * patterns.
475 */
476 @Override
477 public String[] getIncludedFiles() {
478 return filesIncluded.toArray(EMPTY_STRING_ARRAY);
479 }
480
481 /**
482 * Returns the names of the files which matched none of the include patterns. The names are relative to the base
483 * directory. This involves performing a slow scan if one has not already been completed.
484 *
485 * @return the names of the files which matched none of the include patterns.
486 * @see #slowScan
487 */
488 public String[] getNotIncludedFiles() {
489 slowScan();
490 return filesNotIncluded.toArray(EMPTY_STRING_ARRAY);
491 }
492
493 /**
494 * Returns the names of the files which matched at least one of the include patterns and at least one of the exclude
495 * patterns. The names are relative to the base directory. This involves performing a slow scan if one has not
496 * already been completed.
497 *
498 * @return the names of the files which matched at least one of the include patterns and at at least one of the
499 * exclude patterns.
500 * @see #slowScan
501 */
502 public String[] getExcludedFiles() {
503 slowScan();
504 return filesExcluded.toArray(EMPTY_STRING_ARRAY);
505 }
506
507 /**
508 * <p>Returns the names of the files which were selected out and therefore not ultimately included.</p>
509 *
510 * <p>The names are relative to the base directory. This involves performing a slow scan if one has not already been
511 * completed.</p>
512 *
513 * @return the names of the files which were deselected.
514 * @see #slowScan
515 */
516 public String[] getDeselectedFiles() {
517 slowScan();
518 return filesDeselected.toArray(EMPTY_STRING_ARRAY);
519 }
520
521 /**
522 * Returns the names of the directories which matched at least one of the include patterns and none of the exclude
523 * patterns. The names are relative to the base directory.
524 *
525 * @return the names of the directories which matched at least one of the include patterns and none of the exclude
526 * patterns.
527 */
528 @Override
529 public String[] getIncludedDirectories() {
530 return dirsIncluded.toArray(EMPTY_STRING_ARRAY);
531 }
532
533 /**
534 * Returns the names of the directories which matched none of the include patterns. The names are relative to the
535 * base directory. This involves performing a slow scan if one has not already been completed.
536 *
537 * @return the names of the directories which matched none of the include patterns.
538 * @see #slowScan
539 */
540 public String[] getNotIncludedDirectories() {
541 slowScan();
542 return dirsNotIncluded.toArray(EMPTY_STRING_ARRAY);
543 }
544
545 /**
546 * Returns the names of the directories which matched at least one of the include patterns and at least one of the
547 * exclude patterns. The names are relative to the base directory. This involves performing a slow scan if one has
548 * not already been completed.
549 *
550 * @return the names of the directories which matched at least one of the include patterns and at least one of the
551 * exclude patterns.
552 * @see #slowScan
553 */
554 public String[] getExcludedDirectories() {
555 slowScan();
556 return dirsExcluded.toArray(EMPTY_STRING_ARRAY);
557 }
558
559 /**
560 * <p>Returns the names of the directories which were selected out and therefore not ultimately included.</p>
561 *
562 * <p>The names are relative to the base directory. This involves performing a slow scan if one has not already been
563 * completed.</p>
564 *
565 * @return the names of the directories which were deselected.
566 * @see #slowScan
567 */
568 public String[] getDeselectedDirectories() {
569 slowScan();
570 return dirsDeselected.toArray(EMPTY_STRING_ARRAY);
571 }
572
573 /**
574 * <p>Checks whether a given file is a symbolic link.</p>
575 *
576 * <p>It doesn't really test for symbolic links but whether the canonical and absolute paths of the file are identical
577 * - this may lead to false positives on some platforms.
578 * </p>
579 *
580 * @param parent the parent directory of the file to test
581 * @param name the name of the file to test.
582 * @return true if it's a symbolic link
583 * @throws java.io.IOException .
584 * @since Ant 1.5
585 */
586 public boolean isSymbolicLink(File parent, String name) throws IOException {
587 return NioFiles.isSymbolicLink(new File(parent, name));
588 }
589
590 /**
591 * <p>Checks whether the parent of this file is a symbolic link.</p>
592 *
593 * <p>For java versions prior to 7 It doesn't really test for symbolic links but whether the canonical and absolute
594 * paths of the file are identical - this may lead to false positives on some platforms.</p>
595 *
596 * @param parent the parent directory of the file to test
597 * @param name the name of the file to test.
598 * @return true if it's a symbolic link
599 * @throws java.io.IOException .
600 * @since Ant 1.5
601 */
602 public boolean isParentSymbolicLink(File parent, String name) throws IOException {
603 return NioFiles.isSymbolicLink(parent);
604 }
605 }