001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     * 
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     * 
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.io;
018    
019    import java.io.File;
020    import java.util.ArrayList;
021    import java.util.Collection;
022    import java.util.Iterator;
023    import java.util.Stack;
024    
025    /**
026     * General filename and filepath manipulation utilities.
027     * <p>
028     * When dealing with filenames you can hit problems when moving from a Windows
029     * based development machine to a Unix based production machine.
030     * This class aims to help avoid those problems.
031     * <p>
032     * <b>NOTE</b>: You may be able to avoid using this class entirely simply by
033     * using JDK {@link java.io.File File} objects and the two argument constructor
034     * {@link java.io.File#File(java.io.File, java.lang.String) File(File,String)}.
035     * <p>
036     * Most methods on this class are designed to work the same on both Unix and Windows.
037     * Those that don't include 'System', 'Unix' or 'Windows' in their name.
038     * <p>
039     * Most methods recognise both separators (forward and back), and both
040     * sets of prefixes. See the javadoc of each method for details.
041     * <p>
042     * This class defines six components within a filename
043     * (example C:\dev\project\file.txt):
044     * <ul>
045     * <li>the prefix - C:\</li>
046     * <li>the path - dev\project\</li>
047     * <li>the full path - C:\dev\project\</li>
048     * <li>the name - file.txt</li>
049     * <li>the base name - file</li>
050     * <li>the extension - txt</li>
051     * </ul>
052     * Note that this class works best if directory filenames end with a separator.
053     * If you omit the last separator, it is impossible to determine if the filename
054     * corresponds to a file or a directory. As a result, we have chosen to say
055     * it corresponds to a file.
056     * <p>
057     * This class only supports Unix and Windows style names.
058     * Prefixes are matched as follows:
059     * <pre>
060     * Windows:
061     * a\b\c.txt           --> ""          --> relative
062     * \a\b\c.txt          --> "\"         --> current drive absolute
063     * C:a\b\c.txt         --> "C:"        --> drive relative
064     * C:\a\b\c.txt        --> "C:\"       --> absolute
065     * \\server\a\b\c.txt  --> "\\server\" --> UNC
066     *
067     * Unix:
068     * a/b/c.txt           --> ""          --> relative
069     * /a/b/c.txt          --> "/"         --> absolute
070     * ~/a/b/c.txt         --> "~/"        --> current user
071     * ~                   --> "~/"        --> current user (slash added)
072     * ~user/a/b/c.txt     --> "~user/"    --> named user
073     * ~user               --> "~user/"    --> named user (slash added)
074     * </pre>
075     * Both prefix styles are matched always, irrespective of the machine that you are
076     * currently running on.
077     * <p>
078     * Origin of code: Excalibur, Alexandria, Tomcat, Commons-Utils.
079     *
080     * @author <a href="mailto:burton@relativity.yi.org">Kevin A. Burton</A>
081     * @author <a href="mailto:sanders@apache.org">Scott Sanders</a>
082     * @author <a href="mailto:dlr@finemaltcoding.com">Daniel Rall</a>
083     * @author <a href="mailto:Christoph.Reck@dlr.de">Christoph.Reck</a>
084     * @author <a href="mailto:peter@apache.org">Peter Donald</a>
085     * @author <a href="mailto:jefft@apache.org">Jeff Turner</a>
086     * @author Matthew Hawthorne
087     * @author Martin Cooper
088     * @author <a href="mailto:jeremias@apache.org">Jeremias Maerki</a>
089     * @author Stephen Colebourne
090     * @version $Id: FilenameUtils.java 609870 2008-01-08 04:46:26Z niallp $
091     * @since Commons IO 1.1
092     */
093    public class FilenameUtils {
094    
095        /**
096         * The extension separator character.
097         * @since Commons IO 1.4
098         */
099        public static final char EXTENSION_SEPARATOR = '.';
100    
101        /**
102         * The extension separator String.
103         * @since Commons IO 1.4
104         */
105        public static final String EXTENSION_SEPARATOR_STR = (new Character(EXTENSION_SEPARATOR)).toString();
106    
107        /**
108         * The Unix separator character.
109         */
110        private static final char UNIX_SEPARATOR = '/';
111    
112        /**
113         * The Windows separator character.
114         */
115        private static final char WINDOWS_SEPARATOR = '\\';
116    
117        /**
118         * The system separator character.
119         */
120        private static final char SYSTEM_SEPARATOR = File.separatorChar;
121    
122        /**
123         * The separator character that is the opposite of the system separator.
124         */
125        private static final char OTHER_SEPARATOR;
126        static {
127            if (isSystemWindows()) {
128                OTHER_SEPARATOR = UNIX_SEPARATOR;
129            } else {
130                OTHER_SEPARATOR = WINDOWS_SEPARATOR;
131            }
132        }
133    
134        /**
135         * Instances should NOT be constructed in standard programming.
136         */
137        public FilenameUtils() {
138            super();
139        }
140    
141        //-----------------------------------------------------------------------
142        /**
143         * Determines if Windows file system is in use.
144         * 
145         * @return true if the system is Windows
146         */
147        static boolean isSystemWindows() {
148            return SYSTEM_SEPARATOR == WINDOWS_SEPARATOR;
149        }
150    
151        //-----------------------------------------------------------------------
152        /**
153         * Checks if the character is a separator.
154         * 
155         * @param ch  the character to check
156         * @return true if it is a separator character
157         */
158        private static boolean isSeparator(char ch) {
159            return (ch == UNIX_SEPARATOR) || (ch == WINDOWS_SEPARATOR);
160        }
161    
162        //-----------------------------------------------------------------------
163        /**
164         * Normalizes a path, removing double and single dot path steps.
165         * <p>
166         * This method normalizes a path to a standard format.
167         * The input may contain separators in either Unix or Windows format.
168         * The output will contain separators in the format of the system.
169         * <p>
170         * A trailing slash will be retained.
171         * A double slash will be merged to a single slash (but UNC names are handled).
172         * A single dot path segment will be removed.
173         * A double dot will cause that path segment and the one before to be removed.
174         * If the double dot has no parent path segment to work with, <code>null</code>
175         * is returned.
176         * <p>
177         * The output will be the same on both Unix and Windows except
178         * for the separator character.
179         * <pre>
180         * /foo//               -->   /foo/
181         * /foo/./              -->   /foo/
182         * /foo/../bar          -->   /bar
183         * /foo/../bar/         -->   /bar/
184         * /foo/../bar/../baz   -->   /baz
185         * //foo//./bar         -->   /foo/bar
186         * /../                 -->   null
187         * ../foo               -->   null
188         * foo/bar/..           -->   foo/
189         * foo/../../bar        -->   null
190         * foo/../bar           -->   bar
191         * //server/foo/../bar  -->   //server/bar
192         * //server/../bar      -->   null
193         * C:\foo\..\bar        -->   C:\bar
194         * C:\..\bar            -->   null
195         * ~/foo/../bar/        -->   ~/bar/
196         * ~/../bar             -->   null
197         * </pre>
198         * (Note the file separator returned will be correct for Windows/Unix)
199         *
200         * @param filename  the filename to normalize, null returns null
201         * @return the normalized filename, or null if invalid
202         */
203        public static String normalize(String filename) {
204            return doNormalize(filename, true);
205        }
206    
207        //-----------------------------------------------------------------------
208        /**
209         * Normalizes a path, removing double and single dot path steps,
210         * and removing any final directory separator.
211         * <p>
212         * This method normalizes a path to a standard format.
213         * The input may contain separators in either Unix or Windows format.
214         * The output will contain separators in the format of the system.
215         * <p>
216         * A trailing slash will be removed.
217         * A double slash will be merged to a single slash (but UNC names are handled).
218         * A single dot path segment will be removed.
219         * A double dot will cause that path segment and the one before to be removed.
220         * If the double dot has no parent path segment to work with, <code>null</code>
221         * is returned.
222         * <p>
223         * The output will be the same on both Unix and Windows except
224         * for the separator character.
225         * <pre>
226         * /foo//               -->   /foo
227         * /foo/./              -->   /foo
228         * /foo/../bar          -->   /bar
229         * /foo/../bar/         -->   /bar
230         * /foo/../bar/../baz   -->   /baz
231         * //foo//./bar         -->   /foo/bar
232         * /../                 -->   null
233         * ../foo               -->   null
234         * foo/bar/..           -->   foo
235         * foo/../../bar        -->   null
236         * foo/../bar           -->   bar
237         * //server/foo/../bar  -->   //server/bar
238         * //server/../bar      -->   null
239         * C:\foo\..\bar        -->   C:\bar
240         * C:\..\bar            -->   null
241         * ~/foo/../bar/        -->   ~/bar
242         * ~/../bar             -->   null
243         * </pre>
244         * (Note the file separator returned will be correct for Windows/Unix)
245         *
246         * @param filename  the filename to normalize, null returns null
247         * @return the normalized filename, or null if invalid
248         */
249        public static String normalizeNoEndSeparator(String filename) {
250            return doNormalize(filename, false);
251        }
252    
253        /**
254         * Internal method to perform the normalization.
255         *
256         * @param filename  the filename
257         * @param keepSeparator  true to keep the final separator
258         * @return the normalized filename
259         */
260        private static String doNormalize(String filename, boolean keepSeparator) {
261            if (filename == null) {
262                return null;
263            }
264            int size = filename.length();
265            if (size == 0) {
266                return filename;
267            }
268            int prefix = getPrefixLength(filename);
269            if (prefix < 0) {
270                return null;
271            }
272            
273            char[] array = new char[size + 2];  // +1 for possible extra slash, +2 for arraycopy
274            filename.getChars(0, filename.length(), array, 0);
275            
276            // fix separators throughout
277            for (int i = 0; i < array.length; i++) {
278                if (array[i] == OTHER_SEPARATOR) {
279                    array[i] = SYSTEM_SEPARATOR;
280                }
281            }
282            
283            // add extra separator on the end to simplify code below
284            boolean lastIsDirectory = true;
285            if (array[size - 1] != SYSTEM_SEPARATOR) {
286                array[size++] = SYSTEM_SEPARATOR;
287                lastIsDirectory = false;
288            }
289            
290            // adjoining slashes
291            for (int i = prefix + 1; i < size; i++) {
292                if (array[i] == SYSTEM_SEPARATOR && array[i - 1] == SYSTEM_SEPARATOR) {
293                    System.arraycopy(array, i, array, i - 1, size - i);
294                    size--;
295                    i--;
296                }
297            }
298            
299            // dot slash
300            for (int i = prefix + 1; i < size; i++) {
301                if (array[i] == SYSTEM_SEPARATOR && array[i - 1] == '.' &&
302                        (i == prefix + 1 || array[i - 2] == SYSTEM_SEPARATOR)) {
303                    if (i == size - 1) {
304                        lastIsDirectory = true;
305                    }
306                    System.arraycopy(array, i + 1, array, i - 1, size - i);
307                    size -=2;
308                    i--;
309                }
310            }
311            
312            // double dot slash
313            outer:
314            for (int i = prefix + 2; i < size; i++) {
315                if (array[i] == SYSTEM_SEPARATOR && array[i - 1] == '.' && array[i - 2] == '.' &&
316                        (i == prefix + 2 || array[i - 3] == SYSTEM_SEPARATOR)) {
317                    if (i == prefix + 2) {
318                        return null;
319                    }
320                    if (i == size - 1) {
321                        lastIsDirectory = true;
322                    }
323                    int j;
324                    for (j = i - 4 ; j >= prefix; j--) {
325                        if (array[j] == SYSTEM_SEPARATOR) {
326                            // remove b/../ from a/b/../c
327                            System.arraycopy(array, i + 1, array, j + 1, size - i);
328                            size -= (i - j);
329                            i = j + 1;
330                            continue outer;
331                        }
332                    }
333                    // remove a/../ from a/../c
334                    System.arraycopy(array, i + 1, array, prefix, size - i);
335                    size -= (i + 1 - prefix);
336                    i = prefix + 1;
337                }
338            }
339            
340            if (size <= 0) {  // should never be less than 0
341                return "";
342            }
343            if (size <= prefix) {  // should never be less than prefix
344                return new String(array, 0, size);
345            }
346            if (lastIsDirectory && keepSeparator) {
347                return new String(array, 0, size);  // keep trailing separator
348            }
349            return new String(array, 0, size - 1);  // lose trailing separator
350        }
351    
352        //-----------------------------------------------------------------------
353        /**
354         * Concatenates a filename to a base path using normal command line style rules.
355         * <p>
356         * The effect is equivalent to resultant directory after changing
357         * directory to the first argument, followed by changing directory to
358         * the second argument.
359         * <p>
360         * The first argument is the base path, the second is the path to concatenate.
361         * The returned path is always normalized via {@link #normalize(String)},
362         * thus <code>..</code> is handled.
363         * <p>
364         * If <code>pathToAdd</code> is absolute (has an absolute prefix), then
365         * it will be normalized and returned.
366         * Otherwise, the paths will be joined, normalized and returned.
367         * <p>
368         * The output will be the same on both Unix and Windows except
369         * for the separator character.
370         * <pre>
371         * /foo/ + bar          -->   /foo/bar
372         * /foo + bar           -->   /foo/bar
373         * /foo + /bar          -->   /bar
374         * /foo + C:/bar        -->   C:/bar
375         * /foo + C:bar         -->   C:bar (*)
376         * /foo/a/ + ../bar     -->   foo/bar
377         * /foo/ + ../../bar    -->   null
378         * /foo/ + /bar         -->   /bar
379         * /foo/.. + /bar       -->   /bar
380         * /foo + bar/c.txt     -->   /foo/bar/c.txt
381         * /foo/c.txt + bar     -->   /foo/c.txt/bar (!)
382         * </pre>
383         * (*) Note that the Windows relative drive prefix is unreliable when
384         * used with this method.
385         * (!) Note that the first parameter must be a path. If it ends with a name, then
386         * the name will be built into the concatenated path. If this might be a problem,
387         * use {@link #getFullPath(String)} on the base path argument.
388         *
389         * @param basePath  the base path to attach to, always treated as a path
390         * @param fullFilenameToAdd  the filename (or path) to attach to the base
391         * @return the concatenated path, or null if invalid
392         */
393        public static String concat(String basePath, String fullFilenameToAdd) {
394            int prefix = getPrefixLength(fullFilenameToAdd);
395            if (prefix < 0) {
396                return null;
397            }
398            if (prefix > 0) {
399                return normalize(fullFilenameToAdd);
400            }
401            if (basePath == null) {
402                return null;
403            }
404            int len = basePath.length();
405            if (len == 0) {
406                return normalize(fullFilenameToAdd);
407            }
408            char ch = basePath.charAt(len - 1);
409            if (isSeparator(ch)) {
410                return normalize(basePath + fullFilenameToAdd);
411            } else {
412                return normalize(basePath + '/' + fullFilenameToAdd);
413            }
414        }
415    
416        //-----------------------------------------------------------------------
417        /**
418         * Converts all separators to the Unix separator of forward slash.
419         * 
420         * @param path  the path to be changed, null ignored
421         * @return the updated path
422         */
423        public static String separatorsToUnix(String path) {
424            if (path == null || path.indexOf(WINDOWS_SEPARATOR) == -1) {
425                return path;
426            }
427            return path.replace(WINDOWS_SEPARATOR, UNIX_SEPARATOR);
428        }
429    
430        /**
431         * Converts all separators to the Windows separator of backslash.
432         * 
433         * @param path  the path to be changed, null ignored
434         * @return the updated path
435         */
436        public static String separatorsToWindows(String path) {
437            if (path == null || path.indexOf(UNIX_SEPARATOR) == -1) {
438                return path;
439            }
440            return path.replace(UNIX_SEPARATOR, WINDOWS_SEPARATOR);
441        }
442    
443        /**
444         * Converts all separators to the system separator.
445         * 
446         * @param path  the path to be changed, null ignored
447         * @return the updated path
448         */
449        public static String separatorsToSystem(String path) {
450            if (path == null) {
451                return null;
452            }
453            if (isSystemWindows()) {
454                return separatorsToWindows(path);
455            } else {
456                return separatorsToUnix(path);
457            }
458        }
459    
460        //-----------------------------------------------------------------------
461        /**
462         * Returns the length of the filename prefix, such as <code>C:/</code> or <code>~/</code>.
463         * <p>
464         * This method will handle a file in either Unix or Windows format.
465         * <p>
466         * The prefix length includes the first slash in the full filename
467         * if applicable. Thus, it is possible that the length returned is greater
468         * than the length of the input string.
469         * <pre>
470         * Windows:
471         * a\b\c.txt           --> ""          --> relative
472         * \a\b\c.txt          --> "\"         --> current drive absolute
473         * C:a\b\c.txt         --> "C:"        --> drive relative
474         * C:\a\b\c.txt        --> "C:\"       --> absolute
475         * \\server\a\b\c.txt  --> "\\server\" --> UNC
476         *
477         * Unix:
478         * a/b/c.txt           --> ""          --> relative
479         * /a/b/c.txt          --> "/"         --> absolute
480         * ~/a/b/c.txt         --> "~/"        --> current user
481         * ~                   --> "~/"        --> current user (slash added)
482         * ~user/a/b/c.txt     --> "~user/"    --> named user
483         * ~user               --> "~user/"    --> named user (slash added)
484         * </pre>
485         * <p>
486         * The output will be the same irrespective of the machine that the code is running on.
487         * ie. both Unix and Windows prefixes are matched regardless.
488         *
489         * @param filename  the filename to find the prefix in, null returns -1
490         * @return the length of the prefix, -1 if invalid or null
491         */
492        public static int getPrefixLength(String filename) {
493            if (filename == null) {
494                return -1;
495            }
496            int len = filename.length();
497            if (len == 0) {
498                return 0;
499            }
500            char ch0 = filename.charAt(0);
501            if (ch0 == ':') {
502                return -1;
503            }
504            if (len == 1) {
505                if (ch0 == '~') {
506                    return 2;  // return a length greater than the input
507                }
508                return (isSeparator(ch0) ? 1 : 0);
509            } else {
510                if (ch0 == '~') {
511                    int posUnix = filename.indexOf(UNIX_SEPARATOR, 1);
512                    int posWin = filename.indexOf(WINDOWS_SEPARATOR, 1);
513                    if (posUnix == -1 && posWin == -1) {
514                        return len + 1;  // return a length greater than the input
515                    }
516                    posUnix = (posUnix == -1 ? posWin : posUnix);
517                    posWin = (posWin == -1 ? posUnix : posWin);
518                    return Math.min(posUnix, posWin) + 1;
519                }
520                char ch1 = filename.charAt(1);
521                if (ch1 == ':') {
522                    ch0 = Character.toUpperCase(ch0);
523                    if (ch0 >= 'A' && ch0 <= 'Z') {
524                        if (len == 2 || isSeparator(filename.charAt(2)) == false) {
525                            return 2;
526                        }
527                        return 3;
528                    }
529                    return -1;
530                    
531                } else if (isSeparator(ch0) && isSeparator(ch1)) {
532                    int posUnix = filename.indexOf(UNIX_SEPARATOR, 2);
533                    int posWin = filename.indexOf(WINDOWS_SEPARATOR, 2);
534                    if ((posUnix == -1 && posWin == -1) || posUnix == 2 || posWin == 2) {
535                        return -1;
536                    }
537                    posUnix = (posUnix == -1 ? posWin : posUnix);
538                    posWin = (posWin == -1 ? posUnix : posWin);
539                    return Math.min(posUnix, posWin) + 1;
540                } else {
541                    return (isSeparator(ch0) ? 1 : 0);
542                }
543            }
544        }
545    
546        /**
547         * Returns the index of the last directory separator character.
548         * <p>
549         * This method will handle a file in either Unix or Windows format.
550         * The position of the last forward or backslash is returned.
551         * <p>
552         * The output will be the same irrespective of the machine that the code is running on.
553         * 
554         * @param filename  the filename to find the last path separator in, null returns -1
555         * @return the index of the last separator character, or -1 if there
556         * is no such character
557         */
558        public static int indexOfLastSeparator(String filename) {
559            if (filename == null) {
560                return -1;
561            }
562            int lastUnixPos = filename.lastIndexOf(UNIX_SEPARATOR);
563            int lastWindowsPos = filename.lastIndexOf(WINDOWS_SEPARATOR);
564            return Math.max(lastUnixPos, lastWindowsPos);
565        }
566    
567        /**
568         * Returns the index of the last extension separator character, which is a dot.
569         * <p>
570         * This method also checks that there is no directory separator after the last dot.
571         * To do this it uses {@link #indexOfLastSeparator(String)} which will
572         * handle a file in either Unix or Windows format.
573         * <p>
574         * The output will be the same irrespective of the machine that the code is running on.
575         * 
576         * @param filename  the filename to find the last path separator in, null returns -1
577         * @return the index of the last separator character, or -1 if there
578         * is no such character
579         */
580        public static int indexOfExtension(String filename) {
581            if (filename == null) {
582                return -1;
583            }
584            int extensionPos = filename.lastIndexOf(EXTENSION_SEPARATOR);
585            int lastSeparator = indexOfLastSeparator(filename);
586            return (lastSeparator > extensionPos ? -1 : extensionPos);
587        }
588    
589        //-----------------------------------------------------------------------
590        /**
591         * Gets the prefix from a full filename, such as <code>C:/</code>
592         * or <code>~/</code>.
593         * <p>
594         * This method will handle a file in either Unix or Windows format.
595         * The prefix includes the first slash in the full filename where applicable.
596         * <pre>
597         * Windows:
598         * a\b\c.txt           --> ""          --> relative
599         * \a\b\c.txt          --> "\"         --> current drive absolute
600         * C:a\b\c.txt         --> "C:"        --> drive relative
601         * C:\a\b\c.txt        --> "C:\"       --> absolute
602         * \\server\a\b\c.txt  --> "\\server\" --> UNC
603         *
604         * Unix:
605         * a/b/c.txt           --> ""          --> relative
606         * /a/b/c.txt          --> "/"         --> absolute
607         * ~/a/b/c.txt         --> "~/"        --> current user
608         * ~                   --> "~/"        --> current user (slash added)
609         * ~user/a/b/c.txt     --> "~user/"    --> named user
610         * ~user               --> "~user/"    --> named user (slash added)
611         * </pre>
612         * <p>
613         * The output will be the same irrespective of the machine that the code is running on.
614         * ie. both Unix and Windows prefixes are matched regardless.
615         *
616         * @param filename  the filename to query, null returns null
617         * @return the prefix of the file, null if invalid
618         */
619        public static String getPrefix(String filename) {
620            if (filename == null) {
621                return null;
622            }
623            int len = getPrefixLength(filename);
624            if (len < 0) {
625                return null;
626            }
627            if (len > filename.length()) {
628                return filename + UNIX_SEPARATOR;  // we know this only happens for unix
629            }
630            return filename.substring(0, len);
631        }
632    
633        /**
634         * Gets the path from a full filename, which excludes the prefix.
635         * <p>
636         * This method will handle a file in either Unix or Windows format.
637         * The method is entirely text based, and returns the text before and
638         * including the last forward or backslash.
639         * <pre>
640         * C:\a\b\c.txt --> a\b\
641         * ~/a/b/c.txt  --> a/b/
642         * a.txt        --> ""
643         * a/b/c        --> a/b/
644         * a/b/c/       --> a/b/c/
645         * </pre>
646         * <p>
647         * The output will be the same irrespective of the machine that the code is running on.
648         * <p>
649         * This method drops the prefix from the result.
650         * See {@link #getFullPath(String)} for the method that retains the prefix.
651         *
652         * @param filename  the filename to query, null returns null
653         * @return the path of the file, an empty string if none exists, null if invalid
654         */
655        public static String getPath(String filename) {
656            return doGetPath(filename, 1);
657        }
658    
659        /**
660         * Gets the path from a full filename, which excludes the prefix, and
661         * also excluding the final directory separator.
662         * <p>
663         * This method will handle a file in either Unix or Windows format.
664         * The method is entirely text based, and returns the text before the
665         * last forward or backslash.
666         * <pre>
667         * C:\a\b\c.txt --> a\b
668         * ~/a/b/c.txt  --> a/b
669         * a.txt        --> ""
670         * a/b/c        --> a/b
671         * a/b/c/       --> a/b/c
672         * </pre>
673         * <p>
674         * The output will be the same irrespective of the machine that the code is running on.
675         * <p>
676         * This method drops the prefix from the result.
677         * See {@link #getFullPathNoEndSeparator(String)} for the method that retains the prefix.
678         *
679         * @param filename  the filename to query, null returns null
680         * @return the path of the file, an empty string if none exists, null if invalid
681         */
682        public static String getPathNoEndSeparator(String filename) {
683            return doGetPath(filename, 0);
684        }
685    
686        /**
687         * Does the work of getting the path.
688         * 
689         * @param filename  the filename
690         * @param separatorAdd  0 to omit the end separator, 1 to return it
691         * @return the path
692         */
693        private static String doGetPath(String filename, int separatorAdd) {
694            if (filename == null) {
695                return null;
696            }
697            int prefix = getPrefixLength(filename);
698            if (prefix < 0) {
699                return null;
700            }
701            int index = indexOfLastSeparator(filename);
702            if (prefix >= filename.length() || index < 0) {
703                return "";
704            }
705            return filename.substring(prefix, index + separatorAdd);
706        }
707    
708        /**
709         * Gets the full path from a full filename, which is the prefix + path.
710         * <p>
711         * This method will handle a file in either Unix or Windows format.
712         * The method is entirely text based, and returns the text before and
713         * including the last forward or backslash.
714         * <pre>
715         * C:\a\b\c.txt --> C:\a\b\
716         * ~/a/b/c.txt  --> ~/a/b/
717         * a.txt        --> ""
718         * a/b/c        --> a/b/
719         * a/b/c/       --> a/b/c/
720         * C:           --> C:
721         * C:\          --> C:\
722         * ~            --> ~/
723         * ~/           --> ~/
724         * ~user        --> ~user/
725         * ~user/       --> ~user/
726         * </pre>
727         * <p>
728         * The output will be the same irrespective of the machine that the code is running on.
729         *
730         * @param filename  the filename to query, null returns null
731         * @return the path of the file, an empty string if none exists, null if invalid
732         */
733        public static String getFullPath(String filename) {
734            return doGetFullPath(filename, true);
735        }
736    
737        /**
738         * Gets the full path from a full filename, which is the prefix + path,
739         * and also excluding the final directory separator.
740         * <p>
741         * This method will handle a file in either Unix or Windows format.
742         * The method is entirely text based, and returns the text before the
743         * last forward or backslash.
744         * <pre>
745         * C:\a\b\c.txt --> C:\a\b
746         * ~/a/b/c.txt  --> ~/a/b
747         * a.txt        --> ""
748         * a/b/c        --> a/b
749         * a/b/c/       --> a/b/c
750         * C:           --> C:
751         * C:\          --> C:\
752         * ~            --> ~
753         * ~/           --> ~
754         * ~user        --> ~user
755         * ~user/       --> ~user
756         * </pre>
757         * <p>
758         * The output will be the same irrespective of the machine that the code is running on.
759         *
760         * @param filename  the filename to query, null returns null
761         * @return the path of the file, an empty string if none exists, null if invalid
762         */
763        public static String getFullPathNoEndSeparator(String filename) {
764            return doGetFullPath(filename, false);
765        }
766    
767        /**
768         * Does the work of getting the path.
769         * 
770         * @param filename  the filename
771         * @param includeSeparator  true to include the end separator
772         * @return the path
773         */
774        private static String doGetFullPath(String filename, boolean includeSeparator) {
775            if (filename == null) {
776                return null;
777            }
778            int prefix = getPrefixLength(filename);
779            if (prefix < 0) {
780                return null;
781            }
782            if (prefix >= filename.length()) {
783                if (includeSeparator) {
784                    return getPrefix(filename);  // add end slash if necessary
785                } else {
786                    return filename;
787                }
788            }
789            int index = indexOfLastSeparator(filename);
790            if (index < 0) {
791                return filename.substring(0, prefix);
792            }
793            int end = index + (includeSeparator ?  1 : 0);
794            return filename.substring(0, end);
795        }
796    
797        /**
798         * Gets the name minus the path from a full filename.
799         * <p>
800         * This method will handle a file in either Unix or Windows format.
801         * The text after the last forward or backslash is returned.
802         * <pre>
803         * a/b/c.txt --> c.txt
804         * a.txt     --> a.txt
805         * a/b/c     --> c
806         * a/b/c/    --> ""
807         * </pre>
808         * <p>
809         * The output will be the same irrespective of the machine that the code is running on.
810         *
811         * @param filename  the filename to query, null returns null
812         * @return the name of the file without the path, or an empty string if none exists
813         */
814        public static String getName(String filename) {
815            if (filename == null) {
816                return null;
817            }
818            int index = indexOfLastSeparator(filename);
819            return filename.substring(index + 1);
820        }
821    
822        /**
823         * Gets the base name, minus the full path and extension, from a full filename.
824         * <p>
825         * This method will handle a file in either Unix or Windows format.
826         * The text after the last forward or backslash and before the last dot is returned.
827         * <pre>
828         * a/b/c.txt --> c
829         * a.txt     --> a
830         * a/b/c     --> c
831         * a/b/c/    --> ""
832         * </pre>
833         * <p>
834         * The output will be the same irrespective of the machine that the code is running on.
835         *
836         * @param filename  the filename to query, null returns null
837         * @return the name of the file without the path, or an empty string if none exists
838         */
839        public static String getBaseName(String filename) {
840            return removeExtension(getName(filename));
841        }
842    
843        /**
844         * Gets the extension of a filename.
845         * <p>
846         * This method returns the textual part of the filename after the last dot.
847         * There must be no directory separator after the dot.
848         * <pre>
849         * foo.txt      --> "txt"
850         * a/b/c.jpg    --> "jpg"
851         * a/b.txt/c    --> ""
852         * a/b/c        --> ""
853         * </pre>
854         * <p>
855         * The output will be the same irrespective of the machine that the code is running on.
856         *
857         * @param filename the filename to retrieve the extension of.
858         * @return the extension of the file or an empty string if none exists.
859         */
860        public static String getExtension(String filename) {
861            if (filename == null) {
862                return null;
863            }
864            int index = indexOfExtension(filename);
865            if (index == -1) {
866                return "";
867            } else {
868                return filename.substring(index + 1);
869            }
870        }
871    
872        //-----------------------------------------------------------------------
873        /**
874         * Removes the extension from a filename.
875         * <p>
876         * This method returns the textual part of the filename before the last dot.
877         * There must be no directory separator after the dot.
878         * <pre>
879         * foo.txt    --> foo
880         * a\b\c.jpg  --> a\b\c
881         * a\b\c      --> a\b\c
882         * a.b\c      --> a.b\c
883         * </pre>
884         * <p>
885         * The output will be the same irrespective of the machine that the code is running on.
886         *
887         * @param filename  the filename to query, null returns null
888         * @return the filename minus the extension
889         */
890        public static String removeExtension(String filename) {
891            if (filename == null) {
892                return null;
893            }
894            int index = indexOfExtension(filename);
895            if (index == -1) {
896                return filename;
897            } else {
898                return filename.substring(0, index);
899            }
900        }
901    
902        //-----------------------------------------------------------------------
903        /**
904         * Checks whether two filenames are equal exactly.
905         * <p>
906         * No processing is performed on the filenames other than comparison,
907         * thus this is merely a null-safe case-sensitive equals.
908         *
909         * @param filename1  the first filename to query, may be null
910         * @param filename2  the second filename to query, may be null
911         * @return true if the filenames are equal, null equals null
912         * @see IOCase#SENSITIVE
913         */
914        public static boolean equals(String filename1, String filename2) {
915            return equals(filename1, filename2, false, IOCase.SENSITIVE);
916        }
917    
918        /**
919         * Checks whether two filenames are equal using the case rules of the system.
920         * <p>
921         * No processing is performed on the filenames other than comparison.
922         * The check is case-sensitive on Unix and case-insensitive on Windows.
923         *
924         * @param filename1  the first filename to query, may be null
925         * @param filename2  the second filename to query, may be null
926         * @return true if the filenames are equal, null equals null
927         * @see IOCase#SYSTEM
928         */
929        public static boolean equalsOnSystem(String filename1, String filename2) {
930            return equals(filename1, filename2, false, IOCase.SYSTEM);
931        }
932    
933        //-----------------------------------------------------------------------
934        /**
935         * Checks whether two filenames are equal after both have been normalized.
936         * <p>
937         * Both filenames are first passed to {@link #normalize(String)}.
938         * The check is then performed in a case-sensitive manner.
939         *
940         * @param filename1  the first filename to query, may be null
941         * @param filename2  the second filename to query, may be null
942         * @return true if the filenames are equal, null equals null
943         * @see IOCase#SENSITIVE
944         */
945        public static boolean equalsNormalized(String filename1, String filename2) {
946            return equals(filename1, filename2, true, IOCase.SENSITIVE);
947        }
948    
949        /**
950         * Checks whether two filenames are equal after both have been normalized
951         * and using the case rules of the system.
952         * <p>
953         * Both filenames are first passed to {@link #normalize(String)}.
954         * The check is then performed case-sensitive on Unix and
955         * case-insensitive on Windows.
956         *
957         * @param filename1  the first filename to query, may be null
958         * @param filename2  the second filename to query, may be null
959         * @return true if the filenames are equal, null equals null
960         * @see IOCase#SYSTEM
961         */
962        public static boolean equalsNormalizedOnSystem(String filename1, String filename2) {
963            return equals(filename1, filename2, true, IOCase.SYSTEM);
964        }
965    
966        /**
967         * Checks whether two filenames are equal, optionally normalizing and providing
968         * control over the case-sensitivity.
969         *
970         * @param filename1  the first filename to query, may be null
971         * @param filename2  the second filename to query, may be null
972         * @param normalized  whether to normalize the filenames
973         * @param caseSensitivity  what case sensitivity rule to use, null means case-sensitive
974         * @return true if the filenames are equal, null equals null
975         * @since Commons IO 1.3
976         */
977        public static boolean equals(
978                String filename1, String filename2,
979                boolean normalized, IOCase caseSensitivity) {
980            
981            if (filename1 == null || filename2 == null) {
982                return filename1 == filename2;
983            }
984            if (normalized) {
985                filename1 = normalize(filename1);
986                filename2 = normalize(filename2);
987                if (filename1 == null || filename2 == null) {
988                    throw new NullPointerException(
989                        "Error normalizing one or both of the file names");
990                }
991            }
992            if (caseSensitivity == null) {
993                caseSensitivity = IOCase.SENSITIVE;
994            }
995            return caseSensitivity.checkEquals(filename1, filename2);
996        }
997    
998        //-----------------------------------------------------------------------
999        /**
1000         * Checks whether the extension of the filename is that specified.
1001         * <p>
1002         * This method obtains the extension as the textual part of the filename
1003         * after the last dot. There must be no directory separator after the dot.
1004         * The extension check is case-sensitive on all platforms.
1005         *
1006         * @param filename  the filename to query, null returns false
1007         * @param extension  the extension to check for, null or empty checks for no extension
1008         * @return true if the filename has the specified extension
1009         */
1010        public static boolean isExtension(String filename, String extension) {
1011            if (filename == null) {
1012                return false;
1013            }
1014            if (extension == null || extension.length() == 0) {
1015                return (indexOfExtension(filename) == -1);
1016            }
1017            String fileExt = getExtension(filename);
1018            return fileExt.equals(extension);
1019        }
1020    
1021        /**
1022         * Checks whether the extension of the filename is one of those specified.
1023         * <p>
1024         * This method obtains the extension as the textual part of the filename
1025         * after the last dot. There must be no directory separator after the dot.
1026         * The extension check is case-sensitive on all platforms.
1027         *
1028         * @param filename  the filename to query, null returns false
1029         * @param extensions  the extensions to check for, null checks for no extension
1030         * @return true if the filename is one of the extensions
1031         */
1032        public static boolean isExtension(String filename, String[] extensions) {
1033            if (filename == null) {
1034                return false;
1035            }
1036            if (extensions == null || extensions.length == 0) {
1037                return (indexOfExtension(filename) == -1);
1038            }
1039            String fileExt = getExtension(filename);
1040            for (int i = 0; i < extensions.length; i++) {
1041                if (fileExt.equals(extensions[i])) {
1042                    return true;
1043                }
1044            }
1045            return false;
1046        }
1047    
1048        /**
1049         * Checks whether the extension of the filename is one of those specified.
1050         * <p>
1051         * This method obtains the extension as the textual part of the filename
1052         * after the last dot. There must be no directory separator after the dot.
1053         * The extension check is case-sensitive on all platforms.
1054         *
1055         * @param filename  the filename to query, null returns false
1056         * @param extensions  the extensions to check for, null checks for no extension
1057         * @return true if the filename is one of the extensions
1058         */
1059        public static boolean isExtension(String filename, Collection extensions) {
1060            if (filename == null) {
1061                return false;
1062            }
1063            if (extensions == null || extensions.isEmpty()) {
1064                return (indexOfExtension(filename) == -1);
1065            }
1066            String fileExt = getExtension(filename);
1067            for (Iterator it = extensions.iterator(); it.hasNext();) {
1068                if (fileExt.equals(it.next())) {
1069                    return true;
1070                }
1071            }
1072            return false;
1073        }
1074    
1075        //-----------------------------------------------------------------------
1076        /**
1077         * Checks a filename to see if it matches the specified wildcard matcher,
1078         * always testing case-sensitive.
1079         * <p>
1080         * The wildcard matcher uses the characters '?' and '*' to represent a
1081         * single or multiple wildcard characters.
1082         * This is the same as often found on Dos/Unix command lines.
1083         * The check is case-sensitive always.
1084         * <pre>
1085         * wildcardMatch("c.txt", "*.txt")      --> true
1086         * wildcardMatch("c.txt", "*.jpg")      --> false
1087         * wildcardMatch("a/b/c.txt", "a/b/*")  --> true
1088         * wildcardMatch("c.txt", "*.???")      --> true
1089         * wildcardMatch("c.txt", "*.????")     --> false
1090         * </pre>
1091         * 
1092         * @param filename  the filename to match on
1093         * @param wildcardMatcher  the wildcard string to match against
1094         * @return true if the filename matches the wilcard string
1095         * @see IOCase#SENSITIVE
1096         */
1097        public static boolean wildcardMatch(String filename, String wildcardMatcher) {
1098            return wildcardMatch(filename, wildcardMatcher, IOCase.SENSITIVE);
1099        }
1100    
1101        /**
1102         * Checks a filename to see if it matches the specified wildcard matcher
1103         * using the case rules of the system.
1104         * <p>
1105         * The wildcard matcher uses the characters '?' and '*' to represent a
1106         * single or multiple wildcard characters.
1107         * This is the same as often found on Dos/Unix command lines.
1108         * The check is case-sensitive on Unix and case-insensitive on Windows.
1109         * <pre>
1110         * wildcardMatch("c.txt", "*.txt")      --> true
1111         * wildcardMatch("c.txt", "*.jpg")      --> false
1112         * wildcardMatch("a/b/c.txt", "a/b/*")  --> true
1113         * wildcardMatch("c.txt", "*.???")      --> true
1114         * wildcardMatch("c.txt", "*.????")     --> false
1115         * </pre>
1116         * 
1117         * @param filename  the filename to match on
1118         * @param wildcardMatcher  the wildcard string to match against
1119         * @return true if the filename matches the wilcard string
1120         * @see IOCase#SYSTEM
1121         */
1122        public static boolean wildcardMatchOnSystem(String filename, String wildcardMatcher) {
1123            return wildcardMatch(filename, wildcardMatcher, IOCase.SYSTEM);
1124        }
1125    
1126        /**
1127         * Checks a filename to see if it matches the specified wildcard matcher
1128         * allowing control over case-sensitivity.
1129         * <p>
1130         * The wildcard matcher uses the characters '?' and '*' to represent a
1131         * single or multiple wildcard characters.
1132         * 
1133         * @param filename  the filename to match on
1134         * @param wildcardMatcher  the wildcard string to match against
1135         * @param caseSensitivity  what case sensitivity rule to use, null means case-sensitive
1136         * @return true if the filename matches the wilcard string
1137         * @since Commons IO 1.3
1138         */
1139        public static boolean wildcardMatch(String filename, String wildcardMatcher, IOCase caseSensitivity) {
1140            if (filename == null && wildcardMatcher == null) {
1141                return true;
1142            }
1143            if (filename == null || wildcardMatcher == null) {
1144                return false;
1145            }
1146            if (caseSensitivity == null) {
1147                caseSensitivity = IOCase.SENSITIVE;
1148            }
1149            filename = caseSensitivity.convertCase(filename);
1150            wildcardMatcher = caseSensitivity.convertCase(wildcardMatcher);
1151            String[] wcs = splitOnTokens(wildcardMatcher);
1152            boolean anyChars = false;
1153            int textIdx = 0;
1154            int wcsIdx = 0;
1155            Stack backtrack = new Stack();
1156            
1157            // loop around a backtrack stack, to handle complex * matching
1158            do {
1159                if (backtrack.size() > 0) {
1160                    int[] array = (int[]) backtrack.pop();
1161                    wcsIdx = array[0];
1162                    textIdx = array[1];
1163                    anyChars = true;
1164                }
1165                
1166                // loop whilst tokens and text left to process
1167                while (wcsIdx < wcs.length) {
1168          
1169                    if (wcs[wcsIdx].equals("?")) {
1170                        // ? so move to next text char
1171                        textIdx++;
1172                        anyChars = false;
1173                        
1174                    } else if (wcs[wcsIdx].equals("*")) {
1175                        // set any chars status
1176                        anyChars = true;
1177                        if (wcsIdx == wcs.length - 1) {
1178                            textIdx = filename.length();
1179                        }
1180                        
1181                    } else {
1182                        // matching text token
1183                        if (anyChars) {
1184                            // any chars then try to locate text token
1185                            textIdx = filename.indexOf(wcs[wcsIdx], textIdx);
1186                            if (textIdx == -1) {
1187                                // token not found
1188                                break;
1189                            }
1190                            int repeat = filename.indexOf(wcs[wcsIdx], textIdx + 1);
1191                            if (repeat >= 0) {
1192                                backtrack.push(new int[] {wcsIdx, repeat});
1193                            }
1194                        } else {
1195                            // matching from current position
1196                            if (!filename.startsWith(wcs[wcsIdx], textIdx)) {
1197                                // couldnt match token
1198                                break;
1199                            }
1200                        }
1201          
1202                        // matched text token, move text index to end of matched token
1203                        textIdx += wcs[wcsIdx].length();
1204                        anyChars = false;
1205                    }
1206          
1207                    wcsIdx++;
1208                }
1209                
1210                // full match
1211                if (wcsIdx == wcs.length && textIdx == filename.length()) {
1212                    return true;
1213                }
1214                
1215            } while (backtrack.size() > 0);
1216      
1217            return false;
1218        }
1219    
1220        /**
1221         * Splits a string into a number of tokens.
1222         * 
1223         * @param text  the text to split
1224         * @return the tokens, never null
1225         */
1226        static String[] splitOnTokens(String text) {
1227            // used by wildcardMatch
1228            // package level so a unit test may run on this
1229            
1230            if (text.indexOf("?") == -1 && text.indexOf("*") == -1) {
1231                return new String[] { text };
1232            }
1233    
1234            char[] array = text.toCharArray();
1235            ArrayList list = new ArrayList();
1236            StringBuffer buffer = new StringBuffer();
1237            for (int i = 0; i < array.length; i++) {
1238                if (array[i] == '?' || array[i] == '*') {
1239                    if (buffer.length() != 0) {
1240                        list.add(buffer.toString());
1241                        buffer.setLength(0);
1242                    }
1243                    if (array[i] == '?') {
1244                        list.add("?");
1245                    } else if (list.size() == 0 ||
1246                            (i > 0 && list.get(list.size() - 1).equals("*") == false)) {
1247                        list.add("*");
1248                    }
1249                } else {
1250                    buffer.append(array[i]);
1251                }
1252            }
1253            if (buffer.length() != 0) {
1254                list.add(buffer.toString());
1255            }
1256    
1257            return (String[]) list.toArray( new String[ list.size() ] );
1258        }
1259    
1260    }