123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495 |
|
/*******************************************************************************
copyright: Copyright (c) 2006-2009 Max Samukha, Thomas Kühne,
Grzegorz Adam Hankiewicz
license: BSD style: $(LICENSE)
version: Dec 2006: Initial release
Jan 2009: Replaced normalize
author: Max Samukha, Thomas Kühne,
Grzegorz Adam Hankiewicz
*******************************************************************************/
module tango.util.PathUtil;
private import tango.core.Exception;
private extern (C) void memmove (void* dst, void* src, uint bytes);
version (Quiet){} else
pragma (msg, "revision: util.PathUtil has been merged into io.Path - please update your import statements");
/*******************************************************************************
Normalizes a path component.
. segments are removed
<segment>/.. are removed
On Windows, \ will be converted to / prior to normalization.
Multiple consecutive forward slashes are replaced with a single forward slash.
Note that any number of .. segments at the front is ignored,
unless it is an absolute path, in which case they are removed.
The input path is copied into either the provided buffer, or a heap
allocated array if no buffer was provided. Normalization modifies
this copy before returning the relevant slice.
Examples:
-----
normalize("/home/foo/./bar/../../john/doe"); // => "/home/john/doe"
-----
*******************************************************************************/
private enum {NodeStackLength = 64}
deprecated char[] normalize(char[] path, char[] buf = null)
{
// Whether the path is absolute
bool isAbsolute;
// Current position
size_t idx;
// Position to move
size_t moveTo;
// Starting positions of regular path segments are pushed on this stack
// to avoid backward scanning when .. segments are encountered.
size_t[NodeStackLength] nodeStack;
size_t nodeStackTop;
// Moves the path tail starting at the current position to moveTo.
// Then sets the current position to moveTo.
void move()
{
auto len = path.length - idx;
memmove(path.ptr + moveTo, path.ptr + idx, len);
path = path[0..moveTo + len];
idx = moveTo;
}
// Checks if the character at the current position is a separator.
// If true, normalizes the separator to '/' on Windows and advances the
// current position to the next character.
bool isSep(ref size_t i)
{
char c = path[i];
version (Windows)
{
if (c == '\\')
path[i] = '/';
else if (c != '/')
return false;
}
else
{
if (c != '/')
return false;
}
i++;
return true;
}
if (buf is null)
path = path.dup;
else
path = buf[0..path.length] = path;
version (Windows)
{
// Skip Windows drive specifiers
if (path.length >= 2 && path[1] == ':')
{
auto c = path[0];
if (c >= 'a' && c <= 'z')
{
path[0] = c - 32;
idx = 2;
}
else if (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z')
idx = 2;
}
}
if (idx == path.length)
return path;
moveTo = idx;
if (isSep(idx))
{
moveTo++; // preserve root separator.
isAbsolute = true;
}
while (idx < path.length)
{
// Skip duplicate separators
if (isSep(idx))
continue;
if (path[idx] == '.')
{
// leave the current position at the start of the segment
auto i = idx + 1;
if (i < path.length && path[i] == '.')
{
i++;
if (i == path.length || isSep(i))
{
// It is a '..' segment. If the stack is not empty, set
// moveTo and the current position
// to the start position of the last found regular segment.
if (nodeStackTop > 0)
moveTo = nodeStack[--nodeStackTop];
// If no regular segment start positions on the stack, drop the
// .. segment if it is absolute path
// or, otherwise, advance moveTo and the current position to
// the character after the '..' segment
else if (!isAbsolute)
{
if (moveTo != idx)
{
i -= idx - moveTo;
move();
}
moveTo = i;
}
idx = i;
continue;
}
}
// If it is '.' segment, skip it.
if (i == path.length || isSep(i))
{
idx = i;
continue;
}
}
// Remove excessive '/', '.' and/or '..' preceeding the segment.
if (moveTo != idx)
move();
// Push the start position of the regular segment on the stack
assert(nodeStackTop < NodeStackLength);
nodeStack[nodeStackTop++] = idx;
// Skip the regular segment and set moveTo to the position after the segment
// (including the trailing '/' if present)
for(; idx < path.length && !isSep(idx); idx++) {}
moveTo = idx;
}
if (moveTo != idx)
move();
return path;
}
debug (UnitTest)
{
unittest
{
assert (normalize ("") == "");
assert (normalize ("/home/../john/../.tango/.htaccess") == "/.tango/.htaccess");
assert (normalize ("/home/../john/../.tango/foo.conf") == "/.tango/foo.conf");
assert (normalize ("/home/john/.tango/foo.conf") == "/home/john/.tango/foo.conf");
assert (normalize ("/foo/bar/.htaccess") == "/foo/bar/.htaccess");
assert (normalize ("foo/bar/././.") == "foo/bar/");
assert (normalize ("././foo/././././bar") == "foo/bar");
assert (normalize ("/foo/../john") == "/john");
assert (normalize ("foo/../john") == "john");
assert (normalize ("foo/bar/..") == "foo/");
assert (normalize ("foo/bar/../john") == "foo/john");
assert (normalize ("foo/bar/doe/../../john") == "foo/john");
assert (normalize ("foo/bar/doe/../../john/../bar") == "foo/bar");
assert (normalize ("./foo/bar/doe") == "foo/bar/doe");
assert (normalize ("./foo/bar/doe/../../john/../bar") == "foo/bar");
assert (normalize ("./foo/bar/../../john/../bar") == "bar");
assert (normalize ("foo/bar/./doe/../../john") == "foo/john");
assert (normalize ("../../foo/bar") == "../../foo/bar");
assert (normalize ("../../../foo/bar") == "../../../foo/bar");
assert (normalize ("d/") == "d/");
assert (normalize ("/home/john/./foo/bar.txt") == "/home/john/foo/bar.txt");
assert (normalize ("/home//john") == "/home/john");
assert (normalize("/../../bar/") == "/bar/");
assert (normalize("/../../bar/../baz/./") == "/baz/");
assert (normalize("/../../bar/boo/../baz/.bar/.") == "/bar/baz/.bar/");
assert (normalize("../..///.///bar/..//..//baz/.//boo/..") == "../../../baz/");
assert (normalize("./bar/./..boo/./..bar././/") == "bar/..boo/..bar./");
assert (normalize("/bar/..") == "/");
assert (normalize("bar/") == "bar/");
assert (normalize(".../") == ".../");
assert (normalize("///../foo") == "/foo");
assert (normalize("./foo") == "foo");
auto buf = new char[100];
auto ret = normalize("foo/bar/./baz", buf);
assert (ret.ptr == buf.ptr);
assert (ret == "foo/bar/baz");
version (Windows) {
assert (normalize ("\\foo\\..\\john") == "/john");
assert (normalize ("foo\\..\\john") == "john");
assert (normalize ("foo\\bar\\..") == "foo/");
assert (normalize ("foo\\bar\\..\\john") == "foo/john");
assert (normalize ("foo\\bar\\doe\\..\\..\\john") == "foo/john");
assert (normalize ("foo\\bar\\doe\\..\\..\\john\\..\\bar") == "foo/bar");
assert (normalize (".\\foo\\bar\\doe") == "foo/bar/doe");
assert (normalize (".\\foo\\bar\\doe\\..\\..\\john\\..\\bar") == "foo/bar");
assert (normalize (".\\foo\\bar\\..\\..\\john\\..\\bar") == "bar");
assert (normalize ("foo\\bar\\.\\doe\\..\\..\\john") == "foo/john");
assert (normalize ("..\\..\\foo\\bar") == "../../foo/bar");
assert (normalize ("..\\..\\..\\foo\\bar") == "../../../foo/bar");
assert (normalize(r"C:") == "C:");
assert (normalize(r"C") == "C");
assert (normalize(r"c:\") == "C:/");
assert (normalize(r"C:\..\.\..\..\") == "C:/");
assert (normalize(r"c:..\.\boo\") == "C:../boo/");
assert (normalize(r"C:..\..\boo\foo\..\.\..\..\bar") == "C:../../../bar");
assert (normalize(r"C:boo\..") == "C:");
}
}
}
/******************************************************************************
Matches a pattern against a filename.
Some characters of pattern have special a meaning (they are
<i>meta-characters</i>) and <b>can't</b> be escaped. These are:
<p><table>
<tr><td><b>*</b></td>
<td>Matches 0 or more instances of any character.</td></tr>
<tr><td><b>?</b></td>
<td>Matches exactly one instances of any character.</td></tr>
<tr><td><b>[</b><i>chars</i><b>]</b></td>
<td>Matches one instance of any character that appears
between the brackets.</td></tr>
<tr><td><b>[!</b><i>chars</i><b>]</b></td>
<td>Matches one instance of any character that does not appear
between the brackets after the exclamation mark.</td></tr>
</table><p>
Internally individual character comparisons are done calling
charMatch(), so its rules apply here too. Note that path
separators and dots don't stop a meta-character from matching
further portions of the filename.
Returns: true if pattern matches filename, false otherwise.
See_Also: charMatch().
Throws: Nothing.
Examples:
-----
version(Win32)
{
patternMatch("foo.bar", "*") // => true
patternMatch(r"foo/foo\bar", "f*b*r") // => true
patternMatch("foo.bar", "f?bar") // => false
patternMatch("Goo.bar", "[fg]???bar") // => true
patternMatch(r"d:\foo\bar", "d*foo?bar") // => true
}
version(Posix)
{
patternMatch("Go*.bar", "[fg]???bar") // => false
patternMatch("/foo*home/bar", "?foo*bar") // => true
patternMatch("foobar", "foo?bar") // => true
}
-----
******************************************************************************/
deprecated bool patternMatch(char[] filename, char[] pattern)
in
{
// Verify that pattern[] is valid
int i;
int inbracket = false;
for (i = 0; i < pattern.length; i++)
{
switch (pattern[i])
{
case '[':
assert(!inbracket);
inbracket = true;
break;
case ']':
assert(inbracket);
inbracket = false;
break;
default:
break;
}
}
}
body
{
int pi;
int ni;
char pc;
char nc;
int j;
int not;
int anymatch;
ni = 0;
for (pi = 0; pi < pattern.length; pi++)
{
pc = pattern[pi];
switch (pc)
{
case '*':
if (pi + 1 == pattern.length)
goto match;
for (j = ni; j < filename.length; j++)
{
if (patternMatch(filename[j .. filename.length],
pattern[pi + 1 .. pattern.length]))
goto match;
}
goto nomatch;
case '?':
if (ni == filename.length)
goto nomatch;
ni++;
break;
case '[':
if (ni == filename.length)
goto nomatch;
nc = filename[ni];
ni++;
not = 0;
pi++;
if (pattern[pi] == '!')
{
not = 1;
pi++;
}
anymatch = 0;
while (1)
{
pc = pattern[pi];
if (pc == ']')
break;
if (!anymatch && charMatch(nc, pc))
anymatch = 1;
pi++;
}
if (!(anymatch ^ not))
goto nomatch;
break;
default:
if (ni == filename.length)
goto nomatch;
nc = filename[ni];
if (!charMatch(pc, nc))
goto nomatch;
ni++;
break;
}
}
if (ni < filename.length)
goto nomatch;
match:
return true;
nomatch:
return false;
}
debug (UnitTest)
{
unittest
{
version (Win32)
assert(patternMatch("foo", "Foo"));
version (Posix)
assert(!patternMatch("foo", "Foo"));
assert(patternMatch("foo", "*"));
assert(patternMatch("foo.bar", "*"));
assert(patternMatch("foo.bar", "*.*"));
assert(patternMatch("foo.bar", "foo*"));
assert(patternMatch("foo.bar", "f*bar"));
assert(patternMatch("foo.bar", "f*b*r"));
assert(patternMatch("foo.bar", "f???bar"));
assert(patternMatch("foo.bar", "[fg]???bar"));
assert(patternMatch("foo.bar", "[!gh]*bar"));
assert(!patternMatch("foo", "bar"));
assert(!patternMatch("foo", "*.*"));
assert(!patternMatch("foo.bar", "f*baz"));
assert(!patternMatch("foo.bar", "f*b*x"));
assert(!patternMatch("foo.bar", "[gh]???bar"));
assert(!patternMatch("foo.bar", "[!fg]*bar"));
assert(!patternMatch("foo.bar", "[fg]???baz"));
}
}
/******************************************************************************
Matches filename characters.
Under Windows, the comparison is done ignoring case. Under Linux
an exact match is performed.
Returns: true if c1 matches c2, false otherwise.
Throws: Nothing.
Examples:
-----
version(Win32)
{
charMatch('a', 'b') // => false
charMatch('A', 'a') // => true
}
version(Posix)
{
charMatch('a', 'b') // => false
charMatch('A', 'a') // => false
}
-----
******************************************************************************/
private bool charMatch(char c1, char c2)
{
version (Win32)
{
if (c1 != c2)
{
return ((c1 >= 'a' && c1 <= 'z') ? c1 - ('a' - 'A') : c1) ==
((c2 >= 'a' && c2 <= 'z') ? c2 - ('a' - 'A') : c2);
}
return true;
}
version (Posix)
{
return c1 == c2;
}
}
|