From: Vincent Pit Date: Sun, 29 Jun 2008 15:43:38 +0000 (+0200) Subject: Importing Regexp-Wildcards-0.02.tar.gz X-Git-Tag: v0.02^0 X-Git-Url: http://git.vpit.fr/?p=perl%2Fmodules%2FRegexp-Wildcards.git;a=commitdiff_plain;h=46111541589202352d6a6a665eb03fe24e3861a6 Importing Regexp-Wildcards-0.02.tar.gz --- diff --git a/Changes b/Changes index 77c55f6..98afe3e 100644 --- a/Changes +++ b/Changes @@ -1,5 +1,10 @@ Revision history for Regexp-Wildcards +0.02 2007-06-16 09:15 UTC + + Fix : wc2re_unix should escape top-level commas. + + Fix : added missing samples/wc2re.pl + + Add : tests descriptions in t/12-brackets.t + 0.01 2007-06-14 First version, released on an unsuspecting world. diff --git a/MANIFEST b/MANIFEST index eddf17e..f3ff075 100644 --- a/MANIFEST +++ b/MANIFEST @@ -4,6 +4,7 @@ META.yml Makefile.PL README lib/Regexp/Wildcards.pm +samples/wc2re.pl t/00-load.t t/01-import.t t/10-jokers.t diff --git a/META.yml b/META.yml index b6c1e7f..f2ef79d 100644 --- a/META.yml +++ b/META.yml @@ -1,6 +1,6 @@ --- #YAML:1.0 name: Regexp-Wildcards -version: 0.01 +version: 0.02 abstract: Converts wildcards expressions to Perl regular expressions. license: perl generated_by: ExtUtils::MakeMaker version 6.32 diff --git a/README b/README index 7469605..037d0d2 100644 --- a/README +++ b/README @@ -1,21 +1,26 @@ NAME - Regexp::Wildcards - Converts wildcards to regexps. + Regexp::Wildcards - Converts wildcards expressions to Perl regular + expressions. VERSION - Version 0.01 + Version 0.02 SYNOPSIS use Regexp::Wildcards qw/wc2re/; my $re; - $re = wc2re 'a{b.,c}*' => 'unix'; - $re = wc2re 'a.,b*' => 'win32'; + $re = wc2re 'a{b.,c}*' => 'unix'; # Do it Unix style. + $re = wc2re 'a.,b*' => 'win32'; # Do it Windows style. + $re = wc2re '*{x,y}.' => 'jokers'; # Process the jokers & escape the rest. DESCRIPTION - This module converts wildcards expressions to Perl regular expressions. - It handles the "*" and "?" jokers, as well as Unix bracketed - alternatives "{,}", and uses the backspace ("\") as an escape character. - Wrappers are provided to mimic the behaviour of Windows and Unix shells. + In many situations, users may want to specify patterns to match but + don't need the full power of regexps. Wildcards make one of those sets + of simplified rules. This module converts wildcards expressions to Perl + regular expressions, so that you can use them for matching. It handles + the "*" and "?" jokers, as well as Unix bracketed alternatives "{,}", + and uses the backspace ("\") as an escape character. Wrappers are + provided to mimic the behaviour of Windows and Unix shells. EXPORT Four functions are exported only on request : "wc2re", "wc2re_unix", @@ -24,21 +29,42 @@ EXPORT FUNCTIONS "wc2re_unix" This function takes as its only argument the wildcard string to process, - and returns the corresponding regular expression (or "undef" if the - source is invalid) according to standard Unix wildcard rules. It - successively escapes all regexp special characters that doesn't hold any - meaning for wildcards, turns jokers into their regexp equivalents, and - changes bracketed blocks into alternations. If brackets are unbalanced, - it will try to substitute as many of them as possible, and then escape - the remaining "{" and "}". + and returns the corresponding regular expression according to standard + Unix wildcard rules. It successively escapes all unprotected regexp + special characters that doesn't hold any meaning for wildcards, turns + jokers into their regexp equivalents, and changes bracketed blocks into + "(?:|)" alternations. If brackets are unbalanced, it will try to + substitute as many of them as possible, and then escape the remaining + "{" and "}". Commas outside of any bracket-delimited block will also be + escaped. + + # This is a valid brackets expression which is correctly handled. + print 'ok' if wc2re_unix('{a{b,c}d,e}') eq '(?:a(?:b|c)d|e)'; + + Unbalanced bracket expressions can always be rescued, but it may change + completely its meaning. For example : + + # The first comma is replaced, and the remaining brackets and comma are + # escaped. + print 'ok' if wc2re_unix('{a\\{b,c}d,e}') eq '(?:a\\{b|c)d\\,e\\}'; + + # All the brackets and commas are escaped. + print 'ok' if wc2re_unix('{a{b,c\\}d,e}') eq '\\{a\\{b\\,c\\}d\\,e\\}'; "wc2re_win32" Similar to the precedent, but for Windows wildcards. Bracketed blocks are no longer handled (which means that brackets will be escaped), but - you can still provide a comma-separated list of items. + you can provide a comma-separated list of items. + + # All the brackets are escaped, and commas are seen as list delimiters. + print 'ok' if wc2re_win32('{a{b,c}d,e}') eq '(?:\\{a\\{b|c\\}d|e\\})'; "wc2re_jokers" - This one only handles the "?" and "*" jokers. + This one only handles the "?" and "*" jokers. All other unquoted regexp + metacharacters will be escaped. + + # Everything is escaped. + print 'ok' if wc2re_jokers('{a{b,c}d,e}') eq '\\{a\\{b\\,c\\}d\\,e\\}'; "wc2re" A generic function that wraps around all the different rules. The first @@ -46,7 +72,12 @@ FUNCTIONS rules to apply, currently either "unix", "win32" or "jokers". If the type is undefined, it defaults to "unix". +DEPENDENCIES + Text::Balanced, which is bundled with perl since version 5.7.3 + SEE ALSO + Some modules provide incomplete alternatives as helper functions : + Net::FTPServer has a method for that. Only jokers are translated, and escaping won't preserve them. diff --git a/lib/Regexp/Wildcards.pm b/lib/Regexp/Wildcards.pm index c7b49a6..62141e2 100644 --- a/lib/Regexp/Wildcards.pm +++ b/lib/Regexp/Wildcards.pm @@ -11,11 +11,11 @@ Regexp::Wildcards - Converts wildcards expressions to Perl regular expressions. =head1 VERSION -Version 0.01 +Version 0.02 =cut -our $VERSION = '0.01'; +our $VERSION = '0.02'; =head1 SYNOPSIS @@ -53,18 +53,19 @@ our %EXPORT_TAGS = ( all => [ @EXPORT_OK ] ); =head2 C -This function takes as its only argument the wildcard string to process, and returns the corresponding regular expression (or C if the source is invalid) according to standard Unix wildcard rules. It successively escapes all unprotected regexp special characters that doesn't hold any meaning for wildcards, turns jokers into their regexp equivalents, and changes bracketed blocks into C<(?:|)> alternations. If brackets are unbalanced, it will try to substitute as many of them as possible, and then escape the remaining C<{> and C<}>. +This function takes as its only argument the wildcard string to process, and returns the corresponding regular expression according to standard Unix wildcard rules. It successively escapes all unprotected regexp special characters that doesn't hold any meaning for wildcards, turns jokers into their regexp equivalents, and changes bracketed blocks into C<(?:|)> alternations. If brackets are unbalanced, it will try to substitute as many of them as possible, and then escape the remaining C<{> and C<}>. Commas outside of any bracket-delimited block will also be escaped. -Unbalanced bracket expressions can always be rescued, but it may change completely its meaning. As a side effect, commas that first appear to be between brackets can be taken at the uppermost level, which invalidates the pattern. For example : + # This is a valid brackets expression which is correctly handled. + print 'ok' if wc2re_unix('{a{b,c}d,e}') eq '(?:a(?:b|c)d|e)'; - # The last orphaned } gets escaped, and the first comma is replaced. - # We also need to escape the comma because unix doesn't allow them out - # of brackets. - print 'ok' if wc2re_unix('{a\\{b,c}d\\,e}') eq '(?:a\\{b|c)d\\,e\\}'; +Unbalanced bracket expressions can always be rescued, but it may change completely its meaning. For example : - # All of the unprotected brackets are escaped, which means that we must - # escape all the commas. - print 'ok' if wc2re_unix('{a{b\\,c\\}d\\,e}') eq '\\{a\\{b\\,c\\}d\\,e\\}'; + # The first comma is replaced, and the remaining brackets and comma are + # escaped. + print 'ok' if wc2re_unix('{a\\{b,c}d,e}') eq '(?:a\\{b|c)d\\,e\\}'; + + # All the brackets and commas are escaped. + print 'ok' if wc2re_unix('{a{b,c\\}d,e}') eq '\\{a\\{b\\,c\\}d\\,e\\}'; =cut @@ -77,7 +78,10 @@ sub wc2re_unix { =head2 C -Similar to the precedent, but for Windows wildcards. Bracketed blocks are no longer handled (which means that brackets will be escaped), but you can still provide a comma-separated list of items. +Similar to the precedent, but for Windows wildcards. Bracketed blocks are no longer handled (which means that brackets will be escaped), but you can provide a comma-separated list of items. + + # All the brackets are escaped, and commas are seen as list delimiters. + print 'ok' if wc2re_win32('{a{b,c}d,e}') eq '(?:\\{a\\{b|c\\}d|e\\})'; =cut @@ -94,7 +98,10 @@ sub wc2re_win32 { =head2 C -This one only handles the C and C<*> jokers. All other unquoted regexp metacharacters will be quoted. +This one only handles the C and C<*> jokers. All other unquoted regexp metacharacters will be escaped. + + # Everything is escaped. + print 'ok' if wc2re_jokers('{a{b,c}d,e}') eq '\\{a\\{b\\,c\\}d\\,e\\}'; =cut @@ -194,12 +201,10 @@ sub do_bracketed { my $rest = shift; my ($re, $bracket, $prefix) = (''); while (($bracket, $rest, $prefix) = extract $rest and $bracket) { - return undef if $prefix =~ /(? ', wc2re($_ => $type), "\n" for @ARGV; diff --git a/t/11-commas.t b/t/11-commas.t index d616e9b..ba6703a 100644 --- a/t/11-commas.t +++ b/t/11-commas.t @@ -1,15 +1,19 @@ #!perl -T -use Test::More tests => 7; +use Test::More tests => 8; use Regexp::Wildcards qw/wc2re_unix wc2re_win32/; -ok((not defined wc2re_unix('a,b,c')), 'unix: no commas allowed out of brackets'); -ok(wc2re_unix('a\\,b\\\\\\,c') eq 'a\\,b\\\\\\,c', 'unix: no commas allowed out of brackets'); +ok(wc2re_unix('a,b,c') eq 'a\\,b\\,c', 'unix: commas outside of brackets 1'); +ok(wc2re_unix('a\\,b\\\\\\,c') eq 'a\\,b\\\\\\,c', + 'unix: commas outside of brackets 2'); +ok(wc2re_unix(',a,b,c\\\\,') eq '\\,a\\,b\\,c\\\\\\,', + 'unix: commas outside of brackets at begin/ed'); ok(wc2re_win32('a,b\\\\,c') eq '(?:a|b\\\\|c)', 'win32: commas'); ok(wc2re_win32('a\\,b\\\\,c') eq '(?:a\\,b\\\\|c)', 'win32: escaped commas 1'); ok(wc2re_win32('a\\,b\\\\\\,c') eq 'a\\,b\\\\\\,c', 'win32: escaped commas 2'); ok(wc2re_win32(',a,b\\\\,') eq '(?:|a|b\\\\|)', 'win32: commas at begin/end'); -ok(wc2re_win32('\\,a,b\\\\\\,') eq '(?:\\,a|b\\\\\\,)', 'win32: escaped commas at begin/end'); +ok(wc2re_win32('\\,a,b\\\\\\,') eq '(?:\\,a|b\\\\\\,)', + 'win32: escaped commas at begin/end'); diff --git a/t/12-brackets.t b/t/12-brackets.t index a8da49e..ff8cf93 100644 --- a/t/12-brackets.t +++ b/t/12-brackets.t @@ -1,40 +1,58 @@ #!perl -T -use Test::More tests => 28; +use Test::More tests => 26; use Regexp::Wildcards qw/wc2re_jokers wc2re_unix wc2re_win32/; -ok(wc2re_jokers('a{b\\\\,c\\\\}d') eq 'a\\{b\\\\\\,c\\\\\\}d'); - -ok(wc2re_win32('a{b\\\\,c\\\\}d') eq '(?:a\\{b\\\\|c\\\\\\}d)'); - -ok(wc2re_unix('{}') eq '(?:)'); -ok(wc2re_unix('{a}') eq '(?:a)'); -ok(wc2re_unix('{a,b}') eq '(?:a|b)'); -ok(wc2re_unix('{a,b,c}') eq '(?:a|b|c)'); - -ok(wc2re_unix('a{b,c}d') eq 'a(?:b|c)d'); -ok(wc2re_unix('a{b,c}d{e,,f}') eq 'a(?:b|c)d(?:e||f)'); -ok(wc2re_unix('a{b,c}d{e,,f}{g,h,}') eq 'a(?:b|c)d(?:e||f)(?:g|h|)'); - -ok(wc2re_unix('{a{b}}') eq '(?:a(?:b))'); -ok(wc2re_unix('{a,{b},c}') eq '(?:a|(?:b)|c)'); -ok(wc2re_unix('{a,{b{d}e},c}') eq '(?:a|(?:b(?:d)e)|c)'); -ok(wc2re_unix('{a,{b{d{}}e,f,,},c}') eq '(?:a|(?:b(?:d(?:))e|f||)|c)'); -ok(wc2re_unix('{a,{b{d{}}e,f,,},c}{,g{{}h,i}}') eq '(?:a|(?:b(?:d(?:))e|f||)|c)(?:|g(?:(?:)h|i))'); - -ok(wc2re_unix('\\{\\\\}') eq '\\{\\\\\\}'); -ok((not defined wc2re_unix('\\{a,b,c\\\\\\}'))); -ok(wc2re_unix('\\{a\\\\\\,b\\,c}') eq '\\{a\\\\\\,b\\,c\\}'); -ok(wc2re_unix('\\{a\\\\\\,b\\,c\\}') eq '\\{a\\\\\\,b\\,c\\}'); -ok(wc2re_unix('\\{a\\\\\\,b\\,c\\\\}') eq '\\{a\\\\\\,b\\,c\\\\\\}'); - -ok(wc2re_unix('{a\\},b\\{,c}') eq '(?:a\\}|b\\{|c)'); -ok((not defined wc2re_unix('{a,\\{}b,c}'))); -ok((not defined wc2re_unix('{a\\{}b,c}'))); -ok(wc2re_unix('{a\\{b,c}d\\,e}') eq '(?:a\\{b|c)d\\,e\\}'); -ok(wc2re_unix('{a{b\\,c\\}d\\,e}') eq '\\{a\\{b\\,c\\}d\\,e\\}'); -ok(wc2re_unix('{a\\{\\\\}b\\,c\\\\}') eq '(?:a\\{\\\\)b\\,c\\\\\\}'); -ok(wc2re_unix('{a,\\{\\}b,c}') eq '(?:a|\\{\\}b|c)'); -ok(wc2re_unix('{a,\\{d,e,,\\}b,c}') eq '(?:a|\\{d|e||\\}b|c)'); -ok(wc2re_unix('{a,\\{d,e,,\\}b,c}\\\\{f,g,h,i}') eq '(?:a|\\{d|e||\\}b|c)\\\\(?:f|g|h|i)'); +ok(wc2re_jokers('a{b\\\\,c\\\\}d') eq 'a\\{b\\\\\\,c\\\\\\}d', 'wc2re_jokers'); + +ok(wc2re_win32('a{b\\\\,c\\\\}d') eq '(?:a\\{b\\\\|c\\\\\\}d)', 'wc2re_win32'); + +ok(wc2re_unix('{}') eq '(?:)', 'empty brackets'); +ok(wc2re_unix('{a}') eq '(?:a)', 'brackets 1'); +ok(wc2re_unix('{a,b}') eq '(?:a|b)', 'brackets 2'); +ok(wc2re_unix('{a,b,c}') eq '(?:a|b|c)', 'brackets 3'); + +ok(wc2re_unix('a{b,c}d') eq 'a(?:b|c)d', + '1 bracketed block'); +ok(wc2re_unix('a{b,c}d{e,,f}') eq 'a(?:b|c)d(?:e||f)', + '2 bracketed blocks'); +ok(wc2re_unix('a{b,c}d{e,,f}{g,h,}') eq 'a(?:b|c)d(?:e||f)(?:g|h|)', + '3 bracketed blocks'); + +ok(wc2re_unix('{a{b}}') eq '(?:a(?:b))', + '2 nested bracketed blocks 1'); +ok(wc2re_unix('{a,{b},c}') eq '(?:a|(?:b)|c)', + '2 nested bracketed blocks 2'); +ok(wc2re_unix('{a,{b{d}e},c}') eq '(?:a|(?:b(?:d)e)|c)', + '3 nested bracketed blocks'); +ok(wc2re_unix('{a,{b{d{}}e,f,,},c}') eq '(?:a|(?:b(?:d(?:))e|f||)|c)', + '4 nested bracketed blocks'); +ok(wc2re_unix('{a,{b{d{}}e,f,,},c}{,g{{}h,i}}') eq '(?:a|(?:b(?:d(?:))e|f||)|c)(?:|g(?:(?:)h|i))', + '4+3 nested bracketed blocks'); + +ok(wc2re_unix('\\{\\\\}') eq '\\{\\\\\\}', + 'escaping brackets'); +ok(wc2re_unix('\\{a,b,c\\\\\\}') eq '\\{a\\,b\\,c\\\\\\}', + 'escaping commas 1'); +ok(wc2re_unix('\\{a\\\\,b\\,c}') eq '\\{a\\\\\\,b\\,c\\}', + 'escaping commas 2'); +ok(wc2re_unix('\\{a\\\\,b\\,c\\}') eq '\\{a\\\\\\,b\\,c\\}', + 'escaping commas 3'); +ok(wc2re_unix('\\{a\\\\,b\\,c\\\\}') eq '\\{a\\\\\\,b\\,c\\\\\\}', + 'escaping brackets and commas'); + +ok(wc2re_unix('{a\\},b\\{,c}') eq '(?:a\\}|b\\{|c)', + 'overlapping brackets'); +ok(wc2re_unix('{a\\{b,c}d,e}') eq '(?:a\\{b|c)d\\,e\\}', + 'partial unbalanced catching 1'); +ok(wc2re_unix('{a\\{\\\\}b,c\\\\}') eq '(?:a\\{\\\\)b\\,c\\\\\\}', + 'partial unbalanced catching 2'); +ok(wc2re_unix('{a{b,c\\}d,e}') eq '\\{a\\{b\\,c\\}d\\,e\\}', + 'no partial unbalanced catching'); +ok(wc2re_unix('{a,\\{,\\},b}') eq '(?:a|\\{|\\}|b)', + 'substituting commas 1'); +ok(wc2re_unix('{a,\\{d,e,,\\}b,c}') eq '(?:a|\\{d|e||\\}b|c)', + 'substituting commas 2'); +ok(wc2re_unix('{a,\\{d,e,,\\}b,c}\\\\{f,g,h,i}') eq '(?:a|\\{d|e||\\}b|c)\\\\(?:f|g|h|i)', + 'handling the rest');