X-Git-Url: http://git.vpit.fr/?p=perl%2Fmodules%2FRegexp-Wildcards.git;a=blobdiff_plain;f=lib%2FRegexp%2FWildcards.pm;h=d0f9ab3f134560b14360adb95d99b5a9e3e8577a;hp=ac0cec3d2fec31eaf66b3e66403f93e0c7e22ce3;hb=53943aeb9f826c841ac589a706679f18e2881ac4;hpb=d2abbc098aa8c983e2fd330d5282a4363c3f12a9 diff --git a/lib/Regexp/Wildcards.pm b/lib/Regexp/Wildcards.pm index ac0cec3..d0f9ab3 100644 --- a/lib/Regexp/Wildcards.pm +++ b/lib/Regexp/Wildcards.pm @@ -12,13 +12,13 @@ Regexp::Wildcards - Converts wildcard expressions to Perl regular expressions. =head1 VERSION -Version 1.02 +Version 1.03 =cut use vars qw/$VERSION/; BEGIN { - $VERSION = '1.02'; + $VERSION = '1.03'; } =head1 SYNOPSIS @@ -104,31 +104,36 @@ sub _validate { my $valid = shift; my $old = shift; $old = { } unless defined $old; - my $c; + + my %opts; if (@_ <= 1) { - $c = { set => $_[0] }; + $opts{set} = defined $_[0] ? $_[0] : { }; } elsif (@_ % 2) { croak 'Arguments must be passed as an unique scalar or as key => value pairs'; } else { - my %args = @_; - $c = { map { (exists $args{$_}) ? ($_ => $args{$_}) : () } qw/set add rem/ }; + %opts = @_; } + + my %checked; for (qw/set add rem/) { - my $v = $c->{$_}; - next unless defined $v; + my $opt = $opts{$_}; + next unless defined $opt; my $cb = { '' => sub { +{ ($_[0] => 1) x (exists $valid->{$_[0]}) } }, 'ARRAY' => sub { +{ map { ($_ => 1) x (exists $valid->{$_}) } @{$_[0]} } }, 'HASH' => sub { +{ map { ($_ => $_[0]->{$_}) x (exists $valid->{$_}) } keys %{$_[0]} } } - }->{ ref $v }; + }->{ ref $opt }; croak 'Wrong option set' unless $cb; - $c->{$_} = $cb->($v); + $checked{$_} = $cb->($opt); } - my $config = (exists $c->{set}) ? $c->{set} : $old; - $config->{$_} = $c->{add}->{$_} for grep $c->{add}->{$_}, - keys %{$c->{add} || {}}; - delete $config->{$_} for grep $c->{rem}->{$_}, keys %{$c->{rem} || {}}; + + my $config = (exists $checked{set}) ? $checked{set} : $old; + $config->{$_} = $checked{add}->{$_} for grep $checked{add}->{$_}, + keys %{$checked{add} || {}}; + delete $config->{$_} for grep $checked{rem}->{$_}, + keys %{$checked{rem} || {}}; + $config; } @@ -225,27 +230,35 @@ They fit into six classes : =item * -C<'jokers'> converts C to C<.> and C<*> to C<.*> ; +C<'jokers'> + +Converts C to C<.> and C<*> to C<.*>. 'a**\\*b??\\?c' ==> 'a.*\\*b..\\?c' =item * -C<'sql'> converts C<_> to C<.> and C<%> to C<.*> ; +C<'sql'> + +Converts C<_> to C<.> and C<%> to C<.*>. 'a%%\\%b__\\_c' ==> 'a.*\\%b..\\_c' =item * -C<'commas'> converts all C<,> to C<|> and puts the complete resulting regular expression inside C<(?: ... )> ; +C<'commas'> + +Converts all C<,> to C<|> and puts the complete resulting regular expression inside C<(?: ... )>. 'a,b{c,d},e' ==> '(?:a|b\\{c|d\\}|e)' =item * -C<'brackets'> converts all matching C<{ ... , ... }> brackets to C<(?: ... | ... )> alternations. +C<'brackets'> + +Converts all matching C<{ ... , ... }> brackets to C<(?: ... | ... )> alternations. If some brackets are unbalanced, it tries to substitute as many of them as possible, and then escape the remaining unmatched C<{> and C<}>. -Commas outside of any bracket-delimited block are also escaped ; +Commas outside of any bracket-delimited block are also escaped. 'a,b{c,d},e' ==> 'a\\,b(?:c|d)\\,e' '{a\\{b,c}d,e}' ==> '(?:a\\{b|c)d\\,e\\}' @@ -253,14 +266,18 @@ Commas outside of any bracket-delimited block are also escaped ; =item * -C<'groups'> keeps the parenthesis C<( ... )> of the original string without escaping them. +C<'groups'> + +Keeps the parenthesis C<( ... )> of the original string without escaping them. Currently, no check is done to ensure that the parenthesis are matching. 'a(b(c))d\\(\\)' ==> (no change) =item * -C<'anchors'> prevents the I C<^> and I C<$> anchors to be escaped. +C<'anchors'> + +Prevents the I C<^> and I C<$> anchors to be escaped. Since C<[...]> character class are currently escaped, a C<^> will always be interpreted as I. 'a^b$c' ==> (no change) @@ -361,28 +378,36 @@ This method works like L, except that the classes are different : =item * -C<'single'> will capture all unescaped I<"exactly one"> metacharacters, i.e. C for wildcards or C<_> for SQL ; +C<'single'> + +Captures all unescaped I<"exactly one"> metacharacters, i.e. C for wildcards or C<_> for SQL. 'a???b\\??' ==> 'a(.)(.)(.)b\\?(.)' 'a___b\\__' ==> 'a(.)(.)(.)b\\_(.)' =item * -C<'any'> will capture all unescaped I<"any"> metacharacters, i.e. C<*> for wildcards or C<%> for SQL ; +C<'any'> + +Captures all unescaped I<"any"> metacharacters, i.e. C<*> for wildcards or C<%> for SQL. 'a***b\\**' ==> 'a(.*)b\\*(.*)' 'a%%%b\\%%' ==> 'a(.*)b\\%(.*)' =item * -C<'greedy'>, when used in conjunction with C<'any'>, will make the C<'any'> captures greedy (by default they are not) ; +C<'greedy'> + +When used in conjunction with C<'any'>, it makes the C<'any'> captures greedy (by default they are not). 'a***b\\**' ==> 'a(.*?)b\\*(.*?)' 'a%%%b\\%%' ==> 'a(.*?)b\\%(.*?)' =item * -C<'brackets'> will capture matching C<{ ... , ... }> alternations. +C<'brackets'> + +Capture matching C<{ ... , ... }> alternations. 'a{b\\},\\{c}' ==> 'a(b\\}|\\{c)' @@ -394,38 +419,46 @@ C<'brackets'> will capture matching C<{ ... , ... }> alternations. $rw->capture(rem => 'greedy'); # No more greed please. $rw->capture(); # Capture nothing. +The C method returns the L object. + =head2 C Converts the wildcard expression C<$wc> into a regular expression according to the options stored into the L object, or to C<$type> if it's supplied. -It successively escapes all unprotected regexp special characters that doesn't hold any meaning for wildcards, then replace C<'jokers'> or C<'sql'> and C<'commas'> or C<'brackets'> (depending on the L or L options), all of this by applying the C<'capture'> rules specified in the constructor or by L. +It successively escapes all unprotected regexp special characters that doesn't hold any meaning for wildcards, then replace C<'jokers'>, C<'sql'> and C<'commas'> or C<'brackets'> (depending on the L or L options), all of this by applying the C<'capture'> rules specified in the constructor or by L. =cut sub convert { my ($self, $wc, $type) = @_; _check_self $self; - my $config; - if (defined $type) { - $config = $self->_type($type); - } else { - $config = $self; - } + my $config = (defined $type) ? $self->_type($type) : $self; return unless defined $wc; + + my $e = $config->{escape}; + # Escape : + # - an even number of \ that doesn't protect a regexp/wildcard metachar + # - an odd number of \ that doesn't protect a wildcard metachar + $wc =~ s/ + (?{do}; - my $e = $config->{escape}; - $wc =~ s/(?{jokers}) { - $wc = $self->_jokers($wc); - } elsif ($do->{sql}) { - $wc = $self->_sql($wc); - } + $wc = $self->_jokers($wc) if $do->{jokers}; + $wc = $self->_sql($wc) if $do->{sql}; if ($do->{brackets}) { $wc = $self->_bracketed($wc); - } elsif ($do->{commas}) { - if ($wc =~ /(?{'c_brackets'} . $self->_commas($wc) . ')'; - } + } elsif ($do->{commas} and $wc =~ /(?{'c_brackets'} . $self->_commas($wc) . ')'; } + return $wc; } @@ -466,7 +499,7 @@ Tests code coverage report is available at L{c_single}; s/(?{c_single}; s/(?