X-Git-Url: http://git.vpit.fr/?p=perl%2Fmodules%2FRegexp-Wildcards.git;a=blobdiff_plain;f=lib%2FRegexp%2FWildcards.pm;h=b65eebe021ea48d90c8ffe45b6bafddc2d50a9a0;hp=f0b471a8be5cea353f87254b5e3c164849e703db;hb=510bd793e8fac0683f0ac86adbfd48b5af7b14b9;hpb=9375fddef45f4884a1e447e6d31f608ce18b22a2 diff --git a/lib/Regexp/Wildcards.pm b/lib/Regexp/Wildcards.pm index f0b471a..b65eebe 100644 --- a/lib/Regexp/Wildcards.pm +++ b/lib/Regexp/Wildcards.pm @@ -3,8 +3,9 @@ package Regexp::Wildcards; use strict; use warnings; -use Carp qw/croak/; -use Text::Balanced qw/extract_bracketed/; +use Carp qw; +use Scalar::Util qw; +use Text::Balanced qw; =head1 NAME @@ -12,13 +13,13 @@ Regexp::Wildcards - Converts wildcard expressions to Perl regular expressions. =head1 VERSION -Version 1.02 +Version 1.04 =cut -use vars qw/$VERSION/; +use vars qw<$VERSION>; BEGIN { - $VERSION = '1.02'; + $VERSION = '1.04'; } =head1 SYNOPSIS @@ -34,12 +35,12 @@ BEGIN { $re = $rw->convert('%a_c%', 'sql'); # Turn SQL wildcards into regexps. $rw = Regexp::Wildcards->new( - do => [ qw/jokers brackets/ ], # Do jokers and brackets. - capture => [ qw/any greedy/ ], # Capture *'s greedily. + do => [ qw ], # Do jokers and brackets. + capture => [ qw ], # Capture *'s greedily. ); $rw->do(add => 'groups'); # Don't escape groups. - $rw->capture(rem => [ qw/greedy/ ]); # Actually we want non-greedy matches. + $rw->capture(rem => [ qw ]); # Actually we want non-greedy matches. $re = $rw->convert('*a{,(b)?}?c*'); # '(.*?)a(?:|(b).).c(.*?)' $rw->capture(); # No more captures. @@ -61,24 +62,24 @@ Typesets that mimic the behaviour of Windows and Unix shells are also provided. sub _check_self { croak 'First argument isn\'t a valid ' . __PACKAGE__ . ' object' - unless ref $_[0] and $_[0]->isa(__PACKAGE__); + unless blessed $_[0] and $_[0]->isa(__PACKAGE__); } my %types = ( - jokers => [ qw/jokers/ ], - sql => [ qw/sql/ ], - commas => [ qw/commas/ ], - brackets => [ qw/brackets/ ], - unix => [ qw/jokers brackets/ ], - win32 => [ qw/jokers commas/ ], + jokers => [ qw ], + sql => [ qw ], + commas => [ qw ], + brackets => [ qw ], + unix => [ qw ], + win32 => [ qw ], ); -$types{$_} = $types{win32} for qw/dos os2 MSWin32 cygwin/; -$types{$_} = $types{unix} for qw/linux +$types{$_} = $types{win32} for qw; +$types{$_} = $types{unix} for qw; my %escapes = ( jokers => '?*', @@ -95,7 +96,7 @@ my %captures = ( : '(.*?)') : '.*' }, brackets => sub { $_[1] ? '(' : '(?:'; }, - greedy => undef + greedy => undef, ); sub _validate { @@ -115,9 +116,10 @@ sub _validate { } my %checked; - for (qw/set add rem/) { + for (qw) { my $opt = $opts{$_}; next unless defined $opt; + my $cb = { '' => sub { +{ ($_[0] => 1) x (exists $valid->{$_[0]}) } }, 'ARRAY' => sub { +{ map { ($_ => 1) x (exists $valid->{$_}) } @{$_[0]} } }, @@ -139,24 +141,29 @@ sub _validate { sub _do { my $self = shift; + my $config; - $config->{do} = $self->_validate(\%escapes, $self->{do}, @_); - $config->{escape} = ''; + $config->{do} = $self->_validate(\%escapes, $self->{do}, @_); + $config->{escape} = ''; $config->{escape} .= $escapes{$_} for keys %{$config->{do}}; - $config->{escape} = quotemeta $config->{escape}; + $config->{escape} = quotemeta $config->{escape}; + $config; } sub do { my $self = shift; _check_self $self; - my $config = $self->_do(@_); + + my $config = $self->_do(@_); $self->{$_} = $config->{$_} for keys %$config; + $self; } sub _capture { my $self = shift; + my $config; $config->{capture} = $self->_validate(\%captures, $self->{capture}, @_); $config->{greedy} = delete $config->{capture}->{greedy}; @@ -164,50 +171,63 @@ sub _capture { $config->{'c_' . $_} = $captures{$_}->($config, $config->{capture}->{$_}) if $captures{$_}; # Skip 'greedy' } + $config; } sub capture { my $self = shift; _check_self $self; - my $config = $self->_capture(@_); + + my $config = $self->_capture(@_); $self->{$_} = $config->{$_} for keys %$config; + $self; } sub _type { my ($self, $type) = @_; - $type = 'unix' unless defined $type; - croak 'Wrong type' unless exists $types{$type}; - my $config = $self->_do($types{$type}); + $type = 'unix' unless defined $type; + croak 'Wrong type' unless exists $types{$type}; + + my $config = $self->_do($types{$type}); $config->{type} = $type; + $config; } sub type { my $self = shift; _check_self $self; - my $config = $self->_type(@_); + + my $config = $self->_type(@_); $self->{$_} = $config->{$_} for keys %$config; + $self; } sub new { my $class = shift; - $class = ref($class) || $class || __PACKAGE__; + $class = blessed($class) || $class || __PACKAGE__; + croak 'Optional arguments must be passed as key => value pairs' if @_ % 2; my %args = @_; - my $self = { }; - bless $self, $class; + + my $self = bless { }, $class; + if (defined $args{do}) { $self->do($args{do}); } else { $self->type($args{type}); } + $self->capture($args{capture}); } -=head2 C<< new [ do => $what E type => $type ], capture => $captures >> +=head2 C + + my $rw = Regexp::Wildcards->new(do => $what, capture => $capture); + my $rw = Regexp::Wildcards->new(type => $type, capture => $capture); Constructs a new L object. @@ -221,7 +241,12 @@ The C option overrides C. C lists which atoms should be capturing. Refer to L for more details. -=head2 C<< do [ $what E set => $c1, add => $c2, rem => $c3 ] >> +=head2 C + + $rw->do($what); + $rw->do(set => $c1); + $rw->do(add => $c2); + $rw->do(rem => $c3); Specifies the list of metacharacters to convert or to prevent for escaping. They fit into six classes : @@ -230,27 +255,35 @@ They fit into six classes : =item * -C<'jokers'> converts C to C<.> and C<*> to C<.*> ; +C<'jokers'> + +Converts C to C<.> and C<*> to C<.*>. 'a**\\*b??\\?c' ==> 'a.*\\*b..\\?c' =item * -C<'sql'> converts C<_> to C<.> and C<%> to C<.*> ; +C<'sql'> + +Converts C<_> to C<.> and C<%> to C<.*>. 'a%%\\%b__\\_c' ==> 'a.*\\%b..\\_c' =item * -C<'commas'> converts all C<,> to C<|> and puts the complete resulting regular expression inside C<(?: ... )> ; +C<'commas'> + +Converts all C<,> to C<|> and puts the complete resulting regular expression inside C<(?: ... )>. 'a,b{c,d},e' ==> '(?:a|b\\{c|d\\}|e)' =item * -C<'brackets'> converts all matching C<{ ... , ... }> brackets to C<(?: ... | ... )> alternations. +C<'brackets'> + +Converts all matching C<{ ... , ... }> brackets to C<(?: ... | ... )> alternations. If some brackets are unbalanced, it tries to substitute as many of them as possible, and then escape the remaining unmatched C<{> and C<}>. -Commas outside of any bracket-delimited block are also escaped ; +Commas outside of any bracket-delimited block are also escaped. 'a,b{c,d},e' ==> 'a\\,b(?:c|d)\\,e' '{a\\{b,c}d,e}' ==> '(?:a\\{b|c)d\\,e\\}' @@ -258,14 +291,18 @@ Commas outside of any bracket-delimited block are also escaped ; =item * -C<'groups'> keeps the parenthesis C<( ... )> of the original string without escaping them. +C<'groups'> + +Keeps the parenthesis C<( ... )> of the original string without escaping them. Currently, no check is done to ensure that the parenthesis are matching. 'a(b(c))d\\(\\)' ==> (no change) =item * -C<'anchors'> prevents the I C<^> and I C<$> anchors to be escaped. +C<'anchors'> + +Prevents the I C<^> and I C<$> anchors to be escaped. Since C<[...]> character class are currently escaped, a C<^> will always be interpreted as I. 'a^b$c' ==> (no change) @@ -298,13 +335,15 @@ No argument means C<< set => [ ] >>. $rw->do(set => 'jokers'); # Only translate jokers. $rw->do('jokers'); # Same. - $rw->do(add => [ qw/sql commas/ ]); # Translate also SQL and commas. + $rw->do(add => [ qw ]); # Translate also SQL and commas. $rw->do(rem => 'jokers'); # Specifying both 'sql' and 'jokers' is useless. $rw->do(); # Translate nothing. The C method returns the L object. -=head2 C +=head2 C + + $rw->type($type); Notifies to convert the metacharacters that corresponds to the predefined type C<$type>. C<$type> can be any of : @@ -357,7 +396,12 @@ In particular, you can usually pass C<$^O> as the C<$type> and get the correspon The C method returns the L object. -=head2 C<< capture [ $captures E set => $c1, add => $c2, rem => $c3 ] >> +=head2 C + + $rw->capture($captures); + $rw->capture(set => $c1); + $rw->capture(add => $c2); + $rw->capture(rem => $c3); Specifies the list of atoms to capture. This method works like L, except that the classes are different : @@ -366,28 +410,36 @@ This method works like L, except that the classes are different : =item * -C<'single'> will capture all unescaped I<"exactly one"> metacharacters, i.e. C for wildcards or C<_> for SQL ; +C<'single'> + +Captures all unescaped I<"exactly one"> metacharacters, i.e. C for wildcards or C<_> for SQL. 'a???b\\??' ==> 'a(.)(.)(.)b\\?(.)' 'a___b\\__' ==> 'a(.)(.)(.)b\\_(.)' =item * -C<'any'> will capture all unescaped I<"any"> metacharacters, i.e. C<*> for wildcards or C<%> for SQL ; +C<'any'> + +Captures all unescaped I<"any"> metacharacters, i.e. C<*> for wildcards or C<%> for SQL. 'a***b\\**' ==> 'a(.*)b\\*(.*)' 'a%%%b\\%%' ==> 'a(.*)b\\%(.*)' =item * -C<'greedy'>, when used in conjunction with C<'any'>, will make the C<'any'> captures greedy (by default they are not) ; +C<'greedy'> + +When used in conjunction with C<'any'>, it makes the C<'any'> captures greedy (by default they are not). 'a***b\\**' ==> 'a(.*?)b\\*(.*?)' 'a%%%b\\%%' ==> 'a(.*?)b\\%(.*?)' =item * -C<'brackets'> will capture matching C<{ ... , ... }> alternations. +C<'brackets'> + +Capture matching C<{ ... , ... }> alternations. 'a{b\\},\\{c}' ==> 'a(b\\}|\\{c)' @@ -395,38 +447,55 @@ C<'brackets'> will capture matching C<{ ... , ... }> alternations. $rw->capture(set => 'single'); # Only capture "exactly one" metacharacters. $rw->capture('single'); # Same. - $rw->capture(add => [ qw/any greedy/ ]); # Also greedily capture "any" metacharacters. + $rw->capture(add => [ qw ]); # Also greedily capture "any" metacharacters. $rw->capture(rem => 'greedy'); # No more greed please. $rw->capture(); # Capture nothing. -=head2 C +The C method returns the L object. + +=head2 C + + my $rx = $rw->convert($wc); + my $rx = $rw->convert($wc, $type); Converts the wildcard expression C<$wc> into a regular expression according to the options stored into the L object, or to C<$type> if it's supplied. -It successively escapes all unprotected regexp special characters that doesn't hold any meaning for wildcards, then replace C<'jokers'> or C<'sql'> and C<'commas'> or C<'brackets'> (depending on the L or L options), all of this by applying the C<'capture'> rules specified in the constructor or by L. +It successively escapes all unprotected regexp special characters that doesn't hold any meaning for wildcards, then replace C<'jokers'>, C<'sql'> and C<'commas'> or C<'brackets'> (depending on the L or L options), all of this by applying the C<'capture'> rules specified in the constructor or by L. =cut sub convert { my ($self, $wc, $type) = @_; _check_self $self; + my $config = (defined $type) ? $self->_type($type) : $self; return unless defined $wc; + + my $e = $config->{escape}; + # Escape : + # - an even number of \ that doesn't protect a regexp/wildcard metachar + # - an odd number of \ that doesn't protect a wildcard metachar + $wc =~ s/ + (?{do}; - my $e = $config->{escape}; - $wc =~ s/(?{jokers}) { - $wc = $self->_jokers($wc); - } elsif ($do->{sql}) { - $wc = $self->_sql($wc); - } + $wc = $self->_jokers($wc) if $do->{jokers}; + $wc = $self->_sql($wc) if $do->{sql}; if ($do->{brackets}) { $wc = $self->_bracketed($wc); - } elsif ($do->{commas}) { - if ($wc =~ /(?{'c_brackets'} . $self->_commas($wc) . ')'; - } + } elsif ($do->{commas} and $wc =~ /(?{'c_brackets'} . $self->_commas($wc) . ')'; } - return $wc; + + $wc } =head1 EXPORT @@ -435,7 +504,7 @@ An object module shouldn't export any function, and so does this one. =head1 DEPENDENCIES -L (core module since perl 5), L (since 5.7.3). +L (core module since perl 5), L, L (since 5.7.3). =head1 CAVEATS @@ -466,7 +535,7 @@ Tests code coverage report is available at L{c_single}; s/(?{c_any}; s/(?{c_single}; s/(?{c_any}; s/(?_commas($prefix) . $self->_brackets($bracket); } $re .= $self->_commas($rest); - return $self->{c_brackets} . $re . ')'; + + $self->{c_brackets} . $re . ')'; } sub _bracketed { my ($self, $rest) = @_; + my ($re, $bracket, $prefix) = (''); while (do { ($bracket, $rest, $prefix) = _extract $rest; $bracket }) { $re .= $prefix . $self->_brackets($bracket); } $re .= $rest; + $re =~ s/(?