From: Vincent Pit Date: Tue, 19 Aug 2008 15:55:12 +0000 (+0200) Subject: Add a 'anchors' metacharacter class X-Git-Tag: v1.02~1 X-Git-Url: http://git.vpit.fr/?p=perl%2Fmodules%2FRegexp-Wildcards.git;a=commitdiff_plain;h=99cec79018d12958619028d1e21bf7f41eacbd17 Add a 'anchors' metacharacter class --- diff --git a/MANIFEST b/MANIFEST index a85e132..15d88e1 100644 --- a/MANIFEST +++ b/MANIFEST @@ -13,6 +13,7 @@ t/20-jokers.t t/21-commas.t t/22-brackets.t t/23-groups.t +t/24-anchors.t t/90-boilerplate.t t/91-pod.t t/92-pod-coverage.t diff --git a/lib/Regexp/Wildcards.pm b/lib/Regexp/Wildcards.pm index e59e3ea..f4be72f 100644 --- a/lib/Regexp/Wildcards.pm +++ b/lib/Regexp/Wildcards.pm @@ -47,7 +47,7 @@ BEGIN { In many situations, users may want to specify patterns to match but don't need the full power of regexps. Wildcards make one of those sets of simplified rules. This module converts wildcard expressions to Perl regular expressions, so that you can use them for matching. -It handles the C<*> and C jokers, as well as Unix bracketed alternatives C<{,}>, but also C<%> and C<_> SQL wildcards. It can also keep original C<(...)> groups. Backspace (C<\>) is used as an escape character. +It handles the C<*> and C jokers, as well as Unix bracketed alternatives C<{,}>, but also C<%> and C<_> SQL wildcards. If required, it can also keep original C<(...)> groups or C<^> and C<$> anchors. Backspace (C<\>) is used as an escape character. Typesets that mimic the behaviour of Windows and Unix shells are also provided. @@ -76,6 +76,7 @@ my %escapes = ( commas => ',', brackets => '{},', groups => '()', + anchors => '^$', ); my %captures = ( @@ -203,8 +204,8 @@ C lists which atoms should be capturing. Refer to L for more =head2 C<< do [ $what E set => $c1, add => $c2, rem => $c3 ] >> -Specifies the list of metacharacters to convert. -They are classified into five classes : +Specifies the list of metacharacters to convert or to prevent for escaping. +They fit into six classes : =over 4 @@ -240,6 +241,12 @@ C<'groups'> keeps the parenthesis C<( ... )> of the original string without esca 'a(b(c))d\\(\\)' ==> (no change) +=item * + +C<'anchors'> prevents the beginning-of-line C<^> and end-of-line C<$> anchors to be escaped. Since C<[...]> character class are currently escaped, a C<^> will always be interpreted as such. + + 'a^b$c' ==> (no change) + =back Each C<$c> can be any of : diff --git a/t/24-anchors.t b/t/24-anchors.t new file mode 100644 index 0000000..5412cae --- /dev/null +++ b/t/24-anchors.t @@ -0,0 +1,35 @@ +#!perl -T + +use strict; +use warnings; + +use Test::More tests => 16; + +use Regexp::Wildcards; + +my $rw = Regexp::Wildcards->new(do => 'anchors'); + +is($rw->convert('\\^'), '\\^', 'anchor: escape ^ 1'); +is($rw->convert('\\\\\\^'), '\\\\\\^', 'anchor: escape ^ 2'); +is($rw->convert('\\$'), '\\$', 'anchor: escape $ 1'); +is($rw->convert('\\\\\\$'), '\\\\\\$', 'anchor: escape $ 2'); + +is($rw->convert('^a?b*'), '^a\\?b\\*', 'anchor: ^'); +is($rw->convert('a?b*$'), 'a\\?b\\*$', 'anchor: $'); +is($rw->convert('^a?b*$'), '^a\\?b\\*$', 'anchor: ^$'); +is($rw->convert('x^a?b*$y'), 'x^a\\?b\\*$y', 'anchor: intermediate ^$'); + +$rw->do(add => 'jokers'); + +is($rw->convert('^a?b*'), '^a.b.*', 'anchor: ^ with jokers'); +is($rw->convert('a?b*$'), 'a.b.*$', 'anchor: $ with jokers'); +is($rw->convert('^a?b*$'), '^a.b.*$', 'anchor: ^$ with jokers'); +is($rw->convert('x^a?b*$y'), 'x^a.b.*$y','anchor: intermediate ^$ with jokers'); + +$rw->do(add => 'brackets'); + +is($rw->convert('{^,a}?b*'), '(?:^|a).b.*', 'anchor: ^ with brackets'); +is($rw->convert('a?{b*,$}'), 'a.(?:b.*|$)', 'anchor: $ with brackets'); +is($rw->convert('{^a,?}{b,*$}'),'(?:^a|.)(?:b|.*$)','anchor: ^$ with brackets'); +is($rw->convert('x{^,a}?b{*,$}y'), 'x(?:^|a).b(?:.*|$)y', + 'anchor: intermediate ^$ with brackets');