From: Vincent Pit Date: Sun, 29 Jun 2008 15:41:46 +0000 (+0200) Subject: Importing Regexp-Wildcards-0.01.tar.gz X-Git-Tag: v0.01^0 X-Git-Url: http://git.vpit.fr/?p=perl%2Fmodules%2FRegexp-Wildcards.git;a=commitdiff_plain;h=d3841a7816c3e170f292ced4a5818ab252574300 Importing Regexp-Wildcards-0.01.tar.gz --- d3841a7816c3e170f292ced4a5818ab252574300 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f6b7620 --- /dev/null +++ b/.gitignore @@ -0,0 +1,16 @@ +blib* +pm_to_blib* + +Makefile{,.old} +Build +_build* + +*.tar.gz +Regexp-Wildcards-* + +core.* +*.{c,o,so,bs,out,def,exp} + +cover_db +*.{gcda,gcov,gcno} + diff --git a/Changes b/Changes new file mode 100644 index 0000000..77c55f6 --- /dev/null +++ b/Changes @@ -0,0 +1,5 @@ +Revision history for Regexp-Wildcards + +0.01 2007-06-14 + First version, released on an unsuspecting world. + diff --git a/MANIFEST b/MANIFEST new file mode 100644 index 0000000..eddf17e --- /dev/null +++ b/MANIFEST @@ -0,0 +1,15 @@ +Changes +MANIFEST +META.yml +Makefile.PL +README +lib/Regexp/Wildcards.pm +t/00-load.t +t/01-import.t +t/10-jokers.t +t/11-commas.t +t/12-brackets.t +t/boilerplate.t +t/kwalitee.t +t/pod-coverage.t +t/pod.t diff --git a/META.yml b/META.yml new file mode 100644 index 0000000..b6c1e7f --- /dev/null +++ b/META.yml @@ -0,0 +1,14 @@ +--- #YAML:1.0 +name: Regexp-Wildcards +version: 0.01 +abstract: Converts wildcards expressions to Perl regular expressions. +license: perl +generated_by: ExtUtils::MakeMaker version 6.32 +distribution_type: module +requires: + Test::More: 0 +meta-spec: + url: http://module-build.sourceforge.net/META-spec-v1.2.html + version: 1.2 +author: + - Vincent Pit diff --git a/Makefile.PL b/Makefile.PL new file mode 100644 index 0000000..20c7a46 --- /dev/null +++ b/Makefile.PL @@ -0,0 +1,17 @@ +use strict; +use warnings; +use ExtUtils::MakeMaker; + +WriteMakefile( + NAME => 'Regexp::Wildcards', + AUTHOR => 'Vincent Pit ', + LICENSE => 'perl', + VERSION_FROM => 'lib/Regexp/Wildcards.pm', + ABSTRACT_FROM => 'lib/Regexp/Wildcards.pm', + PL_FILES => {}, + PREREQ_PM => { + 'Test::More' => 0, + }, + dist => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', }, + clean => { FILES => 'Regexp-Wildcards-*' }, +); diff --git a/README b/README new file mode 100644 index 0000000..7469605 --- /dev/null +++ b/README @@ -0,0 +1,79 @@ +NAME + Regexp::Wildcards - Converts wildcards to regexps. + +VERSION + Version 0.01 + +SYNOPSIS + use Regexp::Wildcards qw/wc2re/; + + my $re; + $re = wc2re 'a{b.,c}*' => 'unix'; + $re = wc2re 'a.,b*' => 'win32'; + +DESCRIPTION + This module converts wildcards expressions to Perl regular expressions. + It handles the "*" and "?" jokers, as well as Unix bracketed + alternatives "{,}", and uses the backspace ("\") as an escape character. + Wrappers are provided to mimic the behaviour of Windows and Unix shells. + +EXPORT + Four functions are exported only on request : "wc2re", "wc2re_unix", + "wc2re_win32" and "wc2re_jokers". + +FUNCTIONS + "wc2re_unix" + This function takes as its only argument the wildcard string to process, + and returns the corresponding regular expression (or "undef" if the + source is invalid) according to standard Unix wildcard rules. It + successively escapes all regexp special characters that doesn't hold any + meaning for wildcards, turns jokers into their regexp equivalents, and + changes bracketed blocks into alternations. If brackets are unbalanced, + it will try to substitute as many of them as possible, and then escape + the remaining "{" and "}". + + "wc2re_win32" + Similar to the precedent, but for Windows wildcards. Bracketed blocks + are no longer handled (which means that brackets will be escaped), but + you can still provide a comma-separated list of items. + + "wc2re_jokers" + This one only handles the "?" and "*" jokers. + + "wc2re" + A generic function that wraps around all the different rules. The first + argument is the wildcard expression, and the second one is the type of + rules to apply, currently either "unix", "win32" or "jokers". If the + type is undefined, it defaults to "unix". + +SEE ALSO + Net::FTPServer has a method for that. Only jokers are translated, and + escaping won't preserve them. + + File::Find::Match::Util has a "wildcar" function that compiles a + matcher. Only handles "*". + + Text::Buffer has the "convertWildcardToRegex" class method that handles + jokers. + +AUTHOR + Vincent Pit, "" + +BUGS + Please report any bugs or feature requests to "bug-regexp-wildcards at + rt.cpan.org", or through the web interface at + . I + will be notified, and then you'll automatically be notified of progress + on your bug as I make changes. + +SUPPORT + You can find documentation for this module with the perldoc command. + + perldoc Regexp::Wildcards + +COPYRIGHT & LICENSE + Copyright 2007 Vincent Pit, all rights reserved. + + This program is free software; you can redistribute it and/or modify it + under the same terms as Perl itself. + diff --git a/lib/Regexp/Wildcards.pm b/lib/Regexp/Wildcards.pm new file mode 100644 index 0000000..c7b49a6 --- /dev/null +++ b/lib/Regexp/Wildcards.pm @@ -0,0 +1,206 @@ +package Regexp::Wildcards; + +use strict; +use warnings; + +use Text::Balanced qw/extract_bracketed/; + +=head1 NAME + +Regexp::Wildcards - Converts wildcards expressions to Perl regular expressions. + +=head1 VERSION + +Version 0.01 + +=cut + +our $VERSION = '0.01'; + +=head1 SYNOPSIS + + use Regexp::Wildcards qw/wc2re/; + + my $re; + $re = wc2re 'a{b.,c}*' => 'unix'; # Do it Unix style. + $re = wc2re 'a.,b*' => 'win32'; # Do it Windows style. + $re = wc2re '*{x,y}.' => 'jokers'; # Process the jokers & escape the rest. + +=head1 DESCRIPTION + +In many situations, users may want to specify patterns to match but don't need the full power of regexps. Wildcards make one of those sets of simplified rules. This module converts wildcards expressions to Perl regular expressions, so that you can use them for matching. It handles the C<*> and C jokers, as well as Unix bracketed alternatives C<{,}>, and uses the backspace (C<\>) as an escape character. Wrappers are provided to mimic the behaviour of Windows and Unix shells. + +=head1 EXPORT + +Four functions are exported only on request : C, C, C and C. + +=cut + +use base qw/Exporter/; + +my %types = ( + 'jokers' => \&wc2re_jokers, + 'unix' => \&wc2re_unix, + 'win32' => \&wc2re_win32 +); + +our @EXPORT = (); +our @EXPORT_OK = ('wc2re', map { 'wc2re_' . $_ } keys %types); +our @EXPORT_FAIL = qw/extract do_jokers do_commas do_brackets do_bracketed/; +our %EXPORT_TAGS = ( all => [ @EXPORT_OK ] ); + +=head1 FUNCTIONS + +=head2 C + +This function takes as its only argument the wildcard string to process, and returns the corresponding regular expression (or C if the source is invalid) according to standard Unix wildcard rules. It successively escapes all unprotected regexp special characters that doesn't hold any meaning for wildcards, turns jokers into their regexp equivalents, and changes bracketed blocks into C<(?:|)> alternations. If brackets are unbalanced, it will try to substitute as many of them as possible, and then escape the remaining C<{> and C<}>. + +Unbalanced bracket expressions can always be rescued, but it may change completely its meaning. As a side effect, commas that first appear to be between brackets can be taken at the uppermost level, which invalidates the pattern. For example : + + # The last orphaned } gets escaped, and the first comma is replaced. + # We also need to escape the comma because unix doesn't allow them out + # of brackets. + print 'ok' if wc2re_unix('{a\\{b,c}d\\,e}') eq '(?:a\\{b|c)d\\,e\\}'; + + # All of the unprotected brackets are escaped, which means that we must + # escape all the commas. + print 'ok' if wc2re_unix('{a{b\\,c\\}d\\,e}') eq '\\{a\\{b\\,c\\}d\\,e\\}'; + +=cut + +sub wc2re_unix { + my ($re) = @_; + return unless defined $re; + $re =~ s/(? + +Similar to the precedent, but for Windows wildcards. Bracketed blocks are no longer handled (which means that brackets will be escaped), but you can still provide a comma-separated list of items. + +=cut + +sub wc2re_win32 { + my ($wc) = @_; + return unless defined $wc; + $wc =~ s/(? + +This one only handles the C and C<*> jokers. All other unquoted regexp metacharacters will be quoted. + +=cut + +sub wc2re_jokers { + my ($wc) = @_; + $wc =~ s/(? + +A generic function that wraps around all the different rules. The first argument is the wildcard expression, and the second one is the type of rules to apply, currently either C, C or C. If the type is undefined, it defaults to C. + +=cut + +sub wc2re { + my ($wc, $type) = @_; + return unless defined $wc; + $type ||= 'unix'; + return $types{lc $type}($wc); +} + +=head1 DEPENDENCIES + +L, which is bundled with perl since version 5.7.3 + +=head1 SEE ALSO + +Some modules provide incomplete alternatives as helper functions : + +L has a method for that. Only jokers are translated, and escaping won't preserve them. + +L has a C function that compiles a matcher. Only handles C<*>. + +L has the C class method that handles jokers. + +=head1 AUTHOR + +Vincent Pit, C<< >> + +=head1 BUGS + +Please report any bugs or feature requests to +C, or through the web interface at +L. +I will be notified, and then you'll automatically be notified of progress on +your bug as I make changes. + +=head1 SUPPORT + +You can find documentation for this module with the perldoc command. + + perldoc Regexp::Wildcards + +=head1 COPYRIGHT & LICENSE + +Copyright 2007 Vincent Pit, all rights reserved. + +This program is free software; you can redistribute it and/or modify it +under the same terms as Perl itself. + +=cut + +sub extract { extract_bracketed shift, '{', qr/.*?(?:(? 1; + +BEGIN { + use_ok( 'Regexp::Wildcards' ); +} + +diag( "Testing Regexp::Wildcards $Regexp::Wildcards::VERSION, Perl $], $^X" ); diff --git a/t/01-import.t b/t/01-import.t new file mode 100644 index 0000000..a5f8ba1 --- /dev/null +++ b/t/01-import.t @@ -0,0 +1,10 @@ +#!perl -T + +use Test::More tests => 4; + +require Regexp::Wildcards; + +for (qw/wc2re_jokers wc2re_unix wc2re_win32 wc2re/) { + eval { Regexp::Wildcards->import($_) }; + ok(!$@, 'import ' . $_); +} diff --git a/t/10-jokers.t b/t/10-jokers.t new file mode 100644 index 0000000..67a1fd8 --- /dev/null +++ b/t/10-jokers.t @@ -0,0 +1,52 @@ +#!perl -T + +use Test::More tests => 3 * (4 + 2 + 9 + 2) * 3; + +use Regexp::Wildcards qw/wc2re/; + +sub try { + my ($t, $s, $x, $y) = @_; + $y = $x unless defined $y; + ok(wc2re('ab' . $x, $t) eq 'ab' . $y, $s . ' (beginning)'); + ok(wc2re('a' . $x . 'b', $t) eq 'a' . $y . 'b', $s . ' (middle)'); + ok(wc2re($x . 'ab', $t) eq $y . 'ab', $s . ' (end)'); +} + +for my $t (qw/unix win32 jokers/) { + # Simple + + try $t, 'simple *', '*', '.*'; + try $t, 'simple ?', '?', '.'; + + ok(wc2re('?*ab', $t) eq '..*ab', 'simple ? and * (beginning)'); + ok(wc2re('?a*b', $t) eq '.a.*b', 'simple ? and * (middle)'); + ok(wc2re('?ab*', $t) eq '.ab.*', 'simple ? and * (end)'); + + ok(wc2re('*ab?', $t) eq '.*ab.', 'simple * and ? (beginning)'); + ok(wc2re('a*b?', $t) eq 'a.*b.', 'simple * and ? (middle)'); + ok(wc2re('ab*?', $t) eq 'ab.*.', 'simple * and ? (end)'); + + # Multiple + + try $t, 'multiple *', '**', '.*'; + try $t, 'multiple ?', '??', '..'; + + # Escaping + + try $t, 'escaping *', '\\*'; + try $t, 'escaping *', '\\?'; + try $t, 'escaping \\\\\\*', '\\\\\\*'; + try $t, 'escaping \\\\\\?', '\\\\\\?'; + + try $t, 'not escaping \\\\*', '\\\\*', '\\\\.*'; + try $t, 'not escaping \\\\?', '\\\\?', '\\\\.'; + + try $t, 'escaping \\', '\\', '\\\\'; + try $t, 'escaping regex characters', '[]', '\\[\\]'; + try $t, 'not escaping escaped regex characters', '\\\\\\[\\]'; + + # Mixed + + try $t, 'mixed * and \\*', '*\\**', '.*\\*.*'; + try $t, 'mixed ? and \\?', '?\\??', '.\\?.'; +} diff --git a/t/11-commas.t b/t/11-commas.t new file mode 100644 index 0000000..d616e9b --- /dev/null +++ b/t/11-commas.t @@ -0,0 +1,15 @@ +#!perl -T + +use Test::More tests => 7; + +use Regexp::Wildcards qw/wc2re_unix wc2re_win32/; + +ok((not defined wc2re_unix('a,b,c')), 'unix: no commas allowed out of brackets'); +ok(wc2re_unix('a\\,b\\\\\\,c') eq 'a\\,b\\\\\\,c', 'unix: no commas allowed out of brackets'); + +ok(wc2re_win32('a,b\\\\,c') eq '(?:a|b\\\\|c)', 'win32: commas'); +ok(wc2re_win32('a\\,b\\\\,c') eq '(?:a\\,b\\\\|c)', 'win32: escaped commas 1'); +ok(wc2re_win32('a\\,b\\\\\\,c') eq 'a\\,b\\\\\\,c', 'win32: escaped commas 2'); + +ok(wc2re_win32(',a,b\\\\,') eq '(?:|a|b\\\\|)', 'win32: commas at begin/end'); +ok(wc2re_win32('\\,a,b\\\\\\,') eq '(?:\\,a|b\\\\\\,)', 'win32: escaped commas at begin/end'); diff --git a/t/12-brackets.t b/t/12-brackets.t new file mode 100644 index 0000000..a8da49e --- /dev/null +++ b/t/12-brackets.t @@ -0,0 +1,40 @@ +#!perl -T + +use Test::More tests => 28; + +use Regexp::Wildcards qw/wc2re_jokers wc2re_unix wc2re_win32/; + +ok(wc2re_jokers('a{b\\\\,c\\\\}d') eq 'a\\{b\\\\\\,c\\\\\\}d'); + +ok(wc2re_win32('a{b\\\\,c\\\\}d') eq '(?:a\\{b\\\\|c\\\\\\}d)'); + +ok(wc2re_unix('{}') eq '(?:)'); +ok(wc2re_unix('{a}') eq '(?:a)'); +ok(wc2re_unix('{a,b}') eq '(?:a|b)'); +ok(wc2re_unix('{a,b,c}') eq '(?:a|b|c)'); + +ok(wc2re_unix('a{b,c}d') eq 'a(?:b|c)d'); +ok(wc2re_unix('a{b,c}d{e,,f}') eq 'a(?:b|c)d(?:e||f)'); +ok(wc2re_unix('a{b,c}d{e,,f}{g,h,}') eq 'a(?:b|c)d(?:e||f)(?:g|h|)'); + +ok(wc2re_unix('{a{b}}') eq '(?:a(?:b))'); +ok(wc2re_unix('{a,{b},c}') eq '(?:a|(?:b)|c)'); +ok(wc2re_unix('{a,{b{d}e},c}') eq '(?:a|(?:b(?:d)e)|c)'); +ok(wc2re_unix('{a,{b{d{}}e,f,,},c}') eq '(?:a|(?:b(?:d(?:))e|f||)|c)'); +ok(wc2re_unix('{a,{b{d{}}e,f,,},c}{,g{{}h,i}}') eq '(?:a|(?:b(?:d(?:))e|f||)|c)(?:|g(?:(?:)h|i))'); + +ok(wc2re_unix('\\{\\\\}') eq '\\{\\\\\\}'); +ok((not defined wc2re_unix('\\{a,b,c\\\\\\}'))); +ok(wc2re_unix('\\{a\\\\\\,b\\,c}') eq '\\{a\\\\\\,b\\,c\\}'); +ok(wc2re_unix('\\{a\\\\\\,b\\,c\\}') eq '\\{a\\\\\\,b\\,c\\}'); +ok(wc2re_unix('\\{a\\\\\\,b\\,c\\\\}') eq '\\{a\\\\\\,b\\,c\\\\\\}'); + +ok(wc2re_unix('{a\\},b\\{,c}') eq '(?:a\\}|b\\{|c)'); +ok((not defined wc2re_unix('{a,\\{}b,c}'))); +ok((not defined wc2re_unix('{a\\{}b,c}'))); +ok(wc2re_unix('{a\\{b,c}d\\,e}') eq '(?:a\\{b|c)d\\,e\\}'); +ok(wc2re_unix('{a{b\\,c\\}d\\,e}') eq '\\{a\\{b\\,c\\}d\\,e\\}'); +ok(wc2re_unix('{a\\{\\\\}b\\,c\\\\}') eq '(?:a\\{\\\\)b\\,c\\\\\\}'); +ok(wc2re_unix('{a,\\{\\}b,c}') eq '(?:a|\\{\\}b|c)'); +ok(wc2re_unix('{a,\\{d,e,,\\}b,c}') eq '(?:a|\\{d|e||\\}b|c)'); +ok(wc2re_unix('{a,\\{d,e,,\\}b,c}\\\\{f,g,h,i}') eq '(?:a|\\{d|e||\\}b|c)\\\\(?:f|g|h|i)'); diff --git a/t/boilerplate.t b/t/boilerplate.t new file mode 100644 index 0000000..f0b2ada --- /dev/null +++ b/t/boilerplate.t @@ -0,0 +1,48 @@ +#!perl -T + +use strict; +use warnings; +use Test::More tests => 3; + +sub not_in_file_ok { + my ($filename, %regex) = @_; + open my $fh, "<", $filename + or die "couldn't open $filename for reading: $!"; + + my %violated; + + while (my $line = <$fh>) { + while (my ($desc, $regex) = each %regex) { + if ($line =~ $regex) { + push @{$violated{$desc}||=[]}, $.; + } + } + } + + if (%violated) { + fail("$filename contains boilerplate text"); + diag "$_ appears on lines @{$violated{$_}}" for keys %violated; + } else { + pass("$filename contains no boilerplate text"); + } +} + +not_in_file_ok(README => + "The README is used..." => qr/The README is used/, + "'version information here'" => qr/to provide version information/, +); + +not_in_file_ok(Changes => + "placeholder date/time" => qr(Date/time) +); + +sub module_boilerplate_ok { + my ($module) = @_; + not_in_file_ok($module => + 'the great new $MODULENAME' => qr/ - The great new /, + 'boilerplate description' => qr/Quick summary of what the module/, + 'stub function definition' => qr/function[12]/, + ); +} + +module_boilerplate_ok('lib/Regexp/Wildcards.pm'); diff --git a/t/kwalitee.t b/t/kwalitee.t new file mode 100644 index 0000000..1e95c3d --- /dev/null +++ b/t/kwalitee.t @@ -0,0 +1,6 @@ +#!perl + +use Test::More; + +eval { require Test::Kwalitee; Test::Kwalitee->import() }; +plan( skip_all => 'Test::Kwalitee not installed; skipping' ) if $@; diff --git a/t/pod-coverage.t b/t/pod-coverage.t new file mode 100644 index 0000000..d367904 --- /dev/null +++ b/t/pod-coverage.t @@ -0,0 +1,6 @@ +#!perl -T + +use Test::More; +eval "use Test::Pod::Coverage 1.04"; +plan skip_all => "Test::Pod::Coverage 1.04 required for testing POD coverage" if $@; +all_pod_coverage_ok( { also_private => [ qr/^do_/, qw/extract/ ] } ); diff --git a/t/pod.t b/t/pod.t new file mode 100644 index 0000000..976d7cd --- /dev/null +++ b/t/pod.t @@ -0,0 +1,6 @@ +#!perl -T + +use Test::More; +eval "use Test::Pod 1.14"; +plan skip_all => "Test::Pod 1.14 required for testing POD" if $@; +all_pod_files_ok();