X-Git-Url: http://git.vpit.fr/?p=perl%2Fmodules%2FRegexp-Wildcards.git;a=blobdiff_plain;f=README;h=207bc2397baa8cba2ec269852214b0388da834e8;hp=19cb5b7733ad7e7ffc72bc5b3341f9bf2647be33;hb=HEAD;hpb=288bd8baa5fee8ef1407addc316d3d331223cadd diff --git a/README b/README index 19cb5b7..207bc23 100644 --- a/README +++ b/README @@ -3,7 +3,7 @@ NAME expressions. VERSION - Version 1.02 + Version 1.05 SYNOPSIS use Regexp::Wildcards; @@ -13,16 +13,19 @@ SYNOPSIS my $re; $re = $rw->convert('a{b?,c}*'); # Do it Unix shell style. $re = $rw->convert('a?,b*', 'win32'); # Do it Windows shell style. - $re = $rw->convert('*{x,y}?', 'jokers'); # Process the jokers and escape the rest. - $re = $rw->convert('%a_c%', 'sql'); # Turn SQL wildcards into regexps. + $re = $rw->convert('*{x,y}?', 'jokers'); # Process the jokers and + # escape the rest. + $re = $rw->convert('%a_c%', 'sql'); # Turn SQL wildcards into + # regexps. $rw = Regexp::Wildcards->new( - do => [ qw/jokers brackets/ ], # Do jokers and brackets. - capture => [ qw/any greedy/ ], # Capture *'s greedily. + do => [ qw ], # Do jokers and brackets. + capture => [ qw ], # Capture *'s greedily. ); $rw->do(add => 'groups'); # Don't escape groups. - $rw->capture(rem => [ qw/greedy/ ]); # Actually we want non-greedy matches. + $rw->capture(rem => [ qw ]); # Actually we want non-greedy + # matches. $re = $rw->convert('*a{,(b)?}?c*'); # '(.*?)a(?:|(b).).c(.*?)' $rw->capture(); # No more captures. @@ -41,7 +44,10 @@ DESCRIPTION provided. METHODS - "new [ do => $what | type => $type ], capture => $captures" + "new" + my $rw = Regexp::Wildcards->new(do => $what, capture => $capture); + my $rw = Regexp::Wildcards->new(type => $type, capture => $capture); + Constructs a new Regexp::Wildcard object. "do" lists all features that should be enabled when converting wildcards @@ -54,42 +60,59 @@ METHODS "capture" lists which atoms should be capturing. Refer to "capture" for more details. - "do [ $what | set => $c1, add => $c2, rem => $c3 ]" + "do" + $rw->do($what); + $rw->do(set => $c1); + $rw->do(add => $c2); + $rw->do(rem => $c3); + Specifies the list of metacharacters to convert or to prevent for escaping. They fit into six classes : - * 'jokers' converts "?" to "." and "*" to ".*" ; + * 'jokers' + + Converts "?" to "." and "*" to ".*". 'a**\\*b??\\?c' ==> 'a.*\\*b..\\?c' - * 'sql' converts "_" to "." and "%" to ".*" ; + * 'sql' + + Converts "_" to "." and "%" to ".*". 'a%%\\%b__\\_c' ==> 'a.*\\%b..\\_c' - * 'commas' converts all "," to "|" and puts the complete resulting - regular expression inside "(?: ... )" ; + * 'commas' + + Converts all "," to "|" and puts the complete resulting regular + expression inside "(?: ... )". 'a,b{c,d},e' ==> '(?:a|b\\{c|d\\}|e)' - * 'brackets' converts all matching "{ ... , ... }" brackets to "(?: - ... | ... )" alternations. If some brackets are unbalanced, it tries - to substitute as many of them as possible, and then escape the + * 'brackets' + + Converts all matching "{ ... , ... }" brackets to "(?: ... | ... )" + alternations. If some brackets are unbalanced, it tries to + substitute as many of them as possible, and then escape the remaining unmatched "{" and "}". Commas outside of any - bracket-delimited block are also escaped ; + bracket-delimited block are also escaped. 'a,b{c,d},e' ==> 'a\\,b(?:c|d)\\,e' '{a\\{b,c}d,e}' ==> '(?:a\\{b|c)d\\,e\\}' '{a{b,c\\}d,e}' ==> '\\{a\\{b\\,c\\}d\\,e\\}' - * 'groups' keeps the parenthesis "( ... )" of the original string - without escaping them. Currently, no check is done to ensure that - the parenthesis are matching. + * 'groups' + + Keeps the parenthesis "( ... )" of the original string without + escaping them. Currently, no check is done to ensure that the + parenthesis are matching. 'a(b(c))d\\(\\)' ==> (no change) - * 'anchors' prevents the *beginning-of-line* "^" and *end-of-line* "$" - anchors to be escaped. Since "[...]" character class are currently - escaped, a "^" will always be interpreted as *beginning-of-line*. + * 'anchors' + + Prevents the *beginning-of-line* "^" and *end-of-line* "$" anchors + to be escaped. Since "[...]" character class are currently escaped, + a "^" will always be interpreted as *beginning-of-line*. 'a^b$c' ==> (no change) @@ -112,59 +135,112 @@ METHODS $rw->do(set => 'jokers'); # Only translate jokers. $rw->do('jokers'); # Same. - $rw->do(add => [ qw/sql commas/ ]); # Translate also SQL and commas. - $rw->do(rem => 'jokers'); # Specifying both 'sql' and 'jokers' is useless. + $rw->do(add => [ qw ]); # Translate also SQL and commas. + $rw->do(rem => 'jokers'); # Specifying both 'sql' and + # 'jokers' is useless. $rw->do(); # Translate nothing. - "type $type" + The "do" method returns the Regexp::Wildcards object. + + "type" + $rw->type($type); + Notifies to convert the metacharacters that corresponds to the - predefined type $type. $type can be any of 'jokers', 'sql', 'commas', - 'brackets', 'win32' or 'unix'. An unknown or undefined value defaults to - 'unix', except for 'dos', 'os2', 'MSWin32' and 'cygwin' that default to - 'win32'. This means that you can pass $^O as the $type and get the - corresponding shell behaviour. Returns the object. + predefined type $type. $type can be any of : + + * 'jokers', 'sql', 'commas', 'brackets' + + Singleton types that enable the corresponding "do" classes. + + * 'unix' + + Covers typical Unix shell globbing features (effectively 'jokers' + and 'brackets'). + + * $^O values for common Unix systems + + Wrap to 'unix' (see perlport for the list). + + * "undef" + + Defaults to 'unix'. + + * 'win32' + + Covers typical Windows shell globbing features (effectively 'jokers' + and 'commas'). + + * 'dos', 'os2', 'MSWin32', 'cygwin' + + Wrap to 'win32'. + + In particular, you can usually pass $^O as the $type and get the + corresponding shell behaviour. $rw->type('win32'); # Set type to win32. + $rw->type($^O); # Set type to unix on Unices and win32 on Windows $rw->type(); # Set type to unix. - "capture [ $captures | set => $c1, add => $c2, rem => $c3 ]" + The "type" method returns the Regexp::Wildcards object. + + "capture" + $rw->capture($captures); + $rw->capture(set => $c1); + $rw->capture(add => $c2); + $rw->capture(rem => $c3); + Specifies the list of atoms to capture. This method works like "do", except that the classes are different : - * 'single' will capture all unescaped *"exactly one"* metacharacters, - i.e. "?" for wildcards or "_" for SQL ; + * 'single' + + Captures all unescaped *"exactly one"* metacharacters, i.e. "?" for + wildcards or "_" for SQL. 'a???b\\??' ==> 'a(.)(.)(.)b\\?(.)' 'a___b\\__' ==> 'a(.)(.)(.)b\\_(.)' - * 'any' will capture all unescaped *"any"* metacharacters, i.e. "*" - for wildcards or "%" for SQL ; + * 'any' + + Captures all unescaped *"any"* metacharacters, i.e. "*" for + wildcards or "%" for SQL. 'a***b\\**' ==> 'a(.*)b\\*(.*)' 'a%%%b\\%%' ==> 'a(.*)b\\%(.*)' - * 'greedy', when used in conjunction with 'any', will make the 'any' - captures greedy (by default they are not) ; + * 'greedy' + + When used in conjunction with 'any', it makes the 'any' captures + greedy (by default they are not). 'a***b\\**' ==> 'a(.*?)b\\*(.*?)' 'a%%%b\\%%' ==> 'a(.*?)b\\%(.*?)' - * 'brackets' will capture matching "{ ... , ... }" alternations. + * 'brackets' + + Capture matching "{ ... , ... }" alternations. 'a{b\\},\\{c}' ==> 'a(b\\}|\\{c)' - $rw->capture(set => 'single'); # Only capture "exactly one" metacharacters. + $rw->capture(set => 'single'); # Only capture "exactly one" + # metacharacters. $rw->capture('single'); # Same. - $rw->capture(add => [ qw/any greedy/ ]); # Also greedily capture "any" metacharacters. + $rw->capture(add => [ qw ]); # Also greedily capture + # "any" metacharacters. $rw->capture(rem => 'greedy'); # No more greed please. $rw->capture(); # Capture nothing. - "convert $wc [ , $type ]" + The "capture" method returns the Regexp::Wildcards object. + + "convert" + my $rx = $rw->convert($wc); + my $rx = $rw->convert($wc, $type); + Converts the wildcard expression $wc into a regular expression according to the options stored into the Regexp::Wildcards object, or to $type if it's supplied. It successively escapes all unprotected regexp special characters that doesn't hold any meaning for wildcards, then replace - 'jokers' or 'sql' and 'commas' or 'brackets' (depending on the "do" or + 'jokers', 'sql' and 'commas' or 'brackets' (depending on the "do" or "type" options), all of this by applying the 'capture' rules specified in the constructor or by "capture". @@ -172,7 +248,8 @@ EXPORT An object module shouldn't export any function, and so does this one. DEPENDENCIES - Carp (core module since perl 5), Text::Balanced (since 5.7.3). + Carp (core module since perl 5), Scalar::Util, Text::Balanced (since + 5.7.3). CAVEATS This module does not implement the strange behaviours of Windows shell @@ -180,11 +257,13 @@ CAVEATS the file extension). For example, Windows XP shell matches *a like ".*a", "*a?" like ".*a.?", "*a??" like ".*a.{0,2}" and so on. +SEE ALSO + Text::Glob. + AUTHOR Vincent Pit, "", . - You can contact me by mail or on #perl @ FreeNode (vincent or - Prof_Vince). + You can contact me by mail or on "irc.perl.org" (vincent). BUGS Please report any bugs or feature requests to "bug-regexp-wildcards at @@ -202,7 +281,7 @@ SUPPORT . COPYRIGHT & LICENSE - Copyright 2007-2008 Vincent Pit, all rights reserved. + Copyright 2007,2008,2009,2013 Vincent Pit, all rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.