From: Ævar Arnfjörð Bjarmason Date: Thu, 17 May 2007 03:57:00 +0000 (+0000) Subject: Importing re-engine-Plugin-0.02.tar.gz X-Git-Tag: v0.02^0 X-Git-Url: http://git.vpit.fr/?p=perl%2Fmodules%2Fre-engine-Plugin.git;a=commitdiff_plain;h=2dd7bc5f80da4fe2220e28de1102641c239d084c Importing re-engine-Plugin-0.02.tar.gz --- diff --git a/ChangeLog b/ChangeLog new file mode 100644 index 0000000..12ce24f --- /dev/null +++ b/ChangeLog @@ -0,0 +1,8 @@ +2007-05-17 Ævar Arnfjörð Bjarmason (0.02) + + * Brought up to date with current blead after lots of hacking on + blead itself, too many changes to list + +2007-02-18 Ævar Arnfjörð Bjarmason (0.01) + + * Initial release diff --git a/MANIFEST b/MANIFEST index fbce7f8..97d9d7e 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1,5 +1,4 @@ -Changes -contrib/capture_hook.patch +ChangeLog inc/Module/AutoInstall.pm inc/Module/Install.pm inc/Module/Install/AutoInstall.pm @@ -15,17 +14,30 @@ inc/Module/Install/WriteAll.pm Makefile.PL MANIFEST This list of files META.yml +Plugin.h Plugin.pm Plugin.pod Plugin.xs t/00-compile.t -t/captures.t t/eval-comp.t t/eval-exec.t -t/flags.t +t/Example.pm +t/Example.t t/import.t t/methods.t -t/minlen-get.t -t/minlen-set.t -TODO +t/methods/free.t +t/methods/minlen/bytes.t +t/methods/minlen/get.t +t/methods/minlen/set.t +t/methods/mod.t +t/methods/pattern/modify.t +t/methods/pattern/types.t +t/methods/stash.t +t/methods/str/types.t +t/methods/str/undef.t +t/num_buff/FETCH.t +t/num_buff/LENGTH.t +t/num_buff/STORE.t +t/taint/rx.t +t/taint/util.t typemap diff --git a/META.yml b/META.yml index dd487cd..cf689cb 100644 --- a/META.yml +++ b/META.yml @@ -1,4 +1,4 @@ -abstract: Pure-Perl regular expression engine plugin interface +abstract: Regular Expression engine API author: "\xC3\x86var Arnfj\xC3\xB6r\xC3\xB0 Bjarmason " build_requires: Test::More: 0 diff --git a/Makefile.PL b/Makefile.PL index 6da951b..c1224f4 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -1,18 +1,8 @@ - -=pod - -L file for L. M::I knows how to -build our XS stuff automatically. - -=cut - use strict; use inc::Module::Install; name 're-engine-Plugin'; -# Requires the unreleased match vars features in (as of writing) -# unreleased 5.9.6, but 5.9.5 is the version of blead perl_version '5.009005'; author 'Ævar Arnfjörð Bjarmason '; @@ -23,7 +13,7 @@ license_from 'Plugin.pod'; # t/ build_requires 'Test::More' => 0; # 5.007003 -tests 't/*.t'; +tests 't/*.t t/*/*.t t/*/*/*.t'; auto_install; WriteAll; diff --git a/Plugin.h b/Plugin.h new file mode 100644 index 0000000..2124ad8 --- /dev/null +++ b/Plugin.h @@ -0,0 +1,84 @@ + +#define GET_SELF_FROM_PPRIVATE(pprivate) \ + re__engine__Plugin self; \ + SELF_FROM_PPRIVATE(self,pprivate); + +/* re__engine__Plugin self; SELF_FROM_PPRIVATE(self,rx->pprivate) */ +#define SELF_FROM_PPRIVATE(self, pprivate) \ + if (sv_isobject(pprivate)) { \ + SV * ref = SvRV((SV*)pprivate); \ + IV tmp = SvIV((SV*)ref); \ + self = INT2PTR(re__engine__Plugin,tmp); \ + } else { \ + Perl_croak(aTHX_ "Not an object"); \ + } + +START_EXTERN_C +EXTERN_C const regexp_engine engine_plugin; +EXTERN_C REGEXP * Plugin_comp(pTHX_ const SV const *, const U32); +EXTERN_C I32 Plugin_exec(pTHX_ REGEXP * const, char *, char *, + char *, I32, SV *, void *, U32); +EXTERN_C char * Plugin_intuit(pTHX_ REGEXP * const, SV *, char *, + char *, U32, re_scream_pos_data *); +EXTERN_C SV * Plugin_checkstr(pTHX_ REGEXP * const); +EXTERN_C void Plugin_free(pTHX_ REGEXP * const); +EXTERN_C void Plugin_numbered_buff_FETCH(pTHX_ REGEXP * const, + const I32, SV * const); +EXTERN_C void Plugin_numbered_buff_STORE(pTHX_ REGEXP * const, + const I32, SV const * const); +EXTERN_C I32 Plugin_numbered_buff_LENGTH(pTHX_ REGEXP * const, + const SV * const, const I32); +EXTERN_C SV * Plugin_named_buff_FETCH(pTHX_ REGEXP * const, SV * const, + const U32); +EXTERN_C SV * Plugin_package(pTHX_ REGEXP * const); +#ifdef USE_ITHREADS +EXTERN_C void * Plugin_dupe(pTHX_ REGEXP * const, CLONE_PARAMS *); +#endif +END_EXTERN_C + +START_EXTERN_C +EXTERN_C const regexp_engine engine_plugin; +END_EXTERN_C + +#define RE_ENGINE_PLUGIN (&engine_plugin) +const regexp_engine engine_plugin = { + Plugin_comp, + Plugin_exec, + Plugin_intuit, + Plugin_checkstr, + Plugin_free, + Plugin_numbered_buff_FETCH, + Plugin_numbered_buff_STORE, + Plugin_numbered_buff_LENGTH, + Plugin_named_buff_FETCH, + Plugin_package, +#if defined(USE_ITHREADS) + Plugin_dupe, +#endif +}; + +typedef struct replug { + /* Pointer back to the containing REGEXP struct so that accessors + * can modify nparens, gofs etc. */ + REGEXP * rx; + + /* A copy of the pattern given to comp, for ->pattern */ + SV * pattern; + + /* A copy of the string being matched against, for ->str */ + SV * str; + + /* The ->stash */ + SV * stash; + + /* + * Callbacks + */ + + SV * cb_free; + + /* ->num_captures */ + SV * cb_num_capture_buff_FETCH; + SV * cb_num_capture_buff_STORE; + SV * cb_num_capture_buff_LENGTH; +} *re__engine__Plugin; diff --git a/Plugin.pm b/Plugin.pm index ec4ea06..c693c44 100644 --- a/Plugin.pm +++ b/Plugin.pm @@ -1,59 +1,51 @@ # See Plugin.pod for documentation package re::engine::Plugin; use 5.009005; +use base 'Regexp'; use strict; -use Carp 'croak'; -use Scalar::Util 'weaken'; use XSLoader (); -our $VERSION = '0.01'; +our $VERSION = '0.02'; XSLoader::load __PACKAGE__, $VERSION; -my $RE_ENGINE_PLUGIN = get_engine_plugin(); -my $NULL = 0; +my $RE_ENGINE_PLUGIN = ENGINE(); # How many? Used to cheat %^H -my $callback = 0; -# Valid callbacks -my @callback = qw(comp exec intuit checkstr free dupe); +my $callback = 1; + # Where we store our CODE refs my %callback; +# Generate a key to use in the %^H hash from a string, prefix the +# package name like L does +my $key = sub { __PACKAGE__ . "::" . $_[0] }; + sub import { my ($pkg, %sub) = @_; - #$sub{$_} = sub {} + # Valid callbacks + my @callback = qw(comp exec intuit checkstr free dupe); for (@callback) { next unless exists $sub{$_}; my $cb = delete $sub{$_}; - # Convert "package::sub" to CODE if it isn't CODE already unless (ref $cb eq 'CODE') { - no strict 'refs'; - $cb = *{$cb}{CODE}; + require Carp; + Carp::croak("'$_' is not CODE"); } - # Whine if we don't get a CODE ref or a valid package::sub name - croak "'$_' is not CODE and neither is the *{$cb}{CODE} fallback" - unless ref $cb eq 'CODE'; - # Get an ID to use my $id = $callback ++; # Insert into our callback storage, $callback{$_}->{$id} = $cb; - # Weaken it so we don't end up hanging on to something the - # caller doesn't care about anymore - #weaken($callback{$_}->{$id}); # EEK, too weak! - # Instert into our cache with a key we can retrive later # knowing the ID in %^H and what callback we're getting - my $key = callback_key($_); - $^H{$key} = $id; + $^H{ $key->($_) } = $id; } $^H{regcomp} = $RE_ENGINE_PLUGIN; @@ -61,31 +53,34 @@ sub import sub unimport { - my ($pkg) = @_; - # Delete the regcomp hook - delete $^H{regcomp} if $^H{regcomp} == $RE_ENGINE_PLUGIN; -} - -sub callback_key -{ - my ($name) = @_; - - sprintf "rep_%s", $name; + delete $^H{regcomp} + if $^H{regcomp} == $RE_ENGINE_PLUGIN; } -# Minimal function to be called from the XS -sub get_callback +# Minimal function to get CODE for a given key to be called by the +# get_H_callback C function. +sub _get_callback { my ($name) = @_; # 'comp', 'exec', ... - my $key = callback_key($name); my $h = (caller(0))[10]; - my $id = $h->{$key}; + my $id = $h->{ $key->($name) }; my $cb = defined $id ? $callback{$name}->{$id} : 0; return $cb; } +sub num_captures +{ + my ($re, %callback) = @_; + + for my $key (keys %callback) { + $key =~ y/a-z/A-Z/; # ASCII uc + my $name = '_num_capture_buff_' . $key; + $re->$name( $callback{$key} ); + } +} + 1; diff --git a/Plugin.pod b/Plugin.pod index a076caa..d4bb1a2 100644 --- a/Plugin.pod +++ b/Plugin.pod @@ -1,198 +1,395 @@ =head1 NAME -re::engine::Plugin - Pure-Perl regular expression engine plugin interface +re::engine::Plugin - API to write custom regex engines -=head1 SYNOPSIS +=head1 DESCRIPTION - use feature ':5.10'; - use re::engine::Plugin ( - comp => sub { - my ($re) = @_; # A re::engine::Plugin object +As of perl 5.9.5 it's possible to lexically replace perl's built-in +regular expression engine with your own (see L and +L). This module provides a glue interface to the relevant +parts of the perl C API enabling you to write an engine in Perl +instead of the C/XS interface provided by the core. - # return value ignored - }, - exec => sub { - my ($re, $str) = @_; +=head2 The gory details + +Each regex in perl is compiled into an internal C structure +(see L), this can happen +either during compile time in the case of patterns in the format +C or runtime for C patterns, or something inbetween +depending on variable interpolation etc. + +When this module is loaded into a scope it inserts a hook into +C<$^H{regcomp}> (as described in L) to have each regexp +constructed in its lexical scope handled by this engine, but it +differs from other engines in that it also inserts other hooks into +C<%^H> in the same scope that point to user-defined subroutines to use +during compilation, execution etc, these are described in +L below. + +The callbacks (e.g. L) then get called with a +L object as their first argument. This object +provies access to perl's internal REGEXP struct in addition to its own +state (e.g. a L). The L on this object +allow for altering the C struct's internal state, adding new +callbacks, etc. - # We always like ponies! - return 1 if $str eq 'pony'; - return; - } +=head1 CALLBACKS + +Callbacks are specified in the C import list as +key-value pairs of names and subroutine references: + + use re::engine::Plugin ( + comp => sub {}, + exec => sub {}, ); - "pony" =~ /yummie/; +To write a custom engine which imports your functions into the +caller's scope use use the following snippet: -=head1 DESCRIPTION + package re::engine::Example; + use re::engine::Plugin (); + + sub import + { + # Populates the caller's %^H with our callbacks + re::engine::Plugin->import( + comp => \&comp, + exec => \&exec, + ); + } + + *unimport = \&re::engine::Plugin::unimport; -As of perl 5.9.5 it's possible lexically replace perl's built-in -regular expression engine (see L). This module provides glue for writing such a wrapper in -Perl instead of the provided C/XS interface. + # Implementation of the engine + sub comp { ... } + sub exec { ... } -B: This module is a development release that does not work with -any version of perl other than the current (as of February 2007) -I. The provided interface is not a complete wrapper around the -native interface (yet!) but the parts that are left can be implemented -with additional methods so the completed API shouldn't have any major -changes. + 1; + +=head2 comp + + comp => sub { + my ($rx) = @_; + + # return value discarded + } + +Called when a regex is compiled by perl, this is always the first +callback to be called and may be called multiple times or not at all +depending on what perl sees fit at the time. + +The first argument will be a freshly constructed C +object (think of it as C<$self>) which you can interact with using the +L below, this object will be passed around the other +L and L for the lifetime of +the regex. + +Calling C or anything that uses it (such as C) here will +not be trapped by an C block that the pattern is in, i.e. + + use Carp 'croak'; + use re::engine::Plugin( + comp => sub { + my $rx = shift; + croak "Your pattern is invalid" + unless $rx->pattern ~~ /pony/; + } + ); + + # Ignores the eval block + eval { /you die in C, you die for real/ }; + +This happens because the real subroutine call happens indirectly at +compile time and not in the scope of the C block. This is how +perl's own engine would behave in the same situation if given an +invalid pattern such as C. + +=head2 exec + + exec => sub { + my ($rx, $str) = @_; + + # We always like ponies! + return 1 if $str ~~ /pony/; + + # Failed to match + return; + } + +Called when a regex is being executed, i.e. when it's being matched +against something. The scalar being matched against the pattern is +available as the second argument (C<$str>) and through the L +method. The routine should return a true value if the match was +successful, and a false one if it wasn't. =head1 METHODS -=head2 import +=head2 str -Takes a list of key-value pairs with the only mandatory pair being -L and its callback routine. Both subroutine references and the -string name of a subroutine (e.g. C<"main::exec">) can be -specified. The real CODE ref is currently looked up in the symbol -table in the latter case. + "str" ~~ /pattern/; + # in comp/exec/methods: + my $str = $rx->str; -=over 4 +The last scalar to be matched against the L or +C if there hasn't been a match yet. -=item comp +perl's own engine always stringifies the scalar being matched against +a given pattern, however a custom engine need not have such +restrictions. One could write a engine that matched a file handle +against a pattern or any other complex data structure. -An optional sub to be called when a pattern is being compiled, note -that a single pattern may be compiled more than once by perl. +=head2 pattern -The subroutine will be called with a regexp object (see L). The regexp object will be stored internally along with the -pattern and provided as the first argument for the other callback -routines (think of it as C<$self>). +The pattern that the engine was asked to compile, this can be either a +classic Perl pattern with modifiers like C or C or +an arbitary scalar. The latter allows for passing anything that +doesn't fit in a string and five L characters, such as +hashrefs, objects, etc. -If your regex implementation needs to validate its pattern this is the -right place to B on an invalid one (but see L). +=head2 mod -The return value of this subroutine is discarded. + my %mod = $rx->mod; + say "has /ix" if $mod{i} and $mod{x}; -=item exec +A key-value pair list of the modifiers the pattern was compiled with. +The keys will zero or more of C and the values will be true +values (so that you don't have to write C). -Called when a given pattern is being executed, the first argument is -the regexp object and the second is the string being matched. The -routine should return true if the pattern matched and false if it -didn't. +You don't get to know if the C modifiers were attached to the +pattern since these are internal to perl and shouldn't matter to +regexp engines. -=item intuit +=head2 stash -TODO: implement + comp => sub { shift->stash( [ 1 .. 5 ) }, + exec => sub { shift->stash }, # Get [ 1 .. 5 ] -=item checkstr +Returns or sets a user defined stash that's passed around as part of +the C<$rx> object, useful for passing around all sorts of data between +the callback routines and methods. -TODO: implement +=head2 minlen -=item free + $rx->minlen($num); + my $minlen = $rx->minlen // "not set"; -TODO: implement +The minimum C a string must be to match the pattern, perl will +use this internally during matching to check whether the stringified +form of the string (or other object) being matched is at least this +long, if not the regexp engine in effect (that means you!) will not be +called at all. -=item dupe +The length specified will be used as a a byte length (using +L), not a character length. -TODO: implement +=head2 num_captures -=item numbered_buff_get + $re->num_captures( + FETCH => sub { + my ($re, $paren) = @_; -TODO: implement + return "value"; + }, + STORE => sub { + my ($re, $paren, $rhs) = @_; -=item named_buff_get + # return value discarded + }, + LENGTH => sub { + my ($re, $paren) = @_; -TODO: implement + return 123; + }, + ); -=back +Takes a list of key-value pairs of names and subroutines that +implement numbered capture variables. C will be called on value +retrieval (C), C on assignment (C<$1 = "ook">) and +C on C. -=head2 flags +The second paramater of each routine is the paren number being +requested/stored, the following mapping applies for those numbers: -L + -2 => $` or ${^PREMATCH} + -1 => $' or ${^POSTMATCH} + 0 => $& or ${^MATCH} + 1 => $1 + # ... -=head1 TODO +Assignment to capture variables makes it possible to implement +something like Perl 6 C<:rw> semantics, and since it's possible to +make the capture variables return any scalar instead of just a string +it becomes possible to implement Perl 6 match object semantics (to +name an example). -=over +=head2 named_captures -=item * +B: implement -Provide an API for named (C<$+{name}>) and unnamed (C<$1, $2, ...>) -match variables, allow specifying both offsets into the pattern and -any given scalar. +perl internals still needs to be changed to support this but when it's +done it'll allow the binding of C<%+> and C<%-> and support the +L methods FETCH, STORE, DELETE, CLEAR, EXISTS, FIRSTKEY, +NEXTKEY and SCALAR. -=item * +=head1 Tainting -Find some neat example for the L, suggestions welcome. +The only way to untaint an existing variable in Perl is to use it as a +hash key or referencing subpatterns from a regular expression match +(see L), the +latter only works in perl's regex engine because it explicitly +untaints capture variables which a custom engine will also need to do +if it wants its capture variables to be untanted. -=back +There are basically two ways to go about this, the first and obvious +one is to make use of Perl'l lexical scoping which enables the use of +its built-in regex engine in the scope of the overriding engine's +callbacks: -=head1 BUGS + use re::engine::Plugin ( + exec => sub { + my ($re, $str) = @_; # $str is tainted -Please report any bugs that aren't already listed at -L to -L + $re->num_captures( + FETCH => sub { + my ($re, $paren) = @_; + + # This is perl's engine doing the match + $str ~~ /(.*)/; + + # $1 has been untainted + return $1; + }, + ); + }, + ); -=over 1 +The second is to use something like L which flips the +taint flag on the scalar without invoking the perl's regex engine: -=item + use Taint::Util; + use re::engine::Plugin ( + exec => sub { + my ($re, $str) = @_; # $str is tainted -Calling C or anything that uses it (such as C) in the -L callback routines will not be trapped by an C block -that the pattern is in, i.e. + $re->num_captures( + FETCH => sub { + my ($re, $paren) = @_; - use Carp qw(croak); - use re::engine::Plugin( - comp => sub { - my $re = shift; - croak "Your pattern is invalid" - unless $re->pattern =~ /pony/; - } - ); + # Copy $str and untaint the copy + untaint(my $ret = $str); - # Ignores the eval block - eval { /you die in C, you die for real/ }; + # Return the untainted value + return $ret; + }, + ); + }, + ); -Simply put this happens because the real subroutine call happens -indirectly and not in the scope of the C block. +In either case a regex engine using perl's L or +this module is responsible for how and if it untaints its variables. -=back +=head1 SEE ALSO -=head1 Regexp object +L, L -The regexp object is passed around as the first argument to all the -callback routines, it supports the following method calls (with more -to come!). +=head1 TODO / CAVEATS + +I =over -=item pattern +=item * -Returns the pattern this regexp was compiled with. +Engines implemented with this module don't support C and C, the appropriate parts of the C struct need to be wrapped +and documented. -=item flags +=item * -Returns a string of flags the pattern was compiled -with. (e.g. C<"xs">). The flags are not guarenteed to be in any -particular order, so don't depend on the current one. +Still not a complete wrapper for L in other ways, needs +methods for some C struct members, some callbacks aren't +implemented etc. -=item stash +=item * -Returns or sets a user-defined stash that's passed around with the -pattern, this is useful for passing around an arbitary scalar between -callback routines, example: +Support overloading operations on the C object, this allow +control over the of C objects in a manner that isn't limited by +C/C. - use re::engine::Plugin ( - comp => sub { $_[0]->stash( [ 1 .. 5 ] ) }, - comp => sub { $_[0]->stash }, # Get [ 1 .. 5] + $re->overload( + '""' => sub { ... }, + '@{}' => sub { ... }, + ... ); -=item minlen +=item * -The minimum length a given string must be to match the pattern, set -this to an integer in B and perl will not call your B -routine unless the string being matched as at least that long. Returns -the currently set length if not called with any arguments or C -if no length has been set. +Support the dispatch of arbitary methods from the re::engine::Plugin +qr// object to user defined subroutines via AUTOLOAD; -=back + package re::engine::Plugin; + sub AUTOLOAD + { + our $AUTOLOAD; + my ($name) = $AUTOLOAD =~ /.*::(.*?)/; + my $cv = getmeth($name); # or something like that + goto &$cv; + } -=head1 SEE ALSO + package re::engine::SomeEngine; -L + sub comp + { + my $re = shift; -=head1 THANKS + $re->add_method( # or something like that + foshizzle => sub { + my ($re, @arg) = @_; # re::engine::Plugin, 1..5 + }, + ); + } -Yves explaining why I made the regexp engine a sad panda. + package main; + use re::engine::SomeEngine; + later: + + my $re = qr//; + $re->foshizzle(1..5); + +=item * + +Implement the dupe callback, test this on a threaded perl (and learn +how to use threads and how they break the current model). + +=item * + +Allow the user to specify ->offs either as an array or a packed +string. Can pack() even pack I32? Only IV? int? + +=item * + +Add tests that check for different behavior when curpm is and is not +set. + +=item * + +Add tests that check the refcount of the stash and other things I'm +mucking with, run valgrind and make sure everything is destroyed when +it should. + +=item * + +Run the debugger on the testsuite and find cases when the intuit and +checkstr callbacks are called. Write wrappers around them and add +tests. + +=back + +=head1 BUGS + +Please report any bugs that aren't already listed at +L to +L =head1 AUTHOR @@ -200,9 +397,9 @@ Evar ArnfjErE Bjarmason =head1 LICENSE +Copyright 2007 Evar ArnfjErE Bjarmason. + This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. -Copyright 2007 Evar ArnfjErE Bjarmason. - =cut diff --git a/Plugin.xs b/Plugin.xs index 07a73d2..e9d1307 100644 --- a/Plugin.xs +++ b/Plugin.xs @@ -1,37 +1,7 @@ #include "EXTERN.h" #include "perl.h" #include "XSUB.h" - -#define SAVEPVN(p,n) ((p) ? savepvn(p,n) : NULL) - -START_EXTERN_C - -EXTERN_C const regexp_engine engine_plugin; - -END_EXTERN_C - -/* - * Our struct which gets initiated and used as our object - * ($re). Since we can't count on the regexp structure provided by - * perl to be alive between comp/exec etc. we pull stuff from it and - * save it in our own structure. - * - * Besides, creating Perl accessors which directly muck with perl's - * own regexp structures in different phases of regex execution would - * be a little too evil. - */ -typedef struct replug { - SV * pattern; - char flags[sizeof("ecgimsxp")]; - - I32 minlen; - U32 gofs; - - SV * stash; - - U32 nparens; - AV * captures; /* Array of SV* that'll become $1, $2, ... */ -} *re__engine__Plugin; +#include "Plugin.h" SV* get_H_callback(const char* key) @@ -41,21 +11,25 @@ get_H_callback(const char* key) SV * callback; - ENTER; + ENTER; SAVETMPS; PUSHMARK(SP); XPUSHs(sv_2mortal(newSVpv(key, 0))); PUTBACK; - call_pv("re::engine::Plugin::get_callback", G_SCALAR); + call_pv("re::engine::Plugin::_get_callback", G_SCALAR); SPAGAIN; callback = POPs; - SvREFCNT_inc(callback); + SvREFCNT_inc(callback); /* refcount++ or FREETMPS below will collect us */ - if (!SvROK(callback)) { callback = NULL; }// croak("ret value not a ref"); } + /* If we don't get a valid CODE value return a NULL callback, in + * that case the hooks won't call back into Perl space */ + if (!SvROK(callback) || SvTYPE(SvRV(callback)) != SVt_PVCV) { + callback = NULL; + } PUTBACK; FREETMPS; @@ -64,111 +38,53 @@ get_H_callback(const char* key) return callback; } -/* just learn to use gdb you lazy bum! */ -#if 0 -void -dump_r_info(const char* id, regexp *r) -{ - warn("%s:", id); - warn("\textflags = %d", r->extflags); - warn("\tminlen = %d", r->minlen); - warn("\tminlenren = %d", r->minlenret); - warn("\tgofs = %d", r->gofs); - warn("\tnparens = %d", r->nparens); - warn("\tpprivate = %p", r->pprivate); - warn("\tsubbeg = %s", r->subbeg); - warn("\tsublen = %d", r->sublen); - warn("\tprecomp = %s", r->precomp); - warn("\tprelen = %d", r->prelen); - warn("\twrapped = %s", r->wrapped); - warn("\twraplen = %d", r->wraplen); - warn("\tseen_evals = %d", r->seen_evals); - warn("\trefcnt = %d", r->refcnt); - -} -#endif - -regexp * -Plugin_comp(pTHX_ char *exp, char *xend, PMOP *pm) +REGEXP * +Plugin_comp(pTHX_ const SV * const pattern, const U32 flags) { dSP; - register regexp *r; - int count; - - /* - * Allocate a new regexp struct, we must only write to the intflags, - * engine and private members and the others must be populated, - * internals expect the regex to have certain values least our code - * blow up - */ - - Newxz(r,1,regexp); - - /* Set up the regex to be handled by this plugin */ - r->engine = &engine_plugin; + REGEXP * rx; + re__engine__Plugin re; + I32 count; + I32 buffers; - /* Store the initial flags */ - r->intflags = pm->op_pmflags; - r->pprivate = NULL; /* this is set to our object below */ + /* exp/xend version of the pattern & length */ + STRLEN plen; + char* exp = SvPV((SV*)pattern, plen); + char* xend = exp + plen; - /* - * Populate the regexp members for the engine - */ + /* The REGEXP structure to return to perl */ + Newxz(rx, 1, REGEXP); - /* Ref count of the pattern */ - r->refcnt = 1; - - /* Preserve a copy of the original pattern */ - r->prelen = xend - exp; - r->precomp = SAVEPVN(exp, r->prelen); + /* Our blessed object */ + SV *obj = newSV(0); + SvREFCNT_inc(obj); + Newxz(re, 1, struct replug); + sv_setref_pv(obj, "re::engine::Plugin", (void*)re); - /* these may be changed by accessors */ - r->minlen = 0; - r->minlenret = 0; - r->gofs = 0; - r->nparens = 0; + re->rx = rx; /* Make the rx accessible from self->rx */ + rx->refcnt = 1; /* Refcount so we won' be destroyed */ + rx->intflags = flags; /* Flags for internal use */ + rx->extflags = flags; /* Flags for perl to use */ + rx->engine = RE_ENGINE_PLUGIN; /* Compile to use this engine */ - /* Store the flags as perl expects them */ - r->extflags = pm->op_pmflags & RXf_PMf_COMPILETIME; + /* Store a precompiled regexp for pp_regcomp to use */ + rx->prelen = plen; + rx->precomp = savepvn(exp, rx->prelen); - /* - * Construct a new B object that'll carry around - * our data inside C<< r->pprivate >>. The object is a blessed void* - * that points to our replug struct which holds any state we want to - * keep. + /* Set up qr// stringification to be equivalent to the supplied + * pattern, this should be done via overload eventually. */ - re__engine__Plugin re; - Newz(0, re, 1, struct replug); - - SV *obj = newSV(0); - SvREFCNT_inc(obj); - - /* Bless into this package; TODO: make it subclassable */ - const char * pkg = "re::engine::Plugin"; - /* bless it */ - sv_setref_pv(obj, pkg, (void*)re); + rx->wraplen = rx->prelen; + Newx(rx->wrapped, rx->wraplen, char); + Copy(rx->precomp, rx->wrapped, rx->wraplen, char); /* Store our private object */ - r->pprivate = obj; + rx->pprivate = obj; - re->pattern = newSVpvn(SAVEPVN(exp, xend - exp), xend - exp); + /* Store the pattern for ->pattern */ + re->pattern = (SV*)pattern; SvREFCNT_inc(re->pattern); - /* Concat [ec]gimosxp (egimosxp & cgimosxp into) the flags string as - * appropriate - */ - if (r->intflags & PMf_EVAL) { strcat(re->flags, "e"); } - if (r->intflags & PMf_CONTINUE) { strcat(re->flags, "c"); } - if (r->intflags & PMf_GLOBAL) { strcat(re->flags, "g"); } - if (r->intflags & PMf_FOLD) { strcat(re->flags, "i"); } - if (r->intflags & PMf_MULTILINE) { strcat(re->flags, "m"); } - if (r->intflags & PMf_ONCE) { strcat(re->flags, "o"); } - if (r->intflags & PMf_SINGLELINE) { strcat(re->flags, "s"); } - if (r->intflags & PMf_EXTENDED) { strcat(re->flags, "x"); } - if (((r->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY)) { - strcat(re->flags, "p"); - } - /* * Call our callback function if one was defined, if not we've * already set up all the stuff we're going to to need for @@ -182,11 +98,9 @@ Plugin_comp(pTHX_ char *exp, char *xend, PMOP *pm) PUSHMARK(SP); XPUSHs(obj); - XPUSHs(sv_2mortal(newSVpv(exp, xend - exp))); - PUTBACK; - call_sv(get_H_callback("comp"), G_DISCARD); + call_sv(callback, G_DISCARD); FREETMPS; LEAVE; @@ -195,129 +109,237 @@ Plugin_comp(pTHX_ char *exp, char *xend, PMOP *pm) /* If any of the comp-time accessors were called we'll have to * update the regexp struct with the new info. */ - if (re->minlen) r->minlen = re->minlen; - if (re->gofs) r->gofs = re->gofs; - if (re->gofs) r->gofs = re->gofs; - if (re->nparens) r->nparens = re->nparens; - int buffers = r->nparens; + buffers = rx->nparens; - //r->nparens = (buffers - 1); - Newxz(r->startp, buffers, I32); - Newxz(r->endp, buffers, I32); + Newxz(rx->offs, buffers, regexp_paren_pair); - /* return the regexp */ - return r; + return rx; } I32 -Plugin_exec(pTHX_ register regexp *r, char *stringarg, register char *strend, - char *strbeg, I32 minend, SV *sv, void *data, U32 flags) +Plugin_exec(pTHX_ REGEXP * const rx, char *stringarg, char *strend, + char *strbeg, I32 minend, SV *sv, void *data, U32 flags) { dSP; - I32 rc; - int *ovector; - I32 i; - int count; - int ret; - - /*Newx(ovector,r->nparens,int);*/ + I32 matched; + SV * callback = get_H_callback("exec"); + GET_SELF_FROM_PPRIVATE(rx->pprivate); - SV* callback = get_H_callback("exec"); + /* Store the current str for ->str */ + self->str = (SV*)sv; + SvREFCNT_inc(self->str); - ENTER; + ENTER; SAVETMPS; PUSHMARK(SP); - - XPUSHs(r->pprivate); + XPUSHs(rx->pprivate); XPUSHs(sv); - PUTBACK; - count = call_sv(callback, G_ARRAY); + call_sv(callback, G_SCALAR); SPAGAIN; - SV * SvRet = POPs; + SV * ret = POPs; - if (SvTRUE(SvRet)) { - /* Match vars */ - - /* - r->sublen = strend-strbeg; - r->subbeg = savepvn(strbeg,r->sublen); - r->startp[1] = 0; - r->endp[1] = 5; - */ - - ret = 1; - } else { - ret = 0; - } + if (SvTRUE(ret)) + matched = 1; + else + matched = 0; PUTBACK; FREETMPS; LEAVE; - return ret; + return matched; } char * -Plugin_intuit(pTHX_ regexp *prog, SV *sv, char *strpos, +Plugin_intuit(pTHX_ REGEXP * const rx, SV *sv, char *strpos, char *strend, U32 flags, re_scream_pos_data *data) { + PERL_UNUSED_ARG(rx); + PERL_UNUSED_ARG(sv); + PERL_UNUSED_ARG(strpos); + PERL_UNUSED_ARG(strend); + PERL_UNUSED_ARG(flags); + PERL_UNUSED_ARG(data); return NULL; } SV * -Plugin_checkstr(pTHX_ regexp *prog) +Plugin_checkstr(pTHX_ REGEXP * const rx) { + PERL_UNUSED_ARG(rx); return NULL; } void -Plugin_free(pTHX_ struct regexp *r) +Plugin_free(pTHX_ REGEXP * const rx) { - /*sv_2mortal(r->pprivate);*/ - /*PerlMemShared_free(r->pprivate);*/ + PERL_UNUSED_ARG(rx); +/* + dSP; + SV * callback; + GET_SELF_FROM_PPRIVATE(rx->pprivate); + + callback = self->cb_free; + + if (callback) { + ENTER; + SAVETMPS; + + PUSHMARK(SP); + XPUSHs(rx->pprivate); + PUTBACK; + + call_sv(callback, G_DISCARD); + + PUTBACK; + FREETMPS; + LEAVE; + } + return; +*/ } void * -Plugin_dupe(pTHX_ const regexp *r, CLONE_PARAMS *param) +Plugin_dupe(pTHX_ const REGEXP * rx, CLONE_PARAMS *param) { - return r->pprivate; + Perl_croak("dupe not supported yet"); + return rx->pprivate; } -SV* -Plugin_numbered_buff_get(pTHX_ const REGEXP * const rx, I32 paren, SV* usesv) + +void +Plugin_numbered_buff_FETCH(pTHX_ REGEXP * const rx, const I32 paren, + SV * const sv) { - return NULL; + dSP; + I32 items; + SV * callback; + GET_SELF_FROM_PPRIVATE(rx->pprivate); + + callback = self->cb_num_capture_buff_FETCH; + + if (callback) { + ENTER; + SAVETMPS; + + PUSHMARK(SP); + XPUSHs(rx->pprivate); + XPUSHs(sv_2mortal(newSViv(paren))); + PUTBACK; + + items = call_sv(callback, G_SCALAR); + + if (items == 1) { + SPAGAIN; + + SV * ret = POPs; + sv_setsv(sv, ret); + } else { + sv_setsv(sv, &PL_sv_undef); + } + + PUTBACK; + FREETMPS; + LEAVE; + } else { + sv_setsv(sv, &PL_sv_undef); + } +} + +void +Plugin_numbered_buff_STORE(pTHX_ REGEXP * const rx, const I32 paren, + SV const * const value) +{ + dSP; + I32 items; + SV * callback; + GET_SELF_FROM_PPRIVATE(rx->pprivate); + + callback = self->cb_num_capture_buff_STORE; + + if (callback) { + ENTER; + SAVETMPS; + + PUSHMARK(SP); + XPUSHs(rx->pprivate); + XPUSHs(sv_2mortal(newSViv(paren))); + XPUSHs(SvREFCNT_inc(value)); + PUTBACK; + + call_sv(callback, G_DISCARD); + + PUTBACK; + FREETMPS; + LEAVE; + } } +I32 +Plugin_numbered_buff_LENGTH(pTHX_ REGEXP * const rx, const SV * const sv, + const I32 paren) +{ + dSP; + I32 items; + SV * callback; + re__engine__Plugin self; + + SELF_FROM_PPRIVATE(self,rx->pprivate); + + callback = self->cb_num_capture_buff_LENGTH; + + if (callback) { + ENTER; + SAVETMPS; + + PUSHMARK(SP); + XPUSHs(rx->pprivate); + XPUSHs(sv_2mortal(newSViv(paren))); + PUTBACK; + + call_sv(callback, G_SCALAR); + + SPAGAIN; + + IV ret = POPi; + + PUTBACK; + FREETMPS; + LEAVE; + + return (I32)ret; + } else { + /* TODO: call FETCH and get the length on that value */ + return 0; + } +} + + SV* -Plugin_named_buff_get(pTHX_ const REGEXP * const rx, SV* namesv, U32 flags) +Plugin_named_buff_FETCH(pTHX_ REGEXP * const rx, SV * const key, U32 flags) { + PERL_UNUSED_ARG(rx); + PERL_UNUSED_ARG(key); + PERL_UNUSED_ARG(flags); + return NULL; } -/* - * The function pointers we're telling the regex engine to use - */ -const regexp_engine engine_plugin = { - Plugin_comp, - Plugin_exec, - Plugin_intuit, - Plugin_checkstr, - Plugin_free, - Plugin_numbered_buff_get, - Plugin_named_buff_get, -#if defined(USE_ITHREADS) - Plugin_dupe, -#endif -}; +SV* +Plugin_package(pTHX_ REGEXP * const rx) +{ + PERL_UNUSED_ARG(rx); + return newSVpvs("re::engine::Plugin"); +} MODULE = re::engine::Plugin PACKAGE = re::engine::Plugin +PROTOTYPES: ENABLE SV * pattern(re::engine::Plugin self, ...) @@ -327,13 +349,47 @@ CODE: OUTPUT: RETVAL -char* -flags(re::engine::Plugin self, ...) +SV * +str(re::engine::Plugin self, ...) CODE: - RETVAL = self->flags; + SvREFCNT_inc(self->str); + RETVAL = self->str; OUTPUT: RETVAL +char* +mod(re::engine::Plugin self, ...) +PPCODE: + /* /i */ + if (self->rx->intflags & PMf_FOLD) { + XPUSHs(sv_2mortal(newSVpvs("i"))); + XPUSHs(&PL_sv_yes); + } + + /* /m */ + if (self->rx->intflags & PMf_MULTILINE) { + XPUSHs(sv_2mortal(newSVpvs("m"))); + XPUSHs(&PL_sv_yes); + } + + /* /s */ + if (self->rx->intflags & PMf_SINGLELINE) { + XPUSHs(sv_2mortal(newSVpvs("s"))); + XPUSHs(&PL_sv_yes); + } + + /* /x */ + if (self->rx->intflags & PMf_EXTENDED) { + XPUSHs(sv_2mortal(newSVpvs("x"))); + XPUSHs(&PL_sv_yes); + } + + /* /p */ + if (self->rx->intflags & RXf_PMf_KEEPCOPY) { + XPUSHs(sv_2mortal(newSVpvs("p"))); + XPUSHs(&PL_sv_yes); + } + SV * stash(re::engine::Plugin self, ...) PREINIT: @@ -352,10 +408,10 @@ SV * minlen(re::engine::Plugin self, ...) CODE: if (items > 1) { - self->minlen = (I32)SvIV(ST(1)); + self->rx->minlen = (I32)SvIV(ST(1)); } - RETVAL = self->minlen ? newSViv(self->minlen) : &PL_sv_undef; + RETVAL = self->rx->minlen ? newSViv(self->rx->minlen) : &PL_sv_undef; OUTPUT: RETVAL @@ -363,9 +419,9 @@ SV * gofs(re::engine::Plugin self, ...) CODE: if (items > 1) { - self->gofs = (U32)SvIV(ST(1)); + self->rx->gofs = (U32)SvIV(ST(1)); } - RETVAL = self->gofs ? newSVuv(self->gofs) : &PL_sv_undef; + RETVAL = self->rx->gofs ? newSVuv(self->rx->gofs) : &PL_sv_undef; OUTPUT: RETVAL @@ -373,22 +429,37 @@ SV * nparens(re::engine::Plugin self, ...) CODE: if (items > 1) { - self->nparens = (U32)SvIV(ST(1)); + self->rx->nparens = (U32)SvIV(ST(1)); } - RETVAL = self->gofs ? newSVuv(self->gofs) : &PL_sv_undef; + RETVAL = self->rx->nparens ? newSVuv(self->rx->nparens) : &PL_sv_undef; OUTPUT: RETVAL void -captures(re::engine::Plugin self, ...) +_num_capture_buff_FETCH(re::engine::Plugin self, ...) +PPCODE: + if (items > 1) { + self->cb_num_capture_buff_FETCH = ST(1); + SvREFCNT_inc(self->cb_num_capture_buff_FETCH); + } + +void +_num_capture_buff_STORE(re::engine::Plugin self, ...) +PPCODE: + if (items > 1) { + self->cb_num_capture_buff_STORE = ST(1); + SvREFCNT_inc(self->cb_num_capture_buff_STORE); + } + +void +_num_capture_buff_LENGTH(re::engine::Plugin self, ...) PPCODE: if (items > 1) { - self->minlen = (I32)SvIV(ST(1)); + self->cb_num_capture_buff_LENGTH = ST(1); + SvREFCNT_inc(self->cb_num_capture_buff_LENGTH); } - XPUSHs(sv_2mortal(newSViv(5))); - XPUSHs(sv_2mortal(newSViv(10))); void -get_engine_plugin() +ENGINE() PPCODE: XPUSHs(sv_2mortal(newSViv(PTR2IV(&engine_plugin)))); diff --git a/t/Example.pm b/t/Example.pm new file mode 100644 index 0000000..efe9bab --- /dev/null +++ b/t/Example.pm @@ -0,0 +1,31 @@ +package Example; +use strict; + +# Note the (), doesn't call ->import +use re::engine::Plugin (); + +sub import { + # Populates %^H with re::engine::Plugin hooks + re::engine::Plugin->import( + exec => \&exec, + ); +} + +*unimport = \&re::engine::Plugin::unimport; + +sub exec +{ + my ($re, $str) = @_; + + $re->num_captures( + FETCH => sub { + my ($re, $paren) = @_; + + $str . "_" . $paren; + } + ); + + 1; +} + +1; diff --git a/t/Example.t b/t/Example.t new file mode 100644 index 0000000..f9080c0 --- /dev/null +++ b/t/Example.t @@ -0,0 +1,9 @@ +use strict; +use lib 't'; +use Test::More tests => 1; + +use Example; + +"str" =~ /pattern/; + +is($1, "str_1"); diff --git a/t/eval-comp.t b/t/eval-comp.t index deebdd3..104f111 100644 --- a/t/eval-comp.t +++ b/t/eval-comp.t @@ -1,27 +1,21 @@ =pod -Having C catch C in one of the callbacks when called -indirectly as C doesn't work. This is not at all surprising -since the eventual call to the callback sub is not exectuted within -the context that C appeared in, but there's a test for it -anyway. +This will always die, see L for why. -The other eval tests are just copies of this one made because the -interpreter can only die so many times per process. +This can be made to live regardless of C by adding C<| G_EVAL> +to C in C. =cut use strict; -use Test::More skip_all => 'TODO: make this work'; -#use Test::More tests => 1; +use Test::More skip_all => 'Always dies'; use re::engine::Plugin ( comp => sub { die "died at comp time" }, - exec => sub { 1 }, ); -eval { /noes/ }; +eval { "str" ~~ /noes/ }; TODO: { local $TODO = 'passing tests for known bug with how we handle eval'; diff --git a/t/eval-exec.t b/t/eval-exec.t index 426be1e..4ec6475 100644 --- a/t/eval-exec.t +++ b/t/eval-exec.t @@ -9,7 +9,6 @@ use strict; use Test::More tests => 1; use re::engine::Plugin ( - comp => sub {}, exec => sub { die "died at exec time" }, ); diff --git a/t/import.t b/t/import.t index 04cb7f2..f2eae22 100644 --- a/t/import.t +++ b/t/import.t @@ -5,18 +5,13 @@ Test that lexical importing works, check BEGIN-ish stuff etc. =cut use strict; - -use Data::Dumper; - -use Test::More tests => 7; - +use Test::More tests => 8; use re::engine::Plugin (); like "a", qr/^a$/, "import didn't run, perl's regex engine in effect"; BEGIN { re::engine::Plugin->import( - comp => sub {}, # TODO: remove when this can be omitted exec => sub { $_[0]->pattern eq $_[1] } ); } @@ -26,7 +21,6 @@ ok "^hello" =~ /^hello/ => "regex modified to match a literal pattern"; { BEGIN { re::engine::Plugin->import( - comp => sub {}, # TODO: remove when this can be omitted exec => sub { $_[0]->pattern ne $_[1] } ); } @@ -35,7 +29,6 @@ ok "^hello" =~ /^hello/ => "regex modified to match a literal pattern"; { BEGIN { re::engine::Plugin->import( - comp => sub {}, # TODO: remove when this can be omitted exec => sub { $_[0]->pattern eq '^[abc]$' } ); } @@ -48,15 +41,11 @@ ok "^hello" =~ /^hello/ => "regex modified to match a literal pattern"; ok "^hello" =~ /^hello/ => "regex modified to match a literal pattern"; +# Another import at the same scope +BEGIN { + re::engine::Plugin->import( + exec => sub { $_[0]->pattern ne $_[1] } + ); +} - - - - - - - - - - - +ok "^hello" !~ /^hello/ => "regex modified not to match a literal pattern"; diff --git a/t/methods.t b/t/methods.t index 56de947..6df32fa 100644 --- a/t/methods.t +++ b/t/methods.t @@ -8,7 +8,7 @@ use strict; use feature ':5.10'; -use Test::More 'no_plan';#tests => 6; +use Test::More tests => 9; use re::engine::Plugin ( comp => sub { @@ -27,11 +27,11 @@ use re::engine::Plugin ( # pattern cmp_ok($re->pattern, 'eq', ' foobar zoobar ' => '->pattern ok'); - # flags - my $f = $re->flags; - like $f, qr/i/, 'str flags /i'; - like $f, qr/x/, 'str flags /x'; - like $f, qr/^[cgimosx]+$/, 'flags contain all-good characters'; + # modifiers + my %mod = $re->mod; + ok(exists $mod{i}, 'str flags /i'); + ok(exists $mod{x}, 'str flags /i'); + like(join('', keys %mod), qr/^[cgimosx]+$/, 'flags contain all-good characters'); # stash cmp_ok($re->stash->{"x"}, '==', 5, "data correct in stash"); @@ -39,14 +39,6 @@ use re::engine::Plugin ( cmp_ok(ref $re->stash->{"y"}, 'eq', 'CODE', "data correct in stash"); cmp_ok($re->stash->{"y"}->(), '==', 6, "data correct in stash"); - # This needs a less sucky name - # - # Pattern: ' foobar zoobar ', set $1 to "foobar" (if I counted this right:) -# $re->offset_captures( [1, 7], ... ); - - # This name sucks as well -# $re->named_captures2offset_captures( myNameIs => 0 ): # $+{myNameIs} = $1 - # Pattern contains "foo", "bar" and "zoo", return a true return $re->pattern =~ /zoo/; } diff --git a/t/methods/free.t b/t/methods/free.t new file mode 100644 index 0000000..61c5dff --- /dev/null +++ b/t/methods/free.t @@ -0,0 +1,18 @@ +=pod + +Test the C method + +=cut + +use strict; +use Test::More skip_all => "Doesn't work currently (where did my scope go?!)"; + +use re::engine::Plugin ( + comp => sub { + my ($re) = @_; + + $re->free( sub { pass "ran free" } ); + } +); + +"str" ~~ /pattern/; diff --git a/t/methods/minlen/bytes.t b/t/methods/minlen/bytes.t new file mode 100644 index 0000000..643f118 --- /dev/null +++ b/t/methods/minlen/bytes.t @@ -0,0 +1,23 @@ +=pod + +C speaks bytes, not characters. + +=cut + +use strict; +use Test::More tests => 3; +use re::engine::Plugin ( + comp => sub { shift->minlen(5) }, + exec => sub { + my ($re, $str) = @_; + pass "Called with $str"; + }, +); + +my $str = "ævar"; +is(length $str, 5, "$str is 5 char long"); # Chars +$str =~ /pattern/; # no ->exec + +chop $str; +is(length $str, 4, "$str is 4 char long"); # Chars +$str =~ /pattern/; # yes ->exec diff --git a/t/methods/minlen/get.t b/t/methods/minlen/get.t new file mode 100644 index 0000000..80f7f80 --- /dev/null +++ b/t/methods/minlen/get.t @@ -0,0 +1,20 @@ +use strict; + +use Test::More tests => 3; + +use re::engine::Plugin ( + comp => sub { + my $re = shift; + $re->minlen(2); + }, + exec => sub { + my $re = shift; + my $minlen = $re->minlen; + cmp_ok $minlen, '==', 2, 'minlen accessor'; + }, +); + +pass "making match"; +"s" =~ /pattern/; +"st" =~ /pattern/; +"str" =~ /pattern/; diff --git a/t/methods/minlen/set.t b/t/methods/minlen/set.t new file mode 100644 index 0000000..51fa823 --- /dev/null +++ b/t/methods/minlen/set.t @@ -0,0 +1,14 @@ +use strict; + +use Test::More tests => 1; + +use re::engine::Plugin ( + comp => sub { + my $re = shift; + $re->minlen(length("str") + 1); # make "str" too short + }, + exec => sub { fail "exec called" }, +); + +pass "making match"; +"str" =~ /pattern/; diff --git a/t/methods/mod.t b/t/methods/mod.t new file mode 100644 index 0000000..426182d --- /dev/null +++ b/t/methods/mod.t @@ -0,0 +1,76 @@ +=pod + +Test the C or C method + +=cut + +use strict; +use feature ':5.10'; +use Test::More tests => 25; + +my @tests = ( + sub { cmp_ok shift, 'eq', '', => 'no flags' }, + sub { cmp_ok shift, 'eq', '', => '/c' }, + sub { cmp_ok shift, 'eq', '' => '/g' }, + sub { cmp_ok shift, 'eq', 'i' => '/i' }, + sub { cmp_ok shift, 'eq', 'm' => '/m' }, + sub { cmp_ok shift, 'eq', '' => '/o' }, + sub { cmp_ok shift, 'eq', 's' => '/s' }, + sub { cmp_ok shift, 'eq', 'x' => '/x' }, + sub { cmp_ok shift, 'eq', 'p' => '/p' }, + sub { like $_[0], qr/$_/ => "/$_ in $_[0]" for unpack "(Z)*", "xi" }, + sub { like $_[0], qr/$_/ => "/$_ in $_[0]" for unpack "(Z)*", "xs" }, + sub { + for (unpack "(Z)*", "cgimsxp") { + /[cg]/ and next; + like $_[0], qr/$_/ => "/$_ in $_[0]" + } + }, + sub { cmp_ok shift, 'eq', '', => '/e' }, + sub { + for (unpack "(Z)*", "egimsxp") { + /[ge]/ and next; + like $_[0], qr/$_/ => "/$_ in $_[0]"; + } + }, + + sub { cmp_ok shift, 'eq', '' => '??' }, + # Leave this as the last + ,sub { die "add more tests" } +); + +use re::engine::Plugin ( + exec => sub { + my ($re, $str) = @_; + + my $t = shift @tests; + + my %mod = $re->mod; + + my $mod_str = join '', keys %mod; + + $t->($mod_str); + } +); + +# Provide a pattern that can match to avoid running into regexp +# optimizations that won't call exec on C<"" =~ //>; + +"" =~ /x/; +"" =~ /x/cg; # meaningless without /g +"" =~ /x/g; +"" =~ /x/i; +"" =~ /x/m; +"" =~ /x/o; +"" =~ /x/s; +"" =~ /x/x; +"" =~ /x/p; +"" =~ /x/xi; +"" =~ /x/xs; +"" =~ /x/cgimosxp; + +my $_ = ""; + +$_ =~ s/1/2/e; +$_ =~ s/1/2/egimosxp; +$_ =~ ??; diff --git a/t/methods/pattern/modify.t b/t/methods/pattern/modify.t new file mode 100644 index 0000000..f5eb45e --- /dev/null +++ b/t/methods/pattern/modify.t @@ -0,0 +1,18 @@ +use strict; +use Test::More tests => 1; +use re::engine::Plugin ( + exec => sub { + my ($re, $str) = @_; + my $pattern = $re->pattern; + + $$pattern = "eek"; + + return 1; + }, +); + +my $sv = "ook"; +if ("ook" =~ \$sv) { + is($sv, "eek"); +} + diff --git a/t/methods/pattern/types.t b/t/methods/pattern/types.t new file mode 100644 index 0000000..8675336 --- /dev/null +++ b/t/methods/pattern/types.t @@ -0,0 +1,19 @@ +use strict; +use Test::More tests => 7; +use re::engine::Plugin ( + exec => sub { + my ($re, $str) = @_; + my $pattern = $re->pattern; + + isa_ok($pattern, $str); + }, +); + +my $sv; +"SCALAR" =~ \$sv; +"REF" =~ \\$sv; +"ARRAY" =~ []; +"HASH" =~ {}; +"GLOB" =~ \*STDIN; +"CODE" =~ sub {}; +"main" =~ bless {} => "main"; diff --git a/t/methods/stash.t b/t/methods/stash.t new file mode 100644 index 0000000..b719753 --- /dev/null +++ b/t/methods/stash.t @@ -0,0 +1,29 @@ +=pod + +Test the C method + +=cut + +use strict; +use Test::More tests => 4; + +use re::engine::Plugin ( + comp => sub { + my ($re) = @_; + + my $sv = [ qw( a o e u ) ]; + + $re->stash( $sv ); + }, + exec => sub { + my ($re, $str) = @_; + + my $stash = $re->stash; + my %h = qw( 0 a 1 o 2 e 3 u ); + for (keys %h) { + is($h{$_}, $stash->[$_]); + } + } +); + +"ook" =~ /eek/; diff --git a/t/methods/str/types.t b/t/methods/str/types.t new file mode 100644 index 0000000..f79ba36 --- /dev/null +++ b/t/methods/str/types.t @@ -0,0 +1,21 @@ +use strict; +use Test::More tests => 7; +use re::engine::Plugin ( + exec => sub { + my ($re, $str) = @_; + + is_deeply($str, $re->str); + + return 1; + }, +); + +my $sv; +"SCALAR" =~ \$sv; +"REF" =~ \\$sv; +"ARRAY" =~ []; +"HASH" =~ {}; +"GLOB" =~ \*STDIN; +"CODE" =~ sub {}; +"main" =~ bless {} => "main"; + diff --git a/t/methods/str/undef.t b/t/methods/str/undef.t new file mode 100644 index 0000000..34e24b7 --- /dev/null +++ b/t/methods/str/undef.t @@ -0,0 +1,14 @@ +use strict; +use Test::More tests => 3; +use re::engine::Plugin ( + comp => sub { + my ($re, $str) = @_; + + # Runs three times apperently. + is($re->str, undef, 'str is undef'); + + return; + }, +); + +qr/pattern/; diff --git a/t/num_buff/FETCH.t b/t/num_buff/FETCH.t new file mode 100644 index 0000000..39d3f6c --- /dev/null +++ b/t/num_buff/FETCH.t @@ -0,0 +1,35 @@ +use strict; +use Test::More tests => 7; + +use re::engine::Plugin ( + exec => sub { + my $re = shift; + + $re->num_captures( + FETCH => sub { + my ($re, $paren) = @_; + + my %ret = ( + -2 => 10, + -1 => 20, + 0 => 30, + 1 => 40, + ); + + $ret{$paren}; + } + ); + + 1; + }, +); + +"a" =~ /a/; + +is($`, 10, '$`'); +is(${^PREMATCH}, 10, '${^PREMATCH}'); +is($', 20, q($')); +is(${^POSTMATCH}, 20, '${^POSTMATCH}'); +is($&, 30, '$&'); +is(${^MATCH}, 30, '${^MATCH}'); +is($1, 40, '$1'); diff --git a/t/num_buff/LENGTH.t b/t/num_buff/LENGTH.t new file mode 100644 index 0000000..7e8afdd --- /dev/null +++ b/t/num_buff/LENGTH.t @@ -0,0 +1,35 @@ +use strict; +use Test::More tests => 7; + +use re::engine::Plugin ( + exec => sub { + my $re = shift; + + $re->stash( [ + 10, 10, + 20, 20, + 30, 30, + 40, + ]); + + $re->num_captures( + LENGTH => sub { + my ($re, $paren) = @_; + + shift @{ $re->stash }; + }, + ); + + 1; + }, +); + +"a" =~ /a/; + +is(length $`, 10); +is(length ${^PREMATCH}, 10); +is(length $', 20); +is(length ${^POSTMATCH}, 20); +is(length $&, 30); +is(length ${^MATCH}, 30); +is(length $1, 40); diff --git a/t/num_buff/STORE.t b/t/num_buff/STORE.t new file mode 100644 index 0000000..3d693a0 --- /dev/null +++ b/t/num_buff/STORE.t @@ -0,0 +1,40 @@ +use strict; +use Test::More tests => 14; + +use re::engine::Plugin ( + exec => sub { + my $re = shift; + + $re->stash( [ + [ -2, "a" ], + [ -2, "a" ], + [ -1, "o" ], + [ -1, "o" ], + [ 0, "e" ], + [ 0, "e" ], + [ 1, "u" ], + ]); + + $re->num_captures( + STORE => sub { + my ($re, $paren, $sv) = @_; + my $test = shift @{ $re->stash }; + + is($paren, $test->[0]); + is($sv, $test->[1]); + }, + ); + + 1; + }, +); + +"a" =~ /a/; + +$` = "a"; +${^PREMATCH} = "a"; +$' = "o"; +${^POSTMATCH} = "o"; +$& = "e"; +${^MATCH} = "e"; +$1 = "u"; diff --git a/t/taint/rx.t b/t/taint/rx.t new file mode 100644 index 0000000..acaf6df --- /dev/null +++ b/t/taint/rx.t @@ -0,0 +1,58 @@ +#!/usr/bin/env perl -T + +=pod + +Equivalent to F but uses perl regexes to untaint. + +=cut + +use strict; +use Test::More; + +BEGIN { + eval { + require Taint::Util; + Taint::Util->import; + }; + + plan $@ + ? (skip_all => "Taint::Util required for taint tests") + : (tests => 8); +} + +use re::engine::Plugin ( + exec => sub { + my ($re, $str) = @_; + + ok(tainted($str) => 'matched string tainted'); + + my $one = $str; + my $two = $str; $two ~~ /(.*)/; $two = $1; + + ok(tainted($one)); + ok(!tainted($two)); + + $re->num_captures( + FETCH => sub { + my ($re, $p) = @_; + + return $one if $p == 1; + return $two if $p == 2; + }, + ); + + 1; + } +); + +my $str = "string"; +taint($str); +ok(tainted($str)); + +if ($str =~ /pattern/) { + cmp_ok $1, 'eq', $str; + ok(tainted($1) => '$1 is tainted'); + + cmp_ok $2, 'eq', $str; + ok(!tainted($2) => '$2 is untainted'); +} diff --git a/t/taint/util.t b/t/taint/util.t new file mode 100644 index 0000000..11905b3 --- /dev/null +++ b/t/taint/util.t @@ -0,0 +1,51 @@ +#!/usr/bin/env perl -T +use strict; +use Test::More; + +BEGIN { + eval { + require Taint::Util; + Taint::Util->import; + }; + + plan $@ + ? (skip_all => "Taint::Util required for taint tests") + : (tests => 8); +} + +use re::engine::Plugin ( + exec => sub { + my ($re, $str) = @_; + + ok(tainted($str) => 'matched string tainted'); + + my $one = $str; + my $two = $str; untaint($two); + + ok(tainted($one)); + ok(!tainted($two)); + + $re->num_captures( + FETCH => sub { + my ($re, $p) = @_; + + return $one if $p == 1; + return $two if $p == 2; + }, + ); + + 1; + } +); + +my $str = "string"; +taint($str); +ok(tainted($str)); + +if ($str =~ /pattern/) { + cmp_ok $1, 'eq', $str; + ok(tainted($1) => '$1 is tainted'); + + cmp_ok $2, 'eq', $str; + ok(!tainted($2) => '$2 is untainted'); +}