--- /dev/null
+2007-05-17 Ævar Arnfjörð Bjarmason <avar@cpan.org> (0.02)
+
+ * Brought up to date with current blead after lots of hacking on
+ blead itself, too many changes to list
+
+2007-02-18 Ævar Arnfjörð Bjarmason <avar@cpan.org> (0.01)
+
+ * Initial release
-Changes
-contrib/capture_hook.patch
+ChangeLog
inc/Module/AutoInstall.pm
inc/Module/Install.pm
inc/Module/Install/AutoInstall.pm
Makefile.PL
MANIFEST This list of files
META.yml
+Plugin.h
Plugin.pm
Plugin.pod
Plugin.xs
t/00-compile.t
-t/captures.t
t/eval-comp.t
t/eval-exec.t
-t/flags.t
+t/Example.pm
+t/Example.t
t/import.t
t/methods.t
-t/minlen-get.t
-t/minlen-set.t
-TODO
+t/methods/free.t
+t/methods/minlen/bytes.t
+t/methods/minlen/get.t
+t/methods/minlen/set.t
+t/methods/mod.t
+t/methods/pattern/modify.t
+t/methods/pattern/types.t
+t/methods/stash.t
+t/methods/str/types.t
+t/methods/str/undef.t
+t/num_buff/FETCH.t
+t/num_buff/LENGTH.t
+t/num_buff/STORE.t
+t/taint/rx.t
+t/taint/util.t
typemap
-abstract: Pure-Perl regular expression engine plugin interface
+abstract: Regular Expression engine API
author: "\xC3\x86var Arnfj\xC3\xB6r\xC3\xB0 Bjarmason <avar@cpan.org>"
build_requires:
Test::More: 0
-\r
-=pod\r
-\r
-L<Module::Install> file for L<re::engine::Plugin>. M::I knows how to\r
-build our XS stuff automatically.\r
-\r
-=cut\r
-\r
use strict;\r
use inc::Module::Install;\r
\r
name 're-engine-Plugin';\r
\r
-# Requires the unreleased match vars features in (as of writing)\r
-# unreleased 5.9.6, but 5.9.5 is the version of blead\r
perl_version '5.009005';\r
\r
author 'Ævar Arnfjörð Bjarmason <avar@cpan.org>';\r
# t/\r
build_requires 'Test::More' => 0; # 5.007003\r
\r
-tests 't/*.t';\r
+tests 't/*.t t/*/*.t t/*/*/*.t';\r
\r
auto_install;\r
WriteAll;\r
--- /dev/null
+
+#define GET_SELF_FROM_PPRIVATE(pprivate) \
+ re__engine__Plugin self; \
+ SELF_FROM_PPRIVATE(self,pprivate);
+
+/* re__engine__Plugin self; SELF_FROM_PPRIVATE(self,rx->pprivate) */
+#define SELF_FROM_PPRIVATE(self, pprivate) \
+ if (sv_isobject(pprivate)) { \
+ SV * ref = SvRV((SV*)pprivate); \
+ IV tmp = SvIV((SV*)ref); \
+ self = INT2PTR(re__engine__Plugin,tmp); \
+ } else { \
+ Perl_croak(aTHX_ "Not an object"); \
+ }
+
+START_EXTERN_C
+EXTERN_C const regexp_engine engine_plugin;
+EXTERN_C REGEXP * Plugin_comp(pTHX_ const SV const *, const U32);
+EXTERN_C I32 Plugin_exec(pTHX_ REGEXP * const, char *, char *,
+ char *, I32, SV *, void *, U32);
+EXTERN_C char * Plugin_intuit(pTHX_ REGEXP * const, SV *, char *,
+ char *, U32, re_scream_pos_data *);
+EXTERN_C SV * Plugin_checkstr(pTHX_ REGEXP * const);
+EXTERN_C void Plugin_free(pTHX_ REGEXP * const);
+EXTERN_C void Plugin_numbered_buff_FETCH(pTHX_ REGEXP * const,
+ const I32, SV * const);
+EXTERN_C void Plugin_numbered_buff_STORE(pTHX_ REGEXP * const,
+ const I32, SV const * const);
+EXTERN_C I32 Plugin_numbered_buff_LENGTH(pTHX_ REGEXP * const,
+ const SV * const, const I32);
+EXTERN_C SV * Plugin_named_buff_FETCH(pTHX_ REGEXP * const, SV * const,
+ const U32);
+EXTERN_C SV * Plugin_package(pTHX_ REGEXP * const);
+#ifdef USE_ITHREADS
+EXTERN_C void * Plugin_dupe(pTHX_ REGEXP * const, CLONE_PARAMS *);
+#endif
+END_EXTERN_C
+
+START_EXTERN_C
+EXTERN_C const regexp_engine engine_plugin;
+END_EXTERN_C
+
+#define RE_ENGINE_PLUGIN (&engine_plugin)
+const regexp_engine engine_plugin = {
+ Plugin_comp,
+ Plugin_exec,
+ Plugin_intuit,
+ Plugin_checkstr,
+ Plugin_free,
+ Plugin_numbered_buff_FETCH,
+ Plugin_numbered_buff_STORE,
+ Plugin_numbered_buff_LENGTH,
+ Plugin_named_buff_FETCH,
+ Plugin_package,
+#if defined(USE_ITHREADS)
+ Plugin_dupe,
+#endif
+};
+
+typedef struct replug {
+ /* Pointer back to the containing REGEXP struct so that accessors
+ * can modify nparens, gofs etc. */
+ REGEXP * rx;
+
+ /* A copy of the pattern given to comp, for ->pattern */
+ SV * pattern;
+
+ /* A copy of the string being matched against, for ->str */
+ SV * str;
+
+ /* The ->stash */
+ SV * stash;
+
+ /*
+ * Callbacks
+ */
+
+ SV * cb_free;
+
+ /* ->num_captures */
+ SV * cb_num_capture_buff_FETCH;
+ SV * cb_num_capture_buff_STORE;
+ SV * cb_num_capture_buff_LENGTH;
+} *re__engine__Plugin;
# See Plugin.pod for documentation\r
package re::engine::Plugin;\r
use 5.009005;\r
+use base 'Regexp';\r
use strict;\r
-use Carp 'croak';\r
-use Scalar::Util 'weaken';\r
use XSLoader ();\r
\r
-our $VERSION = '0.01';\r
+our $VERSION = '0.02';\r
\r
XSLoader::load __PACKAGE__, $VERSION;\r
\r
-my $RE_ENGINE_PLUGIN = get_engine_plugin();\r
-my $NULL = 0;\r
+my $RE_ENGINE_PLUGIN = ENGINE();\r
\r
# How many? Used to cheat %^H\r
-my $callback = 0;\r
-# Valid callbacks\r
-my @callback = qw(comp exec intuit checkstr free dupe);\r
+my $callback = 1;\r
+\r
# Where we store our CODE refs\r
my %callback;\r
\r
+# Generate a key to use in the %^H hash from a string, prefix the\r
+# package name like L<pragma> does\r
+my $key = sub { __PACKAGE__ . "::" . $_[0] };\r
+\r
sub import\r
{\r
my ($pkg, %sub) = @_;\r
\r
- #$sub{$_} = sub {}\r
+ # Valid callbacks\r
+ my @callback = qw(comp exec intuit checkstr free dupe);\r
\r
for (@callback) {\r
next unless exists $sub{$_};\r
my $cb = delete $sub{$_};\r
\r
- # Convert "package::sub" to CODE if it isn't CODE already\r
unless (ref $cb eq 'CODE') {\r
- no strict 'refs';\r
- $cb = *{$cb}{CODE};\r
+ require Carp;\r
+ Carp::croak("'$_' is not CODE");\r
}\r
\r
- # Whine if we don't get a CODE ref or a valid package::sub name\r
- croak "'$_' is not CODE and neither is the *{$cb}{CODE} fallback"\r
- unless ref $cb eq 'CODE';\r
-\r
# Get an ID to use\r
my $id = $callback ++;\r
\r
# Insert into our callback storage,\r
$callback{$_}->{$id} = $cb;\r
\r
- # Weaken it so we don't end up hanging on to something the\r
- # caller doesn't care about anymore\r
- #weaken($callback{$_}->{$id}); # EEK, too weak!\r
-\r
# Instert into our cache with a key we can retrive later\r
# knowing the ID in %^H and what callback we're getting\r
- my $key = callback_key($_);\r
- $^H{$key} = $id;\r
+ $^H{ $key->($_) } = $id;\r
}\r
\r
$^H{regcomp} = $RE_ENGINE_PLUGIN;\r
\r
sub unimport\r
{\r
- my ($pkg) = @_;\r
-\r
# Delete the regcomp hook\r
- delete $^H{regcomp} if $^H{regcomp} == $RE_ENGINE_PLUGIN;\r
-}\r
-\r
-sub callback_key\r
-{\r
- my ($name) = @_;\r
-\r
- sprintf "rep_%s", $name;\r
+ delete $^H{regcomp}\r
+ if $^H{regcomp} == $RE_ENGINE_PLUGIN;\r
}\r
\r
-# Minimal function to be called from the XS\r
-sub get_callback\r
+# Minimal function to get CODE for a given key to be called by the\r
+# get_H_callback C function.\r
+sub _get_callback\r
{\r
my ($name) = @_; # 'comp', 'exec', ...\r
\r
- my $key = callback_key($name);\r
my $h = (caller(0))[10];\r
- my $id = $h->{$key};\r
+ my $id = $h->{ $key->($name) };\r
\r
my $cb = defined $id ? $callback{$name}->{$id} : 0;\r
\r
return $cb;\r
}\r
\r
+sub num_captures\r
+{\r
+ my ($re, %callback) = @_;\r
+\r
+ for my $key (keys %callback) {\r
+ $key =~ y/a-z/A-Z/; # ASCII uc\r
+ my $name = '_num_capture_buff_' . $key;\r
+ $re->$name( $callback{$key} );\r
+ }\r
+}\r
+\r
1;\r
=head1 NAME
-re::engine::Plugin - Pure-Perl regular expression engine plugin interface
+re::engine::Plugin - API to write custom regex engines
-=head1 SYNOPSIS
+=head1 DESCRIPTION
- use feature ':5.10';
- use re::engine::Plugin (
- comp => sub {
- my ($re) = @_; # A re::engine::Plugin object
+As of perl 5.9.5 it's possible to lexically replace perl's built-in
+regular expression engine with your own (see L<perlreapi> and
+L<perlpragma>). This module provides a glue interface to the relevant
+parts of the perl C API enabling you to write an engine in Perl
+instead of the C/XS interface provided by the core.
- # return value ignored
- },
- exec => sub {
- my ($re, $str) = @_;
+=head2 The gory details
+
+Each regex in perl is compiled into an internal C<REGEXP> structure
+(see L<perlreapi|perlreapi/The REGEXP structure>), this can happen
+either during compile time in the case of patterns in the format
+C</pattern/> or runtime for C<qr//> patterns, or something inbetween
+depending on variable interpolation etc.
+
+When this module is loaded into a scope it inserts a hook into
+C<$^H{regcomp}> (as described in L<perlreapi>) to have each regexp
+constructed in its lexical scope handled by this engine, but it
+differs from other engines in that it also inserts other hooks into
+C<%^H> in the same scope that point to user-defined subroutines to use
+during compilation, execution etc, these are described in
+L</CALLBACKS> below.
+
+The callbacks (e.g. L</comp>) then get called with a
+L<re::engine::Plugin> object as their first argument. This object
+provies access to perl's internal REGEXP struct in addition to its own
+state (e.g. a L<stash|/stash>). The L<methods|/METHODS> on this object
+allow for altering the C<REGEXP> struct's internal state, adding new
+callbacks, etc.
- # We always like ponies!
- return 1 if $str eq 'pony';
- return;
- }
+=head1 CALLBACKS
+
+Callbacks are specified in the C<re::engine::Plugin> import list as
+key-value pairs of names and subroutine references:
+
+ use re::engine::Plugin (
+ comp => sub {},
+ exec => sub {},
);
- "pony" =~ /yummie/;
+To write a custom engine which imports your functions into the
+caller's scope use use the following snippet:
-=head1 DESCRIPTION
+ package re::engine::Example;
+ use re::engine::Plugin ();
+
+ sub import
+ {
+ # Populates the caller's %^H with our callbacks
+ re::engine::Plugin->import(
+ comp => \&comp,
+ exec => \&exec,
+ );
+ }
+
+ *unimport = \&re::engine::Plugin::unimport;
-As of perl 5.9.5 it's possible lexically replace perl's built-in
-regular expression engine (see L<perlreguts|perlreguts/"Pluggable
-Interface">). This module provides glue for writing such a wrapper in
-Perl instead of the provided C/XS interface.
+ # Implementation of the engine
+ sub comp { ... }
+ sub exec { ... }
-B<NOTE>: This module is a development release that does not work with
-any version of perl other than the current (as of February 2007)
-I<blead>. The provided interface is not a complete wrapper around the
-native interface (yet!) but the parts that are left can be implemented
-with additional methods so the completed API shouldn't have any major
-changes.
+ 1;
+
+=head2 comp
+
+ comp => sub {
+ my ($rx) = @_;
+
+ # return value discarded
+ }
+
+Called when a regex is compiled by perl, this is always the first
+callback to be called and may be called multiple times or not at all
+depending on what perl sees fit at the time.
+
+The first argument will be a freshly constructed C<re::engine::Plugin>
+object (think of it as C<$self>) which you can interact with using the
+L<methods|/METHODS> below, this object will be passed around the other
+L<callbacks|/CALLBACKS> and L<methods|/METHODS> for the lifetime of
+the regex.
+
+Calling C<die> or anything that uses it (such as C<carp>) here will
+not be trapped by an C<eval> block that the pattern is in, i.e.
+
+ use Carp 'croak';
+ use re::engine::Plugin(
+ comp => sub {
+ my $rx = shift;
+ croak "Your pattern is invalid"
+ unless $rx->pattern ~~ /pony/;
+ }
+ );
+
+ # Ignores the eval block
+ eval { /you die in C<eval>, you die for real/ };
+
+This happens because the real subroutine call happens indirectly at
+compile time and not in the scope of the C<eval> block. This is how
+perl's own engine would behave in the same situation if given an
+invalid pattern such as C</(/>.
+
+=head2 exec
+
+ exec => sub {
+ my ($rx, $str) = @_;
+
+ # We always like ponies!
+ return 1 if $str ~~ /pony/;
+
+ # Failed to match
+ return;
+ }
+
+Called when a regex is being executed, i.e. when it's being matched
+against something. The scalar being matched against the pattern is
+available as the second argument (C<$str>) and through the L<str|/str>
+method. The routine should return a true value if the match was
+successful, and a false one if it wasn't.
=head1 METHODS
-=head2 import
+=head2 str
-Takes a list of key-value pairs with the only mandatory pair being
-L</exec> and its callback routine. Both subroutine references and the
-string name of a subroutine (e.g. C<"main::exec">) can be
-specified. The real CODE ref is currently looked up in the symbol
-table in the latter case.
+ "str" ~~ /pattern/;
+ # in comp/exec/methods:
+ my $str = $rx->str;
-=over 4
+The last scalar to be matched against the L<pattern|/pattern> or
+C<undef> if there hasn't been a match yet.
-=item comp
+perl's own engine always stringifies the scalar being matched against
+a given pattern, however a custom engine need not have such
+restrictions. One could write a engine that matched a file handle
+against a pattern or any other complex data structure.
-An optional sub to be called when a pattern is being compiled, note
-that a single pattern may be compiled more than once by perl.
+=head2 pattern
-The subroutine will be called with a regexp object (see L</Regexp
-object>). The regexp object will be stored internally along with the
-pattern and provided as the first argument for the other callback
-routines (think of it as C<$self>).
+The pattern that the engine was asked to compile, this can be either a
+classic Perl pattern with modifiers like C</pat/ix> or C<qr/pat/ix> or
+an arbitary scalar. The latter allows for passing anything that
+doesn't fit in a string and five L<modifier|/mod> characters, such as
+hashrefs, objects, etc.
-If your regex implementation needs to validate its pattern this is the
-right place to B<croak> on an invalid one (but see L</BUGS>).
+=head2 mod
-The return value of this subroutine is discarded.
+ my %mod = $rx->mod;
+ say "has /ix" if $mod{i} and $mod{x};
-=item exec
+A key-value pair list of the modifiers the pattern was compiled with.
+The keys will zero or more of C<imsxp> and the values will be true
+values (so that you don't have to write C<exists>).
-Called when a given pattern is being executed, the first argument is
-the regexp object and the second is the string being matched. The
-routine should return true if the pattern matched and false if it
-didn't.
+You don't get to know if the C<eogc> modifiers were attached to the
+pattern since these are internal to perl and shouldn't matter to
+regexp engines.
-=item intuit
+=head2 stash
-TODO: implement
+ comp => sub { shift->stash( [ 1 .. 5 ) },
+ exec => sub { shift->stash }, # Get [ 1 .. 5 ]
-=item checkstr
+Returns or sets a user defined stash that's passed around as part of
+the C<$rx> object, useful for passing around all sorts of data between
+the callback routines and methods.
-TODO: implement
+=head2 minlen
-=item free
+ $rx->minlen($num);
+ my $minlen = $rx->minlen // "not set";
-TODO: implement
+The minimum C<length> a string must be to match the pattern, perl will
+use this internally during matching to check whether the stringified
+form of the string (or other object) being matched is at least this
+long, if not the regexp engine in effect (that means you!) will not be
+called at all.
-=item dupe
+The length specified will be used as a a byte length (using
+L<SvPV|perlapi/SvPV>), not a character length.
-TODO: implement
+=head2 num_captures
-=item numbered_buff_get
+ $re->num_captures(
+ FETCH => sub {
+ my ($re, $paren) = @_;
-TODO: implement
+ return "value";
+ },
+ STORE => sub {
+ my ($re, $paren, $rhs) = @_;
-=item named_buff_get
+ # return value discarded
+ },
+ LENGTH => sub {
+ my ($re, $paren) = @_;
-TODO: implement
+ return 123;
+ },
+ );
-=back
+Takes a list of key-value pairs of names and subroutines that
+implement numbered capture variables. C<FETCH> will be called on value
+retrieval (C<say $1>), C<STORE> on assignment (C<$1 = "ook">) and
+C<LENGTH> on C<length $1>.
-=head2 flags
+The second paramater of each routine is the paren number being
+requested/stored, the following mapping applies for those numbers:
-L<perlop/"/PATTERN/cgimosx">
+ -2 => $` or ${^PREMATCH}
+ -1 => $' or ${^POSTMATCH}
+ 0 => $& or ${^MATCH}
+ 1 => $1
+ # ...
-=head1 TODO
+Assignment to capture variables makes it possible to implement
+something like Perl 6 C<:rw> semantics, and since it's possible to
+make the capture variables return any scalar instead of just a string
+it becomes possible to implement Perl 6 match object semantics (to
+name an example).
-=over
+=head2 named_captures
-=item *
+B<TODO>: implement
-Provide an API for named (C<$+{name}>) and unnamed (C<$1, $2, ...>)
-match variables, allow specifying both offsets into the pattern and
-any given scalar.
+perl internals still needs to be changed to support this but when it's
+done it'll allow the binding of C<%+> and C<%-> and support the
+L<Tie::Hash> methods FETCH, STORE, DELETE, CLEAR, EXISTS, FIRSTKEY,
+NEXTKEY and SCALAR.
-=item *
+=head1 Tainting
-Find some neat example for the L</SYNOPSIS>, suggestions welcome.
+The only way to untaint an existing variable in Perl is to use it as a
+hash key or referencing subpatterns from a regular expression match
+(see L<perlsec|perlsec/Laundering and Detecting Tainted Data>), the
+latter only works in perl's regex engine because it explicitly
+untaints capture variables which a custom engine will also need to do
+if it wants its capture variables to be untanted.
-=back
+There are basically two ways to go about this, the first and obvious
+one is to make use of Perl'l lexical scoping which enables the use of
+its built-in regex engine in the scope of the overriding engine's
+callbacks:
-=head1 BUGS
+ use re::engine::Plugin (
+ exec => sub {
+ my ($re, $str) = @_; # $str is tainted
-Please report any bugs that aren't already listed at
-L<http://rt.cpan.org/Dist/Display.html?Queue=re-engine-Plugin> to
-L<http://rt.cpan.org/Public/Bug/Report.html?Queue=re-engine-Plugin>
+ $re->num_captures(
+ FETCH => sub {
+ my ($re, $paren) = @_;
+
+ # This is perl's engine doing the match
+ $str ~~ /(.*)/;
+
+ # $1 has been untainted
+ return $1;
+ },
+ );
+ },
+ );
-=over 1
+The second is to use something like L<Taint::Util> which flips the
+taint flag on the scalar without invoking the perl's regex engine:
-=item
+ use Taint::Util;
+ use re::engine::Plugin (
+ exec => sub {
+ my ($re, $str) = @_; # $str is tainted
-Calling C<die> or anything that uses it (such as C<carp>) in the
-L</comp> callback routines will not be trapped by an C<eval> block
-that the pattern is in, i.e.
+ $re->num_captures(
+ FETCH => sub {
+ my ($re, $paren) = @_;
- use Carp qw(croak);
- use re::engine::Plugin(
- comp => sub {
- my $re = shift;
- croak "Your pattern is invalid"
- unless $re->pattern =~ /pony/;
- }
- );
+ # Copy $str and untaint the copy
+ untaint(my $ret = $str);
- # Ignores the eval block
- eval { /you die in C<eval>, you die for real/ };
+ # Return the untainted value
+ return $ret;
+ },
+ );
+ },
+ );
-Simply put this happens because the real subroutine call happens
-indirectly and not in the scope of the C<eval> block.
+In either case a regex engine using perl's L<regex api|perlapi> or
+this module is responsible for how and if it untaints its variables.
-=back
+=head1 SEE ALSO
-=head1 Regexp object
+L<perlreapi>, L<Taint::Util>
-The regexp object is passed around as the first argument to all the
-callback routines, it supports the following method calls (with more
-to come!).
+=head1 TODO / CAVEATS
+
+I<here be dragons>
=over
-=item pattern
+=item *
-Returns the pattern this regexp was compiled with.
+Engines implemented with this module don't support C<s///> and C<split
+//>, the appropriate parts of the C<REGEXP> struct need to be wrapped
+and documented.
-=item flags
+=item *
-Returns a string of flags the pattern was compiled
-with. (e.g. C<"xs">). The flags are not guarenteed to be in any
-particular order, so don't depend on the current one.
+Still not a complete wrapper for L<perlreapi> in other ways, needs
+methods for some C<REGEXP> struct members, some callbacks aren't
+implemented etc.
-=item stash
+=item *
-Returns or sets a user-defined stash that's passed around with the
-pattern, this is useful for passing around an arbitary scalar between
-callback routines, example:
+Support overloading operations on the C<qr//> object, this allow
+control over the of C<qr//> objects in a manner that isn't limited by
+C<wrapped>/C<wraplen>.
- use re::engine::Plugin (
- comp => sub { $_[0]->stash( [ 1 .. 5 ] ) },
- comp => sub { $_[0]->stash }, # Get [ 1 .. 5]
+ $re->overload(
+ '""' => sub { ... },
+ '@{}' => sub { ... },
+ ...
);
-=item minlen
+=item *
-The minimum length a given string must be to match the pattern, set
-this to an integer in B<comp> and perl will not call your B<exec>
-routine unless the string being matched as at least that long. Returns
-the currently set length if not called with any arguments or C<undef>
-if no length has been set.
+Support the dispatch of arbitary methods from the re::engine::Plugin
+qr// object to user defined subroutines via AUTOLOAD;
-=back
+ package re::engine::Plugin;
+ sub AUTOLOAD
+ {
+ our $AUTOLOAD;
+ my ($name) = $AUTOLOAD =~ /.*::(.*?)/;
+ my $cv = getmeth($name); # or something like that
+ goto &$cv;
+ }
-=head1 SEE ALSO
+ package re::engine::SomeEngine;
-L<perlreguts/Pluggable Interface>
+ sub comp
+ {
+ my $re = shift;
-=head1 THANKS
+ $re->add_method( # or something like that
+ foshizzle => sub {
+ my ($re, @arg) = @_; # re::engine::Plugin, 1..5
+ },
+ );
+ }
-Yves explaining why I made the regexp engine a sad panda.
+ package main;
+ use re::engine::SomeEngine;
+ later:
+
+ my $re = qr//;
+ $re->foshizzle(1..5);
+
+=item *
+
+Implement the dupe callback, test this on a threaded perl (and learn
+how to use threads and how they break the current model).
+
+=item *
+
+Allow the user to specify ->offs either as an array or a packed
+string. Can pack() even pack I32? Only IV? int?
+
+=item *
+
+Add tests that check for different behavior when curpm is and is not
+set.
+
+=item *
+
+Add tests that check the refcount of the stash and other things I'm
+mucking with, run valgrind and make sure everything is destroyed when
+it should.
+
+=item *
+
+Run the debugger on the testsuite and find cases when the intuit and
+checkstr callbacks are called. Write wrappers around them and add
+tests.
+
+=back
+
+=head1 BUGS
+
+Please report any bugs that aren't already listed at
+L<http://rt.cpan.org/Dist/Display.html?Queue=re-engine-Plugin> to
+L<http://rt.cpan.org/Public/Bug/Report.html?Queue=re-engine-Plugin>
=head1 AUTHOR
=head1 LICENSE
+Copyright 2007 E<AElig>var ArnfjE<ouml>rE<eth> Bjarmason.
+
This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.
-Copyright 2007 E<AElig>var ArnfjE<ouml>rE<eth> Bjarmason.
-
=cut
#include "EXTERN.h"
#include "perl.h"
#include "XSUB.h"
-
-#define SAVEPVN(p,n) ((p) ? savepvn(p,n) : NULL)
-
-START_EXTERN_C
-
-EXTERN_C const regexp_engine engine_plugin;
-
-END_EXTERN_C
-
-/*
- * Our struct which gets initiated and used as our object
- * ($re). Since we can't count on the regexp structure provided by
- * perl to be alive between comp/exec etc. we pull stuff from it and
- * save it in our own structure.
- *
- * Besides, creating Perl accessors which directly muck with perl's
- * own regexp structures in different phases of regex execution would
- * be a little too evil.
- */
-typedef struct replug {
- SV * pattern;
- char flags[sizeof("ecgimsxp")];
-
- I32 minlen;
- U32 gofs;
-
- SV * stash;
-
- U32 nparens;
- AV * captures; /* Array of SV* that'll become $1, $2, ... */
-} *re__engine__Plugin;
+#include "Plugin.h"
SV*
get_H_callback(const char* key)
SV * callback;
- ENTER;
+ ENTER;
SAVETMPS;
PUSHMARK(SP);
XPUSHs(sv_2mortal(newSVpv(key, 0)));
PUTBACK;
- call_pv("re::engine::Plugin::get_callback", G_SCALAR);
+ call_pv("re::engine::Plugin::_get_callback", G_SCALAR);
SPAGAIN;
callback = POPs;
- SvREFCNT_inc(callback);
+ SvREFCNT_inc(callback); /* refcount++ or FREETMPS below will collect us */
- if (!SvROK(callback)) { callback = NULL; }// croak("ret value not a ref"); }
+ /* If we don't get a valid CODE value return a NULL callback, in
+ * that case the hooks won't call back into Perl space */
+ if (!SvROK(callback) || SvTYPE(SvRV(callback)) != SVt_PVCV) {
+ callback = NULL;
+ }
PUTBACK;
FREETMPS;
return callback;
}
-/* just learn to use gdb you lazy bum! */
-#if 0
-void
-dump_r_info(const char* id, regexp *r)
-{
- warn("%s:", id);
- warn("\textflags = %d", r->extflags);
- warn("\tminlen = %d", r->minlen);
- warn("\tminlenren = %d", r->minlenret);
- warn("\tgofs = %d", r->gofs);
- warn("\tnparens = %d", r->nparens);
- warn("\tpprivate = %p", r->pprivate);
- warn("\tsubbeg = %s", r->subbeg);
- warn("\tsublen = %d", r->sublen);
- warn("\tprecomp = %s", r->precomp);
- warn("\tprelen = %d", r->prelen);
- warn("\twrapped = %s", r->wrapped);
- warn("\twraplen = %d", r->wraplen);
- warn("\tseen_evals = %d", r->seen_evals);
- warn("\trefcnt = %d", r->refcnt);
-
-}
-#endif
-
-regexp *
-Plugin_comp(pTHX_ char *exp, char *xend, PMOP *pm)
+REGEXP *
+Plugin_comp(pTHX_ const SV * const pattern, const U32 flags)
{
dSP;
- register regexp *r;
- int count;
-
- /*
- * Allocate a new regexp struct, we must only write to the intflags,
- * engine and private members and the others must be populated,
- * internals expect the regex to have certain values least our code
- * blow up
- */
-
- Newxz(r,1,regexp);
-
- /* Set up the regex to be handled by this plugin */
- r->engine = &engine_plugin;
+ REGEXP * rx;
+ re__engine__Plugin re;
+ I32 count;
+ I32 buffers;
- /* Store the initial flags */
- r->intflags = pm->op_pmflags;
- r->pprivate = NULL; /* this is set to our object below */
+ /* exp/xend version of the pattern & length */
+ STRLEN plen;
+ char* exp = SvPV((SV*)pattern, plen);
+ char* xend = exp + plen;
- /*
- * Populate the regexp members for the engine
- */
+ /* The REGEXP structure to return to perl */
+ Newxz(rx, 1, REGEXP);
- /* Ref count of the pattern */
- r->refcnt = 1;
-
- /* Preserve a copy of the original pattern */
- r->prelen = xend - exp;
- r->precomp = SAVEPVN(exp, r->prelen);
+ /* Our blessed object */
+ SV *obj = newSV(0);
+ SvREFCNT_inc(obj);
+ Newxz(re, 1, struct replug);
+ sv_setref_pv(obj, "re::engine::Plugin", (void*)re);
- /* these may be changed by accessors */
- r->minlen = 0;
- r->minlenret = 0;
- r->gofs = 0;
- r->nparens = 0;
+ re->rx = rx; /* Make the rx accessible from self->rx */
+ rx->refcnt = 1; /* Refcount so we won' be destroyed */
+ rx->intflags = flags; /* Flags for internal use */
+ rx->extflags = flags; /* Flags for perl to use */
+ rx->engine = RE_ENGINE_PLUGIN; /* Compile to use this engine */
- /* Store the flags as perl expects them */
- r->extflags = pm->op_pmflags & RXf_PMf_COMPILETIME;
+ /* Store a precompiled regexp for pp_regcomp to use */
+ rx->prelen = plen;
+ rx->precomp = savepvn(exp, rx->prelen);
- /*
- * Construct a new B<re::engine::Plugin> object that'll carry around
- * our data inside C<< r->pprivate >>. The object is a blessed void*
- * that points to our replug struct which holds any state we want to
- * keep.
+ /* Set up qr// stringification to be equivalent to the supplied
+ * pattern, this should be done via overload eventually.
*/
- re__engine__Plugin re;
- Newz(0, re, 1, struct replug);
-
- SV *obj = newSV(0);
- SvREFCNT_inc(obj);
-
- /* Bless into this package; TODO: make it subclassable */
- const char * pkg = "re::engine::Plugin";
- /* bless it */
- sv_setref_pv(obj, pkg, (void*)re);
+ rx->wraplen = rx->prelen;
+ Newx(rx->wrapped, rx->wraplen, char);
+ Copy(rx->precomp, rx->wrapped, rx->wraplen, char);
/* Store our private object */
- r->pprivate = obj;
+ rx->pprivate = obj;
- re->pattern = newSVpvn(SAVEPVN(exp, xend - exp), xend - exp);
+ /* Store the pattern for ->pattern */
+ re->pattern = (SV*)pattern;
SvREFCNT_inc(re->pattern);
- /* Concat [ec]gimosxp (egimosxp & cgimosxp into) the flags string as
- * appropriate
- */
- if (r->intflags & PMf_EVAL) { strcat(re->flags, "e"); }
- if (r->intflags & PMf_CONTINUE) { strcat(re->flags, "c"); }
- if (r->intflags & PMf_GLOBAL) { strcat(re->flags, "g"); }
- if (r->intflags & PMf_FOLD) { strcat(re->flags, "i"); }
- if (r->intflags & PMf_MULTILINE) { strcat(re->flags, "m"); }
- if (r->intflags & PMf_ONCE) { strcat(re->flags, "o"); }
- if (r->intflags & PMf_SINGLELINE) { strcat(re->flags, "s"); }
- if (r->intflags & PMf_EXTENDED) { strcat(re->flags, "x"); }
- if (((r->extflags & RXf_PMf_KEEPCOPY) == RXf_PMf_KEEPCOPY)) {
- strcat(re->flags, "p");
- }
-
/*
* Call our callback function if one was defined, if not we've
* already set up all the stuff we're going to to need for
PUSHMARK(SP);
XPUSHs(obj);
- XPUSHs(sv_2mortal(newSVpv(exp, xend - exp)));
-
PUTBACK;
- call_sv(get_H_callback("comp"), G_DISCARD);
+ call_sv(callback, G_DISCARD);
FREETMPS;
LEAVE;
/* If any of the comp-time accessors were called we'll have to
* update the regexp struct with the new info.
*/
- if (re->minlen) r->minlen = re->minlen;
- if (re->gofs) r->gofs = re->gofs;
- if (re->gofs) r->gofs = re->gofs;
- if (re->nparens) r->nparens = re->nparens;
- int buffers = r->nparens;
+ buffers = rx->nparens;
- //r->nparens = (buffers - 1);
- Newxz(r->startp, buffers, I32);
- Newxz(r->endp, buffers, I32);
+ Newxz(rx->offs, buffers, regexp_paren_pair);
- /* return the regexp */
- return r;
+ return rx;
}
I32
-Plugin_exec(pTHX_ register regexp *r, char *stringarg, register char *strend,
- char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
+Plugin_exec(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
+ char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
{
dSP;
- I32 rc;
- int *ovector;
- I32 i;
- int count;
- int ret;
-
- /*Newx(ovector,r->nparens,int);*/
+ I32 matched;
+ SV * callback = get_H_callback("exec");
+ GET_SELF_FROM_PPRIVATE(rx->pprivate);
- SV* callback = get_H_callback("exec");
+ /* Store the current str for ->str */
+ self->str = (SV*)sv;
+ SvREFCNT_inc(self->str);
- ENTER;
+ ENTER;
SAVETMPS;
PUSHMARK(SP);
-
- XPUSHs(r->pprivate);
+ XPUSHs(rx->pprivate);
XPUSHs(sv);
-
PUTBACK;
- count = call_sv(callback, G_ARRAY);
+ call_sv(callback, G_SCALAR);
SPAGAIN;
- SV * SvRet = POPs;
+ SV * ret = POPs;
- if (SvTRUE(SvRet)) {
- /* Match vars */
-
- /*
- r->sublen = strend-strbeg;
- r->subbeg = savepvn(strbeg,r->sublen);
- r->startp[1] = 0;
- r->endp[1] = 5;
- */
-
- ret = 1;
- } else {
- ret = 0;
- }
+ if (SvTRUE(ret))
+ matched = 1;
+ else
+ matched = 0;
PUTBACK;
FREETMPS;
LEAVE;
- return ret;
+ return matched;
}
char *
-Plugin_intuit(pTHX_ regexp *prog, SV *sv, char *strpos,
+Plugin_intuit(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
char *strend, U32 flags, re_scream_pos_data *data)
{
+ PERL_UNUSED_ARG(rx);
+ PERL_UNUSED_ARG(sv);
+ PERL_UNUSED_ARG(strpos);
+ PERL_UNUSED_ARG(strend);
+ PERL_UNUSED_ARG(flags);
+ PERL_UNUSED_ARG(data);
return NULL;
}
SV *
-Plugin_checkstr(pTHX_ regexp *prog)
+Plugin_checkstr(pTHX_ REGEXP * const rx)
{
+ PERL_UNUSED_ARG(rx);
return NULL;
}
void
-Plugin_free(pTHX_ struct regexp *r)
+Plugin_free(pTHX_ REGEXP * const rx)
{
- /*sv_2mortal(r->pprivate);*/
- /*PerlMemShared_free(r->pprivate);*/
+ PERL_UNUSED_ARG(rx);
+/*
+ dSP;
+ SV * callback;
+ GET_SELF_FROM_PPRIVATE(rx->pprivate);
+
+ callback = self->cb_free;
+
+ if (callback) {
+ ENTER;
+ SAVETMPS;
+
+ PUSHMARK(SP);
+ XPUSHs(rx->pprivate);
+ PUTBACK;
+
+ call_sv(callback, G_DISCARD);
+
+ PUTBACK;
+ FREETMPS;
+ LEAVE;
+ }
+ return;
+*/
}
void *
-Plugin_dupe(pTHX_ const regexp *r, CLONE_PARAMS *param)
+Plugin_dupe(pTHX_ const REGEXP * rx, CLONE_PARAMS *param)
{
- return r->pprivate;
+ Perl_croak("dupe not supported yet");
+ return rx->pprivate;
}
-SV*
-Plugin_numbered_buff_get(pTHX_ const REGEXP * const rx, I32 paren, SV* usesv)
+
+void
+Plugin_numbered_buff_FETCH(pTHX_ REGEXP * const rx, const I32 paren,
+ SV * const sv)
{
- return NULL;
+ dSP;
+ I32 items;
+ SV * callback;
+ GET_SELF_FROM_PPRIVATE(rx->pprivate);
+
+ callback = self->cb_num_capture_buff_FETCH;
+
+ if (callback) {
+ ENTER;
+ SAVETMPS;
+
+ PUSHMARK(SP);
+ XPUSHs(rx->pprivate);
+ XPUSHs(sv_2mortal(newSViv(paren)));
+ PUTBACK;
+
+ items = call_sv(callback, G_SCALAR);
+
+ if (items == 1) {
+ SPAGAIN;
+
+ SV * ret = POPs;
+ sv_setsv(sv, ret);
+ } else {
+ sv_setsv(sv, &PL_sv_undef);
+ }
+
+ PUTBACK;
+ FREETMPS;
+ LEAVE;
+ } else {
+ sv_setsv(sv, &PL_sv_undef);
+ }
+}
+
+void
+Plugin_numbered_buff_STORE(pTHX_ REGEXP * const rx, const I32 paren,
+ SV const * const value)
+{
+ dSP;
+ I32 items;
+ SV * callback;
+ GET_SELF_FROM_PPRIVATE(rx->pprivate);
+
+ callback = self->cb_num_capture_buff_STORE;
+
+ if (callback) {
+ ENTER;
+ SAVETMPS;
+
+ PUSHMARK(SP);
+ XPUSHs(rx->pprivate);
+ XPUSHs(sv_2mortal(newSViv(paren)));
+ XPUSHs(SvREFCNT_inc(value));
+ PUTBACK;
+
+ call_sv(callback, G_DISCARD);
+
+ PUTBACK;
+ FREETMPS;
+ LEAVE;
+ }
}
+I32
+Plugin_numbered_buff_LENGTH(pTHX_ REGEXP * const rx, const SV * const sv,
+ const I32 paren)
+{
+ dSP;
+ I32 items;
+ SV * callback;
+ re__engine__Plugin self;
+
+ SELF_FROM_PPRIVATE(self,rx->pprivate);
+
+ callback = self->cb_num_capture_buff_LENGTH;
+
+ if (callback) {
+ ENTER;
+ SAVETMPS;
+
+ PUSHMARK(SP);
+ XPUSHs(rx->pprivate);
+ XPUSHs(sv_2mortal(newSViv(paren)));
+ PUTBACK;
+
+ call_sv(callback, G_SCALAR);
+
+ SPAGAIN;
+
+ IV ret = POPi;
+
+ PUTBACK;
+ FREETMPS;
+ LEAVE;
+
+ return (I32)ret;
+ } else {
+ /* TODO: call FETCH and get the length on that value */
+ return 0;
+ }
+}
+
+
SV*
-Plugin_named_buff_get(pTHX_ const REGEXP * const rx, SV* namesv, U32 flags)
+Plugin_named_buff_FETCH(pTHX_ REGEXP * const rx, SV * const key, U32 flags)
{
+ PERL_UNUSED_ARG(rx);
+ PERL_UNUSED_ARG(key);
+ PERL_UNUSED_ARG(flags);
+
return NULL;
}
-/*
- * The function pointers we're telling the regex engine to use
- */
-const regexp_engine engine_plugin = {
- Plugin_comp,
- Plugin_exec,
- Plugin_intuit,
- Plugin_checkstr,
- Plugin_free,
- Plugin_numbered_buff_get,
- Plugin_named_buff_get,
-#if defined(USE_ITHREADS)
- Plugin_dupe,
-#endif
-};
+SV*
+Plugin_package(pTHX_ REGEXP * const rx)
+{
+ PERL_UNUSED_ARG(rx);
+ return newSVpvs("re::engine::Plugin");
+}
MODULE = re::engine::Plugin PACKAGE = re::engine::Plugin
+PROTOTYPES: ENABLE
SV *
pattern(re::engine::Plugin self, ...)
OUTPUT:
RETVAL
-char*
-flags(re::engine::Plugin self, ...)
+SV *
+str(re::engine::Plugin self, ...)
CODE:
- RETVAL = self->flags;
+ SvREFCNT_inc(self->str);
+ RETVAL = self->str;
OUTPUT:
RETVAL
+char*
+mod(re::engine::Plugin self, ...)
+PPCODE:
+ /* /i */
+ if (self->rx->intflags & PMf_FOLD) {
+ XPUSHs(sv_2mortal(newSVpvs("i")));
+ XPUSHs(&PL_sv_yes);
+ }
+
+ /* /m */
+ if (self->rx->intflags & PMf_MULTILINE) {
+ XPUSHs(sv_2mortal(newSVpvs("m")));
+ XPUSHs(&PL_sv_yes);
+ }
+
+ /* /s */
+ if (self->rx->intflags & PMf_SINGLELINE) {
+ XPUSHs(sv_2mortal(newSVpvs("s")));
+ XPUSHs(&PL_sv_yes);
+ }
+
+ /* /x */
+ if (self->rx->intflags & PMf_EXTENDED) {
+ XPUSHs(sv_2mortal(newSVpvs("x")));
+ XPUSHs(&PL_sv_yes);
+ }
+
+ /* /p */
+ if (self->rx->intflags & RXf_PMf_KEEPCOPY) {
+ XPUSHs(sv_2mortal(newSVpvs("p")));
+ XPUSHs(&PL_sv_yes);
+ }
+
SV *
stash(re::engine::Plugin self, ...)
PREINIT:
minlen(re::engine::Plugin self, ...)
CODE:
if (items > 1) {
- self->minlen = (I32)SvIV(ST(1));
+ self->rx->minlen = (I32)SvIV(ST(1));
}
- RETVAL = self->minlen ? newSViv(self->minlen) : &PL_sv_undef;
+ RETVAL = self->rx->minlen ? newSViv(self->rx->minlen) : &PL_sv_undef;
OUTPUT:
RETVAL
gofs(re::engine::Plugin self, ...)
CODE:
if (items > 1) {
- self->gofs = (U32)SvIV(ST(1));
+ self->rx->gofs = (U32)SvIV(ST(1));
}
- RETVAL = self->gofs ? newSVuv(self->gofs) : &PL_sv_undef;
+ RETVAL = self->rx->gofs ? newSVuv(self->rx->gofs) : &PL_sv_undef;
OUTPUT:
RETVAL
nparens(re::engine::Plugin self, ...)
CODE:
if (items > 1) {
- self->nparens = (U32)SvIV(ST(1));
+ self->rx->nparens = (U32)SvIV(ST(1));
}
- RETVAL = self->gofs ? newSVuv(self->gofs) : &PL_sv_undef;
+ RETVAL = self->rx->nparens ? newSVuv(self->rx->nparens) : &PL_sv_undef;
OUTPUT:
RETVAL
void
-captures(re::engine::Plugin self, ...)
+_num_capture_buff_FETCH(re::engine::Plugin self, ...)
+PPCODE:
+ if (items > 1) {
+ self->cb_num_capture_buff_FETCH = ST(1);
+ SvREFCNT_inc(self->cb_num_capture_buff_FETCH);
+ }
+
+void
+_num_capture_buff_STORE(re::engine::Plugin self, ...)
+PPCODE:
+ if (items > 1) {
+ self->cb_num_capture_buff_STORE = ST(1);
+ SvREFCNT_inc(self->cb_num_capture_buff_STORE);
+ }
+
+void
+_num_capture_buff_LENGTH(re::engine::Plugin self, ...)
PPCODE:
if (items > 1) {
- self->minlen = (I32)SvIV(ST(1));
+ self->cb_num_capture_buff_LENGTH = ST(1);
+ SvREFCNT_inc(self->cb_num_capture_buff_LENGTH);
}
- XPUSHs(sv_2mortal(newSViv(5)));
- XPUSHs(sv_2mortal(newSViv(10)));
void
-get_engine_plugin()
+ENGINE()
PPCODE:
XPUSHs(sv_2mortal(newSViv(PTR2IV(&engine_plugin))));
--- /dev/null
+package Example;
+use strict;
+
+# Note the (), doesn't call ->import
+use re::engine::Plugin ();
+
+sub import {
+ # Populates %^H with re::engine::Plugin hooks
+ re::engine::Plugin->import(
+ exec => \&exec,
+ );
+}
+
+*unimport = \&re::engine::Plugin::unimport;
+
+sub exec
+{
+ my ($re, $str) = @_;
+
+ $re->num_captures(
+ FETCH => sub {
+ my ($re, $paren) = @_;
+
+ $str . "_" . $paren;
+ }
+ );
+
+ 1;
+}
+
+1;
--- /dev/null
+use strict;
+use lib 't';
+use Test::More tests => 1;
+
+use Example;
+
+"str" =~ /pattern/;
+
+is($1, "str_1");
=pod
-Having C<eval> catch C<die> in one of the callbacks when called
-indirectly as C</pattern/> doesn't work. This is not at all surprising
-since the eventual call to the callback sub is not exectuted within
-the context that C</pattern> appeared in, but there's a test for it
-anyway.
+This will always die, see L<re::engine::Plugin/comp> for why.
-The other eval tests are just copies of this one made because the
-interpreter can only die so many times per process.
+This can be made to live regardless of C<eval> by adding C<| G_EVAL>
+to C<call_sv()> in C<Plugin_comp>.
=cut
use strict;
-use Test::More skip_all => 'TODO: make this work';
-#use Test::More tests => 1;
+use Test::More skip_all => 'Always dies';
use re::engine::Plugin (
comp => sub { die "died at comp time" },
- exec => sub { 1 },
);
-eval { /noes/ };
+eval { "str" ~~ /noes/ };
TODO: {
local $TODO = 'passing tests for known bug with how we handle eval';
use Test::More tests => 1;
use re::engine::Plugin (
- comp => sub {},
exec => sub { die "died at exec time" },
);
=cut
use strict;
-
-use Data::Dumper;
-
-use Test::More tests => 7;
-
+use Test::More tests => 8;
use re::engine::Plugin ();
like "a", qr/^a$/, "import didn't run, perl's regex engine in effect";
BEGIN {
re::engine::Plugin->import(
- comp => sub {}, # TODO: remove when this can be omitted
exec => sub { $_[0]->pattern eq $_[1] }
);
}
{
BEGIN {
re::engine::Plugin->import(
- comp => sub {}, # TODO: remove when this can be omitted
exec => sub { $_[0]->pattern ne $_[1] }
);
}
{
BEGIN {
re::engine::Plugin->import(
- comp => sub {}, # TODO: remove when this can be omitted
exec => sub { $_[0]->pattern eq '^[abc]$' }
);
}
ok "^hello" =~ /^hello/ => "regex modified to match a literal pattern";
+# Another import at the same scope
+BEGIN {
+ re::engine::Plugin->import(
+ exec => sub { $_[0]->pattern ne $_[1] }
+ );
+}
-
-
-
-
-
-
-
-
-
-
-
+ok "^hello" !~ /^hello/ => "regex modified not to match a literal pattern";
use feature ':5.10';
-use Test::More 'no_plan';#tests => 6;
+use Test::More tests => 9;
use re::engine::Plugin (
comp => sub {
# pattern
cmp_ok($re->pattern, 'eq', ' foobar zoobar ' => '->pattern ok');
- # flags
- my $f = $re->flags;
- like $f, qr/i/, 'str flags /i';
- like $f, qr/x/, 'str flags /x';
- like $f, qr/^[cgimosx]+$/, 'flags contain all-good characters';
+ # modifiers
+ my %mod = $re->mod;
+ ok(exists $mod{i}, 'str flags /i');
+ ok(exists $mod{x}, 'str flags /i');
+ like(join('', keys %mod), qr/^[cgimosx]+$/, 'flags contain all-good characters');
# stash
cmp_ok($re->stash->{"x"}, '==', 5, "data correct in stash");
cmp_ok(ref $re->stash->{"y"}, 'eq', 'CODE', "data correct in stash");
cmp_ok($re->stash->{"y"}->(), '==', 6, "data correct in stash");
- # This needs a less sucky name
- #
- # Pattern: ' foobar zoobar ', set $1 to "foobar" (if I counted this right:)
-# $re->offset_captures( [1, 7], ... );
-
- # This name sucks as well
-# $re->named_captures2offset_captures( myNameIs => 0 ): # $+{myNameIs} = $1
-
# Pattern contains "foo", "bar" and "zoo", return a true
return $re->pattern =~ /zoo/;
}
--- /dev/null
+=pod
+
+Test the C<free> method
+
+=cut
+
+use strict;
+use Test::More skip_all => "Doesn't work currently (where did my scope go?!)";
+
+use re::engine::Plugin (
+ comp => sub {
+ my ($re) = @_;
+
+ $re->free( sub { pass "ran free" } );
+ }
+);
+
+"str" ~~ /pattern/;
--- /dev/null
+=pod
+
+C<minlen> speaks bytes, not characters.
+
+=cut
+
+use strict;
+use Test::More tests => 3;
+use re::engine::Plugin (
+ comp => sub { shift->minlen(5) },
+ exec => sub {
+ my ($re, $str) = @_;
+ pass "Called with $str";
+ },
+);
+
+my $str = "ævar";
+is(length $str, 5, "$str is 5 char long"); # Chars
+$str =~ /pattern/; # no ->exec
+
+chop $str;
+is(length $str, 4, "$str is 4 char long"); # Chars
+$str =~ /pattern/; # yes ->exec
--- /dev/null
+use strict;
+
+use Test::More tests => 3;
+
+use re::engine::Plugin (
+ comp => sub {
+ my $re = shift;
+ $re->minlen(2);
+ },
+ exec => sub {
+ my $re = shift;
+ my $minlen = $re->minlen;
+ cmp_ok $minlen, '==', 2, 'minlen accessor';
+ },
+);
+
+pass "making match";
+"s" =~ /pattern/;
+"st" =~ /pattern/;
+"str" =~ /pattern/;
--- /dev/null
+use strict;
+
+use Test::More tests => 1;
+
+use re::engine::Plugin (
+ comp => sub {
+ my $re = shift;
+ $re->minlen(length("str") + 1); # make "str" too short
+ },
+ exec => sub { fail "exec called" },
+);
+
+pass "making match";
+"str" =~ /pattern/;
--- /dev/null
+=pod
+
+Test the C<mod> or C<modifiers> method
+
+=cut
+
+use strict;
+use feature ':5.10';
+use Test::More tests => 25;
+
+my @tests = (
+ sub { cmp_ok shift, 'eq', '', => 'no flags' },
+ sub { cmp_ok shift, 'eq', '', => '/c' },
+ sub { cmp_ok shift, 'eq', '' => '/g' },
+ sub { cmp_ok shift, 'eq', 'i' => '/i' },
+ sub { cmp_ok shift, 'eq', 'm' => '/m' },
+ sub { cmp_ok shift, 'eq', '' => '/o' },
+ sub { cmp_ok shift, 'eq', 's' => '/s' },
+ sub { cmp_ok shift, 'eq', 'x' => '/x' },
+ sub { cmp_ok shift, 'eq', 'p' => '/p' },
+ sub { like $_[0], qr/$_/ => "/$_ in $_[0]" for unpack "(Z)*", "xi" },
+ sub { like $_[0], qr/$_/ => "/$_ in $_[0]" for unpack "(Z)*", "xs" },
+ sub {
+ for (unpack "(Z)*", "cgimsxp") {
+ /[cg]/ and next;
+ like $_[0], qr/$_/ => "/$_ in $_[0]"
+ }
+ },
+ sub { cmp_ok shift, 'eq', '', => '/e' },
+ sub {
+ for (unpack "(Z)*", "egimsxp") {
+ /[ge]/ and next;
+ like $_[0], qr/$_/ => "/$_ in $_[0]";
+ }
+ },
+
+ sub { cmp_ok shift, 'eq', '' => '??' },
+ # Leave this as the last
+ ,sub { die "add more tests" }
+);
+
+use re::engine::Plugin (
+ exec => sub {
+ my ($re, $str) = @_;
+
+ my $t = shift @tests;
+
+ my %mod = $re->mod;
+
+ my $mod_str = join '', keys %mod;
+
+ $t->($mod_str);
+ }
+);
+
+# Provide a pattern that can match to avoid running into regexp
+# optimizations that won't call exec on C<"" =~ //>;
+
+"" =~ /x/;
+"" =~ /x/cg; # meaningless without /g
+"" =~ /x/g;
+"" =~ /x/i;
+"" =~ /x/m;
+"" =~ /x/o;
+"" =~ /x/s;
+"" =~ /x/x;
+"" =~ /x/p;
+"" =~ /x/xi;
+"" =~ /x/xs;
+"" =~ /x/cgimosxp;
+
+my $_ = "";
+
+$_ =~ s/1/2/e;
+$_ =~ s/1/2/egimosxp;
+$_ =~ ??;
--- /dev/null
+use strict;
+use Test::More tests => 1;
+use re::engine::Plugin (
+ exec => sub {
+ my ($re, $str) = @_;
+ my $pattern = $re->pattern;
+
+ $$pattern = "eek";
+
+ return 1;
+ },
+);
+
+my $sv = "ook";
+if ("ook" =~ \$sv) {
+ is($sv, "eek");
+}
+
--- /dev/null
+use strict;
+use Test::More tests => 7;
+use re::engine::Plugin (
+ exec => sub {
+ my ($re, $str) = @_;
+ my $pattern = $re->pattern;
+
+ isa_ok($pattern, $str);
+ },
+);
+
+my $sv;
+"SCALAR" =~ \$sv;
+"REF" =~ \\$sv;
+"ARRAY" =~ [];
+"HASH" =~ {};
+"GLOB" =~ \*STDIN;
+"CODE" =~ sub {};
+"main" =~ bless {} => "main";
--- /dev/null
+=pod
+
+Test the C<stash> method
+
+=cut
+
+use strict;
+use Test::More tests => 4;
+
+use re::engine::Plugin (
+ comp => sub {
+ my ($re) = @_;
+
+ my $sv = [ qw( a o e u ) ];
+
+ $re->stash( $sv );
+ },
+ exec => sub {
+ my ($re, $str) = @_;
+
+ my $stash = $re->stash;
+ my %h = qw( 0 a 1 o 2 e 3 u );
+ for (keys %h) {
+ is($h{$_}, $stash->[$_]);
+ }
+ }
+);
+
+"ook" =~ /eek/;
--- /dev/null
+use strict;
+use Test::More tests => 7;
+use re::engine::Plugin (
+ exec => sub {
+ my ($re, $str) = @_;
+
+ is_deeply($str, $re->str);
+
+ return 1;
+ },
+);
+
+my $sv;
+"SCALAR" =~ \$sv;
+"REF" =~ \\$sv;
+"ARRAY" =~ [];
+"HASH" =~ {};
+"GLOB" =~ \*STDIN;
+"CODE" =~ sub {};
+"main" =~ bless {} => "main";
+
--- /dev/null
+use strict;
+use Test::More tests => 3;
+use re::engine::Plugin (
+ comp => sub {
+ my ($re, $str) = @_;
+
+ # Runs three times apperently.
+ is($re->str, undef, 'str is undef');
+
+ return;
+ },
+);
+
+qr/pattern/;
--- /dev/null
+use strict;
+use Test::More tests => 7;
+
+use re::engine::Plugin (
+ exec => sub {
+ my $re = shift;
+
+ $re->num_captures(
+ FETCH => sub {
+ my ($re, $paren) = @_;
+
+ my %ret = (
+ -2 => 10,
+ -1 => 20,
+ 0 => 30,
+ 1 => 40,
+ );
+
+ $ret{$paren};
+ }
+ );
+
+ 1;
+ },
+);
+
+"a" =~ /a/;
+
+is($`, 10, '$`');
+is(${^PREMATCH}, 10, '${^PREMATCH}');
+is($', 20, q($'));
+is(${^POSTMATCH}, 20, '${^POSTMATCH}');
+is($&, 30, '$&');
+is(${^MATCH}, 30, '${^MATCH}');
+is($1, 40, '$1');
--- /dev/null
+use strict;
+use Test::More tests => 7;
+
+use re::engine::Plugin (
+ exec => sub {
+ my $re = shift;
+
+ $re->stash( [
+ 10, 10,
+ 20, 20,
+ 30, 30,
+ 40,
+ ]);
+
+ $re->num_captures(
+ LENGTH => sub {
+ my ($re, $paren) = @_;
+
+ shift @{ $re->stash };
+ },
+ );
+
+ 1;
+ },
+);
+
+"a" =~ /a/;
+
+is(length $`, 10);
+is(length ${^PREMATCH}, 10);
+is(length $', 20);
+is(length ${^POSTMATCH}, 20);
+is(length $&, 30);
+is(length ${^MATCH}, 30);
+is(length $1, 40);
--- /dev/null
+use strict;
+use Test::More tests => 14;
+
+use re::engine::Plugin (
+ exec => sub {
+ my $re = shift;
+
+ $re->stash( [
+ [ -2, "a" ],
+ [ -2, "a" ],
+ [ -1, "o" ],
+ [ -1, "o" ],
+ [ 0, "e" ],
+ [ 0, "e" ],
+ [ 1, "u" ],
+ ]);
+
+ $re->num_captures(
+ STORE => sub {
+ my ($re, $paren, $sv) = @_;
+ my $test = shift @{ $re->stash };
+
+ is($paren, $test->[0]);
+ is($sv, $test->[1]);
+ },
+ );
+
+ 1;
+ },
+);
+
+"a" =~ /a/;
+
+$` = "a";
+${^PREMATCH} = "a";
+$' = "o";
+${^POSTMATCH} = "o";
+$& = "e";
+${^MATCH} = "e";
+$1 = "u";
--- /dev/null
+#!/usr/bin/env perl -T
+
+=pod
+
+Equivalent to F<taint/util.t> but uses perl regexes to untaint.
+
+=cut
+
+use strict;
+use Test::More;
+
+BEGIN {
+ eval {
+ require Taint::Util;
+ Taint::Util->import;
+ };
+
+ plan $@
+ ? (skip_all => "Taint::Util required for taint tests")
+ : (tests => 8);
+}
+
+use re::engine::Plugin (
+ exec => sub {
+ my ($re, $str) = @_;
+
+ ok(tainted($str) => 'matched string tainted');
+
+ my $one = $str;
+ my $two = $str; $two ~~ /(.*)/; $two = $1;
+
+ ok(tainted($one));
+ ok(!tainted($two));
+
+ $re->num_captures(
+ FETCH => sub {
+ my ($re, $p) = @_;
+
+ return $one if $p == 1;
+ return $two if $p == 2;
+ },
+ );
+
+ 1;
+ }
+);
+
+my $str = "string";
+taint($str);
+ok(tainted($str));
+
+if ($str =~ /pattern/) {
+ cmp_ok $1, 'eq', $str;
+ ok(tainted($1) => '$1 is tainted');
+
+ cmp_ok $2, 'eq', $str;
+ ok(!tainted($2) => '$2 is untainted');
+}
--- /dev/null
+#!/usr/bin/env perl -T
+use strict;
+use Test::More;
+
+BEGIN {
+ eval {
+ require Taint::Util;
+ Taint::Util->import;
+ };
+
+ plan $@
+ ? (skip_all => "Taint::Util required for taint tests")
+ : (tests => 8);
+}
+
+use re::engine::Plugin (
+ exec => sub {
+ my ($re, $str) = @_;
+
+ ok(tainted($str) => 'matched string tainted');
+
+ my $one = $str;
+ my $two = $str; untaint($two);
+
+ ok(tainted($one));
+ ok(!tainted($two));
+
+ $re->num_captures(
+ FETCH => sub {
+ my ($re, $p) = @_;
+
+ return $one if $p == 1;
+ return $two if $p == 2;
+ },
+ );
+
+ 1;
+ }
+);
+
+my $str = "string";
+taint($str);
+ok(tainted($str));
+
+if ($str =~ /pattern/) {
+ cmp_ok $1, 'eq', $str;
+ ok(tainted($1) => '$1 is tainted');
+
+ cmp_ok $2, 'eq', $str;
+ ok(!tainted($2) => '$2 is untainted');
+}