src/5010001/orig/regexec.c

   1 /*    regexec.c
   2  */
   3
   4 /*
   5  *      One Ring to rule them all, One Ring to find them
   6  &
   7  *     [p.v of _The Lord of the Rings_, opening poem]
   8  *     [p.50 of _The Lord of the Rings_, I/iii: "The Shadow of the Past"]
   9  *     [p.254 of _The Lord of the Rings_, II/ii: "The Council of Elrond"]
  10  */
  11
  12 /* This file contains functions for executing a regular expression.  See
  13  * also regcomp.c which funnily enough, contains functions for compiling
  14  * a regular expression.
  15  *
  16  * This file is also copied at build time to ext/re/re_exec.c, where
  17  * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
  18  * This causes the main functions to be compiled under new names and with
  19  * debugging support added, which makes "use re 'debug'" work.
  20  */
  21
  22 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
  23  * confused with the original package (see point 3 below).  Thanks, Henry!
  24  */
  25
  26 /* Additional note: this code is very heavily munged from Henry's version
  27  * in places.  In some spots I've traded clarity for efficiency, so don't
  28  * blame Henry for some of the lack of readability.
  29  */
  30
  31 /* The names of the functions have been changed from regcomp and
  32  * regexec to  pregcomp and pregexec in order to avoid conflicts
  33  * with the POSIX routines of the same names.
  34 */
  35
  36 #ifdef PERL_EXT_RE_BUILD
  37 #include "re_top.h"
  38 #endif
  39
  40 /*
  41  * pregcomp and pregexec -- regsub and regerror are not used in perl
  42  *
  43  *      Copyright (c) 1986 by University of Toronto.
  44  *      Written by Henry Spencer.  Not derived from licensed software.
  45  *
  46  *      Permission is granted to anyone to use this software for any
  47  *      purpose on any computer system, and to redistribute it freely,
  48  *      subject to the following restrictions:
  49  *
  50  *      1. The author is not responsible for the consequences of use of
  51  *              this software, no matter how awful, even if they arise
  52  *              from defects in it.
  53  *
  54  *      2. The origin of this software must not be misrepresented, either
  55  *              by explicit claim or by omission.
  56  *
  57  *      3. Altered versions must be plainly marked as such, and must not
  58  *              be misrepresented as being the original software.
  59  *
  60  ****    Alterations to Henry's code are...
  61  ****
  62  ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
  63  ****    2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
  64  ****    by Larry Wall and others
  65  ****
  66  ****    You may distribute under the terms of either the GNU General Public
  67  ****    License or the Artistic License, as specified in the README file.
  68  *
  69  * Beware that some of this code is subtly aware of the way operator
  70  * precedence is structured in regular expressions.  Serious changes in
  71  * regular-expression syntax might require a total rethink.
  72  */
  73 #include "EXTERN.h"
  74 #define PERL_IN_REGEXEC_C
  75 #include "perl.h"
  76
  77 #ifdef PERL_IN_XSUB_RE
  78 #  include "re_comp.h"
  79 #else
  80 #  include "regcomp.h"
  81 #endif
  82
  83 #define RF_tainted      1               /* tainted information used? */
  84 #define RF_warned       2               /* warned about big count? */
  85
  86 #define RF_utf8         8               /* Pattern contains multibyte chars? */
  87
  88 #define UTF ((PL_reg_flags & RF_utf8) != 0)
  89
  90 #define RS_init         1               /* eval environment created */
  91 #define RS_set          2               /* replsv value is set */
  92
  93 #ifndef STATIC
  94 #define STATIC  static
  95 #endif
  96
  97 #define REGINCLASS(prog,p,c)  (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
  98
  99 /*
 100  * Forwards.
 101  */
 102
 103 #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
 104 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
 105
 106 #define HOPc(pos,off) \
 107         (char *)(PL_reg_match_utf8 \
 108             ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
 109             : (U8*)(pos + off))
 110 #define HOPBACKc(pos, off) \
 111         (char*)(PL_reg_match_utf8\
 112             ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
 113             : (pos - off >= PL_bostr)           \
 114                 ? (U8*)pos - off                \
 115                 : NULL)
 116
 117 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
 118 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
 119
 120 #define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
 121     if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
 122 #define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
 123 #define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
 124 #define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
 125 #define LOAD_UTF8_CHARCLASS_MARK()  LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
 126
 127 /* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
 128
 129 /* for use after a quantifier and before an EXACT-like node -- japhy */
 130 /* it would be nice to rework regcomp.sym to generate this stuff. sigh */
 131 #define JUMPABLE(rn) (      \
 132     OP(rn) == OPEN ||       \
 133     (OP(rn) == CLOSE && (!cur_eval || cur_eval->u.eval.close_paren != ARG(rn))) || \
 134     OP(rn) == EVAL ||   \
 135     OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
 136     OP(rn) == PLUS || OP(rn) == MINMOD || \
 137     OP(rn) == KEEPS || (PL_regkind[OP(rn)] == VERB) || \
 138     (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
 139 )
 140 #define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
 141
 142 #define HAS_TEXT(rn) ( IS_EXACT(rn) || PL_regkind[OP(rn)] == REF )
 143
 144 #if 0
 145 /* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
 146    we don't need this definition. */
 147 #define IS_TEXT(rn)   ( OP(rn)==EXACT   || OP(rn)==REF   || OP(rn)==NREF   )
 148 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  || OP(rn)==REFF  || OP(rn)==NREFF  )
 149 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL || OP(rn)==REFFL || OP(rn)==NREFFL )
 150
 151 #else
 152 /* ... so we use this as its faster. */
 153 #define IS_TEXT(rn)   ( OP(rn)==EXACT   )
 154 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  )
 155 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
 156
 157 #endif
 158
 159 /*
 160   Search for mandatory following text node; for lookahead, the text must
 161   follow but for lookbehind (rn->flags != 0) we skip to the next step.
 162 */
 163 #define FIND_NEXT_IMPT(rn) STMT_START { \
 164     while (JUMPABLE(rn)) { \
 165         const OPCODE type = OP(rn); \
 166         if (type == SUSPEND || PL_regkind[type] == CURLY) \
 167             rn = NEXTOPER(NEXTOPER(rn)); \
 168         else if (type == PLUS) \
 169             rn = NEXTOPER(rn); \
 170         else if (type == IFMATCH) \
 171             rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
 172         else rn += NEXT_OFF(rn); \
 173     } \
 174 } STMT_END
 175
 176
 177 static void restore_pos(pTHX_ void *arg);
 178
 179 STATIC CHECKPOINT
 180 S_regcppush(pTHX_ I32 parenfloor)
 181 {
 182     dVAR;
 183     const int retval = PL_savestack_ix;
 184 #define REGCP_PAREN_ELEMS 4
 185     const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
 186     int p;
 187     GET_RE_DEBUG_FLAGS_DECL;
 188
 189     if (paren_elems_to_push < 0)
 190         Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
 191
 192 #define REGCP_OTHER_ELEMS 7
 193     SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
 194
 195     for (p = PL_regsize; p > parenfloor; p--) {
 196 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
 197         SSPUSHINT(PL_regoffs[p].end);
 198         SSPUSHINT(PL_regoffs[p].start);
 199         SSPUSHPTR(PL_reg_start_tmp[p]);
 200         SSPUSHINT(p);
 201         DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
 202           "     saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
 203                       (UV)p, (IV)PL_regoffs[p].start,
 204                       (IV)(PL_reg_start_tmp[p] - PL_bostr),
 205                       (IV)PL_regoffs[p].end
 206         ));
 207     }
 208 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
 209     SSPUSHPTR(PL_regoffs);
 210     SSPUSHINT(PL_regsize);
 211     SSPUSHINT(*PL_reglastparen);
 212     SSPUSHINT(*PL_reglastcloseparen);
 213     SSPUSHPTR(PL_reginput);
 214 #define REGCP_FRAME_ELEMS 2
 215 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
 216  * are needed for the regexp context stack bookkeeping. */
 217     SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 218     SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
 219
 220     return retval;
 221 }
 222
 223 /* These are needed since we do not localize EVAL nodes: */
 224 #define REGCP_SET(cp)                                           \
 225     DEBUG_STATE_r(                                              \
 226             PerlIO_printf(Perl_debug_log,                       \
 227                 "  Setting an EVAL scope, savestack=%"IVdf"\n", \
 228                 (IV)PL_savestack_ix));                          \
 229     cp = PL_savestack_ix
 230
 231 #define REGCP_UNWIND(cp)                                        \
 232     DEBUG_STATE_r(                                              \
 233         if (cp != PL_savestack_ix)                              \
 234             PerlIO_printf(Perl_debug_log,                       \
 235                 "  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
 236                 (IV)(cp), (IV)PL_savestack_ix));                \
 237     regcpblow(cp)
 238
 239 STATIC char *
 240 S_regcppop(pTHX_ const regexp *rex)
 241 {
 242     dVAR;
 243     U32 i;
 244     char *input;
 245     GET_RE_DEBUG_FLAGS_DECL;
 246
 247     PERL_ARGS_ASSERT_REGCPPOP;
 248
 249     /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
 250     i = SSPOPINT;
 251     assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
 252     i = SSPOPINT; /* Parentheses elements to pop. */
 253     input = (char *) SSPOPPTR;
 254     *PL_reglastcloseparen = SSPOPINT;
 255     *PL_reglastparen = SSPOPINT;
 256     PL_regsize = SSPOPINT;
 257     PL_regoffs=(regexp_paren_pair *) SSPOPPTR;
 258
 259
 260     /* Now restore the parentheses context. */
 261     for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 262          i > 0; i -= REGCP_PAREN_ELEMS) {
 263         I32 tmps;
 264         U32 paren = (U32)SSPOPINT;
 265         PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
 266         PL_regoffs[paren].start = SSPOPINT;
 267         tmps = SSPOPINT;
 268         if (paren <= *PL_reglastparen)
 269             PL_regoffs[paren].end = tmps;
 270         DEBUG_BUFFERS_r(
 271             PerlIO_printf(Perl_debug_log,
 272                           "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
 273                           (UV)paren, (IV)PL_regoffs[paren].start,
 274                           (IV)(PL_reg_start_tmp[paren] - PL_bostr),
 275                           (IV)PL_regoffs[paren].end,
 276                           (paren > *PL_reglastparen ? "(no)" : ""));
 277         );
 278     }
 279     DEBUG_BUFFERS_r(
 280         if (*PL_reglastparen + 1 <= rex->nparens) {
 281             PerlIO_printf(Perl_debug_log,
 282                           "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
 283                           (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
 284         }
 285     );
 286 #if 1
 287     /* It would seem that the similar code in regtry()
 288      * already takes care of this, and in fact it is in
 289      * a better location to since this code can #if 0-ed out
 290      * but the code in regtry() is needed or otherwise tests
 291      * requiring null fields (pat.t#187 and split.t#{13,14}
 292      * (as of patchlevel 7877)  will fail.  Then again,
 293      * this code seems to be necessary or otherwise
 294      * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
 295      * --jhi updated by dapm */
 296     for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
 297         if (i > PL_regsize)
 298             PL_regoffs[i].start = -1;
 299         PL_regoffs[i].end = -1;
 300     }
 301 #endif
 302     return input;
 303 }
 304
 305 #define regcpblow(cp) LEAVE_SCOPE(cp)   /* Ignores regcppush()ed data. */
 306
 307 /*
 308  * pregexec and friends
 309  */
 310
 311 #ifndef PERL_IN_XSUB_RE
 312 /*
 313  - pregexec - match a regexp against a string
 314  */
 315 I32
 316 Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
 317          char *strbeg, I32 minend, SV *screamer, U32 nosave)
 318 /* strend: pointer to null at end of string */
 319 /* strbeg: real beginning of string */
 320 /* minend: end of match must be >=minend after stringarg. */
 321 /* nosave: For optimizations. */
 322 {
 323     PERL_ARGS_ASSERT_PREGEXEC;
 324
 325     return
 326         regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
 327                       nosave ? 0 : REXEC_COPY_STR);
 328 }
 329 #endif
 330
 331 /*
 332  * Need to implement the following flags for reg_anch:
 333  *
 334  * USE_INTUIT_NOML              - Useful to call re_intuit_start() first
 335  * USE_INTUIT_ML
 336  * INTUIT_AUTORITATIVE_NOML     - Can trust a positive answer
 337  * INTUIT_AUTORITATIVE_ML
 338  * INTUIT_ONCE_NOML             - Intuit can match in one location only.
 339  * INTUIT_ONCE_ML
 340  *
 341  * Another flag for this function: SECOND_TIME (so that float substrs
 342  * with giant delta may be not rechecked).
 343  */
 344
 345 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
 346
 347 /* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
 348    Otherwise, only SvCUR(sv) is used to get strbeg. */
 349
 350 /* XXXX We assume that strpos is strbeg unless sv. */
 351
 352 /* XXXX Some places assume that there is a fixed substring.
 353         An update may be needed if optimizer marks as "INTUITable"
 354         RExen without fixed substrings.  Similarly, it is assumed that
 355         lengths of all the strings are no more than minlen, thus they
 356         cannot come from lookahead.
 357         (Or minlen should take into account lookahead.)
 358   NOTE: Some of this comment is not correct. minlen does now take account
 359   of lookahead/behind. Further research is required. -- demerphq
 360
 361 */
 362
 363 /* A failure to find a constant substring means that there is no need to make
 364    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
 365    finding a substring too deep into the string means that less calls to
 366    regtry() should be needed.
 367
 368    REx compiler's optimizer found 4 possible hints:
 369         a) Anchored substring;
 370         b) Fixed substring;
 371         c) Whether we are anchored (beginning-of-line or \G);
 372         d) First node (of those at offset 0) which may distingush positions;
 373    We use a)b)d) and multiline-part of c), and try to find a position in the
 374    string which does not contradict any of them.
 375  */
 376
 377 /* Most of decisions we do here should have been done at compile time.
 378    The nodes of the REx which we used for the search should have been
 379    deleted from the finite automaton. */
 380
 381 char *
 382 Perl_re_intuit_start(pTHX_ REGEXP * const prog, SV *sv, char *strpos,
 383                      char *strend, const U32 flags, re_scream_pos_data *data)
 384 {
 385     dVAR;
 386     register I32 start_shift = 0;
 387     /* Should be nonnegative! */
 388     register I32 end_shift   = 0;
 389     register char *s;
 390     register SV *check;
 391     char *strbeg;
 392     char *t;
 393     const bool do_utf8 = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
 394     I32 ml_anch;
 395     register char *other_last = NULL;   /* other substr checked before this */
 396     char *check_at = NULL;              /* check substr found at this pos */
 397     const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
 398     RXi_GET_DECL(prog,progi);
 399 #ifdef DEBUGGING
 400     const char * const i_strpos = strpos;
 401 #endif
 402     GET_RE_DEBUG_FLAGS_DECL;
 403
 404     PERL_ARGS_ASSERT_RE_INTUIT_START;
 405
 406     RX_MATCH_UTF8_set(prog,do_utf8);
 407
 408     if (RX_UTF8(prog)) {
 409         PL_reg_flags |= RF_utf8;
 410     }
 411     DEBUG_EXECUTE_r(
 412         debug_start_match(prog, do_utf8, strpos, strend,
 413             sv ? "Guessing start of match in sv for"
 414                : "Guessing start of match in string for");
 415               );
 416
 417     /* CHR_DIST() would be more correct here but it makes things slow. */
 418     if (prog->minlen > strend - strpos) {
 419         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 420                               "String too short... [re_intuit_start]\n"));
 421         goto fail;
 422     }
 423
 424     strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
 425     PL_regeol = strend;
 426     if (do_utf8) {
 427         if (!prog->check_utf8 && prog->check_substr)
 428             to_utf8_substr(prog);
 429         check = prog->check_utf8;
 430     } else {
 431         if (!prog->check_substr && prog->check_utf8)
 432             to_byte_substr(prog);
 433         check = prog->check_substr;
 434     }
 435     if (check == &PL_sv_undef) {
 436         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 437                 "Non-utf8 string cannot match utf8 check string\n"));
 438         goto fail;
 439     }
 440     if (prog->extflags & RXf_ANCH) {    /* Match at beg-of-str or after \n */
 441         ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
 442                      || ( (prog->extflags & RXf_ANCH_BOL)
 443                           && !multiline ) );    /* Check after \n? */
 444
 445         if (!ml_anch) {
 446           if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
 447                 && !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
 448                /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
 449                && sv && !SvROK(sv)
 450                && (strpos != strbeg)) {
 451               DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
 452               goto fail;
 453           }
 454           if (prog->check_offset_min == prog->check_offset_max &&
 455               !(prog->extflags & RXf_CANY_SEEN)) {
 456             /* Substring at constant offset from beg-of-str... */
 457             I32 slen;
 458
 459             s = HOP3c(strpos, prog->check_offset_min, strend);
 460
 461             if (SvTAIL(check)) {
 462                 slen = SvCUR(check);    /* >= 1 */
 463
 464                 if ( strend - s > slen || strend - s < slen - 1
 465                      || (strend - s == slen && strend[-1] != '\n')) {
 466                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
 467                     goto fail_finish;
 468                 }
 469                 /* Now should match s[0..slen-2] */
 470                 slen--;
 471                 if (slen && (*SvPVX_const(check) != *s
 472                              || (slen > 1
 473                                  && memNE(SvPVX_const(check), s, slen)))) {
 474                   report_neq:
 475                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
 476                     goto fail_finish;
 477                 }
 478             }
 479             else if (*SvPVX_const(check) != *s
 480                      || ((slen = SvCUR(check)) > 1
 481                          && memNE(SvPVX_const(check), s, slen)))
 482                 goto report_neq;
 483             check_at = s;
 484             goto success_at_start;
 485           }
 486         }
 487         /* Match is anchored, but substr is not anchored wrt beg-of-str. */
 488         s = strpos;
 489         start_shift = prog->check_offset_min; /* okay to underestimate on CC */
 490         end_shift = prog->check_end_shift;
 491
 492         if (!ml_anch) {
 493             const I32 end = prog->check_offset_max + CHR_SVLEN(check)
 494                                          - (SvTAIL(check) != 0);
 495             const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
 496
 497             if (end_shift < eshift)
 498                 end_shift = eshift;
 499         }
 500     }
 501     else {                              /* Can match at random position */
 502         ml_anch = 0;
 503         s = strpos;
 504         start_shift = prog->check_offset_min;  /* okay to underestimate on CC */
 505         end_shift = prog->check_end_shift;
 506
 507         /* end shift should be non negative here */
 508     }
 509
 510 #ifdef QDEBUGGING       /* 7/99: reports of failure (with the older version) */
 511     if (end_shift < 0)
 512         Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
 513                    (IV)end_shift, RX_PRECOMP(prog));
 514 #endif
 515
 516   restart:
 517     /* Find a possible match in the region s..strend by looking for
 518        the "check" substring in the region corrected by start/end_shift. */
 519
 520     {
 521         I32 srch_start_shift = start_shift;
 522         I32 srch_end_shift = end_shift;
 523         if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
 524             srch_end_shift -= ((strbeg - s) - srch_start_shift);
 525             srch_start_shift = strbeg - s;
 526         }
 527     DEBUG_OPTIMISE_MORE_r({
 528         PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
 529             (IV)prog->check_offset_min,
 530             (IV)srch_start_shift,
 531             (IV)srch_end_shift,
 532             (IV)prog->check_end_shift);
 533     });
 534
 535     if (flags & REXEC_SCREAM) {
 536         I32 p = -1;                     /* Internal iterator of scream. */
 537         I32 * const pp = data ? data->scream_pos : &p;
 538
 539         if (PL_screamfirst[BmRARE(check)] >= 0
 540             || ( BmRARE(check) == '\n'
 541                  && (BmPREVIOUS(check) == SvCUR(check) - 1)
 542                  && SvTAIL(check) ))
 543             s = screaminstr(sv, check,
 544                             srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
 545         else
 546             goto fail_finish;
 547         /* we may be pointing at the wrong string */
 548         if (s && RXp_MATCH_COPIED(prog))
 549             s = strbeg + (s - SvPVX_const(sv));
 550         if (data)
 551             *data->scream_olds = s;
 552     }
 553     else {
 554         U8* start_point;
 555         U8* end_point;
 556         if (prog->extflags & RXf_CANY_SEEN) {
 557             start_point= (U8*)(s + srch_start_shift);
 558             end_point= (U8*)(strend - srch_end_shift);
 559         } else {
 560             start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
 561             end_point= HOP3(strend, -srch_end_shift, strbeg);
 562         }
 563         DEBUG_OPTIMISE_MORE_r({
 564             PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
 565                 (int)(end_point - start_point),
 566                 (int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
 567                 start_point);
 568         });
 569
 570         s = fbm_instr( start_point, end_point,
 571                       check, multiline ? FBMrf_MULTILINE : 0);
 572     }
 573     }
 574     /* Update the count-of-usability, remove useless subpatterns,
 575         unshift s.  */
 576
 577     DEBUG_EXECUTE_r({
 578         RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 579             SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
 580         PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
 581                           (s ? "Found" : "Did not find"),
 582             (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)
 583                 ? "anchored" : "floating"),
 584             quoted,
 585             RE_SV_TAIL(check),
 586             (s ? " at offset " : "...\n") );
 587     });
 588
 589     if (!s)
 590         goto fail_finish;
 591     /* Finish the diagnostic message */
 592     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
 593
 594     /* XXX dmq: first branch is for positive lookbehind...
 595        Our check string is offset from the beginning of the pattern.
 596        So we need to do any stclass tests offset forward from that
 597        point. I think. :-(
 598      */
 599
 600
 601
 602     check_at=s;
 603
 604
 605     /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
 606        Start with the other substr.
 607        XXXX no SCREAM optimization yet - and a very coarse implementation
 608        XXXX /ttx+/ results in anchored="ttx", floating="x".  floating will
 609                 *always* match.  Probably should be marked during compile...
 610        Probably it is right to do no SCREAM here...
 611      */
 612
 613     if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8)
 614                 : (prog->float_substr && prog->anchored_substr))
 615     {
 616         /* Take into account the "other" substring. */
 617         /* XXXX May be hopelessly wrong for UTF... */
 618         if (!other_last)
 619             other_last = strpos;
 620         if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
 621           do_other_anchored:
 622             {
 623                 char * const last = HOP3c(s, -start_shift, strbeg);
 624                 char *last1, *last2;
 625                 char * const saved_s = s;
 626                 SV* must;
 627
 628                 t = s - prog->check_offset_max;
 629                 if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 630                     && (!do_utf8
 631                         || ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
 632                             && t > strpos)))
 633                     NOOP;
 634                 else
 635                     t = strpos;
 636                 t = HOP3c(t, prog->anchored_offset, strend);
 637                 if (t < other_last)     /* These positions already checked */
 638                     t = other_last;
 639                 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
 640                 if (last < last1)
 641                     last1 = last;
 642                 /* XXXX It is not documented what units *_offsets are in.
 643                    We assume bytes, but this is clearly wrong.
 644                    Meaning this code needs to be carefully reviewed for errors.
 645                    dmq.
 646                   */
 647
 648                 /* On end-of-str: see comment below. */
 649                 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
 650                 if (must == &PL_sv_undef) {
 651                     s = (char*)NULL;
 652                     DEBUG_r(must = prog->anchored_utf8);        /* for debug */
 653                 }
 654                 else
 655                     s = fbm_instr(
 656                         (unsigned char*)t,
 657                         HOP3(HOP3(last1, prog->anchored_offset, strend)
 658                                 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
 659                         must,
 660                         multiline ? FBMrf_MULTILINE : 0
 661                     );
 662                 DEBUG_EXECUTE_r({
 663                     RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 664                         SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 665                     PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
 666                         (s ? "Found" : "Contradicts"),
 667                         quoted, RE_SV_TAIL(must));
 668                 });
 669
 670
 671                 if (!s) {
 672                     if (last1 >= last2) {
 673                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 674                                                 ", giving up...\n"));
 675                         goto fail_finish;
 676                     }
 677                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 678                         ", trying floating at offset %ld...\n",
 679                         (long)(HOP3c(saved_s, 1, strend) - i_strpos)));
 680                     other_last = HOP3c(last1, prog->anchored_offset+1, strend);
 681                     s = HOP3c(last, 1, strend);
 682                     goto restart;
 683                 }
 684                 else {
 685                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 686                           (long)(s - i_strpos)));
 687                     t = HOP3c(s, -prog->anchored_offset, strbeg);
 688                     other_last = HOP3c(s, 1, strend);
 689                     s = saved_s;
 690                     if (t == strpos)
 691                         goto try_at_start;
 692                     goto try_at_offset;
 693                 }
 694             }
 695         }
 696         else {          /* Take into account the floating substring. */
 697             char *last, *last1;
 698             char * const saved_s = s;
 699             SV* must;
 700
 701             t = HOP3c(s, -start_shift, strbeg);
 702             last1 = last =
 703                 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
 704             if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
 705                 last = HOP3c(t, prog->float_max_offset, strend);
 706             s = HOP3c(t, prog->float_min_offset, strend);
 707             if (s < other_last)
 708                 s = other_last;
 709  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
 710             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
 711             /* fbm_instr() takes into account exact value of end-of-str
 712                if the check is SvTAIL(ed).  Since false positives are OK,
 713                and end-of-str is not later than strend we are OK. */
 714             if (must == &PL_sv_undef) {
 715                 s = (char*)NULL;
 716                 DEBUG_r(must = prog->float_utf8);       /* for debug message */
 717             }
 718             else
 719                 s = fbm_instr((unsigned char*)s,
 720                               (unsigned char*)last + SvCUR(must)
 721                                   - (SvTAIL(must)!=0),
 722                               must, multiline ? FBMrf_MULTILINE : 0);
 723             DEBUG_EXECUTE_r({
 724                 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 725                     SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 726                 PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
 727                     (s ? "Found" : "Contradicts"),
 728                     quoted, RE_SV_TAIL(must));
 729             });
 730             if (!s) {
 731                 if (last1 == last) {
 732                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 733                                             ", giving up...\n"));
 734                     goto fail_finish;
 735                 }
 736                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 737                     ", trying anchored starting at offset %ld...\n",
 738                     (long)(saved_s + 1 - i_strpos)));
 739                 other_last = last;
 740                 s = HOP3c(t, 1, strend);
 741                 goto restart;
 742             }
 743             else {
 744                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 745                       (long)(s - i_strpos)));
 746                 other_last = s; /* Fix this later. --Hugo */
 747                 s = saved_s;
 748                 if (t == strpos)
 749                     goto try_at_start;
 750                 goto try_at_offset;
 751             }
 752         }
 753     }
 754
 755
 756     t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
 757
 758     DEBUG_OPTIMISE_MORE_r(
 759         PerlIO_printf(Perl_debug_log,
 760             "Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
 761             (IV)prog->check_offset_min,
 762             (IV)prog->check_offset_max,
 763             (IV)(s-strpos),
 764             (IV)(t-strpos),
 765             (IV)(t-s),
 766             (IV)(strend-strpos)
 767         )
 768     );
 769
 770     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 771         && (!do_utf8
 772             || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
 773                  && t > strpos)))
 774     {
 775         /* Fixed substring is found far enough so that the match
 776            cannot start at strpos. */
 777       try_at_offset:
 778         if (ml_anch && t[-1] != '\n') {
 779             /* Eventually fbm_*() should handle this, but often
 780                anchored_offset is not 0, so this check will not be wasted. */
 781             /* XXXX In the code below we prefer to look for "^" even in
 782                presence of anchored substrings.  And we search even
 783                beyond the found float position.  These pessimizations
 784                are historical artefacts only.  */
 785           find_anchor:
 786             while (t < strend - prog->minlen) {
 787                 if (*t == '\n') {
 788                     if (t < check_at - prog->check_offset_min) {
 789                         if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
 790                             /* Since we moved from the found position,
 791                                we definitely contradict the found anchored
 792                                substr.  Due to the above check we do not
 793                                contradict "check" substr.
 794                                Thus we can arrive here only if check substr
 795                                is float.  Redo checking for "other"=="fixed".
 796                              */
 797                             strpos = t + 1;
 798                             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
 799                                 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
 800                             goto do_other_anchored;
 801                         }
 802                         /* We don't contradict the found floating substring. */
 803                         /* XXXX Why not check for STCLASS? */
 804                         s = t + 1;
 805                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
 806                             PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
 807                         goto set_useful;
 808                     }
 809                     /* Position contradicts check-string */
 810                     /* XXXX probably better to look for check-string
 811                        than for "\n", so one should lower the limit for t? */
 812                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
 813                         PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
 814                     other_last = strpos = s = t + 1;
 815                     goto restart;
 816                 }
 817                 t++;
 818             }
 819             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
 820                         PL_colors[0], PL_colors[1]));
 821             goto fail_finish;
 822         }
 823         else {
 824             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
 825                         PL_colors[0], PL_colors[1]));
 826         }
 827         s = t;
 828       set_useful:
 829         ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);    /* hooray/5 */
 830     }
 831     else {
 832         /* The found string does not prohibit matching at strpos,
 833            - no optimization of calling REx engine can be performed,
 834            unless it was an MBOL and we are not after MBOL,
 835            or a future STCLASS check will fail this. */
 836       try_at_start:
 837         /* Even in this situation we may use MBOL flag if strpos is offset
 838            wrt the start of the string. */
 839         if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
 840             && (strpos != strbeg) && strpos[-1] != '\n'
 841             /* May be due to an implicit anchor of m{.*foo}  */
 842             && !(prog->intflags & PREGf_IMPLICIT))
 843         {
 844             t = strpos;
 845             goto find_anchor;
 846         }
 847         DEBUG_EXECUTE_r( if (ml_anch)
 848             PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
 849                           (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
 850         );
 851       success_at_start:
 852         if (!(prog->intflags & PREGf_NAUGHTY)   /* XXXX If strpos moved? */
 853             && (do_utf8 ? (
 854                 prog->check_utf8                /* Could be deleted already */
 855                 && --BmUSEFUL(prog->check_utf8) < 0
 856                 && (prog->check_utf8 == prog->float_utf8)
 857             ) : (
 858                 prog->check_substr              /* Could be deleted already */
 859                 && --BmUSEFUL(prog->check_substr) < 0
 860                 && (prog->check_substr == prog->float_substr)
 861             )))
 862         {
 863             /* If flags & SOMETHING - do not do it many times on the same match */
 864             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
 865             SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
 866             if (do_utf8 ? prog->check_substr : prog->check_utf8)
 867                 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
 868             prog->check_substr = prog->check_utf8 = NULL;       /* disable */
 869             prog->float_substr = prog->float_utf8 = NULL;       /* clear */
 870             check = NULL;                       /* abort */
 871             s = strpos;
 872             /* XXXX This is a remnant of the old implementation.  It
 873                     looks wasteful, since now INTUIT can use many
 874                     other heuristics. */
 875             prog->extflags &= ~RXf_USE_INTUIT;
 876         }
 877         else
 878             s = strpos;
 879     }
 880
 881     /* Last resort... */
 882     /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
 883     /* trie stclasses are too expensive to use here, we are better off to
 884        leave it to regmatch itself */
 885     if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
 886         /* minlen == 0 is possible if regstclass is \b or \B,
 887            and the fixed substr is ''$.
 888            Since minlen is already taken into account, s+1 is before strend;
 889            accidentally, minlen >= 1 guaranties no false positives at s + 1
 890            even for \b or \B.  But (minlen? 1 : 0) below assumes that
 891            regstclass does not come from lookahead...  */
 892         /* If regstclass takes bytelength more than 1: If charlength==1, OK.
 893            This leaves EXACTF only, which is dealt with in find_byclass().  */
 894         const U8* const str = (U8*)STRING(progi->regstclass);
 895         const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
 896                     ? CHR_DIST(str+STR_LEN(progi->regstclass), str)
 897                     : 1);
 898         char * endpos;
 899         if (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
 900             endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
 901         else if (prog->float_substr || prog->float_utf8)
 902             endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
 903         else
 904             endpos= strend;
 905
 906         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf"\n",
 907                                       (IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg)));
 908
 909         t = s;
 910         s = find_byclass(prog, progi->regstclass, s, endpos, NULL);
 911         if (!s) {
 912 #ifdef DEBUGGING
 913             const char *what = NULL;
 914 #endif
 915             if (endpos == strend) {
 916                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 917                                 "Could not match STCLASS...\n") );
 918                 goto fail;
 919             }
 920             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 921                                    "This position contradicts STCLASS...\n") );
 922             if ((prog->extflags & RXf_ANCH) && !ml_anch)
 923                 goto fail;
 924             /* Contradict one of substrings */
 925             if (prog->anchored_substr || prog->anchored_utf8) {
 926                 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
 927                     DEBUG_EXECUTE_r( what = "anchored" );
 928                   hop_and_restart:
 929                     s = HOP3c(t, 1, strend);
 930                     if (s + start_shift + end_shift > strend) {
 931                         /* XXXX Should be taken into account earlier? */
 932                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 933                                                "Could not match STCLASS...\n") );
 934                         goto fail;
 935                     }
 936                     if (!check)
 937                         goto giveup;
 938                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 939                                 "Looking for %s substr starting at offset %ld...\n",
 940                                  what, (long)(s + start_shift - i_strpos)) );
 941                     goto restart;
 942                 }
 943                 /* Have both, check_string is floating */
 944                 if (t + start_shift >= check_at) /* Contradicts floating=check */
 945                     goto retry_floating_check;
 946                 /* Recheck anchored substring, but not floating... */
 947                 s = check_at;
 948                 if (!check)
 949                     goto giveup;
 950                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 951                           "Looking for anchored substr starting at offset %ld...\n",
 952                           (long)(other_last - i_strpos)) );
 953                 goto do_other_anchored;
 954             }
 955             /* Another way we could have checked stclass at the
 956                current position only: */
 957             if (ml_anch) {
 958                 s = t = t + 1;
 959                 if (!check)
 960                     goto giveup;
 961                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 962                           "Looking for /%s^%s/m starting at offset %ld...\n",
 963                           PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
 964                 goto try_at_offset;
 965             }
 966             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))     /* Could have been deleted */
 967                 goto fail;
 968             /* Check is floating subtring. */
 969           retry_floating_check:
 970             t = check_at - start_shift;
 971             DEBUG_EXECUTE_r( what = "floating" );
 972             goto hop_and_restart;
 973         }
 974         if (t != s) {
 975             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 976                         "By STCLASS: moving %ld --> %ld\n",
 977                                   (long)(t - i_strpos), (long)(s - i_strpos))
 978                    );
 979         }
 980         else {
 981             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 982                                   "Does not contradict STCLASS...\n");
 983                    );
 984         }
 985     }
 986   giveup:
 987     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
 988                           PL_colors[4], (check ? "Guessed" : "Giving up"),
 989                           PL_colors[5], (long)(s - i_strpos)) );
 990     return s;
 991
 992   fail_finish:                          /* Substring not found */
 993     if (prog->check_substr || prog->check_utf8)         /* could be removed already */
 994         BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
 995   fail:
 996     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
 997                           PL_colors[4], PL_colors[5]));
 998     return NULL;
 999 }
1000
1001 #define DECL_TRIE_TYPE(scan) \
1002     const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold } \
1003                     trie_type = (scan->flags != EXACT) \
1004                               ? (do_utf8 ? trie_utf8_fold : (UTF ? trie_latin_utf8_fold : trie_plain)) \
1005                               : (do_utf8 ? trie_utf8 : trie_plain)
1006
1007 #define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len,  \
1008 uvc, charid, foldlen, foldbuf, uniflags) STMT_START {                       \
1009     UV uvc_unfolded = 0;                                                    \
1010     switch (trie_type) {                                                    \
1011     case trie_utf8_fold:                                                    \
1012         if ( foldlen>0 ) {                                                  \
1013             uvc_unfolded = uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
1014             foldlen -= len;                                                 \
1015             uscan += len;                                                   \
1016             len=0;                                                          \
1017         } else {                                                            \
1018             uvc_unfolded = uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
1019             uvc = to_uni_fold( uvc, foldbuf, &foldlen );                    \
1020             foldlen -= UNISKIP( uvc );                                      \
1021             uscan = foldbuf + UNISKIP( uvc );                               \
1022         }                                                                   \
1023         break;                                                              \
1024     case trie_latin_utf8_fold:                                              \
1025         if ( foldlen>0 ) {                                                  \
1026             uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );     \
1027             foldlen -= len;                                                 \
1028             uscan += len;                                                   \
1029             len=0;                                                          \
1030         } else {                                                            \
1031             len = 1;                                                        \
1032             uvc = to_uni_fold( *(U8*)uc, foldbuf, &foldlen );               \
1033             foldlen -= UNISKIP( uvc );                                      \
1034             uscan = foldbuf + UNISKIP( uvc );                               \
1035         }                                                                   \
1036         break;                                                              \
1037     case trie_utf8:                                                         \
1038         uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );       \
1039         break;                                                              \
1040     case trie_plain:                                                        \
1041         uvc = (UV)*uc;                                                      \
1042         len = 1;                                                            \
1043     }                                                                       \
1044                                                                             \
1045     if (uvc < 256) {                                                        \
1046         charid = trie->charmap[ uvc ];                                      \
1047     }                                                                       \
1048     else {                                                                  \
1049         charid = 0;                                                         \
1050         if (widecharmap) {                                                  \
1051             SV** const svpp = hv_fetch(widecharmap,                         \
1052                         (char*)&uvc, sizeof(UV), 0);                        \
1053             if (svpp)                                                       \
1054                 charid = (U16)SvIV(*svpp);                                  \
1055         }                                                                   \
1056     }                                                                       \
1057     if (!charid && trie_type == trie_utf8_fold && !UTF) {                   \
1058         charid = trie->charmap[uvc_unfolded];                               \
1059     }                                                                       \
1060 } STMT_END
1061
1062 #define REXEC_FBC_EXACTISH_CHECK(CoNd)                 \
1063 {                                                      \
1064     char *my_strend= (char *)strend;                   \
1065     if ( (CoNd)                                        \
1066          && (ln == len ||                              \
1067              !ibcmp_utf8(s, &my_strend, 0,  do_utf8,   \
1068                         m, NULL, ln, (bool)UTF))       \
1069          && (!reginfo || regtry(reginfo, &s)) )        \
1070         goto got_it;                                   \
1071     else {                                             \
1072          U8 foldbuf[UTF8_MAXBYTES_CASE+1];             \
1073          uvchr_to_utf8(tmpbuf, c);                     \
1074          f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);  \
1075          if ( f != c                                   \
1076               && (f == c1 || f == c2)                  \
1077               && (ln == len ||                         \
1078                 !ibcmp_utf8(s, &my_strend, 0,  do_utf8,\
1079                               m, NULL, ln, (bool)UTF)) \
1080               && (!reginfo || regtry(reginfo, &s)) )   \
1081               goto got_it;                             \
1082     }                                                  \
1083 }                                                      \
1084 s += len
1085
1086 #define REXEC_FBC_EXACTISH_SCAN(CoNd)                     \
1087 STMT_START {                                              \
1088     while (s <= e) {                                      \
1089         if ( (CoNd)                                       \
1090              && (ln == 1 || !(OP(c) == EXACTF             \
1091                               ? ibcmp(s, m, ln)           \
1092                               : ibcmp_locale(s, m, ln)))  \
1093              && (!reginfo || regtry(reginfo, &s)) )        \
1094             goto got_it;                                  \
1095         s++;                                              \
1096     }                                                     \
1097 } STMT_END
1098
1099 #define REXEC_FBC_UTF8_SCAN(CoDe)                     \
1100 STMT_START {                                          \
1101     while (s + (uskip = UTF8SKIP(s)) <= strend) {     \
1102         CoDe                                          \
1103         s += uskip;                                   \
1104     }                                                 \
1105 } STMT_END
1106
1107 #define REXEC_FBC_SCAN(CoDe)                          \
1108 STMT_START {                                          \
1109     while (s < strend) {                              \
1110         CoDe                                          \
1111         s++;                                          \
1112     }                                                 \
1113 } STMT_END
1114
1115 #define REXEC_FBC_UTF8_CLASS_SCAN(CoNd)               \
1116 REXEC_FBC_UTF8_SCAN(                                  \
1117     if (CoNd) {                                       \
1118         if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1119             goto got_it;                              \
1120         else                                          \
1121             tmp = doevery;                            \
1122     }                                                 \
1123     else                                              \
1124         tmp = 1;                                      \
1125 )
1126
1127 #define REXEC_FBC_CLASS_SCAN(CoNd)                    \
1128 REXEC_FBC_SCAN(                                       \
1129     if (CoNd) {                                       \
1130         if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1131             goto got_it;                              \
1132         else                                          \
1133             tmp = doevery;                            \
1134     }                                                 \
1135     else                                              \
1136         tmp = 1;                                      \
1137 )
1138
1139 #define REXEC_FBC_TRYIT               \
1140 if ((!reginfo || regtry(reginfo, &s))) \
1141     goto got_it
1142
1143 #define REXEC_FBC_CSCAN(CoNdUtF8,CoNd)                         \
1144     if (do_utf8) {                                             \
1145         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1146     }                                                          \
1147     else {                                                     \
1148         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1149     }                                                          \
1150     break
1151
1152 #define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd)      \
1153     if (do_utf8) {                                             \
1154         UtFpReLoAd;                                            \
1155         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1156     }                                                          \
1157     else {                                                     \
1158         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1159     }                                                          \
1160     break
1161
1162 #define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd)                   \
1163     PL_reg_flags |= RF_tainted;                                \
1164     if (do_utf8) {                                             \
1165         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1166     }                                                          \
1167     else {                                                     \
1168         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1169     }                                                          \
1170     break
1171
1172 #define DUMP_EXEC_POS(li,s,doutf8) \
1173     dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
1174
1175 /* We know what class REx starts with.  Try to find this position... */
1176 /* if reginfo is NULL, its a dryrun */
1177 /* annoyingly all the vars in this routine have different names from their counterparts
1178    in regmatch. /grrr */
1179
1180 STATIC char *
1181 S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
1182     const char *strend, regmatch_info *reginfo)
1183 {
1184         dVAR;
1185         const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
1186         char *m;
1187         STRLEN ln;
1188         STRLEN lnc;
1189         register STRLEN uskip;
1190         unsigned int c1;
1191         unsigned int c2;
1192         char *e;
1193         register I32 tmp = 1;   /* Scratch variable? */
1194         register const bool do_utf8 = PL_reg_match_utf8;
1195         RXi_GET_DECL(prog,progi);
1196
1197         PERL_ARGS_ASSERT_FIND_BYCLASS;
1198
1199         /* We know what class it must start with. */
1200         switch (OP(c)) {
1201         case ANYOF:
1202             if (do_utf8) {
1203                  REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
1204                           !UTF8_IS_INVARIANT((U8)s[0]) ?
1205                           reginclass(prog, c, (U8*)s, 0, do_utf8) :
1206                           REGINCLASS(prog, c, (U8*)s));
1207             }
1208             else {
1209                  while (s < strend) {
1210                       STRLEN skip = 1;
1211
1212                       if (REGINCLASS(prog, c, (U8*)s) ||
1213                           (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1214                            /* The assignment of 2 is intentional:
1215                             * for the folded sharp s, the skip is 2. */
1216                            (skip = SHARP_S_SKIP))) {
1217                            if (tmp && (!reginfo || regtry(reginfo, &s)))
1218                                 goto got_it;
1219                            else
1220                                 tmp = doevery;
1221                       }
1222                       else
1223                            tmp = 1;
1224                       s += skip;
1225                  }
1226             }
1227             break;
1228         case CANY:
1229             REXEC_FBC_SCAN(
1230                 if (tmp && (!reginfo || regtry(reginfo, &s)))
1231                     goto got_it;
1232                 else
1233                     tmp = doevery;
1234             );
1235             break;
1236         case EXACTF:
1237             m   = STRING(c);
1238             ln  = STR_LEN(c);   /* length to match in octets/bytes */
1239             lnc = (I32) ln;     /* length to match in characters */
1240             if (UTF) {
1241                 STRLEN ulen1, ulen2;
1242                 U8 *sm = (U8 *) m;
1243                 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1244                 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
1245                 /* used by commented-out code below */
1246                 /*const U32 uniflags = UTF8_ALLOW_DEFAULT;*/
1247
1248                 /* XXX: Since the node will be case folded at compile
1249                    time this logic is a little odd, although im not
1250                    sure that its actually wrong. --dmq */
1251
1252                 c1 = to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1253                 c2 = to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1254
1255                 /* XXX: This is kinda strange. to_utf8_XYZ returns the
1256                    codepoint of the first character in the converted
1257                    form, yet originally we did the extra step.
1258                    No tests fail by commenting this code out however
1259                    so Ive left it out. -- dmq.
1260
1261                 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
1262                                     0, uniflags);
1263                 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
1264                                     0, uniflags);
1265                 */
1266
1267                 lnc = 0;
1268                 while (sm < ((U8 *) m + ln)) {
1269                     lnc++;
1270                     sm += UTF8SKIP(sm);
1271                 }
1272             }
1273             else {
1274                 c1 = *(U8*)m;
1275                 c2 = PL_fold[c1];
1276             }
1277             goto do_exactf;
1278         case EXACTFL:
1279             m   = STRING(c);
1280             ln  = STR_LEN(c);
1281             lnc = (I32) ln;
1282             c1 = *(U8*)m;
1283             c2 = PL_fold_locale[c1];
1284           do_exactf:
1285             e = HOP3c(strend, -((I32)lnc), s);
1286
1287             if (!reginfo && e < s)
1288                 e = s;                  /* Due to minlen logic of intuit() */
1289
1290             /* The idea in the EXACTF* cases is to first find the
1291              * first character of the EXACTF* node and then, if
1292              * necessary, case-insensitively compare the full
1293              * text of the node.  The c1 and c2 are the first
1294              * characters (though in Unicode it gets a bit
1295              * more complicated because there are more cases
1296              * than just upper and lower: one needs to use
1297              * the so-called folding case for case-insensitive
1298              * matching (called "loose matching" in Unicode).
1299              * ibcmp_utf8() will do just that. */
1300
1301             if (do_utf8 || UTF) {
1302                 UV c, f;
1303                 U8 tmpbuf [UTF8_MAXBYTES+1];
1304                 STRLEN len = 1;
1305                 STRLEN foldlen;
1306                 const U32 uniflags = UTF8_ALLOW_DEFAULT;
1307                 if (c1 == c2) {
1308                     /* Upper and lower of 1st char are equal -
1309                      * probably not a "letter". */
1310                     while (s <= e) {
1311                         if (do_utf8) {
1312                             c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1313                                            uniflags);
1314                         } else {
1315                             c = *((U8*)s);
1316                         }
1317                         REXEC_FBC_EXACTISH_CHECK(c == c1);
1318                     }
1319                 }
1320                 else {
1321                     while (s <= e) {
1322                         if (do_utf8) {
1323                             c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1324                                            uniflags);
1325                         } else {
1326                             c = *((U8*)s);
1327                         }
1328
1329                         /* Handle some of the three Greek sigmas cases.
1330                          * Note that not all the possible combinations
1331                          * are handled here: some of them are handled
1332                          * by the standard folding rules, and some of
1333                          * them (the character class or ANYOF cases)
1334                          * are handled during compiletime in
1335                          * regexec.c:S_regclass(). */
1336                         if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1337                             c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1338                             c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1339
1340                         REXEC_FBC_EXACTISH_CHECK(c == c1 || c == c2);
1341                     }
1342                 }
1343             }
1344             else {
1345                 /* Neither pattern nor string are UTF8 */
1346                 if (c1 == c2)
1347                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
1348                 else
1349                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
1350             }
1351             break;
1352         case BOUNDL:
1353             PL_reg_flags |= RF_tainted;
1354             /* FALL THROUGH */
1355         case BOUND:
1356             if (do_utf8) {
1357                 if (s == PL_bostr)
1358                     tmp = '\n';
1359                 else {
1360                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1361                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1362                 }
1363                 tmp = ((OP(c) == BOUND ?
1364                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1365                 LOAD_UTF8_CHARCLASS_ALNUM();
1366                 REXEC_FBC_UTF8_SCAN(
1367                     if (tmp == !(OP(c) == BOUND ?
1368                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1369                                  isALNUM_LC_utf8((U8*)s)))
1370                     {
1371                         tmp = !tmp;
1372                         REXEC_FBC_TRYIT;
1373                 }
1374                 );
1375             }
1376             else {
1377                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1378                 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1379                 REXEC_FBC_SCAN(
1380                     if (tmp ==
1381                         !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1382                         tmp = !tmp;
1383                         REXEC_FBC_TRYIT;
1384                 }
1385                 );
1386             }
1387             if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, &s)))
1388                 goto got_it;
1389             break;
1390         case NBOUNDL:
1391             PL_reg_flags |= RF_tainted;
1392             /* FALL THROUGH */
1393         case NBOUND:
1394             if (do_utf8) {
1395                 if (s == PL_bostr)
1396                     tmp = '\n';
1397                 else {
1398                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1399                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1400                 }
1401                 tmp = ((OP(c) == NBOUND ?
1402                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1403                 LOAD_UTF8_CHARCLASS_ALNUM();
1404                 REXEC_FBC_UTF8_SCAN(
1405                     if (tmp == !(OP(c) == NBOUND ?
1406                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1407                                  isALNUM_LC_utf8((U8*)s)))
1408                         tmp = !tmp;
1409                     else REXEC_FBC_TRYIT;
1410                 );
1411             }
1412             else {
1413                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1414                 tmp = ((OP(c) == NBOUND ?
1415                         isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1416                 REXEC_FBC_SCAN(
1417                     if (tmp ==
1418                         !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1419                         tmp = !tmp;
1420                     else REXEC_FBC_TRYIT;
1421                 );
1422             }
1423             if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, &s)))
1424                 goto got_it;
1425             break;
1426         case ALNUM:
1427             REXEC_FBC_CSCAN_PRELOAD(
1428                 LOAD_UTF8_CHARCLASS_ALNUM(),
1429                 swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1430                 isALNUM(*s)
1431             );
1432         case ALNUML:
1433             REXEC_FBC_CSCAN_TAINT(
1434                 isALNUM_LC_utf8((U8*)s),
1435                 isALNUM_LC(*s)
1436             );
1437         case NALNUM:
1438             REXEC_FBC_CSCAN_PRELOAD(
1439                 LOAD_UTF8_CHARCLASS_ALNUM(),
1440                 !swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1441                 !isALNUM(*s)
1442             );
1443         case NALNUML:
1444             REXEC_FBC_CSCAN_TAINT(
1445                 !isALNUM_LC_utf8((U8*)s),
1446                 !isALNUM_LC(*s)
1447             );
1448         case SPACE:
1449             REXEC_FBC_CSCAN_PRELOAD(
1450                 LOAD_UTF8_CHARCLASS_SPACE(),
1451                 *s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8),
1452                 isSPACE(*s)
1453             );
1454         case SPACEL:
1455             REXEC_FBC_CSCAN_TAINT(
1456                 *s == ' ' || isSPACE_LC_utf8((U8*)s),
1457                 isSPACE_LC(*s)
1458             );
1459         case NSPACE:
1460             REXEC_FBC_CSCAN_PRELOAD(
1461                 LOAD_UTF8_CHARCLASS_SPACE(),
1462                 !(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)),
1463                 !isSPACE(*s)
1464             );
1465         case NSPACEL:
1466             REXEC_FBC_CSCAN_TAINT(
1467                 !(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
1468                 !isSPACE_LC(*s)
1469             );
1470         case DIGIT:
1471             REXEC_FBC_CSCAN_PRELOAD(
1472                 LOAD_UTF8_CHARCLASS_DIGIT(),
1473                 swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1474                 isDIGIT(*s)
1475             );
1476         case DIGITL:
1477             REXEC_FBC_CSCAN_TAINT(
1478                 isDIGIT_LC_utf8((U8*)s),
1479                 isDIGIT_LC(*s)
1480             );
1481         case NDIGIT:
1482             REXEC_FBC_CSCAN_PRELOAD(
1483                 LOAD_UTF8_CHARCLASS_DIGIT(),
1484                 !swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1485                 !isDIGIT(*s)
1486             );
1487         case NDIGITL:
1488             REXEC_FBC_CSCAN_TAINT(
1489                 !isDIGIT_LC_utf8((U8*)s),
1490                 !isDIGIT_LC(*s)
1491             );
1492         case LNBREAK:
1493             REXEC_FBC_CSCAN(
1494                 is_LNBREAK_utf8(s),
1495                 is_LNBREAK_latin1(s)
1496             );
1497         case VERTWS:
1498             REXEC_FBC_CSCAN(
1499                 is_VERTWS_utf8(s),
1500                 is_VERTWS_latin1(s)
1501             );
1502         case NVERTWS:
1503             REXEC_FBC_CSCAN(
1504                 !is_VERTWS_utf8(s),
1505                 !is_VERTWS_latin1(s)
1506             );
1507         case HORIZWS:
1508             REXEC_FBC_CSCAN(
1509                 is_HORIZWS_utf8(s),
1510                 is_HORIZWS_latin1(s)
1511             );
1512         case NHORIZWS:
1513             REXEC_FBC_CSCAN(
1514                 !is_HORIZWS_utf8(s),
1515                 !is_HORIZWS_latin1(s)
1516             );
1517         case AHOCORASICKC:
1518         case AHOCORASICK:
1519             {
1520                 DECL_TRIE_TYPE(c);
1521                 /* what trie are we using right now */
1522                 reg_ac_data *aho
1523                     = (reg_ac_data*)progi->data->data[ ARG( c ) ];
1524                 reg_trie_data *trie
1525                     = (reg_trie_data*)progi->data->data[ aho->trie ];
1526                 HV *widecharmap = MUTABLE_HV(progi->data->data[ aho->trie + 1 ]);
1527
1528                 const char *last_start = strend - trie->minlen;
1529 #ifdef DEBUGGING
1530                 const char *real_start = s;
1531 #endif
1532                 STRLEN maxlen = trie->maxlen;
1533                 SV *sv_points;
1534                 U8 **points; /* map of where we were in the input string
1535                                 when reading a given char. For ASCII this
1536                                 is unnecessary overhead as the relationship
1537                                 is always 1:1, but for Unicode, especially
1538                                 case folded Unicode this is not true. */
1539                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
1540                 U8 *bitmap=NULL;
1541
1542
1543                 GET_RE_DEBUG_FLAGS_DECL;
1544
1545                 /* We can't just allocate points here. We need to wrap it in
1546                  * an SV so it gets freed properly if there is a croak while
1547                  * running the match */
1548                 ENTER;
1549                 SAVETMPS;
1550                 sv_points=newSV(maxlen * sizeof(U8 *));
1551                 SvCUR_set(sv_points,
1552                     maxlen * sizeof(U8 *));
1553                 SvPOK_on(sv_points);
1554                 sv_2mortal(sv_points);
1555                 points=(U8**)SvPV_nolen(sv_points );
1556                 if ( trie_type != trie_utf8_fold
1557                      && (trie->bitmap || OP(c)==AHOCORASICKC) )
1558                 {
1559                     if (trie->bitmap)
1560                         bitmap=(U8*)trie->bitmap;
1561                     else
1562                         bitmap=(U8*)ANYOF_BITMAP(c);
1563                 }
1564                 /* this is the Aho-Corasick algorithm modified a touch
1565                    to include special handling for long "unknown char"
1566                    sequences. The basic idea being that we use AC as long
1567                    as we are dealing with a possible matching char, when
1568                    we encounter an unknown char (and we have not encountered
1569                    an accepting state) we scan forward until we find a legal
1570                    starting char.
1571                    AC matching is basically that of trie matching, except
1572                    that when we encounter a failing transition, we fall back
1573                    to the current states "fail state", and try the current char
1574                    again, a process we repeat until we reach the root state,
1575                    state 1, or a legal transition. If we fail on the root state
1576                    then we can either terminate if we have reached an accepting
1577                    state previously, or restart the entire process from the beginning
1578                    if we have not.
1579
1580                  */
1581                 while (s <= last_start) {
1582                     const U32 uniflags = UTF8_ALLOW_DEFAULT;
1583                     U8 *uc = (U8*)s;
1584                     U16 charid = 0;
1585                     U32 base = 1;
1586                     U32 state = 1;
1587                     UV uvc = 0;
1588                     STRLEN len = 0;
1589                     STRLEN foldlen = 0;
1590                     U8 *uscan = (U8*)NULL;
1591                     U8 *leftmost = NULL;
1592 #ifdef DEBUGGING
1593                     U32 accepted_word= 0;
1594 #endif
1595                     U32 pointpos = 0;
1596
1597                     while ( state && uc <= (U8*)strend ) {
1598                         int failed=0;
1599                         U32 word = aho->states[ state ].wordnum;
1600
1601                         if( state==1 ) {
1602                             if ( bitmap ) {
1603                                 DEBUG_TRIE_EXECUTE_r(
1604                                     if ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1605                                         dump_exec_pos( (char *)uc, c, strend, real_start,
1606                                             (char *)uc, do_utf8 );
1607                                         PerlIO_printf( Perl_debug_log,
1608                                             " Scanning for legal start char...\n");
1609                                     }
1610                                 );
1611                                 while ( uc <= (U8*)last_start  && !BITMAP_TEST(bitmap,*uc) ) {
1612                                     uc++;
1613                                 }
1614                                 s= (char *)uc;
1615                             }
1616                             if (uc >(U8*)last_start) break;
1617                         }
1618
1619                         if ( word ) {
1620                             U8 *lpos= points[ (pointpos - trie->wordlen[word-1] ) % maxlen ];
1621                             if (!leftmost || lpos < leftmost) {
1622                                 DEBUG_r(accepted_word=word);
1623                                 leftmost= lpos;
1624                             }
1625                             if (base==0) break;
1626
1627                         }
1628                         points[pointpos++ % maxlen]= uc;
1629                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
1630                                              uscan, len, uvc, charid, foldlen,
1631                                              foldbuf, uniflags);
1632                         DEBUG_TRIE_EXECUTE_r({
1633                             dump_exec_pos( (char *)uc, c, strend, real_start,
1634                                 s,   do_utf8 );
1635                             PerlIO_printf(Perl_debug_log,
1636                                 " Charid:%3u CP:%4"UVxf" ",
1637                                  charid, uvc);
1638                         });
1639
1640                         do {
1641 #ifdef DEBUGGING
1642                             word = aho->states[ state ].wordnum;
1643 #endif
1644                             base = aho->states[ state ].trans.base;
1645
1646                             DEBUG_TRIE_EXECUTE_r({
1647                                 if (failed)
1648                                     dump_exec_pos( (char *)uc, c, strend, real_start,
1649                                         s,   do_utf8 );
1650                                 PerlIO_printf( Perl_debug_log,
1651                                     "%sState: %4"UVxf", word=%"UVxf,
1652                                     failed ? " Fail transition to " : "",
1653                                     (UV)state, (UV)word);
1654                             });
1655                             if ( base ) {
1656                                 U32 tmp;
1657                                 if (charid &&
1658                                      (base + charid > trie->uniquecharcount )
1659                                      && (base + charid - 1 - trie->uniquecharcount
1660                                             < trie->lasttrans)
1661                                      && trie->trans[base + charid - 1 -
1662                                             trie->uniquecharcount].check == state
1663                                      && (tmp=trie->trans[base + charid - 1 -
1664                                         trie->uniquecharcount ].next))
1665                                 {
1666                                     DEBUG_TRIE_EXECUTE_r(
1667                                         PerlIO_printf( Perl_debug_log," - legal\n"));
1668                                     state = tmp;
1669                                     break;
1670                                 }
1671                                 else {
1672                                     DEBUG_TRIE_EXECUTE_r(
1673                                         PerlIO_printf( Perl_debug_log," - fail\n"));
1674                                     failed = 1;
1675                                     state = aho->fail[state];
1676                                 }
1677                             }
1678                             else {
1679                                 /* we must be accepting here */
1680                                 DEBUG_TRIE_EXECUTE_r(
1681                                         PerlIO_printf( Perl_debug_log," - accepting\n"));
1682                                 failed = 1;
1683                                 break;
1684                             }
1685                         } while(state);
1686                         uc += len;
1687                         if (failed) {
1688                             if (leftmost)
1689                                 break;
1690                             if (!state) state = 1;
1691                         }
1692                     }
1693                     if ( aho->states[ state ].wordnum ) {
1694                         U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
1695                         if (!leftmost || lpos < leftmost) {
1696                             DEBUG_r(accepted_word=aho->states[ state ].wordnum);
1697                             leftmost = lpos;
1698                         }
1699                     }
1700                     if (leftmost) {
1701                         s = (char*)leftmost;
1702                         DEBUG_TRIE_EXECUTE_r({
1703                             PerlIO_printf(
1704                                 Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
1705                                 (UV)accepted_word, (IV)(s - real_start)
1706                             );
1707                         });
1708                         if (!reginfo || regtry(reginfo, &s)) {
1709                             FREETMPS;
1710                             LEAVE;
1711                             goto got_it;
1712                         }
1713                         s = HOPc(s,1);
1714                         DEBUG_TRIE_EXECUTE_r({
1715                             PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
1716                         });
1717                     } else {
1718                         DEBUG_TRIE_EXECUTE_r(
1719                             PerlIO_printf( Perl_debug_log,"No match.\n"));
1720                         break;
1721                     }
1722                 }
1723                 FREETMPS;
1724                 LEAVE;
1725             }
1726             break;
1727         default:
1728             Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1729             break;
1730         }
1731         return 0;
1732       got_it:
1733         return s;
1734 }
1735
1736 static void
1737 S_swap_match_buff (pTHX_ regexp *prog)
1738 {
1739     regexp_paren_pair *t;
1740
1741     PERL_ARGS_ASSERT_SWAP_MATCH_BUFF;
1742
1743     if (!prog->swap) {
1744     /* We have to be careful. If the previous successful match
1745        was from this regex we don't want a subsequent paritally
1746        successful match to clobber the old results.
1747        So when we detect this possibility we add a swap buffer
1748        to the re, and switch the buffer each match. If we fail
1749        we switch it back, otherwise we leave it swapped.
1750     */
1751         Newxz(prog->swap, (prog->nparens + 1), regexp_paren_pair);
1752     }
1753     t = prog->swap;
1754     prog->swap = prog->offs;
1755     prog->offs = t;
1756 }
1757
1758
1759 /*
1760  - regexec_flags - match a regexp against a string
1761  */
1762 I32
1763 Perl_regexec_flags(pTHX_ REGEXP * const prog, char *stringarg, register char *strend,
1764               char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1765 /* strend: pointer to null at end of string */
1766 /* strbeg: real beginning of string */
1767 /* minend: end of match must be >=minend after stringarg. */
1768 /* data: May be used for some additional optimizations.
1769          Currently its only used, with a U32 cast, for transmitting
1770          the ganch offset when doing a /g match. This will change */
1771 /* nosave: For optimizations. */
1772 {
1773     dVAR;
1774     /*register*/ char *s;
1775     register regnode *c;
1776     /*register*/ char *startpos = stringarg;
1777     I32 minlen;         /* must match at least this many chars */
1778     I32 dontbother = 0; /* how many characters not to try at end */
1779     I32 end_shift = 0;                  /* Same for the end. */         /* CC */
1780     I32 scream_pos = -1;                /* Internal iterator of scream. */
1781     char *scream_olds = NULL;
1782     const bool do_utf8 = (bool)DO_UTF8(sv);
1783     I32 multiline;
1784     RXi_GET_DECL(prog,progi);
1785     regmatch_info reginfo;  /* create some info to pass to regtry etc */
1786     bool swap_on_fail = 0;
1787     GET_RE_DEBUG_FLAGS_DECL;
1788
1789     PERL_ARGS_ASSERT_REGEXEC_FLAGS;
1790     PERL_UNUSED_ARG(data);
1791
1792     /* Be paranoid... */
1793     if (prog == NULL || startpos == NULL) {
1794         Perl_croak(aTHX_ "NULL regexp parameter");
1795         return 0;
1796     }
1797
1798     multiline = prog->extflags & RXf_PMf_MULTILINE;
1799     reginfo.prog = prog;
1800
1801     RX_MATCH_UTF8_set(prog, do_utf8);
1802     DEBUG_EXECUTE_r(
1803         debug_start_match(prog, do_utf8, startpos, strend,
1804         "Matching");
1805     );
1806
1807     minlen = prog->minlen;
1808
1809     if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
1810         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1811                               "String too short [regexec_flags]...\n"));
1812         goto phooey;
1813     }
1814
1815
1816     /* Check validity of program. */
1817     if (UCHARAT(progi->program) != REG_MAGIC) {
1818         Perl_croak(aTHX_ "corrupted regexp program");
1819     }
1820
1821     PL_reg_flags = 0;
1822     PL_reg_eval_set = 0;
1823     PL_reg_maxiter = 0;
1824
1825     if (RX_UTF8(prog))
1826         PL_reg_flags |= RF_utf8;
1827
1828     /* Mark beginning of line for ^ and lookbehind. */
1829     reginfo.bol = startpos; /* XXX not used ??? */
1830     PL_bostr  = strbeg;
1831     reginfo.sv = sv;
1832
1833     /* Mark end of line for $ (and such) */
1834     PL_regeol = strend;
1835
1836     /* see how far we have to get to not match where we matched before */
1837     reginfo.till = startpos+minend;
1838
1839     /* If there is a "must appear" string, look for it. */
1840     s = startpos;
1841
1842     if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
1843         MAGIC *mg;
1844
1845         if (flags & REXEC_IGNOREPOS)    /* Means: check only at start */
1846             reginfo.ganch = startpos + prog->gofs;
1847         else if (sv && SvTYPE(sv) >= SVt_PVMG
1848                   && SvMAGIC(sv)
1849                   && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1850                   && mg->mg_len >= 0) {
1851             reginfo.ganch = strbeg + mg->mg_len;        /* Defined pos() */
1852             if (prog->extflags & RXf_ANCH_GPOS) {
1853                 if (s > reginfo.ganch)
1854                     goto phooey;
1855                 s = reginfo.ganch - prog->gofs;
1856             }
1857         }
1858         else if (data) {
1859             reginfo.ganch = strbeg + PTR2UV(data);
1860         } else                          /* pos() not defined */
1861             reginfo.ganch = strbeg;
1862     }
1863     if (PL_curpm && (PM_GETRE(PL_curpm) == prog)) {
1864         swap_on_fail = 1;
1865         swap_match_buff(prog); /* do we need a save destructor here for
1866                                   eval dies? */
1867     }
1868     if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
1869         re_scream_pos_data d;
1870
1871         d.scream_olds = &scream_olds;
1872         d.scream_pos = &scream_pos;
1873         s = re_intuit_start(prog, sv, s, strend, flags, &d);
1874         if (!s) {
1875             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1876             goto phooey;        /* not present */
1877         }
1878     }
1879
1880
1881
1882     /* Simplest case:  anchored match need be tried only once. */
1883     /*  [unless only anchor is BOL and multiline is set] */
1884     if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
1885         if (s == startpos && regtry(&reginfo, &startpos))
1886             goto got_it;
1887         else if (multiline || (prog->intflags & PREGf_IMPLICIT)
1888                  || (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
1889         {
1890             char *end;
1891
1892             if (minlen)
1893                 dontbother = minlen - 1;
1894             end = HOP3c(strend, -dontbother, strbeg) - 1;
1895             /* for multiline we only have to try after newlines */
1896             if (prog->check_substr || prog->check_utf8) {
1897                 if (s == startpos)
1898                     goto after_try;
1899                 while (1) {
1900                     if (regtry(&reginfo, &s))
1901                         goto got_it;
1902                   after_try:
1903                     if (s > end)
1904                         goto phooey;
1905                     if (prog->extflags & RXf_USE_INTUIT) {
1906                         s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1907                         if (!s)
1908                             goto phooey;
1909                     }
1910                     else
1911                         s++;
1912                 }
1913             } else {
1914                 if (s > startpos)
1915                     s--;
1916                 while (s < end) {
1917                     if (*s++ == '\n') { /* don't need PL_utf8skip here */
1918                         if (regtry(&reginfo, &s))
1919                             goto got_it;
1920                     }
1921                 }
1922             }
1923         }
1924         goto phooey;
1925     } else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
1926     {
1927         /* the warning about reginfo.ganch being used without intialization
1928            is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
1929            and we only enter this block when the same bit is set. */
1930         char *tmp_s = reginfo.ganch - prog->gofs;
1931         if (regtry(&reginfo, &tmp_s))
1932             goto got_it;
1933         goto phooey;
1934     }
1935
1936     /* Messy cases:  unanchored match. */
1937     if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
1938         /* we have /x+whatever/ */
1939         /* it must be a one character string (XXXX Except UTF?) */
1940         char ch;
1941 #ifdef DEBUGGING
1942         int did_match = 0;
1943 #endif
1944         if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1945             do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1946         ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1947
1948         if (do_utf8) {
1949             REXEC_FBC_SCAN(
1950                 if (*s == ch) {
1951                     DEBUG_EXECUTE_r( did_match = 1 );
1952                     if (regtry(&reginfo, &s)) goto got_it;
1953                     s += UTF8SKIP(s);
1954                     while (s < strend && *s == ch)
1955                         s += UTF8SKIP(s);
1956                 }
1957             );
1958         }
1959         else {
1960             REXEC_FBC_SCAN(
1961                 if (*s == ch) {
1962                     DEBUG_EXECUTE_r( did_match = 1 );
1963                     if (regtry(&reginfo, &s)) goto got_it;
1964                     s++;
1965                     while (s < strend && *s == ch)
1966                         s++;
1967                 }
1968             );
1969         }
1970         DEBUG_EXECUTE_r(if (!did_match)
1971                 PerlIO_printf(Perl_debug_log,
1972                                   "Did not find anchored character...\n")
1973                );
1974     }
1975     else if (prog->anchored_substr != NULL
1976               || prog->anchored_utf8 != NULL
1977               || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
1978                   && prog->float_max_offset < strend - s)) {
1979         SV *must;
1980         I32 back_max;
1981         I32 back_min;
1982         char *last;
1983         char *last1;            /* Last position checked before */
1984 #ifdef DEBUGGING
1985         int did_match = 0;
1986 #endif
1987         if (prog->anchored_substr || prog->anchored_utf8) {
1988             if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1989                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1990             must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1991             back_max = back_min = prog->anchored_offset;
1992         } else {
1993             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1994                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1995             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1996             back_max = prog->float_max_offset;
1997             back_min = prog->float_min_offset;
1998         }
1999
2000
2001         if (must == &PL_sv_undef)
2002             /* could not downgrade utf8 check substring, so must fail */
2003             goto phooey;
2004
2005         if (back_min<0) {
2006             last = strend;
2007         } else {
2008             last = HOP3c(strend,        /* Cannot start after this */
2009                   -(I32)(CHR_SVLEN(must)
2010                          - (SvTAIL(must) != 0) + back_min), strbeg);
2011         }
2012         if (s > PL_bostr)
2013             last1 = HOPc(s, -1);
2014         else
2015             last1 = s - 1;      /* bogus */
2016
2017         /* XXXX check_substr already used to find "s", can optimize if
2018            check_substr==must. */
2019         scream_pos = -1;
2020         dontbother = end_shift;
2021         strend = HOPc(strend, -dontbother);
2022         while ( (s <= last) &&
2023                 ((flags & REXEC_SCREAM)
2024                  ? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
2025                                     end_shift, &scream_pos, 0))
2026                  : (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
2027                                   (unsigned char*)strend, must,
2028                                   multiline ? FBMrf_MULTILINE : 0))) ) {
2029             /* we may be pointing at the wrong string */
2030             if ((flags & REXEC_SCREAM) && RXp_MATCH_COPIED(prog))
2031                 s = strbeg + (s - SvPVX_const(sv));
2032             DEBUG_EXECUTE_r( did_match = 1 );
2033             if (HOPc(s, -back_max) > last1) {
2034                 last1 = HOPc(s, -back_min);
2035                 s = HOPc(s, -back_max);
2036             }
2037             else {
2038                 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
2039
2040                 last1 = HOPc(s, -back_min);
2041                 s = t;
2042             }
2043             if (do_utf8) {
2044                 while (s <= last1) {
2045                     if (regtry(&reginfo, &s))
2046                         goto got_it;
2047                     s += UTF8SKIP(s);
2048                 }
2049             }
2050             else {
2051                 while (s <= last1) {
2052                     if (regtry(&reginfo, &s))
2053                         goto got_it;
2054                     s++;
2055                 }
2056             }
2057         }
2058         DEBUG_EXECUTE_r(if (!did_match) {
2059             RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
2060                 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
2061             PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
2062                               ((must == prog->anchored_substr || must == prog->anchored_utf8)
2063                                ? "anchored" : "floating"),
2064                 quoted, RE_SV_TAIL(must));
2065         });
2066         goto phooey;
2067     }
2068     else if ( (c = progi->regstclass) ) {
2069         if (minlen) {
2070             const OPCODE op = OP(progi->regstclass);
2071             /* don't bother with what can't match */
2072             if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
2073                 strend = HOPc(strend, -(minlen - 1));
2074         }
2075         DEBUG_EXECUTE_r({
2076             SV * const prop = sv_newmortal();
2077             regprop(prog, prop, c);
2078             {
2079                 RE_PV_QUOTED_DECL(quoted,do_utf8,PERL_DEBUG_PAD_ZERO(1),
2080                     s,strend-s,60);
2081                 PerlIO_printf(Perl_debug_log,
2082                     "Matching stclass %.*s against %s (%d chars)\n",
2083                     (int)SvCUR(prop), SvPVX_const(prop),
2084                      quoted, (int)(strend - s));
2085             }
2086         });
2087         if (find_byclass(prog, c, s, strend, &reginfo))
2088             goto got_it;
2089         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
2090     }
2091     else {
2092         dontbother = 0;
2093         if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
2094             /* Trim the end. */
2095             char *last;
2096             SV* float_real;
2097
2098             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
2099                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2100             float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
2101
2102             if (flags & REXEC_SCREAM) {
2103                 last = screaminstr(sv, float_real, s - strbeg,
2104                                    end_shift, &scream_pos, 1); /* last one */
2105                 if (!last)
2106                     last = scream_olds; /* Only one occurrence. */
2107                 /* we may be pointing at the wrong string */
2108                 else if (RXp_MATCH_COPIED(prog))
2109                     s = strbeg + (s - SvPVX_const(sv));
2110             }
2111             else {
2112                 STRLEN len;
2113                 const char * const little = SvPV_const(float_real, len);
2114
2115                 if (SvTAIL(float_real)) {
2116                     if (memEQ(strend - len + 1, little, len - 1))
2117                         last = strend - len + 1;
2118                     else if (!multiline)
2119                         last = memEQ(strend - len, little, len)
2120                             ? strend - len : NULL;
2121                     else
2122                         goto find_last;
2123                 } else {
2124                   find_last:
2125                     if (len)
2126                         last = rninstr(s, strend, little, little + len);
2127                     else
2128                         last = strend;  /* matching "$" */
2129                 }
2130             }
2131             if (last == NULL) {
2132                 DEBUG_EXECUTE_r(
2133                     PerlIO_printf(Perl_debug_log,
2134                         "%sCan't trim the tail, match fails (should not happen)%s\n",
2135                         PL_colors[4], PL_colors[5]));
2136                 goto phooey; /* Should not happen! */
2137             }
2138             dontbother = strend - last + prog->float_min_offset;
2139         }
2140         if (minlen && (dontbother < minlen))
2141             dontbother = minlen - 1;
2142         strend -= dontbother;              /* this one's always in bytes! */
2143         /* We don't know much -- general case. */
2144         if (do_utf8) {
2145             for (;;) {
2146                 if (regtry(&reginfo, &s))
2147                     goto got_it;
2148                 if (s >= strend)
2149                     break;
2150                 s += UTF8SKIP(s);
2151             };
2152         }
2153         else {
2154             do {
2155                 if (regtry(&reginfo, &s))
2156                     goto got_it;
2157             } while (s++ < strend);
2158         }
2159     }
2160
2161     /* Failure. */
2162     goto phooey;
2163
2164 got_it:
2165     RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2166
2167     if (PL_reg_eval_set)
2168         restore_pos(aTHX_ prog);
2169     if (RXp_PAREN_NAMES(prog))
2170         (void)hv_iterinit(RXp_PAREN_NAMES(prog));
2171
2172     /* make sure $`, $&, $', and $digit will work later */
2173     if ( !(flags & REXEC_NOT_FIRST) ) {
2174         RX_MATCH_COPY_FREE(prog);
2175         if (flags & REXEC_COPY_STR) {
2176             const I32 i = PL_regeol - startpos + (stringarg - strbeg);
2177 #ifdef PERL_OLD_COPY_ON_WRITE
2178             if ((SvIsCOW(sv)
2179                  || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2180                 if (DEBUG_C_TEST) {
2181                     PerlIO_printf(Perl_debug_log,
2182                                   "Copy on write: regexp capture, type %d\n",
2183                                   (int) SvTYPE(sv));
2184                 }
2185                 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
2186                 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
2187                 assert (SvPOKp(prog->saved_copy));
2188             } else
2189 #endif
2190             {
2191                 RX_MATCH_COPIED_on(prog);
2192                 s = savepvn(strbeg, i);
2193                 prog->subbeg = s;
2194             }
2195             prog->sublen = i;
2196         }
2197         else {
2198             prog->subbeg = strbeg;
2199             prog->sublen = PL_regeol - strbeg;  /* strend may have been modified */
2200         }
2201     }
2202
2203     return 1;
2204
2205 phooey:
2206     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2207                           PL_colors[4], PL_colors[5]));
2208     if (PL_reg_eval_set)
2209         restore_pos(aTHX_ prog);
2210     if (swap_on_fail)
2211         /* we failed :-( roll it back */
2212         swap_match_buff(prog);
2213
2214     return 0;
2215 }
2216
2217
2218 /*
2219  - regtry - try match at specific point
2220  */
2221 STATIC I32                      /* 0 failure, 1 success */
2222 S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
2223 {
2224     dVAR;
2225     CHECKPOINT lastcp;
2226     regexp *prog = reginfo->prog;
2227     RXi_GET_DECL(prog,progi);
2228     GET_RE_DEBUG_FLAGS_DECL;
2229
2230     PERL_ARGS_ASSERT_REGTRY;
2231
2232     reginfo->cutpoint=NULL;
2233
2234     if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
2235         MAGIC *mg;
2236
2237         PL_reg_eval_set = RS_init;
2238         DEBUG_EXECUTE_r(DEBUG_s(
2239             PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2240                           (IV)(PL_stack_sp - PL_stack_base));
2241             ));
2242         SAVESTACK_CXPOS();
2243         cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2244         /* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2245         SAVETMPS;
2246         /* Apparently this is not needed, judging by wantarray. */
2247         /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2248            cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2249
2250         if (reginfo->sv) {
2251             /* Make $_ available to executed code. */
2252             if (reginfo->sv != DEFSV) {
2253                 SAVE_DEFSV;
2254                 DEFSV_set(reginfo->sv);
2255             }
2256
2257             if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2258                   && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
2259                 /* prepare for quick setting of pos */
2260 #ifdef PERL_OLD_COPY_ON_WRITE
2261                 if (SvIsCOW(reginfo->sv))
2262                     sv_force_normal_flags(reginfo->sv, 0);
2263 #endif
2264                 mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
2265                                  &PL_vtbl_mglob, NULL, 0);
2266                 mg->mg_len = -1;
2267             }
2268             PL_reg_magic    = mg;
2269             PL_reg_oldpos   = mg->mg_len;
2270             SAVEDESTRUCTOR_X(restore_pos, prog);
2271         }
2272         if (!PL_reg_curpm) {
2273             Newxz(PL_reg_curpm, 1, PMOP);
2274 #ifdef USE_ITHREADS
2275             {
2276                 SV* const repointer = newSViv(0);
2277                 /* this regexp is also owned by the new PL_reg_curpm, which
2278                    will try to free it.  */
2279                 av_push(PL_regex_padav,repointer);
2280                 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2281                 PL_regex_pad = AvARRAY(PL_regex_padav);
2282             }
2283 #endif
2284         }
2285 #ifdef USE_ITHREADS
2286         /* It seems that non-ithreads works both with and without this code.
2287            So for efficiency reasons it seems best not to have the code
2288            compiled when it is not needed.  */
2289         /* This is safe against NULLs: */
2290         ReREFCNT_dec(PM_GETRE(PL_reg_curpm));
2291         /* PM_reg_curpm owns a reference to this regexp.  */
2292         ReREFCNT_inc(prog);
2293 #endif
2294         PM_SETRE(PL_reg_curpm, prog);
2295         PL_reg_oldcurpm = PL_curpm;
2296         PL_curpm = PL_reg_curpm;
2297         if (RXp_MATCH_COPIED(prog)) {
2298             /*  Here is a serious problem: we cannot rewrite subbeg,
2299                 since it may be needed if this match fails.  Thus
2300                 $` inside (?{}) could fail... */
2301             PL_reg_oldsaved = prog->subbeg;
2302             PL_reg_oldsavedlen = prog->sublen;
2303 #ifdef PERL_OLD_COPY_ON_WRITE
2304             PL_nrs = prog->saved_copy;
2305 #endif
2306             RXp_MATCH_COPIED_off(prog);
2307         }
2308         else
2309             PL_reg_oldsaved = NULL;
2310         prog->subbeg = PL_bostr;
2311         prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2312     }
2313     DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
2314     prog->offs[0].start = *startpos - PL_bostr;
2315     PL_reginput = *startpos;
2316     PL_reglastparen = &prog->lastparen;
2317     PL_reglastcloseparen = &prog->lastcloseparen;
2318     prog->lastparen = 0;
2319     prog->lastcloseparen = 0;
2320     PL_regsize = 0;
2321     PL_regoffs = prog->offs;
2322     if (PL_reg_start_tmpl <= prog->nparens) {
2323         PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2324         if(PL_reg_start_tmp)
2325             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2326         else
2327             Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2328     }
2329
2330     /* XXXX What this code is doing here?!!!  There should be no need
2331        to do this again and again, PL_reglastparen should take care of
2332        this!  --ilya*/
2333
2334     /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2335      * Actually, the code in regcppop() (which Ilya may be meaning by
2336      * PL_reglastparen), is not needed at all by the test suite
2337      * (op/regexp, op/pat, op/split), but that code is needed otherwise
2338      * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
2339      * Meanwhile, this code *is* needed for the
2340      * above-mentioned test suite tests to succeed.  The common theme
2341      * on those tests seems to be returning null fields from matches.
2342      * --jhi updated by dapm */
2343 #if 1
2344     if (prog->nparens) {
2345         regexp_paren_pair *pp = PL_regoffs;
2346         register I32 i;
2347         for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2348             ++pp;
2349             pp->start = -1;
2350             pp->end = -1;
2351         }
2352     }
2353 #endif
2354     REGCP_SET(lastcp);
2355     if (regmatch(reginfo, progi->program + 1)) {
2356         PL_regoffs[0].end = PL_reginput - PL_bostr;
2357         return 1;
2358     }
2359     if (reginfo->cutpoint)
2360         *startpos= reginfo->cutpoint;
2361     REGCP_UNWIND(lastcp);
2362     return 0;
2363 }
2364
2365
2366 #define sayYES goto yes
2367 #define sayNO goto no
2368 #define sayNO_SILENT goto no_silent
2369
2370 /* we dont use STMT_START/END here because it leads to
2371    "unreachable code" warnings, which are bogus, but distracting. */
2372 #define CACHEsayNO \
2373     if (ST.cache_mask) \
2374        PL_reg_poscache[ST.cache_offset] |= ST.cache_mask; \
2375     sayNO
2376
2377 /* this is used to determine how far from the left messages like
2378    'failed...' are printed. It should be set such that messages
2379    are inline with the regop output that created them.
2380 */
2381 #define REPORT_CODE_OFF 32
2382
2383
2384 /* Make sure there is a test for this +1 options in re_tests */
2385 #define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2386
2387 #define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
2388 #define CHRTEST_VOID   -1000 /* the c1/c2 "next char" test should be skipped */
2389
2390 #define SLAB_FIRST(s) (&(s)->states[0])
2391 #define SLAB_LAST(s)  (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2392
2393 /* grab a new slab and return the first slot in it */
2394
2395 STATIC regmatch_state *
2396 S_push_slab(pTHX)
2397 {
2398 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2399     dMY_CXT;
2400 #endif
2401     regmatch_slab *s = PL_regmatch_slab->next;
2402     if (!s) {
2403         Newx(s, 1, regmatch_slab);
2404         s->prev = PL_regmatch_slab;
2405         s->next = NULL;
2406         PL_regmatch_slab->next = s;
2407     }
2408     PL_regmatch_slab = s;
2409     return SLAB_FIRST(s);
2410 }
2411
2412
2413 /* push a new state then goto it */
2414
2415 #define PUSH_STATE_GOTO(state, node) \
2416     scan = node; \
2417     st->resume_state = state; \
2418     goto push_state;
2419
2420 /* push a new state with success backtracking, then goto it */
2421
2422 #define PUSH_YES_STATE_GOTO(state, node) \
2423     scan = node; \
2424     st->resume_state = state; \
2425     goto push_yes_state;
2426
2427
2428
2429 /*
2430
2431 regmatch() - main matching routine
2432
2433 This is basically one big switch statement in a loop. We execute an op,
2434 set 'next' to point the next op, and continue. If we come to a point which
2435 we may need to backtrack to on failure such as (A|B|C), we push a
2436 backtrack state onto the backtrack stack. On failure, we pop the top
2437 state, and re-enter the loop at the state indicated. If there are no more
2438 states to pop, we return failure.
2439
2440 Sometimes we also need to backtrack on success; for example /A+/, where
2441 after successfully matching one A, we need to go back and try to
2442 match another one; similarly for lookahead assertions: if the assertion
2443 completes successfully, we backtrack to the state just before the assertion
2444 and then carry on.  In these cases, the pushed state is marked as
2445 'backtrack on success too'. This marking is in fact done by a chain of
2446 pointers, each pointing to the previous 'yes' state. On success, we pop to
2447 the nearest yes state, discarding any intermediate failure-only states.
2448 Sometimes a yes state is pushed just to force some cleanup code to be
2449 called at the end of a successful match or submatch; e.g. (??{$re}) uses
2450 it to free the inner regex.
2451
2452 Note that failure backtracking rewinds the cursor position, while
2453 success backtracking leaves it alone.
2454
2455 A pattern is complete when the END op is executed, while a subpattern
2456 such as (?=foo) is complete when the SUCCESS op is executed. Both of these
2457 ops trigger the "pop to last yes state if any, otherwise return true"
2458 behaviour.
2459
2460 A common convention in this function is to use A and B to refer to the two
2461 subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
2462 the subpattern to be matched possibly multiple times, while B is the entire
2463 rest of the pattern. Variable and state names reflect this convention.
2464
2465 The states in the main switch are the union of ops and failure/success of
2466 substates associated with with that op.  For example, IFMATCH is the op
2467 that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
2468 'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
2469 successfully matched A and IFMATCH_A_fail is a state saying that we have
2470 just failed to match A. Resume states always come in pairs. The backtrack
2471 state we push is marked as 'IFMATCH_A', but when that is popped, we resume
2472 at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
2473 on success or failure.
2474
2475 The struct that holds a backtracking state is actually a big union, with
2476 one variant for each major type of op. The variable st points to the
2477 top-most backtrack struct. To make the code clearer, within each
2478 block of code we #define ST to alias the relevant union.
2479
2480 Here's a concrete example of a (vastly oversimplified) IFMATCH
2481 implementation:
2482
2483     switch (state) {
2484     ....
2485
2486 #define ST st->u.ifmatch
2487
2488     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2489         ST.foo = ...; // some state we wish to save
2490         ...
2491         // push a yes backtrack state with a resume value of
2492         // IFMATCH_A/IFMATCH_A_fail, then continue execution at the
2493         // first node of A:
2494         PUSH_YES_STATE_GOTO(IFMATCH_A, A);
2495         // NOTREACHED
2496
2497     case IFMATCH_A: // we have successfully executed A; now continue with B
2498         next = B;
2499         bar = ST.foo; // do something with the preserved value
2500         break;
2501
2502     case IFMATCH_A_fail: // A failed, so the assertion failed
2503         ...;   // do some housekeeping, then ...
2504         sayNO; // propagate the failure
2505
2506 #undef ST
2507
2508     ...
2509     }
2510
2511 For any old-timers reading this who are familiar with the old recursive
2512 approach, the code above is equivalent to:
2513
2514     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2515     {
2516         int foo = ...
2517         ...
2518         if (regmatch(A)) {
2519             next = B;
2520             bar = foo;
2521             break;
2522         }
2523         ...;   // do some housekeeping, then ...
2524         sayNO; // propagate the failure
2525     }
2526
2527 The topmost backtrack state, pointed to by st, is usually free. If you
2528 want to claim it, populate any ST.foo fields in it with values you wish to
2529 save, then do one of
2530
2531         PUSH_STATE_GOTO(resume_state, node);
2532         PUSH_YES_STATE_GOTO(resume_state, node);
2533
2534 which sets that backtrack state's resume value to 'resume_state', pushes a
2535 new free entry to the top of the backtrack stack, then goes to 'node'.
2536 On backtracking, the free slot is popped, and the saved state becomes the
2537 new free state. An ST.foo field in this new top state can be temporarily
2538 accessed to retrieve values, but once the main loop is re-entered, it
2539 becomes available for reuse.
2540
2541 Note that the depth of the backtrack stack constantly increases during the
2542 left-to-right execution of the pattern, rather than going up and down with
2543 the pattern nesting. For example the stack is at its maximum at Z at the
2544 end of the pattern, rather than at X in the following:
2545
2546     /(((X)+)+)+....(Y)+....Z/
2547
2548 The only exceptions to this are lookahead/behind assertions and the cut,
2549 (?>A), which pop all the backtrack states associated with A before
2550 continuing.
2551
2552 Bascktrack state structs are allocated in slabs of about 4K in size.
2553 PL_regmatch_state and st always point to the currently active state,
2554 and PL_regmatch_slab points to the slab currently containing
2555 PL_regmatch_state.  The first time regmatch() is called, the first slab is
2556 allocated, and is never freed until interpreter destruction. When the slab
2557 is full, a new one is allocated and chained to the end. At exit from
2558 regmatch(), slabs allocated since entry are freed.
2559
2560 */
2561
2562
2563 #define DEBUG_STATE_pp(pp)                                  \
2564     DEBUG_STATE_r({                                         \
2565         DUMP_EXEC_POS(locinput, scan, do_utf8);             \
2566         PerlIO_printf(Perl_debug_log,                       \
2567             "    %*s"pp" %s%s%s%s%s\n",                     \
2568             depth*2, "",                                    \
2569             PL_reg_name[st->resume_state],                     \
2570             ((st==yes_state||st==mark_state) ? "[" : ""),   \
2571             ((st==yes_state) ? "Y" : ""),                   \
2572             ((st==mark_state) ? "M" : ""),                  \
2573             ((st==yes_state||st==mark_state) ? "]" : "")    \
2574         );                                                  \
2575     });
2576
2577
2578 #define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
2579
2580 #ifdef DEBUGGING
2581
2582 STATIC void
2583 S_debug_start_match(pTHX_ const REGEXP *prog, const bool do_utf8,
2584     const char *start, const char *end, const char *blurb)
2585 {
2586     const bool utf8_pat = RX_UTF8(prog) ? 1 : 0;
2587
2588     PERL_ARGS_ASSERT_DEBUG_START_MATCH;
2589
2590     if (!PL_colorset)
2591             reginitcolors();
2592     {
2593         RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
2594             RX_PRECOMP_const(prog), RX_PRELEN(prog), 60);
2595
2596         RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1),
2597             start, end - start, 60);
2598
2599         PerlIO_printf(Perl_debug_log,
2600             "%s%s REx%s %s against %s\n",
2601                        PL_colors[4], blurb, PL_colors[5], s0, s1);
2602
2603         if (do_utf8||utf8_pat)
2604             PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
2605                 utf8_pat ? "pattern" : "",
2606                 utf8_pat && do_utf8 ? " and " : "",
2607                 do_utf8 ? "string" : ""
2608             );
2609     }
2610 }
2611
2612 STATIC void
2613 S_dump_exec_pos(pTHX_ const char *locinput,
2614                       const regnode *scan,
2615                       const char *loc_regeol,
2616                       const char *loc_bostr,
2617                       const char *loc_reg_starttry,
2618                       const bool do_utf8)
2619 {
2620     const int docolor = *PL_colors[0] || *PL_colors[2] || *PL_colors[4];
2621     const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2622     int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
2623     /* The part of the string before starttry has one color
2624        (pref0_len chars), between starttry and current
2625        position another one (pref_len - pref0_len chars),
2626        after the current position the third one.
2627        We assume that pref0_len <= pref_len, otherwise we
2628        decrease pref0_len.  */
2629     int pref_len = (locinput - loc_bostr) > (5 + taill) - l
2630         ? (5 + taill) - l : locinput - loc_bostr;
2631     int pref0_len;
2632
2633     PERL_ARGS_ASSERT_DUMP_EXEC_POS;
2634
2635     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2636         pref_len++;
2637     pref0_len = pref_len  - (locinput - loc_reg_starttry);
2638     if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
2639         l = ( loc_regeol - locinput > (5 + taill) - pref_len
2640               ? (5 + taill) - pref_len : loc_regeol - locinput);
2641     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2642         l--;
2643     if (pref0_len < 0)
2644         pref0_len = 0;
2645     if (pref0_len > pref_len)
2646         pref0_len = pref_len;
2647     {
2648         const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0;
2649
2650         RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
2651             (locinput - pref_len),pref0_len, 60, 4, 5);
2652
2653         RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
2654                     (locinput - pref_len + pref0_len),
2655                     pref_len - pref0_len, 60, 2, 3);
2656
2657         RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
2658                     locinput, loc_regeol - locinput, 10, 0, 1);
2659
2660         const STRLEN tlen=len0+len1+len2;
2661         PerlIO_printf(Perl_debug_log,
2662                     "%4"IVdf" <%.*s%.*s%s%.*s>%*s|",
2663                     (IV)(locinput - loc_bostr),
2664                     len0, s0,
2665                     len1, s1,
2666                     (docolor ? "" : "> <"),
2667                     len2, s2,
2668                     (int)(tlen > 19 ? 0 :  19 - tlen),
2669                     "");
2670     }
2671 }
2672
2673 #endif
2674
2675 /* reg_check_named_buff_matched()
2676  * Checks to see if a named buffer has matched. The data array of
2677  * buffer numbers corresponding to the buffer is expected to reside
2678  * in the regexp->data->data array in the slot stored in the ARG() of
2679  * node involved. Note that this routine doesn't actually care about the
2680  * name, that information is not preserved from compilation to execution.
2681  * Returns the index of the leftmost defined buffer with the given name
2682  * or 0 if non of the buffers matched.
2683  */
2684 STATIC I32
2685 S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan)
2686 {
2687     I32 n;
2688     RXi_GET_DECL(rex,rexi);
2689     SV *sv_dat= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
2690     I32 *nums=(I32*)SvPVX(sv_dat);
2691
2692     PERL_ARGS_ASSERT_REG_CHECK_NAMED_BUFF_MATCHED;
2693
2694     for ( n=0; n<SvIVX(sv_dat); n++ ) {
2695         if ((I32)*PL_reglastparen >= nums[n] &&
2696             PL_regoffs[nums[n]].end != -1)
2697         {
2698             return nums[n];
2699         }
2700     }
2701     return 0;
2702 }
2703
2704
2705 /* free all slabs above current one  - called during LEAVE_SCOPE */
2706
2707 STATIC void
2708 S_clear_backtrack_stack(pTHX_ void *p)
2709 {
2710     regmatch_slab *s = PL_regmatch_slab->next;
2711     PERL_UNUSED_ARG(p);
2712
2713     if (!s)
2714         return;
2715     PL_regmatch_slab->next = NULL;
2716     while (s) {
2717         regmatch_slab * const osl = s;
2718         s = s->next;
2719         Safefree(osl);
2720     }
2721 }
2722
2723
2724 #define SETREX(Re1,Re2) \
2725     if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \
2726     Re1 = (Re2)
2727
2728 STATIC I32                      /* 0 failure, 1 success */
2729 S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
2730 {
2731 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2732     dMY_CXT;
2733 #endif
2734     dVAR;
2735     register const bool do_utf8 = PL_reg_match_utf8;
2736     const U32 uniflags = UTF8_ALLOW_DEFAULT;
2737     regexp *rex = reginfo->prog;
2738     RXi_GET_DECL(rex,rexi);
2739     I32 oldsave;
2740     /* the current state. This is a cached copy of PL_regmatch_state */
2741     register regmatch_state *st;
2742     /* cache heavy used fields of st in registers */
2743     register regnode *scan;
2744     register regnode *next;
2745     register U32 n = 0; /* general value; init to avoid compiler warning */
2746     register I32 ln = 0; /* len or last;  init to avoid compiler warning */
2747     register char *locinput = PL_reginput;
2748     register I32 nextchr;   /* is always set to UCHARAT(locinput) */
2749
2750     bool result = 0;        /* return value of S_regmatch */
2751     int depth = 0;          /* depth of backtrack stack */
2752     U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
2753     const U32 max_nochange_depth =
2754         (3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
2755         3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
2756     regmatch_state *yes_state = NULL; /* state to pop to on success of
2757                                                             subpattern */
2758     /* mark_state piggy backs on the yes_state logic so that when we unwind
2759        the stack on success we can update the mark_state as we go */
2760     regmatch_state *mark_state = NULL; /* last mark state we have seen */
2761     regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
2762     struct regmatch_state  *cur_curlyx = NULL; /* most recent curlyx */
2763     U32 state_num;
2764     bool no_final = 0;      /* prevent failure from backtracking? */
2765     bool do_cutgroup = 0;   /* no_final only until next branch/trie entry */
2766     char *startpoint = PL_reginput;
2767     SV *popmark = NULL;     /* are we looking for a mark? */
2768     SV *sv_commit = NULL;   /* last mark name seen in failure */
2769     SV *sv_yes_mark = NULL; /* last mark name we have seen
2770                                during a successfull match */
2771     U32 lastopen = 0;       /* last open we saw */
2772     bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
2773     SV* const oreplsv = GvSV(PL_replgv);
2774     /* these three flags are set by various ops to signal information to
2775      * the very next op. They have a useful lifetime of exactly one loop
2776      * iteration, and are not preserved or restored by state pushes/pops
2777      */
2778     bool sw = 0;            /* the condition value in (?(cond)a|b) */
2779     bool minmod = 0;        /* the next "{n,m}" is a "{n,m}?" */
2780     int logical = 0;        /* the following EVAL is:
2781                                 0: (?{...})
2782                                 1: (?(?{...})X|Y)
2783                                 2: (??{...})
2784                                or the following IFMATCH/UNLESSM is:
2785                                 false: plain (?=foo)
2786                                 true:  used as a condition: (?(?=foo))
2787                             */
2788 #ifdef DEBUGGING
2789     GET_RE_DEBUG_FLAGS_DECL;
2790 #endif
2791
2792     PERL_ARGS_ASSERT_REGMATCH;
2793
2794     DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
2795             PerlIO_printf(Perl_debug_log,"regmatch start\n");
2796     }));
2797     /* on first ever call to regmatch, allocate first slab */
2798     if (!PL_regmatch_slab) {
2799         Newx(PL_regmatch_slab, 1, regmatch_slab);
2800         PL_regmatch_slab->prev = NULL;
2801         PL_regmatch_slab->next = NULL;
2802         PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
2803     }
2804
2805     oldsave = PL_savestack_ix;
2806     SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
2807     SAVEVPTR(PL_regmatch_slab);
2808     SAVEVPTR(PL_regmatch_state);
2809
2810     /* grab next free state slot */
2811     st = ++PL_regmatch_state;
2812     if (st >  SLAB_LAST(PL_regmatch_slab))
2813         st = PL_regmatch_state = S_push_slab(aTHX);
2814
2815     /* Note that nextchr is a byte even in UTF */
2816     nextchr = UCHARAT(locinput);
2817     scan = prog;
2818     while (scan != NULL) {
2819
2820         DEBUG_EXECUTE_r( {
2821             SV * const prop = sv_newmortal();
2822             regnode *rnext=regnext(scan);
2823             DUMP_EXEC_POS( locinput, scan, do_utf8 );
2824             regprop(rex, prop, scan);
2825
2826             PerlIO_printf(Perl_debug_log,
2827                     "%3"IVdf":%*s%s(%"IVdf")\n",
2828                     (IV)(scan - rexi->program), depth*2, "",
2829                     SvPVX_const(prop),
2830                     (PL_regkind[OP(scan)] == END || !rnext) ?
2831                         0 : (IV)(rnext - rexi->program));
2832         });
2833
2834         next = scan + NEXT_OFF(scan);
2835         if (next == scan)
2836             next = NULL;
2837         state_num = OP(scan);
2838
2839       reenter_switch:
2840
2841         assert(PL_reglastparen == &rex->lastparen);
2842         assert(PL_reglastcloseparen == &rex->lastcloseparen);
2843         assert(PL_regoffs == rex->offs);
2844
2845         switch (state_num) {
2846         case BOL:
2847             if (locinput == PL_bostr)
2848             {
2849                 /* reginfo->till = reginfo->bol; */
2850                 break;
2851             }
2852             sayNO;
2853         case MBOL:
2854             if (locinput == PL_bostr ||
2855                 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2856             {
2857                 break;
2858             }
2859             sayNO;
2860         case SBOL:
2861             if (locinput == PL_bostr)
2862                 break;
2863             sayNO;
2864         case GPOS:
2865             if (locinput == reginfo->ganch)
2866                 break;
2867             sayNO;
2868
2869         case KEEPS:
2870             /* update the startpoint */
2871             st->u.keeper.val = PL_regoffs[0].start;
2872             PL_reginput = locinput;
2873             PL_regoffs[0].start = locinput - PL_bostr;
2874             PUSH_STATE_GOTO(KEEPS_next, next);
2875             /*NOT-REACHED*/
2876         case KEEPS_next_fail:
2877             /* rollback the start point change */
2878             PL_regoffs[0].start = st->u.keeper.val;
2879             sayNO_SILENT;
2880             /*NOT-REACHED*/
2881         case EOL:
2882                 goto seol;
2883         case MEOL:
2884             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2885                 sayNO;
2886             break;
2887         case SEOL:
2888           seol:
2889             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2890                 sayNO;
2891             if (PL_regeol - locinput > 1)
2892                 sayNO;
2893             break;
2894         case EOS:
2895             if (PL_regeol != locinput)
2896                 sayNO;
2897             break;
2898         case SANY:
2899             if (!nextchr && locinput >= PL_regeol)
2900                 sayNO;
2901             if (do_utf8) {
2902                 locinput += PL_utf8skip[nextchr];
2903                 if (locinput > PL_regeol)
2904                     sayNO;
2905                 nextchr = UCHARAT(locinput);
2906             }
2907             else
2908                 nextchr = UCHARAT(++locinput);
2909             break;
2910         case CANY:
2911             if (!nextchr && locinput >= PL_regeol)
2912                 sayNO;
2913             nextchr = UCHARAT(++locinput);
2914             break;
2915         case REG_ANY:
2916             if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2917                 sayNO;
2918             if (do_utf8) {
2919                 locinput += PL_utf8skip[nextchr];
2920                 if (locinput > PL_regeol)
2921                     sayNO;
2922                 nextchr = UCHARAT(locinput);
2923             }
2924             else
2925                 nextchr = UCHARAT(++locinput);
2926             break;
2927
2928 #undef  ST
2929 #define ST st->u.trie
2930         case TRIEC:
2931             /* In this case the charclass data is available inline so
2932                we can fail fast without a lot of extra overhead.
2933              */
2934             if (scan->flags == EXACT || !do_utf8) {
2935                 if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
2936                     DEBUG_EXECUTE_r(
2937                         PerlIO_printf(Perl_debug_log,
2938                                   "%*s  %sfailed to match trie start class...%s\n",
2939                                   REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2940                     );
2941                     sayNO_SILENT;
2942                     /* NOTREACHED */
2943                 }
2944             }
2945             /* FALL THROUGH */
2946         case TRIE:
2947             {
2948                 /* what type of TRIE am I? (utf8 makes this contextual) */
2949                 DECL_TRIE_TYPE(scan);
2950
2951                 /* what trie are we using right now */
2952                 reg_trie_data * const trie
2953                     = (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
2954                 HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG( scan ) + 1 ]);
2955                 U32 state = trie->startstate;
2956
2957                 if (trie->bitmap && trie_type != trie_utf8_fold &&
2958                     !TRIE_BITMAP_TEST(trie,*locinput)
2959                 ) {
2960                     if (trie->states[ state ].wordnum) {
2961                          DEBUG_EXECUTE_r(
2962                             PerlIO_printf(Perl_debug_log,
2963                                           "%*s  %smatched empty string...%s\n",
2964                                           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2965                         );
2966                         break;
2967                     } else {
2968                         DEBUG_EXECUTE_r(
2969                             PerlIO_printf(Perl_debug_log,
2970                                           "%*s  %sfailed to match trie start class...%s\n",
2971                                           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2972                         );
2973                         sayNO_SILENT;
2974                    }
2975                 }
2976
2977             {
2978                 U8 *uc = ( U8* )locinput;
2979
2980                 STRLEN len = 0;
2981                 STRLEN foldlen = 0;
2982                 U8 *uscan = (U8*)NULL;
2983                 STRLEN bufflen=0;
2984                 SV *sv_accept_buff = NULL;
2985                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
2986
2987                 ST.accepted = 0; /* how many accepting states we have seen */
2988                 ST.B = next;
2989                 ST.jump = trie->jump;
2990                 ST.me = scan;
2991                 /*
2992                    traverse the TRIE keeping track of all accepting states
2993                    we transition through until we get to a failing node.
2994                 */
2995
2996                 while ( state && uc <= (U8*)PL_regeol ) {
2997                     U32 base = trie->states[ state ].trans.base;
2998                     UV uvc = 0;
2999                     U16 charid;
3000                     /* We use charid to hold the wordnum as we don't use it
3001                        for charid until after we have done the wordnum logic.
3002                        We define an alias just so that the wordnum logic reads
3003                        more naturally. */
3004
3005 #define got_wordnum charid
3006                     got_wordnum = trie->states[ state ].wordnum;
3007
3008                     if ( got_wordnum ) {
3009                         if ( ! ST.accepted ) {
3010                             ENTER;
3011                             SAVETMPS; /* XXX is this necessary? dmq */
3012                             bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
3013                             sv_accept_buff=newSV(bufflen *
3014                                             sizeof(reg_trie_accepted) - 1);
3015                             SvCUR_set(sv_accept_buff, 0);
3016                             SvPOK_on(sv_accept_buff);
3017                             sv_2mortal(sv_accept_buff);
3018                             SAVETMPS;
3019                             ST.accept_buff =
3020                                 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
3021                         }
3022                         do {
3023                             if (ST.accepted >= bufflen) {
3024                                 bufflen *= 2;
3025                                 ST.accept_buff =(reg_trie_accepted*)
3026                                     SvGROW(sv_accept_buff,
3027                                         bufflen * sizeof(reg_trie_accepted));
3028                             }
3029                             SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
3030                                 + sizeof(reg_trie_accepted));
3031
3032
3033                             ST.accept_buff[ST.accepted].wordnum = got_wordnum;
3034                             ST.accept_buff[ST.accepted].endpos = uc;
3035                             ++ST.accepted;
3036                         } while (trie->nextword && (got_wordnum= trie->nextword[got_wordnum]));
3037                     }
3038 #undef got_wordnum
3039
3040                     DEBUG_TRIE_EXECUTE_r({
3041                                 DUMP_EXEC_POS( (char *)uc, scan, do_utf8 );
3042                                 PerlIO_printf( Perl_debug_log,
3043                                     "%*s  %sState: %4"UVxf" Accepted: %4"UVxf" ",
3044                                     2+depth * 2, "", PL_colors[4],
3045                                     (UV)state, (UV)ST.accepted );
3046                     });
3047
3048                     if ( base ) {
3049                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
3050                                              uscan, len, uvc, charid, foldlen,
3051                                              foldbuf, uniflags);
3052
3053                         if (charid &&
3054                              (base + charid > trie->uniquecharcount )
3055                              && (base + charid - 1 - trie->uniquecharcount
3056                                     < trie->lasttrans)
3057                              && trie->trans[base + charid - 1 -
3058                                     trie->uniquecharcount].check == state)
3059                         {
3060                             state = trie->trans[base + charid - 1 -
3061                                 trie->uniquecharcount ].next;
3062                         }
3063                         else {
3064                             state = 0;
3065                         }
3066                         uc += len;
3067
3068                     }
3069                     else {
3070                         state = 0;
3071                     }
3072                     DEBUG_TRIE_EXECUTE_r(
3073                         PerlIO_printf( Perl_debug_log,
3074                             "Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
3075                             charid, uvc, (UV)state, PL_colors[5] );
3076                     );
3077                 }
3078                 if (!ST.accepted )
3079                    sayNO;
3080
3081                 DEBUG_EXECUTE_r(
3082                     PerlIO_printf( Perl_debug_log,
3083                         "%*s  %sgot %"IVdf" possible matches%s\n",
3084                         REPORT_CODE_OFF + depth * 2, "",
3085                         PL_colors[4], (IV)ST.accepted, PL_colors[5] );
3086                 );
3087             }}
3088             goto trie_first_try; /* jump into the fail handler */
3089             /* NOTREACHED */
3090         case TRIE_next_fail: /* we failed - try next alterative */
3091             if ( ST.jump) {
3092                 REGCP_UNWIND(ST.cp);
3093                 for (n = *PL_reglastparen; n > ST.lastparen; n--)
3094                     PL_regoffs[n].end = -1;
3095                 *PL_reglastparen = n;
3096             }
3097           trie_first_try:
3098             if (do_cutgroup) {
3099                 do_cutgroup = 0;
3100                 no_final = 0;
3101             }
3102
3103             if ( ST.jump) {
3104                 ST.lastparen = *PL_reglastparen;
3105                 REGCP_SET(ST.cp);
3106             }
3107             if ( ST.accepted == 1 ) {
3108                 /* only one choice left - just continue */
3109                 DEBUG_EXECUTE_r({
3110                     AV *const trie_words
3111                         = MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
3112                     SV ** const tmp = av_fetch( trie_words,
3113                         ST.accept_buff[ 0 ].wordnum-1, 0 );
3114                     SV *sv= tmp ? sv_newmortal() : NULL;
3115
3116                     PerlIO_printf( Perl_debug_log,
3117                         "%*s  %sonly one match left: #%d <%s>%s\n",
3118                         REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3119                         ST.accept_buff[ 0 ].wordnum,
3120                         tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3121                                 PL_colors[0], PL_colors[1],
3122                                 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3123                             )
3124                         : "not compiled under -Dr",
3125                         PL_colors[5] );
3126                 });
3127                 PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
3128                 /* in this case we free tmps/leave before we call regmatch
3129                    as we wont be using accept_buff again. */
3130
3131                 locinput = PL_reginput;
3132                 nextchr = UCHARAT(locinput);
3133                 if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum])
3134                     scan = ST.B;
3135                 else
3136                     scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
3137                 if (!has_cutgroup) {
3138                     FREETMPS;
3139                     LEAVE;
3140                 } else {
3141                     ST.accepted--;
3142                     PUSH_YES_STATE_GOTO(TRIE_next, scan);
3143                 }
3144
3145                 continue; /* execute rest of RE */
3146             }
3147
3148             if ( !ST.accepted-- ) {
3149                 DEBUG_EXECUTE_r({
3150                     PerlIO_printf( Perl_debug_log,
3151                         "%*s  %sTRIE failed...%s\n",
3152                         REPORT_CODE_OFF+depth*2, "",
3153                         PL_colors[4],
3154                         PL_colors[5] );
3155                 });
3156                 FREETMPS;
3157                 LEAVE;
3158                 sayNO_SILENT;
3159                 /*NOTREACHED*/
3160             }
3161
3162             /*
3163                There are at least two accepting states left.  Presumably
3164                the number of accepting states is going to be low,
3165                typically two. So we simply scan through to find the one
3166                with lowest wordnum.  Once we find it, we swap the last
3167                state into its place and decrement the size. We then try to
3168                match the rest of the pattern at the point where the word
3169                ends. If we succeed, control just continues along the
3170                regex; if we fail we return here to try the next accepting
3171                state
3172              */
3173
3174             {
3175                 U32 best = 0;
3176                 U32 cur;
3177                 for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
3178                     DEBUG_TRIE_EXECUTE_r(
3179                         PerlIO_printf( Perl_debug_log,
3180                             "%*s  %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
3181                             REPORT_CODE_OFF + depth * 2, "", PL_colors[4],
3182                             (IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
3183                             ST.accept_buff[ cur ].wordnum, PL_colors[5] );
3184                     );
3185
3186                     if (ST.accept_buff[cur].wordnum <
3187                             ST.accept_buff[best].wordnum)
3188                         best = cur;
3189                 }
3190
3191                 DEBUG_EXECUTE_r({
3192                     AV *const trie_words
3193                         = MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
3194                     SV ** const tmp = av_fetch( trie_words,
3195                         ST.accept_buff[ best ].wordnum - 1, 0 );
3196                     regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ?
3197                                     ST.B :
3198                                     ST.me + ST.jump[ST.accept_buff[best].wordnum];
3199                     SV *sv= tmp ? sv_newmortal() : NULL;
3200
3201                     PerlIO_printf( Perl_debug_log,
3202                         "%*s  %strying alternation #%d <%s> at node #%d %s\n",
3203                         REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3204                         ST.accept_buff[best].wordnum,
3205                         tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3206                                 PL_colors[0], PL_colors[1],
3207                                 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3208                             ) : "not compiled under -Dr",
3209                             REG_NODE_NUM(nextop),
3210                         PL_colors[5] );
3211                 });
3212
3213                 if ( best<ST.accepted ) {
3214                     reg_trie_accepted tmp = ST.accept_buff[ best ];
3215                     ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
3216                     ST.accept_buff[ ST.accepted ] = tmp;
3217                     best = ST.accepted;
3218                 }
3219                 PL_reginput = (char *)ST.accept_buff[ best ].endpos;
3220                 if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
3221                     scan = ST.B;
3222                 } else {
3223                     scan = ST.me + ST.jump[ST.accept_buff[best].wordnum];
3224                 }
3225                 PUSH_YES_STATE_GOTO(TRIE_next, scan);
3226                 /* NOTREACHED */
3227             }
3228             /* NOTREACHED */
3229         case TRIE_next:
3230             /* we dont want to throw this away, see bug 57042*/
3231             if (oreplsv != GvSV(PL_replgv))
3232                 sv_setsv(oreplsv, GvSV(PL_replgv));
3233             FREETMPS;
3234             LEAVE;
3235             sayYES;
3236 #undef  ST
3237
3238         case EXACT: {
3239             char *s = STRING(scan);
3240             ln = STR_LEN(scan);
3241             if (do_utf8 != UTF) {
3242                 /* The target and the pattern have differing utf8ness. */
3243                 char *l = locinput;
3244                 const char * const e = s + ln;
3245
3246                 if (do_utf8) {
3247                     /* The target is utf8, the pattern is not utf8. */
3248                     while (s < e) {
3249                         STRLEN ulen;
3250                         if (l >= PL_regeol)
3251                              sayNO;
3252                         if (NATIVE_TO_UNI(*(U8*)s) !=
3253                             utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
3254                                             uniflags))
3255                              sayNO;
3256                         l += ulen;
3257                         s ++;
3258                     }
3259                 }
3260                 else {
3261                     /* The target is not utf8, the pattern is utf8. */
3262                     while (s < e) {
3263                         STRLEN ulen;
3264                         if (l >= PL_regeol)
3265                             sayNO;
3266                         if (NATIVE_TO_UNI(*((U8*)l)) !=
3267                             utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
3268                                            uniflags))
3269                             sayNO;
3270                         s += ulen;
3271                         l ++;
3272                     }
3273                 }
3274                 locinput = l;
3275                 nextchr = UCHARAT(locinput);
3276                 break;
3277             }
3278             /* The target and the pattern have the same utf8ness. */
3279             /* Inline the first character, for speed. */
3280             if (UCHARAT(s) != nextchr)
3281                 sayNO;
3282             if (PL_regeol - locinput < ln)
3283                 sayNO;
3284             if (ln > 1 && memNE(s, locinput, ln))
3285                 sayNO;
3286             locinput += ln;
3287             nextchr = UCHARAT(locinput);
3288             break;
3289             }
3290         case EXACTFL:
3291             PL_reg_flags |= RF_tainted;
3292             /* FALL THROUGH */
3293         case EXACTF: {
3294             char * const s = STRING(scan);
3295             ln = STR_LEN(scan);
3296
3297             if (do_utf8 || UTF) {
3298               /* Either target or the pattern are utf8. */
3299                 const char * const l = locinput;
3300                 char *e = PL_regeol;
3301
3302                 if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
3303                                l, &e, 0,  do_utf8)) {
3304                      /* One more case for the sharp s:
3305                       * pack("U0U*", 0xDF) =~ /ss/i,
3306                       * the 0xC3 0x9F are the UTF-8
3307                       * byte sequence for the U+00DF. */
3308
3309                      if (!(do_utf8 &&
3310                            toLOWER(s[0]) == 's' &&
3311                            ln >= 2 &&
3312                            toLOWER(s[1]) == 's' &&
3313                            (U8)l[0] == 0xC3 &&
3314                            e - l >= 2 &&
3315                            (U8)l[1] == 0x9F))
3316                           sayNO;
3317                 }
3318                 locinput = e;
3319                 nextchr = UCHARAT(locinput);
3320                 break;
3321             }
3322
3323             /* Neither the target and the pattern are utf8. */
3324
3325             /* Inline the first character, for speed. */
3326             if (UCHARAT(s) != nextchr &&
3327                 UCHARAT(s) != ((OP(scan) == EXACTF)
3328                                ? PL_fold : PL_fold_locale)[nextchr])
3329                 sayNO;
3330             if (PL_regeol - locinput < ln)
3331                 sayNO;
3332             if (ln > 1 && (OP(scan) == EXACTF
3333                            ? ibcmp(s, locinput, ln)
3334                            : ibcmp_locale(s, locinput, ln)))
3335                 sayNO;
3336             locinput += ln;
3337             nextchr = UCHARAT(locinput);
3338             break;
3339             }
3340         case ANYOF:
3341             if (do_utf8) {
3342                 STRLEN inclasslen = PL_regeol - locinput;
3343
3344                 if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
3345                     goto anyof_fail;
3346                 if (locinput >= PL_regeol)
3347                     sayNO;
3348                 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
3349                 nextchr = UCHARAT(locinput);
3350                 break;
3351             }
3352             else {
3353                 if (nextchr < 0)
3354                     nextchr = UCHARAT(locinput);
3355                 if (!REGINCLASS(rex, scan, (U8*)locinput))
3356                     goto anyof_fail;
3357                 if (!nextchr && locinput >= PL_regeol)
3358                     sayNO;
3359                 nextchr = UCHARAT(++locinput);
3360                 break;
3361             }
3362         anyof_fail:
3363             /* If we might have the case of the German sharp s
3364              * in a casefolding Unicode character class. */
3365
3366             if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3367                  locinput += SHARP_S_SKIP;
3368                  nextchr = UCHARAT(locinput);
3369             }
3370             else
3371                  sayNO;
3372             break;
3373         case ALNUML:
3374             PL_reg_flags |= RF_tainted;
3375             /* FALL THROUGH */
3376         case ALNUM:
3377             if (!nextchr)
3378                 sayNO;
3379             if (do_utf8) {
3380                 LOAD_UTF8_CHARCLASS_ALNUM();
3381                 if (!(OP(scan) == ALNUM
3382                       ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3383                       : isALNUM_LC_utf8((U8*)locinput)))
3384                 {
3385                     sayNO;
3386                 }
3387                 locinput += PL_utf8skip[nextchr];
3388                 nextchr = UCHARAT(locinput);
3389                 break;
3390             }
3391             if (!(OP(scan) == ALNUM
3392                   ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
3393                 sayNO;
3394             nextchr = UCHARAT(++locinput);
3395             break;
3396         case NALNUML:
3397             PL_reg_flags |= RF_tainted;
3398             /* FALL THROUGH */
3399         case NALNUM:
3400             if (!nextchr && locinput >= PL_regeol)
3401                 sayNO;
3402             if (do_utf8) {
3403                 LOAD_UTF8_CHARCLASS_ALNUM();
3404                 if (OP(scan) == NALNUM
3405                     ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3406                     : isALNUM_LC_utf8((U8*)locinput))
3407                 {
3408                     sayNO;
3409                 }
3410                 locinput += PL_utf8skip[nextchr];
3411                 nextchr = UCHARAT(locinput);
3412                 break;
3413             }
3414             if (OP(scan) == NALNUM
3415                 ? isALNUM(nextchr) : isALNUM_LC(nextchr))
3416                 sayNO;
3417             nextchr = UCHARAT(++locinput);
3418             break;
3419         case BOUNDL:
3420         case NBOUNDL:
3421             PL_reg_flags |= RF_tainted;
3422             /* FALL THROUGH */
3423         case BOUND:
3424         case NBOUND:
3425             /* was last char in word? */
3426             if (do_utf8) {
3427                 if (locinput == PL_bostr)
3428                     ln = '\n';
3429                 else {
3430                     const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
3431
3432                     ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
3433                 }
3434                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3435                     ln = isALNUM_uni(ln);
3436                     LOAD_UTF8_CHARCLASS_ALNUM();
3437                     n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
3438                 }
3439                 else {
3440                     ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
3441                     n = isALNUM_LC_utf8((U8*)locinput);
3442                 }
3443             }
3444             else {
3445                 ln = (locinput != PL_bostr) ?
3446                     UCHARAT(locinput - 1) : '\n';
3447                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3448                     ln = isALNUM(ln);
3449                     n = isALNUM(nextchr);
3450                 }
3451                 else {
3452                     ln = isALNUM_LC(ln);
3453                     n = isALNUM_LC(nextchr);
3454                 }
3455             }
3456             if (((!ln) == (!n)) == (OP(scan) == BOUND ||
3457                                     OP(scan) == BOUNDL))
3458                     sayNO;
3459             break;
3460         case SPACEL:
3461             PL_reg_flags |= RF_tainted;
3462             /* FALL THROUGH */
3463         case SPACE:
3464             if (!nextchr)
3465                 sayNO;
3466             if (do_utf8) {
3467                 if (UTF8_IS_CONTINUED(nextchr)) {
3468                     LOAD_UTF8_CHARCLASS_SPACE();
3469                     if (!(OP(scan) == SPACE
3470                           ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3471                           : isSPACE_LC_utf8((U8*)locinput)))
3472                     {
3473                         sayNO;
3474                     }
3475                     locinput += PL_utf8skip[nextchr];
3476                     nextchr = UCHARAT(locinput);
3477                     break;
3478                 }
3479                 if (!(OP(scan) == SPACE
3480                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3481                     sayNO;
3482                 nextchr = UCHARAT(++locinput);
3483             }
3484             else {
3485                 if (!(OP(scan) == SPACE
3486                       ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3487                     sayNO;
3488                 nextchr = UCHARAT(++locinput);
3489             }
3490             break;
3491         case NSPACEL:
3492             PL_reg_flags |= RF_tainted;
3493             /* FALL THROUGH */
3494         case NSPACE:
3495             if (!nextchr && locinput >= PL_regeol)
3496                 sayNO;
3497             if (do_utf8) {
3498                 LOAD_UTF8_CHARCLASS_SPACE();
3499                 if (OP(scan) == NSPACE
3500                     ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3501                     : isSPACE_LC_utf8((U8*)locinput))
3502                 {
3503                     sayNO;
3504                 }
3505                 locinput += PL_utf8skip[nextchr];
3506                 nextchr = UCHARAT(locinput);
3507                 break;
3508             }
3509             if (OP(scan) == NSPACE
3510                 ? isSPACE(nextchr) : isSPACE_LC(nextchr))
3511                 sayNO;
3512             nextchr = UCHARAT(++locinput);
3513             break;
3514         case DIGITL:
3515             PL_reg_flags |= RF_tainted;
3516             /* FALL THROUGH */
3517         case DIGIT:
3518             if (!nextchr)
3519                 sayNO;
3520             if (do_utf8) {
3521                 LOAD_UTF8_CHARCLASS_DIGIT();
3522                 if (!(OP(scan) == DIGIT
3523                       ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3524                       : isDIGIT_LC_utf8((U8*)locinput)))
3525                 {
3526                     sayNO;
3527                 }
3528                 locinput += PL_utf8skip[nextchr];
3529                 nextchr = UCHARAT(locinput);
3530                 break;
3531             }
3532             if (!(OP(scan) == DIGIT
3533                   ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
3534                 sayNO;
3535             nextchr = UCHARAT(++locinput);
3536             break;
3537         case NDIGITL:
3538             PL_reg_flags |= RF_tainted;
3539             /* FALL THROUGH */
3540         case NDIGIT:
3541             if (!nextchr && locinput >= PL_regeol)
3542                 sayNO;
3543             if (do_utf8) {
3544                 LOAD_UTF8_CHARCLASS_DIGIT();
3545                 if (OP(scan) == NDIGIT
3546                     ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3547                     : isDIGIT_LC_utf8((U8*)locinput))
3548                 {
3549                     sayNO;
3550                 }
3551                 locinput += PL_utf8skip[nextchr];
3552                 nextchr = UCHARAT(locinput);
3553                 break;
3554             }
3555             if (OP(scan) == NDIGIT
3556                 ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
3557                 sayNO;
3558             nextchr = UCHARAT(++locinput);
3559             break;
3560         case CLUMP:
3561             if (locinput >= PL_regeol)
3562                 sayNO;
3563             if  (do_utf8) {
3564                 LOAD_UTF8_CHARCLASS_MARK();
3565                 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3566                     sayNO;
3567                 locinput += PL_utf8skip[nextchr];
3568                 while (locinput < PL_regeol &&
3569                        swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3570                     locinput += UTF8SKIP(locinput);
3571                 if (locinput > PL_regeol)
3572                     sayNO;
3573             }
3574             else
3575                locinput++;
3576             nextchr = UCHARAT(locinput);
3577             break;
3578
3579         case NREFFL:
3580         {
3581             char *s;
3582             char type;
3583             PL_reg_flags |= RF_tainted;
3584             /* FALL THROUGH */
3585         case NREF:
3586         case NREFF:
3587             type = OP(scan);
3588             n = reg_check_named_buff_matched(rex,scan);
3589
3590             if ( n ) {
3591                 type = REF + ( type - NREF );
3592                 goto do_ref;
3593             } else {
3594                 sayNO;
3595             }
3596             /* unreached */
3597         case REFFL:
3598             PL_reg_flags |= RF_tainted;
3599             /* FALL THROUGH */
3600         case REF:
3601         case REFF:
3602             n = ARG(scan);  /* which paren pair */
3603             type = OP(scan);
3604           do_ref:
3605             ln = PL_regoffs[n].start;
3606             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3607             if (*PL_reglastparen < n || ln == -1)
3608                 sayNO;                  /* Do not match unless seen CLOSEn. */
3609             if (ln == PL_regoffs[n].end)
3610                 break;
3611
3612             s = PL_bostr + ln;
3613             if (do_utf8 && type != REF) {       /* REF can do byte comparison */
3614                 char *l = locinput;
3615                 const char *e = PL_bostr + PL_regoffs[n].end;
3616                 /*
3617                  * Note that we can't do the "other character" lookup trick as
3618                  * in the 8-bit case (no pun intended) because in Unicode we
3619                  * have to map both upper and title case to lower case.
3620                  */
3621                 if (type == REFF) {
3622                     while (s < e) {
3623                         STRLEN ulen1, ulen2;
3624                         U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3625                         U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3626
3627                         if (l >= PL_regeol)
3628                             sayNO;
3629                         toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3630                         toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
3631                         if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
3632                             sayNO;
3633                         s += ulen1;
3634                         l += ulen2;
3635                     }
3636                 }
3637                 locinput = l;
3638                 nextchr = UCHARAT(locinput);
3639                 break;
3640             }
3641
3642             /* Inline the first character, for speed. */
3643             if (UCHARAT(s) != nextchr &&
3644                 (type == REF ||
3645                  (UCHARAT(s) != (type == REFF
3646                                   ? PL_fold : PL_fold_locale)[nextchr])))
3647                 sayNO;
3648             ln = PL_regoffs[n].end - ln;
3649             if (locinput + ln > PL_regeol)
3650                 sayNO;
3651             if (ln > 1 && (type == REF
3652                            ? memNE(s, locinput, ln)
3653                            : (type == REFF
3654                               ? ibcmp(s, locinput, ln)
3655                               : ibcmp_locale(s, locinput, ln))))
3656                 sayNO;
3657             locinput += ln;
3658             nextchr = UCHARAT(locinput);
3659             break;
3660         }
3661         case NOTHING:
3662         case TAIL:
3663             break;
3664         case BACK:
3665             break;
3666
3667 #undef  ST
3668 #define ST st->u.eval
3669         {
3670             SV *ret;
3671             regexp *re;
3672             regexp_internal *rei;
3673             regnode *startpoint;
3674
3675         case GOSTART:
3676         case GOSUB: /*    /(...(?1))/   /(...(?&foo))/   */
3677             if (cur_eval && cur_eval->locinput==locinput) {
3678                 if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
3679                     Perl_croak(aTHX_ "Infinite recursion in regex");
3680                 if ( ++nochange_depth > max_nochange_depth )
3681                     Perl_croak(aTHX_
3682                         "Pattern subroutine nesting without pos change"
3683                         " exceeded limit in regex");
3684             } else {
3685                 nochange_depth = 0;
3686             }
3687             re = rex;
3688             rei = rexi;
3689             (void)ReREFCNT_inc(rex);
3690             if (OP(scan)==GOSUB) {
3691                 startpoint = scan + ARG2L(scan);
3692                 ST.close_paren = ARG(scan);
3693             } else {
3694                 startpoint = rei->program+1;
3695                 ST.close_paren = 0;
3696             }
3697             goto eval_recurse_doit;
3698             /* NOTREACHED */
3699         case EVAL:  /*   /(?{A})B/   /(??{A})B/  and /(?(?{A})X|Y)B/   */
3700             if (cur_eval && cur_eval->locinput==locinput) {
3701                 if ( ++nochange_depth > max_nochange_depth )
3702                     Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
3703             } else {
3704                 nochange_depth = 0;
3705             }
3706             {
3707                 /* execute the code in the {...} */
3708                 dSP;
3709                 SV ** const before = SP;
3710                 OP_4tree * const oop = PL_op;
3711                 COP * const ocurcop = PL_curcop;
3712                 PAD *old_comppad;
3713
3714                 n = ARG(scan);
3715                 PL_op = (OP_4tree*)rexi->data->data[n];
3716                 DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
3717                     "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
3718                 PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
3719                 PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
3720
3721                 if (sv_yes_mark) {
3722                     SV *sv_mrk = get_sv("REGMARK", 1);
3723                     sv_setsv(sv_mrk, sv_yes_mark);
3724                 }
3725
3726                 CALLRUNOPS(aTHX);                       /* Scalar context. */
3727                 SPAGAIN;
3728                 if (SP == before)
3729                     ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
3730                 else {
3731                     ret = POPs;
3732                     PUTBACK;
3733                 }
3734
3735                 PL_op = oop;
3736                 PAD_RESTORE_LOCAL(old_comppad);
3737                 PL_curcop = ocurcop;
3738                 if (!logical) {
3739                     /* /(?{...})/ */
3740                     sv_setsv(save_scalar(PL_replgv), ret);
3741                     break;
3742                 }
3743             }
3744             if (logical == 2) { /* Postponed subexpression: /(??{...})/ */
3745                 logical = 0;
3746                 {
3747                     /* extract RE object from returned value; compiling if
3748                      * necessary */
3749
3750                     MAGIC *mg = NULL;
3751                     const SV *sv;
3752                     if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3753                         mg = mg_find(sv, PERL_MAGIC_qr);
3754                     else if (SvSMAGICAL(ret)) {
3755                         if (SvGMAGICAL(ret))
3756                             sv_unmagic(ret, PERL_MAGIC_qr);
3757                         else
3758                             mg = mg_find(ret, PERL_MAGIC_qr);
3759                     }
3760
3761                     if (mg) {
3762                         re = reg_temp_copy((regexp *)mg->mg_obj); /*XXX:dmq*/
3763                     }
3764                     else {
3765                         U32 pm_flags = 0;
3766                         const I32 osize = PL_regsize;
3767
3768                         if (DO_UTF8(ret)) pm_flags |= RXf_UTF8;
3769                         re = CALLREGCOMP(ret, pm_flags);
3770                         if (!(SvFLAGS(ret)
3771                               & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3772                                 | SVs_GMG)))
3773                             sv_magic(ret,MUTABLE_SV(ReREFCNT_inc(re)),
3774                                         PERL_MAGIC_qr,0,0);
3775                         PL_regsize = osize;
3776                     }
3777                 }
3778                 RXp_MATCH_COPIED_off(re);
3779                 re->subbeg = rex->subbeg;
3780                 re->sublen = rex->sublen;
3781                 rei = RXi_GET(re);
3782                 DEBUG_EXECUTE_r(
3783                     debug_start_match(re, do_utf8, locinput, PL_regeol,
3784                         "Matching embedded");
3785                 );
3786                 startpoint = rei->program + 1;
3787                 ST.close_paren = 0; /* only used for GOSUB */
3788                 /* borrowed from regtry */
3789                 if (PL_reg_start_tmpl <= re->nparens) {
3790                     PL_reg_start_tmpl = re->nparens*3/2 + 3;
3791                     if(PL_reg_start_tmp)
3792                         Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3793                     else
3794                         Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3795                 }
3796
3797         eval_recurse_doit: /* Share code with GOSUB below this line */
3798                 /* run the pattern returned from (??{...}) */
3799                 ST.cp = regcppush(0);   /* Save *all* the positions. */
3800                 REGCP_SET(ST.lastcp);
3801
3802                 PL_regoffs = re->offs; /* essentially NOOP on GOSUB */
3803
3804                 /* see regtry, specifically PL_reglast(?:close)?paren is a pointer! (i dont know why) :dmq */
3805                 PL_reglastparen = &re->lastparen;
3806                 PL_reglastcloseparen = &re->lastcloseparen;
3807                 re->lastparen = 0;
3808                 re->lastcloseparen = 0;
3809
3810                 PL_reginput = locinput;
3811                 PL_regsize = 0;
3812
3813                 /* XXXX This is too dramatic a measure... */
3814                 PL_reg_maxiter = 0;
3815
3816                 ST.toggle_reg_flags = PL_reg_flags;
3817                 if (RX_UTF8(re))
3818                     PL_reg_flags |= RF_utf8;
3819                 else
3820                     PL_reg_flags &= ~RF_utf8;
3821                 ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
3822
3823                 ST.prev_rex = rex;
3824                 ST.prev_curlyx = cur_curlyx;
3825                 SETREX(rex,re);
3826                 rexi = rei;
3827                 cur_curlyx = NULL;
3828                 ST.B = next;
3829                 ST.prev_eval = cur_eval;
3830                 cur_eval = st;
3831                 /* now continue from first node in postoned RE */
3832                 PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
3833                 /* NOTREACHED */
3834             }
3835             /* logical is 1,   /(?(?{...})X|Y)/ */
3836             sw = (bool)SvTRUE(ret);
3837             logical = 0;
3838             break;
3839         }
3840
3841         case EVAL_AB: /* cleanup after a successful (??{A})B */
3842             /* note: this is called twice; first after popping B, then A */
3843             PL_reg_flags ^= ST.toggle_reg_flags;
3844             ReREFCNT_dec(rex);
3845             SETREX(rex,ST.prev_rex);
3846             rexi = RXi_GET(rex);
3847             regcpblow(ST.cp);
3848             cur_eval = ST.prev_eval;
3849             cur_curlyx = ST.prev_curlyx;
3850
3851             /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
3852             PL_reglastparen = &rex->lastparen;
3853             PL_reglastcloseparen = &rex->lastcloseparen;
3854             /* also update PL_regoffs */
3855             PL_regoffs = rex->offs;
3856
3857             /* XXXX This is too dramatic a measure... */
3858             PL_reg_maxiter = 0;
3859             if ( nochange_depth )
3860                 nochange_depth--;
3861             sayYES;
3862
3863
3864         case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
3865             /* note: this is called twice; first after popping B, then A */
3866             PL_reg_flags ^= ST.toggle_reg_flags;
3867             ReREFCNT_dec(rex);
3868             SETREX(rex,ST.prev_rex);
3869             rexi = RXi_GET(rex);
3870             /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
3871             PL_reglastparen = &rex->lastparen;
3872             PL_reglastcloseparen = &rex->lastcloseparen;
3873
3874             PL_reginput = locinput;
3875             REGCP_UNWIND(ST.lastcp);
3876             regcppop(rex);
3877             cur_eval = ST.prev_eval;
3878             cur_curlyx = ST.prev_curlyx;
3879             /* XXXX This is too dramatic a measure... */
3880             PL_reg_maxiter = 0;
3881             if ( nochange_depth )
3882                 nochange_depth--;
3883             sayNO_SILENT;
3884 #undef ST
3885
3886         case OPEN:
3887             n = ARG(scan);  /* which paren pair */
3888             PL_reg_start_tmp[n] = locinput;
3889             if (n > PL_regsize)
3890                 PL_regsize = n;
3891             lastopen = n;
3892             break;
3893         case CLOSE:
3894             n = ARG(scan);  /* which paren pair */
3895             PL_regoffs[n].start = PL_reg_start_tmp[n] - PL_bostr;
3896             PL_regoffs[n].end = locinput - PL_bostr;
3897             /*if (n > PL_regsize)
3898                 PL_regsize = n;*/
3899             if (n > *PL_reglastparen)
3900                 *PL_reglastparen = n;
3901             *PL_reglastcloseparen = n;
3902             if (cur_eval && cur_eval->u.eval.close_paren == n) {
3903                 goto fake_end;
3904             }
3905             break;
3906         case ACCEPT:
3907             if (ARG(scan)){
3908                 regnode *cursor;
3909                 for (cursor=scan;
3910                      cursor && OP(cursor)!=END;
3911                      cursor=regnext(cursor))
3912                 {
3913                     if ( OP(cursor)==CLOSE ){
3914                         n = ARG(cursor);
3915                         if ( n <= lastopen ) {
3916                             PL_regoffs[n].start
3917                                 = PL_reg_start_tmp[n] - PL_bostr;
3918                             PL_regoffs[n].end = locinput - PL_bostr;
3919                             /*if (n > PL_regsize)
3920                             PL_regsize = n;*/
3921                             if (n > *PL_reglastparen)
3922                                 *PL_reglastparen = n;
3923                             *PL_reglastcloseparen = n;
3924                             if ( n == ARG(scan) || (cur_eval &&
3925                                 cur_eval->u.eval.close_paren == n))
3926                                 break;
3927                         }
3928                     }
3929                 }
3930             }
3931             goto fake_end;
3932             /*NOTREACHED*/
3933         case GROUPP:
3934             n = ARG(scan);  /* which paren pair */
3935             sw = (bool)(*PL_reglastparen >= n && PL_regoffs[n].end != -1);
3936             break;
3937         case NGROUPP:
3938             /* reg_check_named_buff_matched returns 0 for no match */
3939             sw = (bool)(0 < reg_check_named_buff_matched(rex,scan));
3940             break;
3941         case INSUBP:
3942             n = ARG(scan);
3943             sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
3944             break;
3945         case DEFINEP:
3946             sw = 0;
3947             break;
3948         case IFTHEN:
3949             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3950             if (sw)
3951                 next = NEXTOPER(NEXTOPER(scan));
3952             else {
3953                 next = scan + ARG(scan);
3954                 if (OP(next) == IFTHEN) /* Fake one. */
3955                     next = NEXTOPER(NEXTOPER(next));
3956             }
3957             break;
3958         case LOGICAL:
3959             logical = scan->flags;
3960             break;
3961
3962 /*******************************************************************
3963
3964 The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
3965 pattern, where A and B are subpatterns. (For simple A, CURLYM or
3966 STAR/PLUS/CURLY/CURLYN are used instead.)
3967
3968 A*B is compiled as <CURLYX><A><WHILEM><B>
3969
3970 On entry to the subpattern, CURLYX is called. This pushes a CURLYX
3971 state, which contains the current count, initialised to -1. It also sets
3972 cur_curlyx to point to this state, with any previous value saved in the
3973 state block.
3974
3975 CURLYX then jumps straight to the WHILEM op, rather than executing A,
3976 since the pattern may possibly match zero times (i.e. it's a while {} loop
3977 rather than a do {} while loop).
3978
3979 Each entry to WHILEM represents a successful match of A. The count in the
3980 CURLYX block is incremented, another WHILEM state is pushed, and execution
3981 passes to A or B depending on greediness and the current count.
3982
3983 For example, if matching against the string a1a2a3b (where the aN are
3984 substrings that match /A/), then the match progresses as follows: (the
3985 pushed states are interspersed with the bits of strings matched so far):
3986
3987     <CURLYX cnt=-1>
3988     <CURLYX cnt=0><WHILEM>
3989     <CURLYX cnt=1><WHILEM> a1 <WHILEM>
3990     <CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
3991     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
3992     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
3993
3994 (Contrast this with something like CURLYM, which maintains only a single
3995 backtrack state:
3996
3997     <CURLYM cnt=0> a1
3998     a1 <CURLYM cnt=1> a2
3999     a1 a2 <CURLYM cnt=2> a3
4000     a1 a2 a3 <CURLYM cnt=3> b
4001 )
4002
4003 Each WHILEM state block marks a point to backtrack to upon partial failure
4004 of A or B, and also contains some minor state data related to that
4005 iteration.  The CURLYX block, pointed to by cur_curlyx, contains the
4006 overall state, such as the count, and pointers to the A and B ops.
4007
4008 This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
4009 must always point to the *current* CURLYX block, the rules are:
4010
4011 When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
4012 and set cur_curlyx to point the new block.
4013
4014 When popping the CURLYX block after a successful or unsuccessful match,
4015 restore the previous cur_curlyx.
4016
4017 When WHILEM is about to execute B, save the current cur_curlyx, and set it
4018 to the outer one saved in the CURLYX block.
4019
4020 When popping the WHILEM block after a successful or unsuccessful B match,
4021 restore the previous cur_curlyx.
4022
4023 Here's an example for the pattern (AI* BI)*BO
4024 I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
4025
4026 cur_
4027 curlyx backtrack stack
4028 ------ ---------------
4029 NULL
4030 CO     <CO prev=NULL> <WO>
4031 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4032 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4033 NULL   <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
4034
4035 At this point the pattern succeeds, and we work back down the stack to
4036 clean up, restoring as we go:
4037
4038 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4039 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4040 CO     <CO prev=NULL> <WO>
4041 NULL
4042
4043 *******************************************************************/
4044
4045 #define ST st->u.curlyx
4046
4047         case CURLYX:    /* start of /A*B/  (for complex A) */
4048         {
4049             /* No need to save/restore up to this paren */
4050             I32 parenfloor = scan->flags;
4051
4052             assert(next); /* keep Coverity happy */
4053             if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
4054                 next += ARG(next);
4055
4056             /* XXXX Probably it is better to teach regpush to support
4057                parenfloor > PL_regsize... */
4058             if (parenfloor > (I32)*PL_reglastparen)
4059                 parenfloor = *PL_reglastparen; /* Pessimization... */
4060
4061             ST.prev_curlyx= cur_curlyx;
4062             cur_curlyx = st;
4063             ST.cp = PL_savestack_ix;
4064
4065             /* these fields contain the state of the current curly.
4066              * they are accessed by subsequent WHILEMs */
4067             ST.parenfloor = parenfloor;
4068             ST.min = ARG1(scan);
4069             ST.max = ARG2(scan);
4070             ST.A = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
4071             ST.B = next;
4072             ST.minmod = minmod;
4073             minmod = 0;
4074             ST.count = -1;      /* this will be updated by WHILEM */
4075             ST.lastloc = NULL;  /* this will be updated by WHILEM */
4076
4077             PL_reginput = locinput;
4078             PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
4079             /* NOTREACHED */
4080         }
4081
4082         case CURLYX_end: /* just finished matching all of A*B */
4083             cur_curlyx = ST.prev_curlyx;
4084             sayYES;
4085             /* NOTREACHED */
4086
4087         case CURLYX_end_fail: /* just failed to match all of A*B */
4088             regcpblow(ST.cp);
4089             cur_curlyx = ST.prev_curlyx;
4090             sayNO;
4091             /* NOTREACHED */
4092
4093
4094 #undef ST
4095 #define ST st->u.whilem
4096
4097         case WHILEM:     /* just matched an A in /A*B/  (for complex A) */
4098         {
4099             /* see the discussion above about CURLYX/WHILEM */
4100             I32 n;
4101             assert(cur_curlyx); /* keep Coverity happy */
4102             n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
4103             ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
4104             ST.cache_offset = 0;
4105             ST.cache_mask = 0;
4106
4107             PL_reginput = locinput;
4108
4109             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4110                   "%*s  whilem: matched %ld out of %ld..%ld\n",
4111                   REPORT_CODE_OFF+depth*2, "", (long)n,
4112                   (long)cur_curlyx->u.curlyx.min,
4113                   (long)cur_curlyx->u.curlyx.max)
4114             );
4115
4116             /* First just match a string of min A's. */
4117
4118             if (n < cur_curlyx->u.curlyx.min) {
4119                 cur_curlyx->u.curlyx.lastloc = locinput;
4120                 PUSH_STATE_GOTO(WHILEM_A_pre, cur_curlyx->u.curlyx.A);
4121                 /* NOTREACHED */
4122             }
4123
4124             /* If degenerate A matches "", assume A done. */
4125
4126             if (locinput == cur_curlyx->u.curlyx.lastloc) {
4127                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4128                    "%*s  whilem: empty match detected, trying continuation...\n",
4129                    REPORT_CODE_OFF+depth*2, "")
4130                 );
4131                 goto do_whilem_B_max;
4132             }
4133
4134             /* super-linear cache processing */
4135
4136             if (scan->flags) {
4137
4138                 if (!PL_reg_maxiter) {
4139                     /* start the countdown: Postpone detection until we
4140                      * know the match is not *that* much linear. */
4141                     PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
4142                     /* possible overflow for long strings and many CURLYX's */
4143                     if (PL_reg_maxiter < 0)
4144                         PL_reg_maxiter = I32_MAX;
4145                     PL_reg_leftiter = PL_reg_maxiter;
4146                 }
4147
4148                 if (PL_reg_leftiter-- == 0) {
4149                     /* initialise cache */
4150                     const I32 size = (PL_reg_maxiter + 7)/8;
4151                     if (PL_reg_poscache) {
4152                         if ((I32)PL_reg_poscache_size < size) {
4153                             Renew(PL_reg_poscache, size, char);
4154                             PL_reg_poscache_size = size;
4155                         }
4156                         Zero(PL_reg_poscache, size, char);
4157                     }
4158                     else {
4159                         PL_reg_poscache_size = size;
4160                         Newxz(PL_reg_poscache, size, char);
4161                     }
4162                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4163       "%swhilem: Detected a super-linear match, switching on caching%s...\n",
4164                               PL_colors[4], PL_colors[5])
4165                     );
4166                 }
4167
4168                 if (PL_reg_leftiter < 0) {
4169                     /* have we already failed at this position? */
4170                     I32 offset, mask;
4171                     offset  = (scan->flags & 0xf) - 1
4172                                 + (locinput - PL_bostr)  * (scan->flags>>4);
4173                     mask    = 1 << (offset % 8);
4174                     offset /= 8;
4175                     if (PL_reg_poscache[offset] & mask) {
4176                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4177                             "%*s  whilem: (cache) already tried at this position...\n",
4178                             REPORT_CODE_OFF+depth*2, "")
4179                         );
4180                         sayNO; /* cache records failure */
4181                     }
4182                     ST.cache_offset = offset;
4183                     ST.cache_mask   = mask;
4184                 }
4185             }
4186
4187             /* Prefer B over A for minimal matching. */
4188
4189             if (cur_curlyx->u.curlyx.minmod) {
4190                 ST.save_curlyx = cur_curlyx;
4191                 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4192                 ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
4193                 REGCP_SET(ST.lastcp);
4194                 PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
4195                 /* NOTREACHED */
4196             }
4197
4198             /* Prefer A over B for maximal matching. */
4199
4200             if (n < cur_curlyx->u.curlyx.max) { /* More greed allowed? */
4201                 ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4202                 cur_curlyx->u.curlyx.lastloc = locinput;
4203                 REGCP_SET(ST.lastcp);
4204                 PUSH_STATE_GOTO(WHILEM_A_max, cur_curlyx->u.curlyx.A);
4205                 /* NOTREACHED */
4206             }
4207             goto do_whilem_B_max;
4208         }
4209         /* NOTREACHED */
4210
4211         case WHILEM_B_min: /* just matched B in a minimal match */
4212         case WHILEM_B_max: /* just matched B in a maximal match */
4213             cur_curlyx = ST.save_curlyx;
4214             sayYES;
4215             /* NOTREACHED */
4216
4217         case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
4218             cur_curlyx = ST.save_curlyx;
4219             cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4220             cur_curlyx->u.curlyx.count--;
4221             CACHEsayNO;
4222             /* NOTREACHED */
4223
4224         case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
4225             REGCP_UNWIND(ST.lastcp);
4226             regcppop(rex);
4227             /* FALL THROUGH */
4228         case WHILEM_A_pre_fail: /* just failed to match even minimal A */
4229             cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4230             cur_curlyx->u.curlyx.count--;
4231             CACHEsayNO;
4232             /* NOTREACHED */
4233
4234         case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
4235             REGCP_UNWIND(ST.lastcp);
4236             regcppop(rex);      /* Restore some previous $<digit>s? */
4237             PL_reginput = locinput;
4238             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4239                 "%*s  whilem: failed, trying continuation...\n",
4240                 REPORT_CODE_OFF+depth*2, "")
4241             );
4242           do_whilem_B_max:
4243             if (cur_curlyx->u.curlyx.count >= REG_INFTY
4244                 && ckWARN(WARN_REGEXP)
4245                 && !(PL_reg_flags & RF_warned))
4246             {
4247                 PL_reg_flags |= RF_warned;
4248                 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
4249                      "Complex regular subexpression recursion",
4250                      REG_INFTY - 1);
4251             }
4252
4253             /* now try B */
4254             ST.save_curlyx = cur_curlyx;
4255             cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4256             PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
4257             /* NOTREACHED */
4258
4259         case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
4260             cur_curlyx = ST.save_curlyx;
4261             REGCP_UNWIND(ST.lastcp);
4262             regcppop(rex);
4263
4264             if (cur_curlyx->u.curlyx.count >= cur_curlyx->u.curlyx.max) {
4265                 /* Maximum greed exceeded */
4266                 if (cur_curlyx->u.curlyx.count >= REG_INFTY
4267                     && ckWARN(WARN_REGEXP)
4268                     && !(PL_reg_flags & RF_warned))
4269                 {
4270                     PL_reg_flags |= RF_warned;
4271                     Perl_warner(aTHX_ packWARN(WARN_REGEXP),
4272                         "%s limit (%d) exceeded",
4273                         "Complex regular subexpression recursion",
4274                         REG_INFTY - 1);
4275                 }
4276                 cur_curlyx->u.curlyx.count--;
4277                 CACHEsayNO;
4278             }
4279
4280             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4281                 "%*s  trying longer...\n", REPORT_CODE_OFF+depth*2, "")
4282             );
4283             /* Try grabbing another A and see if it helps. */
4284             PL_reginput = locinput;
4285             cur_curlyx->u.curlyx.lastloc = locinput;
4286             ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4287             REGCP_SET(ST.lastcp);
4288             PUSH_STATE_GOTO(WHILEM_A_min, ST.save_curlyx->u.curlyx.A);
4289             /* NOTREACHED */
4290
4291 #undef  ST
4292 #define ST st->u.branch
4293
4294         case BRANCHJ:       /*  /(...|A|...)/ with long next pointer */
4295             next = scan + ARG(scan);
4296             if (next == scan)
4297                 next = NULL;
4298             scan = NEXTOPER(scan);
4299             /* FALL THROUGH */
4300
4301         case BRANCH:        /*  /(...|A|...)/ */
4302             scan = NEXTOPER(scan); /* scan now points to inner node */
4303             ST.lastparen = *PL_reglastparen;
4304             ST.next_branch = next;
4305             REGCP_SET(ST.cp);
4306             PL_reginput = locinput;
4307
4308             /* Now go into the branch */
4309             if (has_cutgroup) {
4310                 PUSH_YES_STATE_GOTO(BRANCH_next, scan);
4311             } else {
4312                 PUSH_STATE_GOTO(BRANCH_next, scan);
4313             }
4314             /* NOTREACHED */
4315         case CUTGROUP:
4316             PL_reginput = locinput;
4317             sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
4318                 MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
4319             PUSH_STATE_GOTO(CUTGROUP_next,next);
4320             /* NOTREACHED */
4321         case CUTGROUP_next_fail:
4322             do_cutgroup = 1;
4323             no_final = 1;
4324             if (st->u.mark.mark_name)
4325                 sv_commit = st->u.mark.mark_name;
4326             sayNO;
4327             /* NOTREACHED */
4328         case BRANCH_next:
4329             sayYES;
4330             /* NOTREACHED */
4331         case BRANCH_next_fail: /* that branch failed; try the next, if any */
4332             if (do_cutgroup) {
4333                 do_cutgroup = 0;
4334                 no_final = 0;
4335             }
4336             REGCP_UNWIND(ST.cp);
4337             for (n = *PL_reglastparen; n > ST.lastparen; n--)
4338                 PL_regoffs[n].end = -1;
4339             *PL_reglastparen = n;
4340             /*dmq: *PL_reglastcloseparen = n; */
4341             scan = ST.next_branch;
4342             /* no more branches? */
4343             if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
4344                 DEBUG_EXECUTE_r({
4345                     PerlIO_printf( Perl_debug_log,
4346                         "%*s  %sBRANCH failed...%s\n",
4347                         REPORT_CODE_OFF+depth*2, "",
4348                         PL_colors[4],
4349                         PL_colors[5] );
4350                 });
4351                 sayNO_SILENT;
4352             }
4353             continue; /* execute next BRANCH[J] op */
4354             /* NOTREACHED */
4355
4356         case MINMOD:
4357             minmod = 1;
4358             break;
4359
4360 #undef  ST
4361 #define ST st->u.curlym
4362
4363         case CURLYM:    /* /A{m,n}B/ where A is fixed-length */
4364
4365             /* This is an optimisation of CURLYX that enables us to push
4366              * only a single backtracking state, no matter how many matches
4367              * there are in {m,n}. It relies on the pattern being constant
4368              * length, with no parens to influence future backrefs
4369              */
4370
4371             ST.me = scan;
4372             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4373
4374             /* if paren positive, emulate an OPEN/CLOSE around A */
4375             if (ST.me->flags) {
4376                 U32 paren = ST.me->flags;
4377                 if (paren > PL_regsize)
4378                     PL_regsize = paren;
4379                 if (paren > *PL_reglastparen)
4380                     *PL_reglastparen = paren;
4381                 scan += NEXT_OFF(scan); /* Skip former OPEN. */
4382             }
4383             ST.A = scan;
4384             ST.B = next;
4385             ST.alen = 0;
4386             ST.count = 0;
4387             ST.minmod = minmod;
4388             minmod = 0;
4389             ST.c1 = CHRTEST_UNINIT;
4390             REGCP_SET(ST.cp);
4391
4392             if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
4393                 goto curlym_do_B;
4394
4395           curlym_do_A: /* execute the A in /A{m,n}B/  */
4396             PL_reginput = locinput;
4397             PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
4398             /* NOTREACHED */
4399
4400         case CURLYM_A: /* we've just matched an A */
4401             locinput = st->locinput;
4402             nextchr = UCHARAT(locinput);
4403
4404             ST.count++;
4405             /* after first match, determine A's length: u.curlym.alen */
4406             if (ST.count == 1) {
4407                 if (PL_reg_match_utf8) {
4408                     char *s = locinput;
4409                     while (s < PL_reginput) {
4410                         ST.alen++;
4411                         s += UTF8SKIP(s);
4412                     }
4413                 }
4414                 else {
4415                     ST.alen = PL_reginput - locinput;
4416                 }
4417                 if (ST.alen == 0)
4418                     ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
4419             }
4420             DEBUG_EXECUTE_r(
4421                 PerlIO_printf(Perl_debug_log,
4422                           "%*s  CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
4423                           (int)(REPORT_CODE_OFF+(depth*2)), "",
4424                           (IV) ST.count, (IV)ST.alen)
4425             );
4426
4427             locinput = PL_reginput;
4428
4429             if (cur_eval && cur_eval->u.eval.close_paren &&
4430                 cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4431                 goto fake_end;
4432
4433             {
4434                 I32 max = (ST.minmod ? ARG1(ST.me) : ARG2(ST.me));
4435                 if ( max == REG_INFTY || ST.count < max )
4436                     goto curlym_do_A; /* try to match another A */
4437             }
4438             goto curlym_do_B; /* try to match B */
4439
4440         case CURLYM_A_fail: /* just failed to match an A */
4441             REGCP_UNWIND(ST.cp);
4442
4443             if (ST.minmod || ST.count < ARG1(ST.me) /* min*/
4444                 || (cur_eval && cur_eval->u.eval.close_paren &&
4445                     cur_eval->u.eval.close_paren == (U32)ST.me->flags))
4446                 sayNO;
4447
4448           curlym_do_B: /* execute the B in /A{m,n}B/  */
4449             PL_reginput = locinput;
4450             if (ST.c1 == CHRTEST_UNINIT) {
4451                 /* calculate c1 and c2 for possible match of 1st char
4452                  * following curly */
4453                 ST.c1 = ST.c2 = CHRTEST_VOID;
4454                 if (HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
4455                     regnode *text_node = ST.B;
4456                     if (! HAS_TEXT(text_node))
4457                         FIND_NEXT_IMPT(text_node);
4458                     /* this used to be
4459
4460                         (HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
4461
4462                         But the former is redundant in light of the latter.
4463
4464                         if this changes back then the macro for
4465                         IS_TEXT and friends need to change.
4466                      */
4467                     if (PL_regkind[OP(text_node)] == EXACT)
4468                     {
4469
4470                         ST.c1 = (U8)*STRING(text_node);
4471                         ST.c2 =
4472                             (IS_TEXTF(text_node))
4473                             ? PL_fold[ST.c1]
4474                             : (IS_TEXTFL(text_node))
4475                                 ? PL_fold_locale[ST.c1]
4476                                 : ST.c1;
4477                     }
4478                 }
4479             }
4480
4481             DEBUG_EXECUTE_r(
4482                 PerlIO_printf(Perl_debug_log,
4483                     "%*s  CURLYM trying tail with matches=%"IVdf"...\n",
4484                     (int)(REPORT_CODE_OFF+(depth*2)),
4485                     "", (IV)ST.count)
4486                 );
4487             if (ST.c1 != CHRTEST_VOID
4488                     && UCHARAT(PL_reginput) != ST.c1
4489                     && UCHARAT(PL_reginput) != ST.c2)
4490             {
4491                 /* simulate B failing */
4492                 DEBUG_OPTIMISE_r(
4493                     PerlIO_printf(Perl_debug_log,
4494                         "%*s  CURLYM Fast bail c1=%"IVdf" c2=%"IVdf"\n",
4495                         (int)(REPORT_CODE_OFF+(depth*2)),"",
4496                         (IV)ST.c1,(IV)ST.c2
4497                 ));
4498                 state_num = CURLYM_B_fail;
4499                 goto reenter_switch;
4500             }
4501
4502             if (ST.me->flags) {
4503                 /* mark current A as captured */
4504                 I32 paren = ST.me->flags;
4505                 if (ST.count) {
4506                     PL_regoffs[paren].start
4507                         = HOPc(PL_reginput, -ST.alen) - PL_bostr;
4508                     PL_regoffs[paren].end = PL_reginput - PL_bostr;
4509                     /*dmq: *PL_reglastcloseparen = paren; */
4510                 }
4511                 else
4512                     PL_regoffs[paren].end = -1;
4513                 if (cur_eval && cur_eval->u.eval.close_paren &&
4514                     cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4515                 {
4516                     if (ST.count)
4517                         goto fake_end;
4518                     else
4519                         sayNO;
4520                 }
4521             }
4522
4523             PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
4524             /* NOTREACHED */
4525
4526         case CURLYM_B_fail: /* just failed to match a B */
4527             REGCP_UNWIND(ST.cp);
4528             if (ST.minmod) {
4529                 I32 max = ARG2(ST.me);
4530                 if (max != REG_INFTY && ST.count == max)
4531                     sayNO;
4532                 goto curlym_do_A; /* try to match a further A */
4533             }
4534             /* backtrack one A */
4535             if (ST.count == ARG1(ST.me) /* min */)
4536                 sayNO;
4537             ST.count--;
4538             locinput = HOPc(locinput, -ST.alen);
4539             goto curlym_do_B; /* try to match B */
4540
4541 #undef ST
4542 #define ST st->u.curly
4543
4544 #define CURLY_SETPAREN(paren, success) \
4545     if (paren) { \
4546         if (success) { \
4547             PL_regoffs[paren].start = HOPc(locinput, -1) - PL_bostr; \
4548             PL_regoffs[paren].end = locinput - PL_bostr; \
4549             *PL_reglastcloseparen = paren; \
4550         } \
4551         else \
4552             PL_regoffs[paren].end = -1; \
4553     }
4554
4555         case STAR:              /*  /A*B/ where A is width 1 */
4556             ST.paren = 0;
4557             ST.min = 0;
4558             ST.max = REG_INFTY;
4559             scan = NEXTOPER(scan);
4560             goto repeat;
4561         case PLUS:              /*  /A+B/ where A is width 1 */
4562             ST.paren = 0;
4563             ST.min = 1;
4564             ST.max = REG_INFTY;
4565             scan = NEXTOPER(scan);
4566             goto repeat;
4567         case CURLYN:            /*  /(A){m,n}B/ where A is width 1 */
4568             ST.paren = scan->flags;     /* Which paren to set */
4569             if (ST.paren > PL_regsize)
4570                 PL_regsize = ST.paren;
4571             if (ST.paren > *PL_reglastparen)
4572                 *PL_reglastparen = ST.paren;
4573             ST.min = ARG1(scan);  /* min to match */
4574             ST.max = ARG2(scan);  /* max to match */
4575             if (cur_eval && cur_eval->u.eval.close_paren &&
4576                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4577                 ST.min=1;
4578                 ST.max=1;
4579             }
4580             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
4581             goto repeat;
4582         case CURLY:             /*  /A{m,n}B/ where A is width 1 */
4583             ST.paren = 0;
4584             ST.min = ARG1(scan);  /* min to match */
4585             ST.max = ARG2(scan);  /* max to match */
4586             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4587           repeat:
4588             /*
4589             * Lookahead to avoid useless match attempts
4590             * when we know what character comes next.
4591             *
4592             * Used to only do .*x and .*?x, but now it allows
4593             * for )'s, ('s and (?{ ... })'s to be in the way
4594             * of the quantifier and the EXACT-like node.  -- japhy
4595             */
4596
4597             if (ST.min > ST.max) /* XXX make this a compile-time check? */
4598                 sayNO;
4599             if (HAS_TEXT(next) || JUMPABLE(next)) {
4600                 U8 *s;
4601                 regnode *text_node = next;
4602
4603                 if (! HAS_TEXT(text_node))
4604                     FIND_NEXT_IMPT(text_node);
4605
4606                 if (! HAS_TEXT(text_node))
4607                     ST.c1 = ST.c2 = CHRTEST_VOID;
4608                 else {
4609                     if ( PL_regkind[OP(text_node)] != EXACT ) {
4610                         ST.c1 = ST.c2 = CHRTEST_VOID;
4611                         goto assume_ok_easy;
4612                     }
4613                     else
4614                         s = (U8*)STRING(text_node);
4615
4616                     /*  Currently we only get here when
4617
4618                         PL_rekind[OP(text_node)] == EXACT
4619
4620                         if this changes back then the macro for IS_TEXT and
4621                         friends need to change. */
4622                     if (!UTF) {
4623                         ST.c2 = ST.c1 = *s;
4624                         if (IS_TEXTF(text_node))
4625                             ST.c2 = PL_fold[ST.c1];
4626                         else if (IS_TEXTFL(text_node))
4627                             ST.c2 = PL_fold_locale[ST.c1];
4628                     }
4629                     else { /* UTF */
4630                         if (IS_TEXTF(text_node)) {
4631                              STRLEN ulen1, ulen2;
4632                              U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4633                              U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4634
4635                              to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4636                              to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4637 #ifdef EBCDIC
4638                              ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
4639                                                     ckWARN(WARN_UTF8) ?
4640                                                     0 : UTF8_ALLOW_ANY);
4641                              ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
4642                                                     ckWARN(WARN_UTF8) ?
4643                                                     0 : UTF8_ALLOW_ANY);
4644 #else
4645                              ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
4646                                                     uniflags);
4647                              ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
4648                                                     uniflags);
4649 #endif
4650                         }
4651                         else {
4652                             ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
4653                                                      uniflags);
4654                         }
4655                     }
4656                 }
4657             }
4658             else
4659                 ST.c1 = ST.c2 = CHRTEST_VOID;
4660         assume_ok_easy:
4661
4662             ST.A = scan;
4663             ST.B = next;
4664             PL_reginput = locinput;
4665             if (minmod) {
4666                 minmod = 0;
4667                 if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
4668                     sayNO;
4669                 ST.count = ST.min;
4670                 locinput = PL_reginput;
4671                 REGCP_SET(ST.cp);
4672                 if (ST.c1 == CHRTEST_VOID)
4673                     goto curly_try_B_min;
4674
4675                 ST.oldloc = locinput;
4676
4677                 /* set ST.maxpos to the furthest point along the
4678                  * string that could possibly match */
4679                 if  (ST.max == REG_INFTY) {
4680                     ST.maxpos = PL_regeol - 1;
4681                     if (do_utf8)
4682                         while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
4683                             ST.maxpos--;
4684                 }
4685                 else if (do_utf8) {
4686                     int m = ST.max - ST.min;
4687                     for (ST.maxpos = locinput;
4688                          m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
4689                         ST.maxpos += UTF8SKIP(ST.maxpos);
4690                 }
4691                 else {
4692                     ST.maxpos = locinput + ST.max - ST.min;
4693                     if (ST.maxpos >= PL_regeol)
4694                         ST.maxpos = PL_regeol - 1;
4695                 }
4696                 goto curly_try_B_min_known;
4697
4698             }
4699             else {
4700                 ST.count = regrepeat(rex, ST.A, ST.max, depth);
4701                 locinput = PL_reginput;
4702                 if (ST.count < ST.min)
4703                     sayNO;
4704                 if ((ST.count > ST.min)
4705                     && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
4706                 {
4707                     /* A{m,n} must come at the end of the string, there's
4708                      * no point in backing off ... */
4709                     ST.min = ST.count;
4710                     /* ...except that $ and \Z can match before *and* after
4711                        newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
4712                        We may back off by one in this case. */
4713                     if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
4714                         ST.min--;
4715                 }
4716                 REGCP_SET(ST.cp);
4717                 goto curly_try_B_max;
4718             }
4719             /* NOTREACHED */
4720
4721
4722         case CURLY_B_min_known_fail:
4723             /* failed to find B in a non-greedy match where c1,c2 valid */
4724             if (ST.paren && ST.count)
4725                 PL_regoffs[ST.paren].end = -1;
4726
4727             PL_reginput = locinput;     /* Could be reset... */
4728             REGCP_UNWIND(ST.cp);
4729             /* Couldn't or didn't -- move forward. */
4730             ST.oldloc = locinput;
4731             if (do_utf8)
4732                 locinput += UTF8SKIP(locinput);
4733             else
4734                 locinput++;
4735             ST.count++;
4736           curly_try_B_min_known:
4737              /* find the next place where 'B' could work, then call B */
4738             {
4739                 int n;
4740                 if (do_utf8) {
4741                     n = (ST.oldloc == locinput) ? 0 : 1;
4742                     if (ST.c1 == ST.c2) {
4743                         STRLEN len;
4744                         /* set n to utf8_distance(oldloc, locinput) */
4745                         while (locinput <= ST.maxpos &&
4746                                utf8n_to_uvchr((U8*)locinput,
4747                                               UTF8_MAXBYTES, &len,
4748                                               uniflags) != (UV)ST.c1) {
4749                             locinput += len;
4750                             n++;
4751                         }
4752                     }
4753                     else {
4754                         /* set n to utf8_distance(oldloc, locinput) */
4755                         while (locinput <= ST.maxpos) {
4756                             STRLEN len;
4757                             const UV c = utf8n_to_uvchr((U8*)locinput,
4758                                                   UTF8_MAXBYTES, &len,
4759                                                   uniflags);
4760                             if (c == (UV)ST.c1 || c == (UV)ST.c2)
4761                                 break;
4762                             locinput += len;
4763                             n++;
4764                         }
4765                     }
4766                 }
4767                 else {
4768                     if (ST.c1 == ST.c2) {
4769                         while (locinput <= ST.maxpos &&
4770                                UCHARAT(locinput) != ST.c1)
4771                             locinput++;
4772                     }
4773                     else {
4774                         while (locinput <= ST.maxpos
4775                                && UCHARAT(locinput) != ST.c1
4776                                && UCHARAT(locinput) != ST.c2)
4777                             locinput++;
4778                     }
4779                     n = locinput - ST.oldloc;
4780                 }
4781                 if (locinput > ST.maxpos)
4782                     sayNO;
4783                 /* PL_reginput == oldloc now */
4784                 if (n) {
4785                     ST.count += n;
4786                     if (regrepeat(rex, ST.A, n, depth) < n)
4787                         sayNO;
4788                 }
4789                 PL_reginput = locinput;
4790                 CURLY_SETPAREN(ST.paren, ST.count);
4791                 if (cur_eval && cur_eval->u.eval.close_paren &&
4792                     cur_eval->u.eval.close_paren == (U32)ST.paren) {
4793                     goto fake_end;
4794                 }
4795                 PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
4796             }
4797             /* NOTREACHED */
4798
4799
4800         case CURLY_B_min_fail:
4801             /* failed to find B in a non-greedy match where c1,c2 invalid */
4802             if (ST.paren && ST.count)
4803                 PL_regoffs[ST.paren].end = -1;
4804
4805             REGCP_UNWIND(ST.cp);
4806             /* failed -- move forward one */
4807             PL_reginput = locinput;
4808             if (regrepeat(rex, ST.A, 1, depth)) {
4809                 ST.count++;
4810                 locinput = PL_reginput;
4811                 if (ST.count <= ST.max || (ST.max == REG_INFTY &&
4812                         ST.count > 0)) /* count overflow ? */
4813                 {
4814                   curly_try_B_min:
4815                     CURLY_SETPAREN(ST.paren, ST.count);
4816                     if (cur_eval && cur_eval->u.eval.close_paren &&
4817                         cur_eval->u.eval.close_paren == (U32)ST.paren) {
4818                         goto fake_end;
4819                     }
4820                     PUSH_STATE_GOTO(CURLY_B_min, ST.B);
4821                 }
4822             }
4823             sayNO;
4824             /* NOTREACHED */
4825
4826
4827         curly_try_B_max:
4828             /* a successful greedy match: now try to match B */
4829             if (cur_eval && cur_eval->u.eval.close_paren &&
4830                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4831                 goto fake_end;
4832             }
4833             {
4834                 UV c = 0;
4835                 if (ST.c1 != CHRTEST_VOID)
4836                     c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
4837                                            UTF8_MAXBYTES, 0, uniflags)
4838                                 : (UV) UCHARAT(PL_reginput);
4839                 /* If it could work, try it. */
4840                 if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
4841                     CURLY_SETPAREN(ST.paren, ST.count);
4842                     PUSH_STATE_GOTO(CURLY_B_max, ST.B);
4843                     /* NOTREACHED */
4844                 }
4845             }
4846             /* FALL THROUGH */
4847         case CURLY_B_max_fail:
4848             /* failed to find B in a greedy match */
4849             if (ST.paren && ST.count)
4850                 PL_regoffs[ST.paren].end = -1;
4851
4852             REGCP_UNWIND(ST.cp);
4853             /*  back up. */
4854             if (--ST.count < ST.min)
4855                 sayNO;
4856             PL_reginput = locinput = HOPc(locinput, -1);
4857             goto curly_try_B_max;
4858
4859 #undef ST
4860
4861         case END:
4862             fake_end:
4863             if (cur_eval) {
4864                 /* we've just finished A in /(??{A})B/; now continue with B */
4865                 I32 tmpix;
4866                 st->u.eval.toggle_reg_flags
4867                             = cur_eval->u.eval.toggle_reg_flags;
4868                 PL_reg_flags ^= st->u.eval.toggle_reg_flags;
4869
4870                 st->u.eval.prev_rex = rex;              /* inner */
4871                 SETREX(rex,cur_eval->u.eval.prev_rex);
4872                 rexi = RXi_GET(rex);
4873                 cur_curlyx = cur_eval->u.eval.prev_curlyx;
4874                 ReREFCNT_inc(rex);
4875                 st->u.eval.cp = regcppush(0);   /* Save *all* the positions. */
4876
4877                 /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
4878                 PL_reglastparen = &rex->lastparen;
4879                 PL_reglastcloseparen = &rex->lastcloseparen;
4880
4881                 REGCP_SET(st->u.eval.lastcp);
4882                 PL_reginput = locinput;
4883
4884                 /* Restore parens of the outer rex without popping the
4885                  * savestack */
4886                 tmpix = PL_savestack_ix;
4887                 PL_savestack_ix = cur_eval->u.eval.lastcp;
4888                 regcppop(rex);
4889                 PL_savestack_ix = tmpix;
4890
4891                 st->u.eval.prev_eval = cur_eval;
4892                 cur_eval = cur_eval->u.eval.prev_eval;
4893                 DEBUG_EXECUTE_r(
4894                     PerlIO_printf(Perl_debug_log, "%*s  EVAL trying tail ... %"UVxf"\n",
4895                                       REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
4896                 if ( nochange_depth )
4897                     nochange_depth--;
4898
4899                 PUSH_YES_STATE_GOTO(EVAL_AB,
4900                         st->u.eval.prev_eval->u.eval.B); /* match B */
4901             }
4902
4903             if (locinput < reginfo->till) {
4904                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4905                                       "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4906                                       PL_colors[4],
4907                                       (long)(locinput - PL_reg_starttry),
4908                                       (long)(reginfo->till - PL_reg_starttry),
4909                                       PL_colors[5]));
4910
4911                 sayNO_SILENT;           /* Cannot match: too short. */
4912             }
4913             PL_reginput = locinput;     /* put where regtry can find it */
4914             sayYES;                     /* Success! */
4915
4916         case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
4917             DEBUG_EXECUTE_r(
4918             PerlIO_printf(Perl_debug_log,
4919                 "%*s  %ssubpattern success...%s\n",
4920                 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
4921             PL_reginput = locinput;     /* put where regtry can find it */
4922             sayYES;                     /* Success! */
4923
4924 #undef  ST
4925 #define ST st->u.ifmatch
4926
4927         case SUSPEND:   /* (?>A) */
4928             ST.wanted = 1;
4929             PL_reginput = locinput;
4930             goto do_ifmatch;
4931
4932         case UNLESSM:   /* -ve lookaround: (?!A), or with flags, (?<!A) */
4933             ST.wanted = 0;
4934             goto ifmatch_trivial_fail_test;
4935
4936         case IFMATCH:   /* +ve lookaround: (?=A), or with flags, (?<=A) */
4937             ST.wanted = 1;
4938           ifmatch_trivial_fail_test:
4939             if (scan->flags) {
4940                 char * const s = HOPBACKc(locinput, scan->flags);
4941                 if (!s) {
4942                     /* trivial fail */
4943                     if (logical) {
4944                         logical = 0;
4945                         sw = 1 - (bool)ST.wanted;
4946                     }
4947                     else if (ST.wanted)
4948                         sayNO;
4949                     next = scan + ARG(scan);
4950                     if (next == scan)
4951                         next = NULL;
4952                     break;
4953                 }
4954                 PL_reginput = s;
4955             }
4956             else
4957                 PL_reginput = locinput;
4958
4959           do_ifmatch:
4960             ST.me = scan;
4961             ST.logical = logical;
4962             logical = 0; /* XXX: reset state of logical once it has been saved into ST */
4963
4964             /* execute body of (?...A) */
4965             PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
4966             /* NOTREACHED */
4967
4968         case IFMATCH_A_fail: /* body of (?...A) failed */
4969             ST.wanted = !ST.wanted;
4970             /* FALL THROUGH */
4971
4972         case IFMATCH_A: /* body of (?...A) succeeded */
4973             if (ST.logical) {
4974                 sw = (bool)ST.wanted;
4975             }
4976             else if (!ST.wanted)
4977                 sayNO;
4978
4979             if (OP(ST.me) == SUSPEND)
4980                 locinput = PL_reginput;
4981             else {
4982                 locinput = PL_reginput = st->locinput;
4983                 nextchr = UCHARAT(locinput);
4984             }
4985             scan = ST.me + ARG(ST.me);
4986             if (scan == ST.me)
4987                 scan = NULL;
4988             continue; /* execute B */
4989
4990 #undef ST
4991
4992         case LONGJMP:
4993             next = scan + ARG(scan);
4994             if (next == scan)
4995                 next = NULL;
4996             break;
4997         case COMMIT:
4998             reginfo->cutpoint = PL_regeol;
4999             /* FALLTHROUGH */
5000         case PRUNE:
5001             PL_reginput = locinput;
5002             if (!scan->flags)
5003                 sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5004             PUSH_STATE_GOTO(COMMIT_next,next);
5005             /* NOTREACHED */
5006         case COMMIT_next_fail:
5007             no_final = 1;
5008             /* FALLTHROUGH */
5009         case OPFAIL:
5010             sayNO;
5011             /* NOTREACHED */
5012
5013 #define ST st->u.mark
5014         case MARKPOINT:
5015             ST.prev_mark = mark_state;
5016             ST.mark_name = sv_commit = sv_yes_mark
5017                 = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5018             mark_state = st;
5019             ST.mark_loc = PL_reginput = locinput;
5020             PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
5021             /* NOTREACHED */
5022         case MARKPOINT_next:
5023             mark_state = ST.prev_mark;
5024             sayYES;
5025             /* NOTREACHED */
5026         case MARKPOINT_next_fail:
5027             if (popmark && sv_eq(ST.mark_name,popmark))
5028             {
5029                 if (ST.mark_loc > startpoint)
5030                     reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5031                 popmark = NULL; /* we found our mark */
5032                 sv_commit = ST.mark_name;
5033
5034                 DEBUG_EXECUTE_r({
5035                         PerlIO_printf(Perl_debug_log,
5036                             "%*s  %ssetting cutpoint to mark:%"SVf"...%s\n",
5037                             REPORT_CODE_OFF+depth*2, "",
5038                             PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
5039                 });
5040             }
5041             mark_state = ST.prev_mark;
5042             sv_yes_mark = mark_state ?
5043                 mark_state->u.mark.mark_name : NULL;
5044             sayNO;
5045             /* NOTREACHED */
5046         case SKIP:
5047             PL_reginput = locinput;
5048             if (scan->flags) {
5049                 /* (*SKIP) : if we fail we cut here*/
5050                 ST.mark_name = NULL;
5051                 ST.mark_loc = locinput;
5052                 PUSH_STATE_GOTO(SKIP_next,next);
5053             } else {
5054                 /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was,
5055                    otherwise do nothing.  Meaning we need to scan
5056                  */
5057                 regmatch_state *cur = mark_state;
5058                 SV *find = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5059
5060                 while (cur) {
5061                     if ( sv_eq( cur->u.mark.mark_name,
5062                                 find ) )
5063                     {
5064                         ST.mark_name = find;
5065                         PUSH_STATE_GOTO( SKIP_next, next );
5066                     }
5067                     cur = cur->u.mark.prev_mark;
5068                 }
5069             }
5070             /* Didn't find our (*MARK:NAME) so ignore this (*SKIP:NAME) */
5071             break;
5072         case SKIP_next_fail:
5073             if (ST.mark_name) {
5074                 /* (*CUT:NAME) - Set up to search for the name as we
5075                    collapse the stack*/
5076                 popmark = ST.mark_name;
5077             } else {
5078                 /* (*CUT) - No name, we cut here.*/
5079                 if (ST.mark_loc > startpoint)
5080                     reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5081                 /* but we set sv_commit to latest mark_name if there
5082                    is one so they can test to see how things lead to this
5083                    cut */
5084                 if (mark_state)
5085                     sv_commit=mark_state->u.mark.mark_name;
5086             }
5087             no_final = 1;
5088             sayNO;
5089             /* NOTREACHED */
5090 #undef ST
5091         case FOLDCHAR:
5092             n = ARG(scan);
5093             if ( n == (U32)what_len_TRICKYFOLD(locinput,do_utf8,ln) ) {
5094                 locinput += ln;
5095             } else if ( 0xDF == n && !do_utf8 && !UTF ) {
5096                 sayNO;
5097             } else  {
5098                 U8 folded[UTF8_MAXBYTES_CASE+1];
5099                 STRLEN foldlen;
5100                 const char * const l = locinput;
5101                 char *e = PL_regeol;
5102                 to_uni_fold(n, folded, &foldlen);
5103
5104                 if (ibcmp_utf8((const char*) folded, 0,  foldlen, 1,
5105                                l, &e, 0,  do_utf8)) {
5106                         sayNO;
5107                 }
5108                 locinput = e;
5109             }
5110             nextchr = UCHARAT(locinput);
5111             break;
5112         case LNBREAK:
5113             if ((n=is_LNBREAK(locinput,do_utf8))) {
5114                 locinput += n;
5115                 nextchr = UCHARAT(locinput);
5116             } else
5117                 sayNO;
5118             break;
5119
5120 #define CASE_CLASS(nAmE)                              \
5121         case nAmE:                                    \
5122             if ((n=is_##nAmE(locinput,do_utf8))) {    \
5123                 locinput += n;                        \
5124                 nextchr = UCHARAT(locinput);          \
5125             } else                                    \
5126                 sayNO;                                \
5127             break;                                    \
5128         case N##nAmE:                                 \
5129             if ((n=is_##nAmE(locinput,do_utf8))) {    \
5130                 sayNO;                                \
5131             } else {                                  \
5132                 locinput += UTF8SKIP(locinput);       \
5133                 nextchr = UCHARAT(locinput);          \
5134             }                                         \
5135             break
5136
5137         CASE_CLASS(VERTWS);
5138         CASE_CLASS(HORIZWS);
5139 #undef CASE_CLASS
5140
5141         default:
5142             PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
5143                           PTR2UV(scan), OP(scan));
5144             Perl_croak(aTHX_ "regexp memory corruption");
5145
5146         } /* end switch */
5147
5148         /* switch break jumps here */
5149         scan = next; /* prepare to execute the next op and ... */
5150         continue;    /* ... jump back to the top, reusing st */
5151         /* NOTREACHED */
5152
5153       push_yes_state:
5154         /* push a state that backtracks on success */
5155         st->u.yes.prev_yes_state = yes_state;
5156         yes_state = st;
5157         /* FALL THROUGH */
5158       push_state:
5159         /* push a new regex state, then continue at scan  */
5160         {
5161             regmatch_state *newst;
5162
5163             DEBUG_STACK_r({
5164                 regmatch_state *cur = st;
5165                 regmatch_state *curyes = yes_state;
5166                 int curd = depth;
5167                 regmatch_slab *slab = PL_regmatch_slab;
5168                 for (;curd > -1;cur--,curd--) {
5169                     if (cur < SLAB_FIRST(slab)) {
5170                         slab = slab->prev;
5171                         cur = SLAB_LAST(slab);
5172                     }
5173                     PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
5174                         REPORT_CODE_OFF + 2 + depth * 2,"",
5175                         curd, PL_reg_name[cur->resume_state],
5176                         (curyes == cur) ? "yes" : ""
5177                     );
5178                     if (curyes == cur)
5179                         curyes = cur->u.yes.prev_yes_state;
5180                 }
5181             } else
5182                 DEBUG_STATE_pp("push")
5183             );
5184             depth++;
5185             st->locinput = locinput;
5186             newst = st+1;
5187             if (newst >  SLAB_LAST(PL_regmatch_slab))
5188                 newst = S_push_slab(aTHX);
5189             PL_regmatch_state = newst;
5190
5191             locinput = PL_reginput;
5192             nextchr = UCHARAT(locinput);
5193             st = newst;
5194             continue;
5195             /* NOTREACHED */
5196         }
5197     }
5198
5199     /*
5200     * We get here only if there's trouble -- normally "case END" is
5201     * the terminating point.
5202     */
5203     Perl_croak(aTHX_ "corrupted regexp pointers");
5204     /*NOTREACHED*/
5205     sayNO;
5206
5207 yes:
5208     if (yes_state) {
5209         /* we have successfully completed a subexpression, but we must now
5210          * pop to the state marked by yes_state and continue from there */
5211         assert(st != yes_state);
5212 #ifdef DEBUGGING
5213         while (st != yes_state) {
5214             st--;
5215             if (st < SLAB_FIRST(PL_regmatch_slab)) {
5216                 PL_regmatch_slab = PL_regmatch_slab->prev;
5217                 st = SLAB_LAST(PL_regmatch_slab);
5218             }
5219             DEBUG_STATE_r({
5220                 if (no_final) {
5221                     DEBUG_STATE_pp("pop (no final)");
5222                 } else {
5223                     DEBUG_STATE_pp("pop (yes)");
5224                 }
5225             });
5226             depth--;
5227         }
5228 #else
5229         while (yes_state < SLAB_FIRST(PL_regmatch_slab)
5230             || yes_state > SLAB_LAST(PL_regmatch_slab))
5231         {
5232             /* not in this slab, pop slab */
5233             depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
5234             PL_regmatch_slab = PL_regmatch_slab->prev;
5235             st = SLAB_LAST(PL_regmatch_slab);
5236         }
5237         depth -= (st - yes_state);
5238 #endif
5239         st = yes_state;
5240         yes_state = st->u.yes.prev_yes_state;
5241         PL_regmatch_state = st;
5242
5243         if (no_final) {
5244             locinput= st->locinput;
5245             nextchr = UCHARAT(locinput);
5246         }
5247         state_num = st->resume_state + no_final;
5248         goto reenter_switch;
5249     }
5250
5251     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
5252                           PL_colors[4], PL_colors[5]));
5253
5254     if (PL_reg_eval_set) {
5255         /* each successfully executed (?{...}) block does the equivalent of
5256          *   local $^R = do {...}
5257          * When popping the save stack, all these locals would be undone;
5258          * bypass this by setting the outermost saved $^R to the latest
5259          * value */
5260         if (oreplsv != GvSV(PL_replgv))
5261             sv_setsv(oreplsv, GvSV(PL_replgv));
5262     }
5263     result = 1;
5264     goto final_exit;
5265
5266 no:
5267     DEBUG_EXECUTE_r(
5268         PerlIO_printf(Perl_debug_log,
5269             "%*s  %sfailed...%s\n",
5270             REPORT_CODE_OFF+depth*2, "",
5271             PL_colors[4], PL_colors[5])
5272         );
5273
5274 no_silent:
5275     if (no_final) {
5276         if (yes_state) {
5277             goto yes;
5278         } else {
5279             goto final_exit;
5280         }
5281     }
5282     if (depth) {
5283         /* there's a previous state to backtrack to */
5284         st--;
5285         if (st < SLAB_FIRST(PL_regmatch_slab)) {
5286             PL_regmatch_slab = PL_regmatch_slab->prev;
5287             st = SLAB_LAST(PL_regmatch_slab);
5288         }
5289         PL_regmatch_state = st;
5290         locinput= st->locinput;
5291         nextchr = UCHARAT(locinput);
5292
5293         DEBUG_STATE_pp("pop");
5294         depth--;
5295         if (yes_state == st)
5296             yes_state = st->u.yes.prev_yes_state;
5297
5298         state_num = st->resume_state + 1; /* failure = success + 1 */
5299         goto reenter_switch;
5300     }
5301     result = 0;
5302
5303   final_exit:
5304     if (rex->intflags & PREGf_VERBARG_SEEN) {
5305         SV *sv_err = get_sv("REGERROR", 1);
5306         SV *sv_mrk = get_sv("REGMARK", 1);
5307         if (result) {
5308             sv_commit = &PL_sv_no;
5309             if (!sv_yes_mark)
5310                 sv_yes_mark = &PL_sv_yes;
5311         } else {
5312             if (!sv_commit)
5313                 sv_commit = &PL_sv_yes;
5314             sv_yes_mark = &PL_sv_no;
5315         }
5316         sv_setsv(sv_err, sv_commit);
5317         sv_setsv(sv_mrk, sv_yes_mark);
5318     }
5319
5320     /* clean up; in particular, free all slabs above current one */
5321     LEAVE_SCOPE(oldsave);
5322
5323     return result;
5324 }
5325
5326 /*
5327  - regrepeat - repeatedly match something simple, report how many
5328  */
5329 /*
5330  * [This routine now assumes that it will only match on things of length 1.
5331  * That was true before, but now we assume scan - reginput is the count,
5332  * rather than incrementing count on every character.  [Er, except utf8.]]
5333  */
5334 STATIC I32
5335 S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
5336 {
5337     dVAR;
5338     register char *scan;
5339     register I32 c;
5340     register char *loceol = PL_regeol;
5341     register I32 hardcount = 0;
5342     register bool do_utf8 = PL_reg_match_utf8;
5343 #ifndef DEBUGGING
5344     PERL_UNUSED_ARG(depth);
5345 #endif
5346
5347     PERL_ARGS_ASSERT_REGREPEAT;
5348
5349     scan = PL_reginput;
5350     if (max == REG_INFTY)
5351         max = I32_MAX;
5352     else if (max < loceol - scan)
5353         loceol = scan + max;
5354     switch (OP(p)) {
5355     case REG_ANY:
5356         if (do_utf8) {
5357             loceol = PL_regeol;
5358             while (scan < loceol && hardcount < max && *scan != '\n') {
5359                 scan += UTF8SKIP(scan);
5360                 hardcount++;
5361             }
5362         } else {
5363             while (scan < loceol && *scan != '\n')
5364                 scan++;
5365         }
5366         break;
5367     case SANY:
5368         if (do_utf8) {
5369             loceol = PL_regeol;
5370             while (scan < loceol && hardcount < max) {
5371                 scan += UTF8SKIP(scan);
5372                 hardcount++;
5373             }
5374         }
5375         else
5376             scan = loceol;
5377         break;
5378     case CANY:
5379         scan = loceol;
5380         break;
5381     case EXACT:         /* length of string is 1 */
5382         c = (U8)*STRING(p);
5383         while (scan < loceol && UCHARAT(scan) == c)
5384             scan++;
5385         break;
5386     case EXACTF:        /* length of string is 1 */
5387         c = (U8)*STRING(p);
5388         while (scan < loceol &&
5389                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
5390             scan++;
5391         break;
5392     case EXACTFL:       /* length of string is 1 */
5393         PL_reg_flags |= RF_tainted;
5394         c = (U8)*STRING(p);
5395         while (scan < loceol &&
5396                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
5397             scan++;
5398         break;
5399     case ANYOF:
5400         if (do_utf8) {
5401             loceol = PL_regeol;
5402             while (hardcount < max && scan < loceol &&
5403                    reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
5404                 scan += UTF8SKIP(scan);
5405                 hardcount++;
5406             }
5407         } else {
5408             while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
5409                 scan++;
5410         }
5411         break;
5412     case ALNUM:
5413         if (do_utf8) {
5414             loceol = PL_regeol;
5415             LOAD_UTF8_CHARCLASS_ALNUM();
5416             while (hardcount < max && scan < loceol &&
5417                    swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5418                 scan += UTF8SKIP(scan);
5419                 hardcount++;
5420             }
5421         } else {
5422             while (scan < loceol && isALNUM(*scan))
5423                 scan++;
5424         }
5425         break;
5426     case ALNUML:
5427         PL_reg_flags |= RF_tainted;
5428         if (do_utf8) {
5429             loceol = PL_regeol;
5430             while (hardcount < max && scan < loceol &&
5431                    isALNUM_LC_utf8((U8*)scan)) {
5432                 scan += UTF8SKIP(scan);
5433                 hardcount++;
5434             }
5435         } else {
5436             while (scan < loceol && isALNUM_LC(*scan))
5437                 scan++;
5438         }
5439         break;
5440     case NALNUM:
5441         if (do_utf8) {
5442             loceol = PL_regeol;
5443             LOAD_UTF8_CHARCLASS_ALNUM();
5444             while (hardcount < max && scan < loceol &&
5445                    !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5446                 scan += UTF8SKIP(scan);
5447                 hardcount++;
5448             }
5449         } else {
5450             while (scan < loceol && !isALNUM(*scan))
5451                 scan++;
5452         }
5453         break;
5454     case NALNUML:
5455         PL_reg_flags |= RF_tainted;
5456         if (do_utf8) {
5457             loceol = PL_regeol;
5458             while (hardcount < max && scan < loceol &&
5459                    !isALNUM_LC_utf8((U8*)scan)) {
5460                 scan += UTF8SKIP(scan);
5461                 hardcount++;
5462             }
5463         } else {
5464             while (scan < loceol && !isALNUM_LC(*scan))
5465                 scan++;
5466         }
5467         break;
5468     case SPACE:
5469         if (do_utf8) {
5470             loceol = PL_regeol;
5471             LOAD_UTF8_CHARCLASS_SPACE();
5472             while (hardcount < max && scan < loceol &&
5473                    (*scan == ' ' ||
5474                     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5475                 scan += UTF8SKIP(scan);
5476                 hardcount++;
5477             }
5478         } else {
5479             while (scan < loceol && isSPACE(*scan))
5480                 scan++;
5481         }
5482         break;
5483     case SPACEL:
5484         PL_reg_flags |= RF_tainted;
5485         if (do_utf8) {
5486             loceol = PL_regeol;
5487             while (hardcount < max && scan < loceol &&
5488                    (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5489                 scan += UTF8SKIP(scan);
5490                 hardcount++;
5491             }
5492         } else {
5493             while (scan < loceol && isSPACE_LC(*scan))
5494                 scan++;
5495         }
5496         break;
5497     case NSPACE:
5498         if (do_utf8) {
5499             loceol = PL_regeol;
5500             LOAD_UTF8_CHARCLASS_SPACE();
5501             while (hardcount < max && scan < loceol &&
5502                    !(*scan == ' ' ||
5503                      swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5504                 scan += UTF8SKIP(scan);
5505                 hardcount++;
5506             }
5507         } else {
5508             while (scan < loceol && !isSPACE(*scan))
5509                 scan++;
5510         }
5511         break;
5512     case NSPACEL:
5513         PL_reg_flags |= RF_tainted;
5514         if (do_utf8) {
5515             loceol = PL_regeol;
5516             while (hardcount < max && scan < loceol &&
5517                    !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5518                 scan += UTF8SKIP(scan);
5519                 hardcount++;
5520             }
5521         } else {
5522             while (scan < loceol && !isSPACE_LC(*scan))
5523                 scan++;
5524         }
5525         break;
5526     case DIGIT:
5527         if (do_utf8) {
5528             loceol = PL_regeol;
5529             LOAD_UTF8_CHARCLASS_DIGIT();
5530             while (hardcount < max && scan < loceol &&
5531                    swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5532                 scan += UTF8SKIP(scan);
5533                 hardcount++;
5534             }
5535         } else {
5536             while (scan < loceol && isDIGIT(*scan))
5537                 scan++;
5538         }
5539         break;
5540     case NDIGIT:
5541         if (do_utf8) {
5542             loceol = PL_regeol;
5543             LOAD_UTF8_CHARCLASS_DIGIT();
5544             while (hardcount < max && scan < loceol &&
5545                    !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5546                 scan += UTF8SKIP(scan);
5547                 hardcount++;
5548             }
5549         } else {
5550             while (scan < loceol && !isDIGIT(*scan))
5551                 scan++;
5552         }
5553     case LNBREAK:
5554         if (do_utf8) {
5555             loceol = PL_regeol;
5556             while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
5557                 scan += c;
5558                 hardcount++;
5559             }
5560         } else {
5561             /*
5562               LNBREAK can match two latin chars, which is ok,
5563               because we have a null terminated string, but we
5564               have to use hardcount in this situation
5565             */
5566             while (scan < loceol && (c=is_LNBREAK_latin1(scan)))  {
5567                 scan+=c;
5568                 hardcount++;
5569             }
5570         }
5571         break;
5572     case HORIZWS:
5573         if (do_utf8) {
5574             loceol = PL_regeol;
5575             while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
5576                 scan += c;
5577                 hardcount++;
5578             }
5579         } else {
5580             while (scan < loceol && is_HORIZWS_latin1(scan))
5581                 scan++;
5582         }
5583         break;
5584     case NHORIZWS:
5585         if (do_utf8) {
5586             loceol = PL_regeol;
5587             while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
5588                 scan += UTF8SKIP(scan);
5589                 hardcount++;
5590             }
5591         } else {
5592             while (scan < loceol && !is_HORIZWS_latin1(scan))
5593                 scan++;
5594
5595         }
5596         break;
5597     case VERTWS:
5598         if (do_utf8) {
5599             loceol = PL_regeol;
5600             while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
5601                 scan += c;
5602                 hardcount++;
5603             }
5604         } else {
5605             while (scan < loceol && is_VERTWS_latin1(scan))
5606                 scan++;
5607
5608         }
5609         break;
5610     case NVERTWS:
5611         if (do_utf8) {
5612             loceol = PL_regeol;
5613             while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
5614                 scan += UTF8SKIP(scan);
5615                 hardcount++;
5616             }
5617         } else {
5618             while (scan < loceol && !is_VERTWS_latin1(scan))
5619                 scan++;
5620
5621         }
5622         break;
5623
5624     default:            /* Called on something of 0 width. */
5625         break;          /* So match right here or not at all. */
5626     }
5627
5628     if (hardcount)
5629         c = hardcount;
5630     else
5631         c = scan - PL_reginput;
5632     PL_reginput = scan;
5633
5634     DEBUG_r({
5635         GET_RE_DEBUG_FLAGS_DECL;
5636         DEBUG_EXECUTE_r({
5637             SV * const prop = sv_newmortal();
5638             regprop(prog, prop, p);
5639             PerlIO_printf(Perl_debug_log,
5640                         "%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
5641                         REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
5642         });
5643     });
5644
5645     return(c);
5646 }
5647
5648
5649 #if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
5650 /*
5651 - regclass_swash - prepare the utf8 swash
5652 */
5653
5654 SV *
5655 Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
5656 {
5657     dVAR;
5658     SV *sw  = NULL;
5659     SV *si  = NULL;
5660     SV *alt = NULL;
5661     RXi_GET_DECL(prog,progi);
5662     const struct reg_data * const data = prog ? progi->data : NULL;
5663
5664     PERL_ARGS_ASSERT_REGCLASS_SWASH;
5665
5666     if (data && data->count) {
5667         const U32 n = ARG(node);
5668
5669         if (data->what[n] == 's') {
5670             SV * const rv = MUTABLE_SV(data->data[n]);
5671             AV * const av = MUTABLE_AV(SvRV(rv));
5672             SV **const ary = AvARRAY(av);
5673             SV **a, **b;
5674
5675             /* See the end of regcomp.c:S_regclass() for
5676              * documentation of these array elements. */
5677
5678             si = *ary;
5679             a  = SvROK(ary[1]) ? &ary[1] : NULL;
5680             b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : NULL;
5681
5682             if (a)
5683                 sw = *a;
5684             else if (si && doinit) {
5685                 sw = swash_init("utf8", "", si, 1, 0);
5686                 (void)av_store(av, 1, sw);
5687             }
5688             if (b)
5689                 alt = *b;
5690         }
5691     }
5692
5693     if (listsvp)
5694         *listsvp = si;
5695     if (altsvp)
5696         *altsvp  = alt;
5697
5698     return sw;
5699 }
5700 #endif
5701
5702 /*
5703  - reginclass - determine if a character falls into a character class
5704
5705   The n is the ANYOF regnode, the p is the target string, lenp
5706   is pointer to the maximum length of how far to go in the p
5707   (if the lenp is zero, UTF8SKIP(p) is used),
5708   do_utf8 tells whether the target string is in UTF-8.
5709
5710  */
5711
5712 STATIC bool
5713 S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
5714 {
5715     dVAR;
5716     const char flags = ANYOF_FLAGS(n);
5717     bool match = FALSE;
5718     UV c = *p;
5719     STRLEN len = 0;
5720     STRLEN plen;
5721
5722     PERL_ARGS_ASSERT_REGINCLASS;
5723
5724     if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
5725         c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
5726                 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
5727                 /* see [perl #37836] for UTF8_ALLOW_ANYUV */
5728         if (len == (STRLEN)-1)
5729             Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
5730     }
5731
5732     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
5733     if (do_utf8 || (flags & ANYOF_UNICODE)) {
5734         if (lenp)
5735             *lenp = 0;
5736         if (do_utf8 && !ANYOF_RUNTIME(n)) {
5737             if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
5738                 match = TRUE;
5739         }
5740         if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
5741             match = TRUE;
5742         if (!match) {
5743             AV *av;
5744             SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
5745
5746             if (sw) {
5747                 U8 * utf8_p;
5748                 if (do_utf8) {
5749                     utf8_p = (U8 *) p;
5750                 } else {
5751                     STRLEN len = 1;
5752                     utf8_p = bytes_to_utf8(p, &len);
5753                 }
5754                 if (swash_fetch(sw, utf8_p, 1))
5755                     match = TRUE;
5756                 else if (flags & ANYOF_FOLD) {
5757                     if (!match && lenp && av) {
5758                         I32 i;
5759                         for (i = 0; i <= av_len(av); i++) {
5760                             SV* const sv = *av_fetch(av, i, FALSE);
5761                             STRLEN len;
5762                             const char * const s = SvPV_const(sv, len);
5763                             if (len <= plen && memEQ(s, (char*)utf8_p, len)) {
5764                                 *lenp = len;
5765                                 match = TRUE;
5766                                 break;
5767                             }
5768                         }
5769                     }
5770                     if (!match) {
5771                         U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
5772
5773                         STRLEN tmplen;
5774                         to_utf8_fold(utf8_p, tmpbuf, &tmplen);
5775                         if (swash_fetch(sw, tmpbuf, 1))
5776                             match = TRUE;
5777                     }
5778                 }
5779
5780                 /* If we allocated a string above, free it */
5781                 if (! do_utf8) Safefree(utf8_p);
5782             }
5783         }
5784         if (match && lenp && *lenp == 0)
5785             *lenp = UNISKIP(NATIVE_TO_UNI(c));
5786     }
5787     if (!match && c < 256) {
5788         if (ANYOF_BITMAP_TEST(n, c))
5789             match = TRUE;
5790         else if (flags & ANYOF_FOLD) {
5791             U8 f;
5792
5793             if (flags & ANYOF_LOCALE) {
5794                 PL_reg_flags |= RF_tainted;
5795                 f = PL_fold_locale[c];
5796             }
5797             else
5798                 f = PL_fold[c];
5799             if (f != c && ANYOF_BITMAP_TEST(n, f))
5800                 match = TRUE;
5801         }
5802
5803         if (!match && (flags & ANYOF_CLASS)) {
5804             PL_reg_flags |= RF_tainted;
5805             if (
5806                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
5807                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
5808                 (ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
5809                 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
5810                 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
5811                 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
5812                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
5813                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5814                 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
5815                 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
5816                 (ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
5817                 (ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
5818                 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
5819                 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
5820                 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
5821                 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
5822                 (ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
5823                 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
5824                 (ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
5825                 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
5826                 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
5827                 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
5828                 (ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
5829                 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
5830                 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
5831                 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
5832                 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
5833                 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
5834                 (ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
5835                 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
5836                 ) /* How's that for a conditional? */
5837             {
5838                 match = TRUE;
5839             }
5840         }
5841     }
5842
5843     return (flags & ANYOF_INVERT) ? !match : match;
5844 }
5845
5846 STATIC U8 *
5847 S_reghop3(U8 *s, I32 off, const U8* lim)
5848 {
5849     dVAR;
5850
5851     PERL_ARGS_ASSERT_REGHOP3;
5852
5853     if (off >= 0) {
5854         while (off-- && s < lim) {
5855             /* XXX could check well-formedness here */
5856             s += UTF8SKIP(s);
5857         }
5858     }
5859     else {
5860         while (off++ && s > lim) {
5861             s--;
5862             if (UTF8_IS_CONTINUED(*s)) {
5863                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5864                     s--;
5865             }
5866             /* XXX could check well-formedness here */
5867         }
5868     }
5869     return s;
5870 }
5871
5872 #ifdef XXX_dmq
5873 /* there are a bunch of places where we use two reghop3's that should
5874    be replaced with this routine. but since thats not done yet
5875    we ifdef it out - dmq
5876 */
5877 STATIC U8 *
5878 S_reghop4(U8 *s, I32 off, const U8* llim, const U8* rlim)
5879 {
5880     dVAR;
5881
5882     PERL_ARGS_ASSERT_REGHOP4;
5883
5884     if (off >= 0) {
5885         while (off-- && s < rlim) {
5886             /* XXX could check well-formedness here */
5887             s += UTF8SKIP(s);
5888         }
5889     }
5890     else {
5891         while (off++ && s > llim) {
5892             s--;
5893             if (UTF8_IS_CONTINUED(*s)) {
5894                 while (s > llim && UTF8_IS_CONTINUATION(*s))
5895                     s--;
5896             }
5897             /* XXX could check well-formedness here */
5898         }
5899     }
5900     return s;
5901 }
5902 #endif
5903
5904 STATIC U8 *
5905 S_reghopmaybe3(U8* s, I32 off, const U8* lim)
5906 {
5907     dVAR;
5908
5909     PERL_ARGS_ASSERT_REGHOPMAYBE3;
5910
5911     if (off >= 0) {
5912         while (off-- && s < lim) {
5913             /* XXX could check well-formedness here */
5914             s += UTF8SKIP(s);
5915         }
5916         if (off >= 0)
5917             return NULL;
5918     }
5919     else {
5920         while (off++ && s > lim) {
5921             s--;
5922             if (UTF8_IS_CONTINUED(*s)) {
5923                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5924                     s--;
5925             }
5926             /* XXX could check well-formedness here */
5927         }
5928         if (off <= 0)
5929             return NULL;
5930     }
5931     return s;
5932 }
5933
5934 static void
5935 restore_pos(pTHX_ void *arg)
5936 {
5937     dVAR;
5938     regexp * const rex = (regexp *)arg;
5939     if (PL_reg_eval_set) {
5940         if (PL_reg_oldsaved) {
5941             rex->subbeg = PL_reg_oldsaved;
5942             rex->sublen = PL_reg_oldsavedlen;
5943 #ifdef PERL_OLD_COPY_ON_WRITE
5944             rex->saved_copy = PL_nrs;
5945 #endif
5946             RXp_MATCH_COPIED_on(rex);
5947         }
5948         PL_reg_magic->mg_len = PL_reg_oldpos;
5949         PL_reg_eval_set = 0;
5950         PL_curpm = PL_reg_oldcurpm;
5951     }
5952 }
5953
5954 STATIC void
5955 S_to_utf8_substr(pTHX_ register regexp *prog)
5956 {
5957     int i = 1;
5958
5959     PERL_ARGS_ASSERT_TO_UTF8_SUBSTR;
5960
5961     do {
5962         if (prog->substrs->data[i].substr
5963             && !prog->substrs->data[i].utf8_substr) {
5964             SV* const sv = newSVsv(prog->substrs->data[i].substr);
5965             prog->substrs->data[i].utf8_substr = sv;
5966             sv_utf8_upgrade(sv);
5967             if (SvVALID(prog->substrs->data[i].substr)) {
5968                 const U8 flags = BmFLAGS(prog->substrs->data[i].substr);
5969                 if (flags & FBMcf_TAIL) {
5970                     /* Trim the trailing \n that fbm_compile added last
5971                        time.  */
5972                     SvCUR_set(sv, SvCUR(sv) - 1);
5973                     /* Whilst this makes the SV technically "invalid" (as its
5974                        buffer is no longer followed by "\0") when fbm_compile()
5975                        adds the "\n" back, a "\0" is restored.  */
5976                 }
5977                 fbm_compile(sv, flags);
5978             }
5979             if (prog->substrs->data[i].substr == prog->check_substr)
5980                 prog->check_utf8 = sv;
5981         }
5982     } while (i--);
5983 }
5984
5985 STATIC void
5986 S_to_byte_substr(pTHX_ register regexp *prog)
5987 {
5988     dVAR;
5989     int i = 1;
5990
5991     PERL_ARGS_ASSERT_TO_BYTE_SUBSTR;
5992
5993     do {
5994         if (prog->substrs->data[i].utf8_substr
5995             && !prog->substrs->data[i].substr) {
5996             SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
5997             if (sv_utf8_downgrade(sv, TRUE)) {
5998                 if (SvVALID(prog->substrs->data[i].utf8_substr)) {
5999                     const U8 flags
6000                         = BmFLAGS(prog->substrs->data[i].utf8_substr);
6001                     if (flags & FBMcf_TAIL) {
6002                         /* Trim the trailing \n that fbm_compile added last
6003                            time.  */
6004                         SvCUR_set(sv, SvCUR(sv) - 1);
6005                     }
6006                     fbm_compile(sv, flags);
6007                 }
6008             } else {
6009                 SvREFCNT_dec(sv);
6010                 sv = &PL_sv_undef;
6011             }
6012             prog->substrs->data[i].substr = sv;
6013             if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
6014                 prog->check_substr = sv;
6015         }
6016     } while (i--);
6017 }
6018
6019 /*
6020  * Local variables:
6021  * c-indentation-style: bsd
6022  * c-basic-offset: 4
6023  * indent-tabs-mode: t
6024  * End:
6025  *
6026  * ex: set ts=8 sts=4 sw=4 noet:
6027  */