src/5011002/regexec.c

   1 /*    regexec.c
   2  */
   3
   4 /*
   5  *      One Ring to rule them all, One Ring to find them
   6  &
   7  *     [p.v of _The Lord of the Rings_, opening poem]
   8  *     [p.50 of _The Lord of the Rings_, I/iii: "The Shadow of the Past"]
   9  *     [p.254 of _The Lord of the Rings_, II/ii: "The Council of Elrond"]
  10  */
  11
  12 /* This file contains functions for executing a regular expression.  See
  13  * also regcomp.c which funnily enough, contains functions for compiling
  14  * a regular expression.
  15  *
  16  * This file is also copied at build time to ext/re/re_exec.c, where
  17  * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
  18  * This causes the main functions to be compiled under new names and with
  19  * debugging support added, which makes "use re 'debug'" work.
  20  */
  21
  22 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
  23  * confused with the original package (see point 3 below).  Thanks, Henry!
  24  */
  25
  26 /* Additional note: this code is very heavily munged from Henry's version
  27  * in places.  In some spots I've traded clarity for efficiency, so don't
  28  * blame Henry for some of the lack of readability.
  29  */
  30
  31 /* The names of the functions have been changed from regcomp and
  32  * regexec to  pregcomp and pregexec in order to avoid conflicts
  33  * with the POSIX routines of the same names.
  34 */
  35
  36 #ifdef PERL_EXT_RE_BUILD
  37 #include "re_top.h"
  38 #endif
  39
  40 /*
  41  * pregcomp and pregexec -- regsub and regerror are not used in perl
  42  *
  43  *      Copyright (c) 1986 by University of Toronto.
  44  *      Written by Henry Spencer.  Not derived from licensed software.
  45  *
  46  *      Permission is granted to anyone to use this software for any
  47  *      purpose on any computer system, and to redistribute it freely,
  48  *      subject to the following restrictions:
  49  *
  50  *      1. The author is not responsible for the consequences of use of
  51  *              this software, no matter how awful, even if they arise
  52  *              from defects in it.
  53  *
  54  *      2. The origin of this software must not be misrepresented, either
  55  *              by explicit claim or by omission.
  56  *
  57  *      3. Altered versions must be plainly marked as such, and must not
  58  *              be misrepresented as being the original software.
  59  *
  60  ****    Alterations to Henry's code are...
  61  ****
  62  ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
  63  ****    2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
  64  ****    by Larry Wall and others
  65  ****
  66  ****    You may distribute under the terms of either the GNU General Public
  67  ****    License or the Artistic License, as specified in the README file.
  68  *
  69  * Beware that some of this code is subtly aware of the way operator
  70  * precedence is structured in regular expressions.  Serious changes in
  71  * regular-expression syntax might require a total rethink.
  72  */
  73 #include "EXTERN.h"
  74 #define PERL_IN_REGEXEC_C
  75 #include "perl.h"
  76 #include "re_defs.h"
  77
  78 #ifdef PERL_IN_XSUB_RE
  79 #  include "re_comp.h"
  80 #else
  81 #  include "regcomp.h"
  82 #endif
  83
  84 #define RF_tainted      1               /* tainted information used? */
  85 #define RF_warned       2               /* warned about big count? */
  86
  87 #define RF_utf8         8               /* Pattern contains multibyte chars? */
  88
  89 #define UTF ((PL_reg_flags & RF_utf8) != 0)
  90
  91 #define RS_init         1               /* eval environment created */
  92 #define RS_set          2               /* replsv value is set */
  93
  94 #ifndef STATIC
  95 #define STATIC  static
  96 #endif
  97
  98 #define REGINCLASS(prog,p,c)  (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
  99
 100 /*
 101  * Forwards.
 102  */
 103
 104 #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
 105 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
 106
 107 #define HOPc(pos,off) \
 108         (char *)(PL_reg_match_utf8 \
 109             ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
 110             : (U8*)(pos + off))
 111 #define HOPBACKc(pos, off) \
 112         (char*)(PL_reg_match_utf8\
 113             ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
 114             : (pos - off >= PL_bostr)           \
 115                 ? (U8*)pos - off                \
 116                 : NULL)
 117
 118 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
 119 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
 120
 121 /* these are unrolled below in the CCC_TRY_XXX defined */
 122 #define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
 123     if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
 124 #define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
 125 #define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
 126 #define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
 127 #define LOAD_UTF8_CHARCLASS_MARK()  LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
 128
 129
 130 /*
 131    We dont use PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS as the direct test
 132    so that it is possible to override the option here without having to
 133    rebuild the entire core. as we are required to do if we change regcomp.h
 134    which is where PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS is defined.
 135 */
 136 #if PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS
 137 #define BROKEN_UNICODE_CHARCLASS_MAPPINGS
 138 #endif
 139
 140 #ifdef BROKEN_UNICODE_CHARCLASS_MAPPINGS
 141 #define LOAD_UTF8_CHARCLASS_PERL_WORD()   LOAD_UTF8_CHARCLASS_ALNUM()
 142 #define LOAD_UTF8_CHARCLASS_PERL_SPACE()  LOAD_UTF8_CHARCLASS_SPACE()
 143 #define LOAD_UTF8_CHARCLASS_POSIX_DIGIT() LOAD_UTF8_CHARCLASS_DIGIT()
 144 #define RE_utf8_perl_word   PL_utf8_alnum
 145 #define RE_utf8_perl_space  PL_utf8_space
 146 #define RE_utf8_posix_digit PL_utf8_digit
 147 #define perl_word  alnum
 148 #define perl_space space
 149 #define posix_digit digit
 150 #else
 151 #define LOAD_UTF8_CHARCLASS_PERL_WORD()   LOAD_UTF8_CHARCLASS(perl_word,"a")
 152 #define LOAD_UTF8_CHARCLASS_PERL_SPACE()  LOAD_UTF8_CHARCLASS(perl_space," ")
 153 #define LOAD_UTF8_CHARCLASS_POSIX_DIGIT() LOAD_UTF8_CHARCLASS(posix_digit,"0")
 154 #define RE_utf8_perl_word   PL_utf8_perl_word
 155 #define RE_utf8_perl_space  PL_utf8_perl_space
 156 #define RE_utf8_posix_digit PL_utf8_posix_digit
 157 #endif
 158
 159
 160 #define CCC_TRY_AFF(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNC,LCFUNC)                          \
 161         case NAMEL:                                                              \
 162             PL_reg_flags |= RF_tainted;                                                 \
 163             /* FALL THROUGH */                                                          \
 164         case NAME:                                                                     \
 165             if (!nextchr)                                                               \
 166                 sayNO;                                                                  \
 167             if (do_utf8 && UTF8_IS_CONTINUED(nextchr)) {                                \
 168                 if (!CAT2(PL_utf8_,CLASS)) {                                            \
 169                     bool ok;                                                            \
 170                     ENTER;                                                              \
 171                     save_re_context();                                                  \
 172                     ok=CAT2(is_utf8_,CLASS)((const U8*)STR);                            \
 173                     assert(ok);                                                         \
 174                     LEAVE;                                                              \
 175                 }                                                                       \
 176                 if (!(OP(scan) == NAME                                                  \
 177                     ? (bool)swash_fetch(CAT2(PL_utf8_,CLASS), (U8*)locinput, do_utf8)   \
 178                     : LCFUNC_utf8((U8*)locinput)))                                      \
 179                 {                                                                       \
 180                     sayNO;                                                              \
 181                 }                                                                       \
 182                 locinput += PL_utf8skip[nextchr];                                       \
 183                 nextchr = UCHARAT(locinput);                                            \
 184                 break;                                                                  \
 185             }                                                                           \
 186             if (!(OP(scan) == NAME ? FUNC(nextchr) : LCFUNC(nextchr)))                  \
 187                 sayNO;                                                                  \
 188             nextchr = UCHARAT(++locinput);                                              \
 189             break
 190
 191 #define CCC_TRY_NEG(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNC,LCFUNC)                        \
 192         case NAMEL:                                                              \
 193             PL_reg_flags |= RF_tainted;                                                 \
 194             /* FALL THROUGH */                                                          \
 195         case NAME :                                                                     \
 196             if (!nextchr && locinput >= PL_regeol)                                      \
 197                 sayNO;                                                                  \
 198             if (do_utf8 && UTF8_IS_CONTINUED(nextchr)) {                                \
 199                 if (!CAT2(PL_utf8_,CLASS)) {                                            \
 200                     bool ok;                                                            \
 201                     ENTER;                                                              \
 202                     save_re_context();                                                  \
 203                     ok=CAT2(is_utf8_,CLASS)((const U8*)STR);                            \
 204                     assert(ok);                                                         \
 205                     LEAVE;                                                              \
 206                 }                                                                       \
 207                 if ((OP(scan) == NAME                                                  \
 208                     ? (bool)swash_fetch(CAT2(PL_utf8_,CLASS), (U8*)locinput, do_utf8)    \
 209                     : LCFUNC_utf8((U8*)locinput)))                                      \
 210                 {                                                                       \
 211                     sayNO;                                                              \
 212                 }                                                                       \
 213                 locinput += PL_utf8skip[nextchr];                                       \
 214                 nextchr = UCHARAT(locinput);                                            \
 215                 break;                                                                  \
 216             }                                                                           \
 217             if ((OP(scan) == NAME ? FUNC(nextchr) : LCFUNC(nextchr)))                   \
 218                 sayNO;                                                                  \
 219             nextchr = UCHARAT(++locinput);                                              \
 220             break
 221
 222
 223
 224
 225
 226 /* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
 227
 228 /* for use after a quantifier and before an EXACT-like node -- japhy */
 229 /* it would be nice to rework regcomp.sym to generate this stuff. sigh */
 230 #define JUMPABLE(rn) (      \
 231     OP(rn) == OPEN ||       \
 232     (OP(rn) == CLOSE && (!cur_eval || cur_eval->u.eval.close_paren != ARG(rn))) || \
 233     OP(rn) == EVAL ||   \
 234     OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
 235     OP(rn) == PLUS || OP(rn) == MINMOD || \
 236     OP(rn) == KEEPS || (PL_regkind[OP(rn)] == VERB) || \
 237     (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
 238 )
 239 #define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
 240
 241 #define HAS_TEXT(rn) ( IS_EXACT(rn) || PL_regkind[OP(rn)] == REF )
 242
 243 #if 0
 244 /* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
 245    we don't need this definition. */
 246 #define IS_TEXT(rn)   ( OP(rn)==EXACT   || OP(rn)==REF   || OP(rn)==NREF   )
 247 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  || OP(rn)==REFF  || OP(rn)==NREFF  )
 248 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL || OP(rn)==REFFL || OP(rn)==NREFFL )
 249
 250 #else
 251 /* ... so we use this as its faster. */
 252 #define IS_TEXT(rn)   ( OP(rn)==EXACT   )
 253 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  )
 254 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
 255
 256 #endif
 257
 258 /*
 259   Search for mandatory following text node; for lookahead, the text must
 260   follow but for lookbehind (rn->flags != 0) we skip to the next step.
 261 */
 262 #define FIND_NEXT_IMPT(rn) STMT_START { \
 263     while (JUMPABLE(rn)) { \
 264         const OPCODE type = OP(rn); \
 265         if (type == SUSPEND || PL_regkind[type] == CURLY) \
 266             rn = NEXTOPER(NEXTOPER(rn)); \
 267         else if (type == PLUS) \
 268             rn = NEXTOPER(rn); \
 269         else if (type == IFMATCH) \
 270             rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
 271         else rn += NEXT_OFF(rn); \
 272     } \
 273 } STMT_END
 274
 275
 276 static void restore_pos(pTHX_ void *arg);
 277
 278 STATIC CHECKPOINT
 279 S_regcppush(pTHX_ I32 parenfloor)
 280 {
 281     dVAR;
 282     const int retval = PL_savestack_ix;
 283 #define REGCP_PAREN_ELEMS 4
 284     const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
 285     int p;
 286     GET_RE_DEBUG_FLAGS_DECL;
 287
 288     if (paren_elems_to_push < 0)
 289         Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
 290
 291 #define REGCP_OTHER_ELEMS 7
 292     SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
 293
 294     for (p = PL_regsize; p > parenfloor; p--) {
 295 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
 296         SSPUSHINT(PL_regoffs[p].end);
 297         SSPUSHINT(PL_regoffs[p].start);
 298         SSPUSHPTR(PL_reg_start_tmp[p]);
 299         SSPUSHINT(p);
 300         DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
 301           "     saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
 302                       (UV)p, (IV)PL_regoffs[p].start,
 303                       (IV)(PL_reg_start_tmp[p] - PL_bostr),
 304                       (IV)PL_regoffs[p].end
 305         ));
 306     }
 307 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
 308     SSPUSHPTR(PL_regoffs);
 309     SSPUSHINT(PL_regsize);
 310     SSPUSHINT(*PL_reglastparen);
 311     SSPUSHINT(*PL_reglastcloseparen);
 312     SSPUSHPTR(PL_reginput);
 313 #define REGCP_FRAME_ELEMS 2
 314 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
 315  * are needed for the regexp context stack bookkeeping. */
 316     SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 317     SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
 318
 319     return retval;
 320 }
 321
 322 /* These are needed since we do not localize EVAL nodes: */
 323 #define REGCP_SET(cp)                                           \
 324     DEBUG_STATE_r(                                              \
 325             PerlIO_printf(Perl_debug_log,                       \
 326                 "  Setting an EVAL scope, savestack=%"IVdf"\n", \
 327                 (IV)PL_savestack_ix));                          \
 328     cp = PL_savestack_ix
 329
 330 #define REGCP_UNWIND(cp)                                        \
 331     DEBUG_STATE_r(                                              \
 332         if (cp != PL_savestack_ix)                              \
 333             PerlIO_printf(Perl_debug_log,                       \
 334                 "  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
 335                 (IV)(cp), (IV)PL_savestack_ix));                \
 336     regcpblow(cp)
 337
 338 STATIC char *
 339 S_regcppop(pTHX_ const regexp *rex)
 340 {
 341     dVAR;
 342     U32 i;
 343     char *input;
 344     GET_RE_DEBUG_FLAGS_DECL;
 345
 346     PERL_ARGS_ASSERT_REGCPPOP;
 347
 348     /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
 349     i = SSPOPINT;
 350     assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
 351     i = SSPOPINT; /* Parentheses elements to pop. */
 352     input = (char *) SSPOPPTR;
 353     *PL_reglastcloseparen = SSPOPINT;
 354     *PL_reglastparen = SSPOPINT;
 355     PL_regsize = SSPOPINT;
 356     PL_regoffs=(regexp_paren_pair *) SSPOPPTR;
 357
 358
 359     /* Now restore the parentheses context. */
 360     for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 361          i > 0; i -= REGCP_PAREN_ELEMS) {
 362         I32 tmps;
 363         U32 paren = (U32)SSPOPINT;
 364         PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
 365         PL_regoffs[paren].start = SSPOPINT;
 366         tmps = SSPOPINT;
 367         if (paren <= *PL_reglastparen)
 368             PL_regoffs[paren].end = tmps;
 369         DEBUG_BUFFERS_r(
 370             PerlIO_printf(Perl_debug_log,
 371                           "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
 372                           (UV)paren, (IV)PL_regoffs[paren].start,
 373                           (IV)(PL_reg_start_tmp[paren] - PL_bostr),
 374                           (IV)PL_regoffs[paren].end,
 375                           (paren > *PL_reglastparen ? "(no)" : ""));
 376         );
 377     }
 378     DEBUG_BUFFERS_r(
 379         if (*PL_reglastparen + 1 <= rex->nparens) {
 380             PerlIO_printf(Perl_debug_log,
 381                           "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
 382                           (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
 383         }
 384     );
 385 #if 1
 386     /* It would seem that the similar code in regtry()
 387      * already takes care of this, and in fact it is in
 388      * a better location to since this code can #if 0-ed out
 389      * but the code in regtry() is needed or otherwise tests
 390      * requiring null fields (pat.t#187 and split.t#{13,14}
 391      * (as of patchlevel 7877)  will fail.  Then again,
 392      * this code seems to be necessary or otherwise
 393      * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
 394      * --jhi updated by dapm */
 395     for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
 396         if (i > PL_regsize)
 397             PL_regoffs[i].start = -1;
 398         PL_regoffs[i].end = -1;
 399     }
 400 #endif
 401     return input;
 402 }
 403
 404 #define regcpblow(cp) LEAVE_SCOPE(cp)   /* Ignores regcppush()ed data. */
 405
 406 /*
 407  * pregexec and friends
 408  */
 409
 410 #ifndef PERL_IN_XSUB_RE
 411 /*
 412  - pregexec - match a regexp against a string
 413  */
 414 I32
 415 Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
 416          char *strbeg, I32 minend, SV *screamer, U32 nosave)
 417 /* strend: pointer to null at end of string */
 418 /* strbeg: real beginning of string */
 419 /* minend: end of match must be >=minend after stringarg. */
 420 /* nosave: For optimizations. */
 421 {
 422     PERL_ARGS_ASSERT_PREGEXEC;
 423
 424     return
 425         regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
 426                       nosave ? 0 : REXEC_COPY_STR);
 427 }
 428 #endif
 429
 430 /*
 431  * Need to implement the following flags for reg_anch:
 432  *
 433  * USE_INTUIT_NOML              - Useful to call re_intuit_start() first
 434  * USE_INTUIT_ML
 435  * INTUIT_AUTORITATIVE_NOML     - Can trust a positive answer
 436  * INTUIT_AUTORITATIVE_ML
 437  * INTUIT_ONCE_NOML             - Intuit can match in one location only.
 438  * INTUIT_ONCE_ML
 439  *
 440  * Another flag for this function: SECOND_TIME (so that float substrs
 441  * with giant delta may be not rechecked).
 442  */
 443
 444 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
 445
 446 /* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
 447    Otherwise, only SvCUR(sv) is used to get strbeg. */
 448
 449 /* XXXX We assume that strpos is strbeg unless sv. */
 450
 451 /* XXXX Some places assume that there is a fixed substring.
 452         An update may be needed if optimizer marks as "INTUITable"
 453         RExen without fixed substrings.  Similarly, it is assumed that
 454         lengths of all the strings are no more than minlen, thus they
 455         cannot come from lookahead.
 456         (Or minlen should take into account lookahead.)
 457   NOTE: Some of this comment is not correct. minlen does now take account
 458   of lookahead/behind. Further research is required. -- demerphq
 459
 460 */
 461
 462 /* A failure to find a constant substring means that there is no need to make
 463    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
 464    finding a substring too deep into the string means that less calls to
 465    regtry() should be needed.
 466
 467    REx compiler's optimizer found 4 possible hints:
 468         a) Anchored substring;
 469         b) Fixed substring;
 470         c) Whether we are anchored (beginning-of-line or \G);
 471         d) First node (of those at offset 0) which may distingush positions;
 472    We use a)b)d) and multiline-part of c), and try to find a position in the
 473    string which does not contradict any of them.
 474  */
 475
 476 /* Most of decisions we do here should have been done at compile time.
 477    The nodes of the REx which we used for the search should have been
 478    deleted from the finite automaton. */
 479
 480 char *
 481 Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
 482                      char *strend, const U32 flags, re_scream_pos_data *data)
 483 {
 484     dVAR;
 485     struct regexp *const prog = (struct regexp *)SvANY(rx);
 486     register I32 start_shift = 0;
 487     /* Should be nonnegative! */
 488     register I32 end_shift   = 0;
 489     register char *s;
 490     register SV *check;
 491     char *strbeg;
 492     char *t;
 493     const bool do_utf8 = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
 494     I32 ml_anch;
 495     register char *other_last = NULL;   /* other substr checked before this */
 496     char *check_at = NULL;              /* check substr found at this pos */
 497     const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
 498     RXi_GET_DECL(prog,progi);
 499 #ifdef DEBUGGING
 500     const char * const i_strpos = strpos;
 501 #endif
 502     GET_RE_DEBUG_FLAGS_DECL;
 503
 504     PERL_ARGS_ASSERT_RE_INTUIT_START;
 505
 506     RX_MATCH_UTF8_set(rx,do_utf8);
 507
 508     if (RX_UTF8(rx)) {
 509         PL_reg_flags |= RF_utf8;
 510     }
 511     DEBUG_EXECUTE_r(
 512         debug_start_match(rx, do_utf8, strpos, strend,
 513             sv ? "Guessing start of match in sv for"
 514                : "Guessing start of match in string for");
 515               );
 516
 517     /* CHR_DIST() would be more correct here but it makes things slow. */
 518     if (prog->minlen > strend - strpos) {
 519         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 520                               "String too short... [re_intuit_start]\n"));
 521         goto fail;
 522     }
 523
 524     strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
 525     PL_regeol = strend;
 526     if (do_utf8) {
 527         if (!prog->check_utf8 && prog->check_substr)
 528             to_utf8_substr(prog);
 529         check = prog->check_utf8;
 530     } else {
 531         if (!prog->check_substr && prog->check_utf8)
 532             to_byte_substr(prog);
 533         check = prog->check_substr;
 534     }
 535     if (check == &PL_sv_undef) {
 536         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 537                 "Non-utf8 string cannot match utf8 check string\n"));
 538         goto fail;
 539     }
 540     if (prog->extflags & RXf_ANCH) {    /* Match at beg-of-str or after \n */
 541         ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
 542                      || ( (prog->extflags & RXf_ANCH_BOL)
 543                           && !multiline ) );    /* Check after \n? */
 544
 545         if (!ml_anch) {
 546           if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
 547                 && !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
 548                /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
 549                && sv && !SvROK(sv)
 550                && (strpos != strbeg)) {
 551               DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
 552               goto fail;
 553           }
 554           if (prog->check_offset_min == prog->check_offset_max &&
 555               !(prog->extflags & RXf_CANY_SEEN)) {
 556             /* Substring at constant offset from beg-of-str... */
 557             I32 slen;
 558
 559             s = HOP3c(strpos, prog->check_offset_min, strend);
 560
 561             if (SvTAIL(check)) {
 562                 slen = SvCUR(check);    /* >= 1 */
 563
 564                 if ( strend - s > slen || strend - s < slen - 1
 565                      || (strend - s == slen && strend[-1] != '\n')) {
 566                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
 567                     goto fail_finish;
 568                 }
 569                 /* Now should match s[0..slen-2] */
 570                 slen--;
 571                 if (slen && (*SvPVX_const(check) != *s
 572                              || (slen > 1
 573                                  && memNE(SvPVX_const(check), s, slen)))) {
 574                   report_neq:
 575                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
 576                     goto fail_finish;
 577                 }
 578             }
 579             else if (*SvPVX_const(check) != *s
 580                      || ((slen = SvCUR(check)) > 1
 581                          && memNE(SvPVX_const(check), s, slen)))
 582                 goto report_neq;
 583             check_at = s;
 584             goto success_at_start;
 585           }
 586         }
 587         /* Match is anchored, but substr is not anchored wrt beg-of-str. */
 588         s = strpos;
 589         start_shift = prog->check_offset_min; /* okay to underestimate on CC */
 590         end_shift = prog->check_end_shift;
 591
 592         if (!ml_anch) {
 593             const I32 end = prog->check_offset_max + CHR_SVLEN(check)
 594                                          - (SvTAIL(check) != 0);
 595             const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
 596
 597             if (end_shift < eshift)
 598                 end_shift = eshift;
 599         }
 600     }
 601     else {                              /* Can match at random position */
 602         ml_anch = 0;
 603         s = strpos;
 604         start_shift = prog->check_offset_min;  /* okay to underestimate on CC */
 605         end_shift = prog->check_end_shift;
 606
 607         /* end shift should be non negative here */
 608     }
 609
 610 #ifdef QDEBUGGING       /* 7/99: reports of failure (with the older version) */
 611     if (end_shift < 0)
 612         Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
 613                    (IV)end_shift, RX_PRECOMP(prog));
 614 #endif
 615
 616   restart:
 617     /* Find a possible match in the region s..strend by looking for
 618        the "check" substring in the region corrected by start/end_shift. */
 619
 620     {
 621         I32 srch_start_shift = start_shift;
 622         I32 srch_end_shift = end_shift;
 623         if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
 624             srch_end_shift -= ((strbeg - s) - srch_start_shift);
 625             srch_start_shift = strbeg - s;
 626         }
 627     DEBUG_OPTIMISE_MORE_r({
 628         PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
 629             (IV)prog->check_offset_min,
 630             (IV)srch_start_shift,
 631             (IV)srch_end_shift,
 632             (IV)prog->check_end_shift);
 633     });
 634
 635     if (flags & REXEC_SCREAM) {
 636         I32 p = -1;                     /* Internal iterator of scream. */
 637         I32 * const pp = data ? data->scream_pos : &p;
 638
 639         if (PL_screamfirst[BmRARE(check)] >= 0
 640             || ( BmRARE(check) == '\n'
 641                  && (BmPREVIOUS(check) == SvCUR(check) - 1)
 642                  && SvTAIL(check) ))
 643             s = screaminstr(sv, check,
 644                             srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
 645         else
 646             goto fail_finish;
 647         /* we may be pointing at the wrong string */
 648         if (s && RXp_MATCH_COPIED(prog))
 649             s = strbeg + (s - SvPVX_const(sv));
 650         if (data)
 651             *data->scream_olds = s;
 652     }
 653     else {
 654         U8* start_point;
 655         U8* end_point;
 656         if (prog->extflags & RXf_CANY_SEEN) {
 657             start_point= (U8*)(s + srch_start_shift);
 658             end_point= (U8*)(strend - srch_end_shift);
 659         } else {
 660             start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
 661             end_point= HOP3(strend, -srch_end_shift, strbeg);
 662         }
 663         DEBUG_OPTIMISE_MORE_r({
 664             PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
 665                 (int)(end_point - start_point),
 666                 (int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
 667                 start_point);
 668         });
 669
 670         s = fbm_instr( start_point, end_point,
 671                       check, multiline ? FBMrf_MULTILINE : 0);
 672     }
 673     }
 674     /* Update the count-of-usability, remove useless subpatterns,
 675         unshift s.  */
 676
 677     DEBUG_EXECUTE_r({
 678         RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 679             SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
 680         PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
 681                           (s ? "Found" : "Did not find"),
 682             (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)
 683                 ? "anchored" : "floating"),
 684             quoted,
 685             RE_SV_TAIL(check),
 686             (s ? " at offset " : "...\n") );
 687     });
 688
 689     if (!s)
 690         goto fail_finish;
 691     /* Finish the diagnostic message */
 692     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
 693
 694     /* XXX dmq: first branch is for positive lookbehind...
 695        Our check string is offset from the beginning of the pattern.
 696        So we need to do any stclass tests offset forward from that
 697        point. I think. :-(
 698      */
 699
 700
 701
 702     check_at=s;
 703
 704
 705     /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
 706        Start with the other substr.
 707        XXXX no SCREAM optimization yet - and a very coarse implementation
 708        XXXX /ttx+/ results in anchored="ttx", floating="x".  floating will
 709                 *always* match.  Probably should be marked during compile...
 710        Probably it is right to do no SCREAM here...
 711      */
 712
 713     if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8)
 714                 : (prog->float_substr && prog->anchored_substr))
 715     {
 716         /* Take into account the "other" substring. */
 717         /* XXXX May be hopelessly wrong for UTF... */
 718         if (!other_last)
 719             other_last = strpos;
 720         if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
 721           do_other_anchored:
 722             {
 723                 char * const last = HOP3c(s, -start_shift, strbeg);
 724                 char *last1, *last2;
 725                 char * const saved_s = s;
 726                 SV* must;
 727
 728                 t = s - prog->check_offset_max;
 729                 if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 730                     && (!do_utf8
 731                         || ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
 732                             && t > strpos)))
 733                     NOOP;
 734                 else
 735                     t = strpos;
 736                 t = HOP3c(t, prog->anchored_offset, strend);
 737                 if (t < other_last)     /* These positions already checked */
 738                     t = other_last;
 739                 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
 740                 if (last < last1)
 741                     last1 = last;
 742                 /* XXXX It is not documented what units *_offsets are in.
 743                    We assume bytes, but this is clearly wrong.
 744                    Meaning this code needs to be carefully reviewed for errors.
 745                    dmq.
 746                   */
 747
 748                 /* On end-of-str: see comment below. */
 749                 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
 750                 if (must == &PL_sv_undef) {
 751                     s = (char*)NULL;
 752                     DEBUG_r(must = prog->anchored_utf8);        /* for debug */
 753                 }
 754                 else
 755                     s = fbm_instr(
 756                         (unsigned char*)t,
 757                         HOP3(HOP3(last1, prog->anchored_offset, strend)
 758                                 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
 759                         must,
 760                         multiline ? FBMrf_MULTILINE : 0
 761                     );
 762                 DEBUG_EXECUTE_r({
 763                     RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 764                         SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 765                     PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
 766                         (s ? "Found" : "Contradicts"),
 767                         quoted, RE_SV_TAIL(must));
 768                 });
 769
 770
 771                 if (!s) {
 772                     if (last1 >= last2) {
 773                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 774                                                 ", giving up...\n"));
 775                         goto fail_finish;
 776                     }
 777                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 778                         ", trying floating at offset %ld...\n",
 779                         (long)(HOP3c(saved_s, 1, strend) - i_strpos)));
 780                     other_last = HOP3c(last1, prog->anchored_offset+1, strend);
 781                     s = HOP3c(last, 1, strend);
 782                     goto restart;
 783                 }
 784                 else {
 785                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 786                           (long)(s - i_strpos)));
 787                     t = HOP3c(s, -prog->anchored_offset, strbeg);
 788                     other_last = HOP3c(s, 1, strend);
 789                     s = saved_s;
 790                     if (t == strpos)
 791                         goto try_at_start;
 792                     goto try_at_offset;
 793                 }
 794             }
 795         }
 796         else {          /* Take into account the floating substring. */
 797             char *last, *last1;
 798             char * const saved_s = s;
 799             SV* must;
 800
 801             t = HOP3c(s, -start_shift, strbeg);
 802             last1 = last =
 803                 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
 804             if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
 805                 last = HOP3c(t, prog->float_max_offset, strend);
 806             s = HOP3c(t, prog->float_min_offset, strend);
 807             if (s < other_last)
 808                 s = other_last;
 809  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
 810             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
 811             /* fbm_instr() takes into account exact value of end-of-str
 812                if the check is SvTAIL(ed).  Since false positives are OK,
 813                and end-of-str is not later than strend we are OK. */
 814             if (must == &PL_sv_undef) {
 815                 s = (char*)NULL;
 816                 DEBUG_r(must = prog->float_utf8);       /* for debug message */
 817             }
 818             else
 819                 s = fbm_instr((unsigned char*)s,
 820                               (unsigned char*)last + SvCUR(must)
 821                                   - (SvTAIL(must)!=0),
 822                               must, multiline ? FBMrf_MULTILINE : 0);
 823             DEBUG_EXECUTE_r({
 824                 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 825                     SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 826                 PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
 827                     (s ? "Found" : "Contradicts"),
 828                     quoted, RE_SV_TAIL(must));
 829             });
 830             if (!s) {
 831                 if (last1 == last) {
 832                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 833                                             ", giving up...\n"));
 834                     goto fail_finish;
 835                 }
 836                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 837                     ", trying anchored starting at offset %ld...\n",
 838                     (long)(saved_s + 1 - i_strpos)));
 839                 other_last = last;
 840                 s = HOP3c(t, 1, strend);
 841                 goto restart;
 842             }
 843             else {
 844                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 845                       (long)(s - i_strpos)));
 846                 other_last = s; /* Fix this later. --Hugo */
 847                 s = saved_s;
 848                 if (t == strpos)
 849                     goto try_at_start;
 850                 goto try_at_offset;
 851             }
 852         }
 853     }
 854
 855
 856     t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
 857
 858     DEBUG_OPTIMISE_MORE_r(
 859         PerlIO_printf(Perl_debug_log,
 860             "Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
 861             (IV)prog->check_offset_min,
 862             (IV)prog->check_offset_max,
 863             (IV)(s-strpos),
 864             (IV)(t-strpos),
 865             (IV)(t-s),
 866             (IV)(strend-strpos)
 867         )
 868     );
 869
 870     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 871         && (!do_utf8
 872             || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
 873                  && t > strpos)))
 874     {
 875         /* Fixed substring is found far enough so that the match
 876            cannot start at strpos. */
 877       try_at_offset:
 878         if (ml_anch && t[-1] != '\n') {
 879             /* Eventually fbm_*() should handle this, but often
 880                anchored_offset is not 0, so this check will not be wasted. */
 881             /* XXXX In the code below we prefer to look for "^" even in
 882                presence of anchored substrings.  And we search even
 883                beyond the found float position.  These pessimizations
 884                are historical artefacts only.  */
 885           find_anchor:
 886             while (t < strend - prog->minlen) {
 887                 if (*t == '\n') {
 888                     if (t < check_at - prog->check_offset_min) {
 889                         if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
 890                             /* Since we moved from the found position,
 891                                we definitely contradict the found anchored
 892                                substr.  Due to the above check we do not
 893                                contradict "check" substr.
 894                                Thus we can arrive here only if check substr
 895                                is float.  Redo checking for "other"=="fixed".
 896                              */
 897                             strpos = t + 1;
 898                             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
 899                                 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
 900                             goto do_other_anchored;
 901                         }
 902                         /* We don't contradict the found floating substring. */
 903                         /* XXXX Why not check for STCLASS? */
 904                         s = t + 1;
 905                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
 906                             PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
 907                         goto set_useful;
 908                     }
 909                     /* Position contradicts check-string */
 910                     /* XXXX probably better to look for check-string
 911                        than for "\n", so one should lower the limit for t? */
 912                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
 913                         PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
 914                     other_last = strpos = s = t + 1;
 915                     goto restart;
 916                 }
 917                 t++;
 918             }
 919             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
 920                         PL_colors[0], PL_colors[1]));
 921             goto fail_finish;
 922         }
 923         else {
 924             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
 925                         PL_colors[0], PL_colors[1]));
 926         }
 927         s = t;
 928       set_useful:
 929         ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);    /* hooray/5 */
 930     }
 931     else {
 932         /* The found string does not prohibit matching at strpos,
 933            - no optimization of calling REx engine can be performed,
 934            unless it was an MBOL and we are not after MBOL,
 935            or a future STCLASS check will fail this. */
 936       try_at_start:
 937         /* Even in this situation we may use MBOL flag if strpos is offset
 938            wrt the start of the string. */
 939         if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
 940             && (strpos != strbeg) && strpos[-1] != '\n'
 941             /* May be due to an implicit anchor of m{.*foo}  */
 942             && !(prog->intflags & PREGf_IMPLICIT))
 943         {
 944             t = strpos;
 945             goto find_anchor;
 946         }
 947         DEBUG_EXECUTE_r( if (ml_anch)
 948             PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
 949                           (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
 950         );
 951       success_at_start:
 952         if (!(prog->intflags & PREGf_NAUGHTY)   /* XXXX If strpos moved? */
 953             && (do_utf8 ? (
 954                 prog->check_utf8                /* Could be deleted already */
 955                 && --BmUSEFUL(prog->check_utf8) < 0
 956                 && (prog->check_utf8 == prog->float_utf8)
 957             ) : (
 958                 prog->check_substr              /* Could be deleted already */
 959                 && --BmUSEFUL(prog->check_substr) < 0
 960                 && (prog->check_substr == prog->float_substr)
 961             )))
 962         {
 963             /* If flags & SOMETHING - do not do it many times on the same match */
 964             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
 965             /* XXX Does the destruction order has to change with do_utf8? */
 966             SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
 967             SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
 968             prog->check_substr = prog->check_utf8 = NULL;       /* disable */
 969             prog->float_substr = prog->float_utf8 = NULL;       /* clear */
 970             check = NULL;                       /* abort */
 971             s = strpos;
 972             /* XXXX This is a remnant of the old implementation.  It
 973                     looks wasteful, since now INTUIT can use many
 974                     other heuristics. */
 975             prog->extflags &= ~RXf_USE_INTUIT;
 976         }
 977         else
 978             s = strpos;
 979     }
 980
 981     /* Last resort... */
 982     /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
 983     /* trie stclasses are too expensive to use here, we are better off to
 984        leave it to regmatch itself */
 985     if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
 986         /* minlen == 0 is possible if regstclass is \b or \B,
 987            and the fixed substr is ''$.
 988            Since minlen is already taken into account, s+1 is before strend;
 989            accidentally, minlen >= 1 guaranties no false positives at s + 1
 990            even for \b or \B.  But (minlen? 1 : 0) below assumes that
 991            regstclass does not come from lookahead...  */
 992         /* If regstclass takes bytelength more than 1: If charlength==1, OK.
 993            This leaves EXACTF only, which is dealt with in find_byclass().  */
 994         const U8* const str = (U8*)STRING(progi->regstclass);
 995         const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
 996                     ? CHR_DIST(str+STR_LEN(progi->regstclass), str)
 997                     : 1);
 998         char * endpos;
 999         if (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
1000             endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
1001         else if (prog->float_substr || prog->float_utf8)
1002             endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
1003         else
1004             endpos= strend;
1005
1006         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf"\n",
1007                                       (IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg)));
1008
1009         t = s;
1010         s = find_byclass(prog, progi->regstclass, s, endpos, NULL);
1011         if (!s) {
1012 #ifdef DEBUGGING
1013             const char *what = NULL;
1014 #endif
1015             if (endpos == strend) {
1016                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1017                                 "Could not match STCLASS...\n") );
1018                 goto fail;
1019             }
1020             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1021                                    "This position contradicts STCLASS...\n") );
1022             if ((prog->extflags & RXf_ANCH) && !ml_anch)
1023                 goto fail;
1024             /* Contradict one of substrings */
1025             if (prog->anchored_substr || prog->anchored_utf8) {
1026                 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
1027                     DEBUG_EXECUTE_r( what = "anchored" );
1028                   hop_and_restart:
1029                     s = HOP3c(t, 1, strend);
1030                     if (s + start_shift + end_shift > strend) {
1031                         /* XXXX Should be taken into account earlier? */
1032                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1033                                                "Could not match STCLASS...\n") );
1034                         goto fail;
1035                     }
1036                     if (!check)
1037                         goto giveup;
1038                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1039                                 "Looking for %s substr starting at offset %ld...\n",
1040                                  what, (long)(s + start_shift - i_strpos)) );
1041                     goto restart;
1042                 }
1043                 /* Have both, check_string is floating */
1044                 if (t + start_shift >= check_at) /* Contradicts floating=check */
1045                     goto retry_floating_check;
1046                 /* Recheck anchored substring, but not floating... */
1047                 s = check_at;
1048                 if (!check)
1049                     goto giveup;
1050                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1051                           "Looking for anchored substr starting at offset %ld...\n",
1052                           (long)(other_last - i_strpos)) );
1053                 goto do_other_anchored;
1054             }
1055             /* Another way we could have checked stclass at the
1056                current position only: */
1057             if (ml_anch) {
1058                 s = t = t + 1;
1059                 if (!check)
1060                     goto giveup;
1061                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1062                           "Looking for /%s^%s/m starting at offset %ld...\n",
1063                           PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
1064                 goto try_at_offset;
1065             }
1066             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))     /* Could have been deleted */
1067                 goto fail;
1068             /* Check is floating subtring. */
1069           retry_floating_check:
1070             t = check_at - start_shift;
1071             DEBUG_EXECUTE_r( what = "floating" );
1072             goto hop_and_restart;
1073         }
1074         if (t != s) {
1075             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1076                         "By STCLASS: moving %ld --> %ld\n",
1077                                   (long)(t - i_strpos), (long)(s - i_strpos))
1078                    );
1079         }
1080         else {
1081             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1082                                   "Does not contradict STCLASS...\n");
1083                    );
1084         }
1085     }
1086   giveup:
1087     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
1088                           PL_colors[4], (check ? "Guessed" : "Giving up"),
1089                           PL_colors[5], (long)(s - i_strpos)) );
1090     return s;
1091
1092   fail_finish:                          /* Substring not found */
1093     if (prog->check_substr || prog->check_utf8)         /* could be removed already */
1094         BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
1095   fail:
1096     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
1097                           PL_colors[4], PL_colors[5]));
1098     return NULL;
1099 }
1100
1101 #define DECL_TRIE_TYPE(scan) \
1102     const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold } \
1103                     trie_type = (scan->flags != EXACT) \
1104                               ? (do_utf8 ? trie_utf8_fold : (UTF ? trie_latin_utf8_fold : trie_plain)) \
1105                               : (do_utf8 ? trie_utf8 : trie_plain)
1106
1107 #define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len,  \
1108 uvc, charid, foldlen, foldbuf, uniflags) STMT_START {                       \
1109     switch (trie_type) {                                                    \
1110     case trie_utf8_fold:                                                    \
1111         if ( foldlen>0 ) {                                                  \
1112             uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
1113             foldlen -= len;                                                 \
1114             uscan += len;                                                   \
1115             len=0;                                                          \
1116         } else {                                                            \
1117             uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
1118             uvc = to_uni_fold( uvc, foldbuf, &foldlen );                    \
1119             foldlen -= UNISKIP( uvc );                                      \
1120             uscan = foldbuf + UNISKIP( uvc );                               \
1121         }                                                                   \
1122         break;                                                              \
1123     case trie_latin_utf8_fold:                                              \
1124         if ( foldlen>0 ) {                                                  \
1125             uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );     \
1126             foldlen -= len;                                                 \
1127             uscan += len;                                                   \
1128             len=0;                                                          \
1129         } else {                                                            \
1130             len = 1;                                                        \
1131             uvc = to_uni_fold( *(U8*)uc, foldbuf, &foldlen );               \
1132             foldlen -= UNISKIP( uvc );                                      \
1133             uscan = foldbuf + UNISKIP( uvc );                               \
1134         }                                                                   \
1135         break;                                                              \
1136     case trie_utf8:                                                         \
1137         uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );       \
1138         break;                                                              \
1139     case trie_plain:                                                        \
1140         uvc = (UV)*uc;                                                      \
1141         len = 1;                                                            \
1142     }                                                                       \
1143     if (uvc < 256) {                                                        \
1144         charid = trie->charmap[ uvc ];                                      \
1145     }                                                                       \
1146     else {                                                                  \
1147         charid = 0;                                                         \
1148         if (widecharmap) {                                                  \
1149             SV** const svpp = hv_fetch(widecharmap,                         \
1150                         (char*)&uvc, sizeof(UV), 0);                        \
1151             if (svpp)                                                       \
1152                 charid = (U16)SvIV(*svpp);                                  \
1153         }                                                                   \
1154     }                                                                       \
1155 } STMT_END
1156
1157 #define REXEC_FBC_EXACTISH_CHECK(CoNd)                 \
1158 {                                                      \
1159     char *my_strend= (char *)strend;                   \
1160     if ( (CoNd)                                        \
1161          && (ln == len ||                              \
1162              !ibcmp_utf8(s, &my_strend, 0,  do_utf8,   \
1163                         m, NULL, ln, (bool)UTF))       \
1164          && (!reginfo || regtry(reginfo, &s)) )        \
1165         goto got_it;                                   \
1166     else {                                             \
1167          U8 foldbuf[UTF8_MAXBYTES_CASE+1];             \
1168          uvchr_to_utf8(tmpbuf, c);                     \
1169          f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);  \
1170          if ( f != c                                   \
1171               && (f == c1 || f == c2)                  \
1172               && (ln == len ||                         \
1173                 !ibcmp_utf8(s, &my_strend, 0,  do_utf8,\
1174                               m, NULL, ln, (bool)UTF)) \
1175               && (!reginfo || regtry(reginfo, &s)) )   \
1176               goto got_it;                             \
1177     }                                                  \
1178 }                                                      \
1179 s += len
1180
1181 #define REXEC_FBC_EXACTISH_SCAN(CoNd)                     \
1182 STMT_START {                                              \
1183     while (s <= e) {                                      \
1184         if ( (CoNd)                                       \
1185              && (ln == 1 || !(OP(c) == EXACTF             \
1186                               ? ibcmp(s, m, ln)           \
1187                               : ibcmp_locale(s, m, ln)))  \
1188              && (!reginfo || regtry(reginfo, &s)) )        \
1189             goto got_it;                                  \
1190         s++;                                              \
1191     }                                                     \
1192 } STMT_END
1193
1194 #define REXEC_FBC_UTF8_SCAN(CoDe)                     \
1195 STMT_START {                                          \
1196     while (s + (uskip = UTF8SKIP(s)) <= strend) {     \
1197         CoDe                                          \
1198         s += uskip;                                   \
1199     }                                                 \
1200 } STMT_END
1201
1202 #define REXEC_FBC_SCAN(CoDe)                          \
1203 STMT_START {                                          \
1204     while (s < strend) {                              \
1205         CoDe                                          \
1206         s++;                                          \
1207     }                                                 \
1208 } STMT_END
1209
1210 #define REXEC_FBC_UTF8_CLASS_SCAN(CoNd)               \
1211 REXEC_FBC_UTF8_SCAN(                                  \
1212     if (CoNd) {                                       \
1213         if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1214             goto got_it;                              \
1215         else                                          \
1216             tmp = doevery;                            \
1217     }                                                 \
1218     else                                              \
1219         tmp = 1;                                      \
1220 )
1221
1222 #define REXEC_FBC_CLASS_SCAN(CoNd)                    \
1223 REXEC_FBC_SCAN(                                       \
1224     if (CoNd) {                                       \
1225         if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1226             goto got_it;                              \
1227         else                                          \
1228             tmp = doevery;                            \
1229     }                                                 \
1230     else                                              \
1231         tmp = 1;                                      \
1232 )
1233
1234 #define REXEC_FBC_TRYIT               \
1235 if ((!reginfo || regtry(reginfo, &s))) \
1236     goto got_it
1237
1238 #define REXEC_FBC_CSCAN(CoNdUtF8,CoNd)                         \
1239     if (do_utf8) {                                             \
1240         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1241     }                                                          \
1242     else {                                                     \
1243         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1244     }                                                          \
1245     break
1246
1247 #define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd)      \
1248     if (do_utf8) {                                             \
1249         UtFpReLoAd;                                            \
1250         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1251     }                                                          \
1252     else {                                                     \
1253         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1254     }                                                          \
1255     break
1256
1257 #define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd)                   \
1258     PL_reg_flags |= RF_tainted;                                \
1259     if (do_utf8) {                                             \
1260         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1261     }                                                          \
1262     else {                                                     \
1263         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1264     }                                                          \
1265     break
1266
1267 #define DUMP_EXEC_POS(li,s,doutf8) \
1268     dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
1269
1270 /* We know what class REx starts with.  Try to find this position... */
1271 /* if reginfo is NULL, its a dryrun */
1272 /* annoyingly all the vars in this routine have different names from their counterparts
1273    in regmatch. /grrr */
1274
1275 STATIC char *
1276 S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
1277     const char *strend, regmatch_info *reginfo)
1278 {
1279         dVAR;
1280         const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
1281         char *m;
1282         STRLEN ln;
1283         STRLEN lnc;
1284         register STRLEN uskip;
1285         unsigned int c1;
1286         unsigned int c2;
1287         char *e;
1288         register I32 tmp = 1;   /* Scratch variable? */
1289         register const bool do_utf8 = PL_reg_match_utf8;
1290         RXi_GET_DECL(prog,progi);
1291
1292         PERL_ARGS_ASSERT_FIND_BYCLASS;
1293
1294         /* We know what class it must start with. */
1295         switch (OP(c)) {
1296         case ANYOF:
1297             if (do_utf8) {
1298                  REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
1299                           !UTF8_IS_INVARIANT((U8)s[0]) ?
1300                           reginclass(prog, c, (U8*)s, 0, do_utf8) :
1301                           REGINCLASS(prog, c, (U8*)s));
1302             }
1303             else {
1304                  while (s < strend) {
1305                       STRLEN skip = 1;
1306
1307                       if (REGINCLASS(prog, c, (U8*)s) ||
1308                           (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1309                            /* The assignment of 2 is intentional:
1310                             * for the folded sharp s, the skip is 2. */
1311                            (skip = SHARP_S_SKIP))) {
1312                            if (tmp && (!reginfo || regtry(reginfo, &s)))
1313                                 goto got_it;
1314                            else
1315                                 tmp = doevery;
1316                       }
1317                       else
1318                            tmp = 1;
1319                       s += skip;
1320                  }
1321             }
1322             break;
1323         case CANY:
1324             REXEC_FBC_SCAN(
1325                 if (tmp && (!reginfo || regtry(reginfo, &s)))
1326                     goto got_it;
1327                 else
1328                     tmp = doevery;
1329             );
1330             break;
1331         case EXACTF:
1332             m   = STRING(c);
1333             ln  = STR_LEN(c);   /* length to match in octets/bytes */
1334             lnc = (I32) ln;     /* length to match in characters */
1335             if (UTF) {
1336                 STRLEN ulen1, ulen2;
1337                 U8 *sm = (U8 *) m;
1338                 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1339                 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
1340                 /* used by commented-out code below */
1341                 /*const U32 uniflags = UTF8_ALLOW_DEFAULT;*/
1342
1343                 /* XXX: Since the node will be case folded at compile
1344                    time this logic is a little odd, although im not
1345                    sure that its actually wrong. --dmq */
1346
1347                 c1 = to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1348                 c2 = to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1349
1350                 /* XXX: This is kinda strange. to_utf8_XYZ returns the
1351                    codepoint of the first character in the converted
1352                    form, yet originally we did the extra step.
1353                    No tests fail by commenting this code out however
1354                    so Ive left it out. -- dmq.
1355
1356                 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
1357                                     0, uniflags);
1358                 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
1359                                     0, uniflags);
1360                 */
1361
1362                 lnc = 0;
1363                 while (sm < ((U8 *) m + ln)) {
1364                     lnc++;
1365                     sm += UTF8SKIP(sm);
1366                 }
1367             }
1368             else {
1369                 c1 = *(U8*)m;
1370                 c2 = PL_fold[c1];
1371             }
1372             goto do_exactf;
1373         case EXACTFL:
1374             m   = STRING(c);
1375             ln  = STR_LEN(c);
1376             lnc = (I32) ln;
1377             c1 = *(U8*)m;
1378             c2 = PL_fold_locale[c1];
1379           do_exactf:
1380             e = HOP3c(strend, -((I32)lnc), s);
1381
1382             if (!reginfo && e < s)
1383                 e = s;                  /* Due to minlen logic of intuit() */
1384
1385             /* The idea in the EXACTF* cases is to first find the
1386              * first character of the EXACTF* node and then, if
1387              * necessary, case-insensitively compare the full
1388              * text of the node.  The c1 and c2 are the first
1389              * characters (though in Unicode it gets a bit
1390              * more complicated because there are more cases
1391              * than just upper and lower: one needs to use
1392              * the so-called folding case for case-insensitive
1393              * matching (called "loose matching" in Unicode).
1394              * ibcmp_utf8() will do just that. */
1395
1396             if (do_utf8 || UTF) {
1397                 UV c, f;
1398                 U8 tmpbuf [UTF8_MAXBYTES+1];
1399                 STRLEN len = 1;
1400                 STRLEN foldlen;
1401                 const U32 uniflags = UTF8_ALLOW_DEFAULT;
1402                 if (c1 == c2) {
1403                     /* Upper and lower of 1st char are equal -
1404                      * probably not a "letter". */
1405                     while (s <= e) {
1406                         if (do_utf8) {
1407                             c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1408                                            uniflags);
1409                         } else {
1410                             c = *((U8*)s);
1411                         }
1412                         REXEC_FBC_EXACTISH_CHECK(c == c1);
1413                     }
1414                 }
1415                 else {
1416                     while (s <= e) {
1417                         if (do_utf8) {
1418                             c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1419                                            uniflags);
1420                         } else {
1421                             c = *((U8*)s);
1422                         }
1423
1424                         /* Handle some of the three Greek sigmas cases.
1425                          * Note that not all the possible combinations
1426                          * are handled here: some of them are handled
1427                          * by the standard folding rules, and some of
1428                          * them (the character class or ANYOF cases)
1429                          * are handled during compiletime in
1430                          * regexec.c:S_regclass(). */
1431                         if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1432                             c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1433                             c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1434
1435                         REXEC_FBC_EXACTISH_CHECK(c == c1 || c == c2);
1436                     }
1437                 }
1438             }
1439             else {
1440                 /* Neither pattern nor string are UTF8 */
1441                 if (c1 == c2)
1442                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
1443                 else
1444                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
1445             }
1446             break;
1447         case BOUNDL:
1448             PL_reg_flags |= RF_tainted;
1449             /* FALL THROUGH */
1450         case BOUND:
1451             if (do_utf8) {
1452                 if (s == PL_bostr)
1453                     tmp = '\n';
1454                 else {
1455                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1456                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1457                 }
1458                 tmp = ((OP(c) == BOUND ?
1459                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1460                 LOAD_UTF8_CHARCLASS_ALNUM();
1461                 REXEC_FBC_UTF8_SCAN(
1462                     if (tmp == !(OP(c) == BOUND ?
1463                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1464                                  isALNUM_LC_utf8((U8*)s)))
1465                     {
1466                         tmp = !tmp;
1467                         REXEC_FBC_TRYIT;
1468                 }
1469                 );
1470             }
1471             else {
1472                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1473                 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1474                 REXEC_FBC_SCAN(
1475                     if (tmp ==
1476                         !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1477                         tmp = !tmp;
1478                         REXEC_FBC_TRYIT;
1479                 }
1480                 );
1481             }
1482             if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, &s)))
1483                 goto got_it;
1484             break;
1485         case NBOUNDL:
1486             PL_reg_flags |= RF_tainted;
1487             /* FALL THROUGH */
1488         case NBOUND:
1489             if (do_utf8) {
1490                 if (s == PL_bostr)
1491                     tmp = '\n';
1492                 else {
1493                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1494                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1495                 }
1496                 tmp = ((OP(c) == NBOUND ?
1497                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1498                 LOAD_UTF8_CHARCLASS_ALNUM();
1499                 REXEC_FBC_UTF8_SCAN(
1500                     if (tmp == !(OP(c) == NBOUND ?
1501                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1502                                  isALNUM_LC_utf8((U8*)s)))
1503                         tmp = !tmp;
1504                     else REXEC_FBC_TRYIT;
1505                 );
1506             }
1507             else {
1508                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1509                 tmp = ((OP(c) == NBOUND ?
1510                         isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1511                 REXEC_FBC_SCAN(
1512                     if (tmp ==
1513                         !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1514                         tmp = !tmp;
1515                     else REXEC_FBC_TRYIT;
1516                 );
1517             }
1518             if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, &s)))
1519                 goto got_it;
1520             break;
1521         case ALNUM:
1522             REXEC_FBC_CSCAN_PRELOAD(
1523                 LOAD_UTF8_CHARCLASS_PERL_WORD(),
1524                 swash_fetch(RE_utf8_perl_word, (U8*)s, do_utf8),
1525                 isALNUM(*s)
1526             );
1527         case ALNUML:
1528             REXEC_FBC_CSCAN_TAINT(
1529                 isALNUM_LC_utf8((U8*)s),
1530                 isALNUM_LC(*s)
1531             );
1532         case NALNUM:
1533             REXEC_FBC_CSCAN_PRELOAD(
1534                 LOAD_UTF8_CHARCLASS_PERL_WORD(),
1535                 !swash_fetch(RE_utf8_perl_word, (U8*)s, do_utf8),
1536                 !isALNUM(*s)
1537             );
1538         case NALNUML:
1539             REXEC_FBC_CSCAN_TAINT(
1540                 !isALNUM_LC_utf8((U8*)s),
1541                 !isALNUM_LC(*s)
1542             );
1543         case SPACE:
1544             REXEC_FBC_CSCAN_PRELOAD(
1545                 LOAD_UTF8_CHARCLASS_PERL_SPACE(),
1546                 *s == ' ' || swash_fetch(RE_utf8_perl_space,(U8*)s, do_utf8),
1547                 isSPACE(*s)
1548             );
1549         case SPACEL:
1550             REXEC_FBC_CSCAN_TAINT(
1551                 *s == ' ' || isSPACE_LC_utf8((U8*)s),
1552                 isSPACE_LC(*s)
1553             );
1554         case NSPACE:
1555             REXEC_FBC_CSCAN_PRELOAD(
1556                 LOAD_UTF8_CHARCLASS_PERL_SPACE(),
1557                 !(*s == ' ' || swash_fetch(RE_utf8_perl_space,(U8*)s, do_utf8)),
1558                 !isSPACE(*s)
1559             );
1560         case NSPACEL:
1561             REXEC_FBC_CSCAN_TAINT(
1562                 !(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
1563                 !isSPACE_LC(*s)
1564             );
1565         case DIGIT:
1566             REXEC_FBC_CSCAN_PRELOAD(
1567                 LOAD_UTF8_CHARCLASS_POSIX_DIGIT(),
1568                 swash_fetch(RE_utf8_posix_digit,(U8*)s, do_utf8),
1569                 isDIGIT(*s)
1570             );
1571         case DIGITL:
1572             REXEC_FBC_CSCAN_TAINT(
1573                 isDIGIT_LC_utf8((U8*)s),
1574                 isDIGIT_LC(*s)
1575             );
1576         case NDIGIT:
1577             REXEC_FBC_CSCAN_PRELOAD(
1578                 LOAD_UTF8_CHARCLASS_POSIX_DIGIT(),
1579                 !swash_fetch(RE_utf8_posix_digit,(U8*)s, do_utf8),
1580                 !isDIGIT(*s)
1581             );
1582         case NDIGITL:
1583             REXEC_FBC_CSCAN_TAINT(
1584                 !isDIGIT_LC_utf8((U8*)s),
1585                 !isDIGIT_LC(*s)
1586             );
1587         case LNBREAK:
1588             REXEC_FBC_CSCAN(
1589                 is_LNBREAK_utf8(s),
1590                 is_LNBREAK_latin1(s)
1591             );
1592         case VERTWS:
1593             REXEC_FBC_CSCAN(
1594                 is_VERTWS_utf8(s),
1595                 is_VERTWS_latin1(s)
1596             );
1597         case NVERTWS:
1598             REXEC_FBC_CSCAN(
1599                 !is_VERTWS_utf8(s),
1600                 !is_VERTWS_latin1(s)
1601             );
1602         case HORIZWS:
1603             REXEC_FBC_CSCAN(
1604                 is_HORIZWS_utf8(s),
1605                 is_HORIZWS_latin1(s)
1606             );
1607         case NHORIZWS:
1608             REXEC_FBC_CSCAN(
1609                 !is_HORIZWS_utf8(s),
1610                 !is_HORIZWS_latin1(s)
1611             );
1612         case AHOCORASICKC:
1613         case AHOCORASICK:
1614             {
1615                 DECL_TRIE_TYPE(c);
1616                 /* what trie are we using right now */
1617                 reg_ac_data *aho
1618                     = (reg_ac_data*)progi->data->data[ ARG( c ) ];
1619                 reg_trie_data *trie
1620                     = (reg_trie_data*)progi->data->data[ aho->trie ];
1621                 HV *widecharmap = MUTABLE_HV(progi->data->data[ aho->trie + 1 ]);
1622
1623                 const char *last_start = strend - trie->minlen;
1624 #ifdef DEBUGGING
1625                 const char *real_start = s;
1626 #endif
1627                 STRLEN maxlen = trie->maxlen;
1628                 SV *sv_points;
1629                 U8 **points; /* map of where we were in the input string
1630                                 when reading a given char. For ASCII this
1631                                 is unnecessary overhead as the relationship
1632                                 is always 1:1, but for Unicode, especially
1633                                 case folded Unicode this is not true. */
1634                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
1635                 U8 *bitmap=NULL;
1636
1637
1638                 GET_RE_DEBUG_FLAGS_DECL;
1639
1640                 /* We can't just allocate points here. We need to wrap it in
1641                  * an SV so it gets freed properly if there is a croak while
1642                  * running the match */
1643                 ENTER;
1644                 SAVETMPS;
1645                 sv_points=newSV(maxlen * sizeof(U8 *));
1646                 SvCUR_set(sv_points,
1647                     maxlen * sizeof(U8 *));
1648                 SvPOK_on(sv_points);
1649                 sv_2mortal(sv_points);
1650                 points=(U8**)SvPV_nolen(sv_points );
1651                 if ( trie_type != trie_utf8_fold
1652                      && (trie->bitmap || OP(c)==AHOCORASICKC) )
1653                 {
1654                     if (trie->bitmap)
1655                         bitmap=(U8*)trie->bitmap;
1656                     else
1657                         bitmap=(U8*)ANYOF_BITMAP(c);
1658                 }
1659                 /* this is the Aho-Corasick algorithm modified a touch
1660                    to include special handling for long "unknown char"
1661                    sequences. The basic idea being that we use AC as long
1662                    as we are dealing with a possible matching char, when
1663                    we encounter an unknown char (and we have not encountered
1664                    an accepting state) we scan forward until we find a legal
1665                    starting char.
1666                    AC matching is basically that of trie matching, except
1667                    that when we encounter a failing transition, we fall back
1668                    to the current states "fail state", and try the current char
1669                    again, a process we repeat until we reach the root state,
1670                    state 1, or a legal transition. If we fail on the root state
1671                    then we can either terminate if we have reached an accepting
1672                    state previously, or restart the entire process from the beginning
1673                    if we have not.
1674
1675                  */
1676                 while (s <= last_start) {
1677                     const U32 uniflags = UTF8_ALLOW_DEFAULT;
1678                     U8 *uc = (U8*)s;
1679                     U16 charid = 0;
1680                     U32 base = 1;
1681                     U32 state = 1;
1682                     UV uvc = 0;
1683                     STRLEN len = 0;
1684                     STRLEN foldlen = 0;
1685                     U8 *uscan = (U8*)NULL;
1686                     U8 *leftmost = NULL;
1687 #ifdef DEBUGGING
1688                     U32 accepted_word= 0;
1689 #endif
1690                     U32 pointpos = 0;
1691
1692                     while ( state && uc <= (U8*)strend ) {
1693                         int failed=0;
1694                         U32 word = aho->states[ state ].wordnum;
1695
1696                         if( state==1 ) {
1697                             if ( bitmap ) {
1698                                 DEBUG_TRIE_EXECUTE_r(
1699                                     if ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1700                                         dump_exec_pos( (char *)uc, c, strend, real_start,
1701                                             (char *)uc, do_utf8 );
1702                                         PerlIO_printf( Perl_debug_log,
1703                                             " Scanning for legal start char...\n");
1704                                     }
1705                                 );
1706                                 while ( uc <= (U8*)last_start  && !BITMAP_TEST(bitmap,*uc) ) {
1707                                     uc++;
1708                                 }
1709                                 s= (char *)uc;
1710                             }
1711                             if (uc >(U8*)last_start) break;
1712                         }
1713
1714                         if ( word ) {
1715                             U8 *lpos= points[ (pointpos - trie->wordlen[word-1] ) % maxlen ];
1716                             if (!leftmost || lpos < leftmost) {
1717                                 DEBUG_r(accepted_word=word);
1718                                 leftmost= lpos;
1719                             }
1720                             if (base==0) break;
1721
1722                         }
1723                         points[pointpos++ % maxlen]= uc;
1724                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
1725                                              uscan, len, uvc, charid, foldlen,
1726                                              foldbuf, uniflags);
1727                         DEBUG_TRIE_EXECUTE_r({
1728                             dump_exec_pos( (char *)uc, c, strend, real_start,
1729                                 s,   do_utf8 );
1730                             PerlIO_printf(Perl_debug_log,
1731                                 " Charid:%3u CP:%4"UVxf" ",
1732                                  charid, uvc);
1733                         });
1734
1735                         do {
1736 #ifdef DEBUGGING
1737                             word = aho->states[ state ].wordnum;
1738 #endif
1739                             base = aho->states[ state ].trans.base;
1740
1741                             DEBUG_TRIE_EXECUTE_r({
1742                                 if (failed)
1743                                     dump_exec_pos( (char *)uc, c, strend, real_start,
1744                                         s,   do_utf8 );
1745                                 PerlIO_printf( Perl_debug_log,
1746                                     "%sState: %4"UVxf", word=%"UVxf,
1747                                     failed ? " Fail transition to " : "",
1748                                     (UV)state, (UV)word);
1749                             });
1750                             if ( base ) {
1751                                 U32 tmp;
1752                                 if (charid &&
1753                                      (base + charid > trie->uniquecharcount )
1754                                      && (base + charid - 1 - trie->uniquecharcount
1755                                             < trie->lasttrans)
1756                                      && trie->trans[base + charid - 1 -
1757                                             trie->uniquecharcount].check == state
1758                                      && (tmp=trie->trans[base + charid - 1 -
1759                                         trie->uniquecharcount ].next))
1760                                 {
1761                                     DEBUG_TRIE_EXECUTE_r(
1762                                         PerlIO_printf( Perl_debug_log," - legal\n"));
1763                                     state = tmp;
1764                                     break;
1765                                 }
1766                                 else {
1767                                     DEBUG_TRIE_EXECUTE_r(
1768                                         PerlIO_printf( Perl_debug_log," - fail\n"));
1769                                     failed = 1;
1770                                     state = aho->fail[state];
1771                                 }
1772                             }
1773                             else {
1774                                 /* we must be accepting here */
1775                                 DEBUG_TRIE_EXECUTE_r(
1776                                         PerlIO_printf( Perl_debug_log," - accepting\n"));
1777                                 failed = 1;
1778                                 break;
1779                             }
1780                         } while(state);
1781                         uc += len;
1782                         if (failed) {
1783                             if (leftmost)
1784                                 break;
1785                             if (!state) state = 1;
1786                         }
1787                     }
1788                     if ( aho->states[ state ].wordnum ) {
1789                         U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
1790                         if (!leftmost || lpos < leftmost) {
1791                             DEBUG_r(accepted_word=aho->states[ state ].wordnum);
1792                             leftmost = lpos;
1793                         }
1794                     }
1795                     if (leftmost) {
1796                         s = (char*)leftmost;
1797                         DEBUG_TRIE_EXECUTE_r({
1798                             PerlIO_printf(
1799                                 Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
1800                                 (UV)accepted_word, (IV)(s - real_start)
1801                             );
1802                         });
1803                         if (!reginfo || regtry(reginfo, &s)) {
1804                             FREETMPS;
1805                             LEAVE;
1806                             goto got_it;
1807                         }
1808                         s = HOPc(s,1);
1809                         DEBUG_TRIE_EXECUTE_r({
1810                             PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
1811                         });
1812                     } else {
1813                         DEBUG_TRIE_EXECUTE_r(
1814                             PerlIO_printf( Perl_debug_log,"No match.\n"));
1815                         break;
1816                     }
1817                 }
1818                 FREETMPS;
1819                 LEAVE;
1820             }
1821             break;
1822         default:
1823             Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1824             break;
1825         }
1826         return 0;
1827       got_it:
1828         return s;
1829 }
1830
1831
1832 /*
1833  - regexec_flags - match a regexp against a string
1834  */
1835 I32
1836 Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *strend,
1837               char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1838 /* strend: pointer to null at end of string */
1839 /* strbeg: real beginning of string */
1840 /* minend: end of match must be >=minend after stringarg. */
1841 /* data: May be used for some additional optimizations.
1842          Currently its only used, with a U32 cast, for transmitting
1843          the ganch offset when doing a /g match. This will change */
1844 /* nosave: For optimizations. */
1845 {
1846     dVAR;
1847     struct regexp *const prog = (struct regexp *)SvANY(rx);
1848     /*register*/ char *s;
1849     register regnode *c;
1850     /*register*/ char *startpos = stringarg;
1851     I32 minlen;         /* must match at least this many chars */
1852     I32 dontbother = 0; /* how many characters not to try at end */
1853     I32 end_shift = 0;                  /* Same for the end. */         /* CC */
1854     I32 scream_pos = -1;                /* Internal iterator of scream. */
1855     char *scream_olds = NULL;
1856     const bool do_utf8 = (bool)DO_UTF8(sv);
1857     I32 multiline;
1858     RXi_GET_DECL(prog,progi);
1859     regmatch_info reginfo;  /* create some info to pass to regtry etc */
1860     regexp_paren_pair *swap = NULL;
1861     GET_RE_DEBUG_FLAGS_DECL;
1862
1863     PERL_ARGS_ASSERT_REGEXEC_FLAGS;
1864     PERL_UNUSED_ARG(data);
1865
1866     /* Be paranoid... */
1867     if (prog == NULL || startpos == NULL) {
1868         Perl_croak(aTHX_ "NULL regexp parameter");
1869         return 0;
1870     }
1871
1872     multiline = prog->extflags & RXf_PMf_MULTILINE;
1873     reginfo.prog = rx;   /* Yes, sorry that this is confusing.  */
1874
1875     RX_MATCH_UTF8_set(rx, do_utf8);
1876     DEBUG_EXECUTE_r(
1877         debug_start_match(rx, do_utf8, startpos, strend,
1878         "Matching");
1879     );
1880
1881     minlen = prog->minlen;
1882
1883     if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
1884         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1885                               "String too short [regexec_flags]...\n"));
1886         goto phooey;
1887     }
1888
1889
1890     /* Check validity of program. */
1891     if (UCHARAT(progi->program) != REG_MAGIC) {
1892         Perl_croak(aTHX_ "corrupted regexp program");
1893     }
1894
1895     PL_reg_flags = 0;
1896     PL_reg_eval_set = 0;
1897     PL_reg_maxiter = 0;
1898
1899     if (RX_UTF8(rx))
1900         PL_reg_flags |= RF_utf8;
1901
1902     /* Mark beginning of line for ^ and lookbehind. */
1903     reginfo.bol = startpos; /* XXX not used ??? */
1904     PL_bostr  = strbeg;
1905     reginfo.sv = sv;
1906
1907     /* Mark end of line for $ (and such) */
1908     PL_regeol = strend;
1909
1910     /* see how far we have to get to not match where we matched before */
1911     reginfo.till = startpos+minend;
1912
1913     /* If there is a "must appear" string, look for it. */
1914     s = startpos;
1915
1916     if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
1917         MAGIC *mg;
1918         if (flags & REXEC_IGNOREPOS){   /* Means: check only at start */
1919             reginfo.ganch = startpos + prog->gofs;
1920             DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
1921               "GPOS IGNOREPOS: reginfo.ganch = startpos + %"UVxf"\n",(UV)prog->gofs));
1922         } else if (sv && SvTYPE(sv) >= SVt_PVMG
1923                   && SvMAGIC(sv)
1924                   && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1925                   && mg->mg_len >= 0) {
1926             reginfo.ganch = strbeg + mg->mg_len;        /* Defined pos() */
1927             DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
1928                 "GPOS MAGIC: reginfo.ganch = strbeg + %"IVdf"\n",(IV)mg->mg_len));
1929
1930             if (prog->extflags & RXf_ANCH_GPOS) {
1931                 if (s > reginfo.ganch)
1932                     goto phooey;
1933                 s = reginfo.ganch - prog->gofs;
1934                 DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
1935                      "GPOS ANCH_GPOS: s = ganch - %"UVxf"\n",(UV)prog->gofs));
1936                 if (s < strbeg)
1937                     goto phooey;
1938             }
1939         }
1940         else if (data) {
1941             reginfo.ganch = strbeg + PTR2UV(data);
1942             DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
1943                  "GPOS DATA: reginfo.ganch= strbeg + %"UVxf"\n",PTR2UV(data)));
1944
1945         } else {                                /* pos() not defined */
1946             reginfo.ganch = strbeg;
1947             DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
1948                  "GPOS: reginfo.ganch = strbeg\n"));
1949         }
1950     }
1951     if (PL_curpm && (PM_GETRE(PL_curpm) == rx)) {
1952         /* We have to be careful. If the previous successful match
1953            was from this regex we don't want a subsequent partially
1954            successful match to clobber the old results.
1955            So when we detect this possibility we add a swap buffer
1956            to the re, and switch the buffer each match. If we fail
1957            we switch it back, otherwise we leave it swapped.
1958         */
1959         swap = prog->offs;
1960         /* do we need a save destructor here for eval dies? */
1961         Newxz(prog->offs, (prog->nparens + 1), regexp_paren_pair);
1962     }
1963     if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
1964         re_scream_pos_data d;
1965
1966         d.scream_olds = &scream_olds;
1967         d.scream_pos = &scream_pos;
1968         s = re_intuit_start(rx, sv, s, strend, flags, &d);
1969         if (!s) {
1970             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1971             goto phooey;        /* not present */
1972         }
1973     }
1974
1975
1976
1977     /* Simplest case:  anchored match need be tried only once. */
1978     /*  [unless only anchor is BOL and multiline is set] */
1979     if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
1980         if (s == startpos && regtry(&reginfo, &startpos))
1981             goto got_it;
1982         else if (multiline || (prog->intflags & PREGf_IMPLICIT)
1983                  || (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
1984         {
1985             char *end;
1986
1987             if (minlen)
1988                 dontbother = minlen - 1;
1989             end = HOP3c(strend, -dontbother, strbeg) - 1;
1990             /* for multiline we only have to try after newlines */
1991             if (prog->check_substr || prog->check_utf8) {
1992                 if (s == startpos)
1993                     goto after_try;
1994                 while (1) {
1995                     if (regtry(&reginfo, &s))
1996                         goto got_it;
1997                   after_try:
1998                     if (s > end)
1999                         goto phooey;
2000                     if (prog->extflags & RXf_USE_INTUIT) {
2001                         s = re_intuit_start(rx, sv, s + 1, strend, flags, NULL);
2002                         if (!s)
2003                             goto phooey;
2004                     }
2005                     else
2006                         s++;
2007                 }
2008             } else {
2009                 if (s > startpos)
2010                     s--;
2011                 while (s < end) {
2012                     if (*s++ == '\n') { /* don't need PL_utf8skip here */
2013                         if (regtry(&reginfo, &s))
2014                             goto got_it;
2015                     }
2016                 }
2017             }
2018         }
2019         goto phooey;
2020     } else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
2021     {
2022         /* the warning about reginfo.ganch being used without intialization
2023            is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
2024            and we only enter this block when the same bit is set. */
2025         char *tmp_s = reginfo.ganch - prog->gofs;
2026
2027         if (tmp_s >= strbeg && regtry(&reginfo, &tmp_s))
2028             goto got_it;
2029         goto phooey;
2030     }
2031
2032     /* Messy cases:  unanchored match. */
2033     if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
2034         /* we have /x+whatever/ */
2035         /* it must be a one character string (XXXX Except UTF?) */
2036         char ch;
2037 #ifdef DEBUGGING
2038         int did_match = 0;
2039 #endif
2040         if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
2041             do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2042         ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
2043
2044         if (do_utf8) {
2045             REXEC_FBC_SCAN(
2046                 if (*s == ch) {
2047                     DEBUG_EXECUTE_r( did_match = 1 );
2048                     if (regtry(&reginfo, &s)) goto got_it;
2049                     s += UTF8SKIP(s);
2050                     while (s < strend && *s == ch)
2051                         s += UTF8SKIP(s);
2052                 }
2053             );
2054         }
2055         else {
2056             REXEC_FBC_SCAN(
2057                 if (*s == ch) {
2058                     DEBUG_EXECUTE_r( did_match = 1 );
2059                     if (regtry(&reginfo, &s)) goto got_it;
2060                     s++;
2061                     while (s < strend && *s == ch)
2062                         s++;
2063                 }
2064             );
2065         }
2066         DEBUG_EXECUTE_r(if (!did_match)
2067                 PerlIO_printf(Perl_debug_log,
2068                                   "Did not find anchored character...\n")
2069                );
2070     }
2071     else if (prog->anchored_substr != NULL
2072               || prog->anchored_utf8 != NULL
2073               || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
2074                   && prog->float_max_offset < strend - s)) {
2075         SV *must;
2076         I32 back_max;
2077         I32 back_min;
2078         char *last;
2079         char *last1;            /* Last position checked before */
2080 #ifdef DEBUGGING
2081         int did_match = 0;
2082 #endif
2083         if (prog->anchored_substr || prog->anchored_utf8) {
2084             if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
2085                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2086             must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
2087             back_max = back_min = prog->anchored_offset;
2088         } else {
2089             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
2090                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2091             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
2092             back_max = prog->float_max_offset;
2093             back_min = prog->float_min_offset;
2094         }
2095
2096
2097         if (must == &PL_sv_undef)
2098             /* could not downgrade utf8 check substring, so must fail */
2099             goto phooey;
2100
2101         if (back_min<0) {
2102             last = strend;
2103         } else {
2104             last = HOP3c(strend,        /* Cannot start after this */
2105                   -(I32)(CHR_SVLEN(must)
2106                          - (SvTAIL(must) != 0) + back_min), strbeg);
2107         }
2108         if (s > PL_bostr)
2109             last1 = HOPc(s, -1);
2110         else
2111             last1 = s - 1;      /* bogus */
2112
2113         /* XXXX check_substr already used to find "s", can optimize if
2114            check_substr==must. */
2115         scream_pos = -1;
2116         dontbother = end_shift;
2117         strend = HOPc(strend, -dontbother);
2118         while ( (s <= last) &&
2119                 ((flags & REXEC_SCREAM)
2120                  ? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
2121                                     end_shift, &scream_pos, 0))
2122                  : (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
2123                                   (unsigned char*)strend, must,
2124                                   multiline ? FBMrf_MULTILINE : 0))) ) {
2125             /* we may be pointing at the wrong string */
2126             if ((flags & REXEC_SCREAM) && RXp_MATCH_COPIED(prog))
2127                 s = strbeg + (s - SvPVX_const(sv));
2128             DEBUG_EXECUTE_r( did_match = 1 );
2129             if (HOPc(s, -back_max) > last1) {
2130                 last1 = HOPc(s, -back_min);
2131                 s = HOPc(s, -back_max);
2132             }
2133             else {
2134                 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
2135
2136                 last1 = HOPc(s, -back_min);
2137                 s = t;
2138             }
2139             if (do_utf8) {
2140                 while (s <= last1) {
2141                     if (regtry(&reginfo, &s))
2142                         goto got_it;
2143                     s += UTF8SKIP(s);
2144                 }
2145             }
2146             else {
2147                 while (s <= last1) {
2148                     if (regtry(&reginfo, &s))
2149                         goto got_it;
2150                     s++;
2151                 }
2152             }
2153         }
2154         DEBUG_EXECUTE_r(if (!did_match) {
2155             RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
2156                 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
2157             PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
2158                               ((must == prog->anchored_substr || must == prog->anchored_utf8)
2159                                ? "anchored" : "floating"),
2160                 quoted, RE_SV_TAIL(must));
2161         });
2162         goto phooey;
2163     }
2164     else if ( (c = progi->regstclass) ) {
2165         if (minlen) {
2166             const OPCODE op = OP(progi->regstclass);
2167             /* don't bother with what can't match */
2168             if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
2169                 strend = HOPc(strend, -(minlen - 1));
2170         }
2171         DEBUG_EXECUTE_r({
2172             SV * const prop = sv_newmortal();
2173             regprop(prog, prop, c);
2174             {
2175                 RE_PV_QUOTED_DECL(quoted,do_utf8,PERL_DEBUG_PAD_ZERO(1),
2176                     s,strend-s,60);
2177                 PerlIO_printf(Perl_debug_log,
2178                     "Matching stclass %.*s against %s (%d chars)\n",
2179                     (int)SvCUR(prop), SvPVX_const(prop),
2180                      quoted, (int)(strend - s));
2181             }
2182         });
2183         if (find_byclass(prog, c, s, strend, &reginfo))
2184             goto got_it;
2185         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
2186     }
2187     else {
2188         dontbother = 0;
2189         if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
2190             /* Trim the end. */
2191             char *last;
2192             SV* float_real;
2193
2194             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
2195                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2196             float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
2197
2198             if (flags & REXEC_SCREAM) {
2199                 last = screaminstr(sv, float_real, s - strbeg,
2200                                    end_shift, &scream_pos, 1); /* last one */
2201                 if (!last)
2202                     last = scream_olds; /* Only one occurrence. */
2203                 /* we may be pointing at the wrong string */
2204                 else if (RXp_MATCH_COPIED(prog))
2205                     s = strbeg + (s - SvPVX_const(sv));
2206             }
2207             else {
2208                 STRLEN len;
2209                 const char * const little = SvPV_const(float_real, len);
2210
2211                 if (SvTAIL(float_real)) {
2212                     if (memEQ(strend - len + 1, little, len - 1))
2213                         last = strend - len + 1;
2214                     else if (!multiline)
2215                         last = memEQ(strend - len, little, len)
2216                             ? strend - len : NULL;
2217                     else
2218                         goto find_last;
2219                 } else {
2220                   find_last:
2221                     if (len)
2222                         last = rninstr(s, strend, little, little + len);
2223                     else
2224                         last = strend;  /* matching "$" */
2225                 }
2226             }
2227             if (last == NULL) {
2228                 DEBUG_EXECUTE_r(
2229                     PerlIO_printf(Perl_debug_log,
2230                         "%sCan't trim the tail, match fails (should not happen)%s\n",
2231                         PL_colors[4], PL_colors[5]));
2232                 goto phooey; /* Should not happen! */
2233             }
2234             dontbother = strend - last + prog->float_min_offset;
2235         }
2236         if (minlen && (dontbother < minlen))
2237             dontbother = minlen - 1;
2238         strend -= dontbother;              /* this one's always in bytes! */
2239         /* We don't know much -- general case. */
2240         if (do_utf8) {
2241             for (;;) {
2242                 if (regtry(&reginfo, &s))
2243                     goto got_it;
2244                 if (s >= strend)
2245                     break;
2246                 s += UTF8SKIP(s);
2247             };
2248         }
2249         else {
2250             do {
2251                 if (regtry(&reginfo, &s))
2252                     goto got_it;
2253             } while (s++ < strend);
2254         }
2255     }
2256
2257     /* Failure. */
2258     goto phooey;
2259
2260 got_it:
2261     Safefree(swap);
2262     RX_MATCH_TAINTED_set(rx, PL_reg_flags & RF_tainted);
2263
2264     if (PL_reg_eval_set)
2265         restore_pos(aTHX_ prog);
2266     if (RXp_PAREN_NAMES(prog))
2267         (void)hv_iterinit(RXp_PAREN_NAMES(prog));
2268
2269     /* make sure $`, $&, $', and $digit will work later */
2270     if ( !(flags & REXEC_NOT_FIRST) ) {
2271         RX_MATCH_COPY_FREE(rx);
2272         if (flags & REXEC_COPY_STR) {
2273             const I32 i = PL_regeol - startpos + (stringarg - strbeg);
2274 #ifdef PERL_OLD_COPY_ON_WRITE
2275             if ((SvIsCOW(sv)
2276                  || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2277                 if (DEBUG_C_TEST) {
2278                     PerlIO_printf(Perl_debug_log,
2279                                   "Copy on write: regexp capture, type %d\n",
2280                                   (int) SvTYPE(sv));
2281                 }
2282                 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
2283                 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
2284                 assert (SvPOKp(prog->saved_copy));
2285             } else
2286 #endif
2287             {
2288                 RX_MATCH_COPIED_on(rx);
2289                 s = savepvn(strbeg, i);
2290                 prog->subbeg = s;
2291             }
2292             prog->sublen = i;
2293         }
2294         else {
2295             prog->subbeg = strbeg;
2296             prog->sublen = PL_regeol - strbeg;  /* strend may have been modified */
2297         }
2298     }
2299
2300     return 1;
2301
2302 phooey:
2303     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2304                           PL_colors[4], PL_colors[5]));
2305     if (PL_reg_eval_set)
2306         restore_pos(aTHX_ prog);
2307     if (swap) {
2308         /* we failed :-( roll it back */
2309         Safefree(prog->offs);
2310         prog->offs = swap;
2311     }
2312
2313     return 0;
2314 }
2315
2316
2317 /*
2318  - regtry - try match at specific point
2319  */
2320 STATIC I32                      /* 0 failure, 1 success */
2321 S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
2322 {
2323     dVAR;
2324     CHECKPOINT lastcp;
2325     REGEXP *const rx = reginfo->prog;
2326     regexp *const prog = (struct regexp *)SvANY(rx);
2327     RXi_GET_DECL(prog,progi);
2328     GET_RE_DEBUG_FLAGS_DECL;
2329
2330     PERL_ARGS_ASSERT_REGTRY;
2331
2332     reginfo->cutpoint=NULL;
2333
2334     if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
2335         MAGIC *mg;
2336
2337         PL_reg_eval_set = RS_init;
2338         DEBUG_EXECUTE_r(DEBUG_s(
2339             PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2340                           (IV)(PL_stack_sp - PL_stack_base));
2341             ));
2342         SAVESTACK_CXPOS();
2343         cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2344         /* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2345         SAVETMPS;
2346         /* Apparently this is not needed, judging by wantarray. */
2347         /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2348            cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2349
2350         if (reginfo->sv) {
2351             /* Make $_ available to executed code. */
2352             if (reginfo->sv != DEFSV) {
2353                 SAVE_DEFSV;
2354                 DEFSV_set(reginfo->sv);
2355             }
2356
2357             if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2358                   && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
2359                 /* prepare for quick setting of pos */
2360 #ifdef PERL_OLD_COPY_ON_WRITE
2361                 if (SvIsCOW(reginfo->sv))
2362                     sv_force_normal_flags(reginfo->sv, 0);
2363 #endif
2364                 mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
2365                                  &PL_vtbl_mglob, NULL, 0);
2366                 mg->mg_len = -1;
2367             }
2368             PL_reg_magic    = mg;
2369             PL_reg_oldpos   = mg->mg_len;
2370             SAVEDESTRUCTOR_X(restore_pos, prog);
2371         }
2372         if (!PL_reg_curpm) {
2373             Newxz(PL_reg_curpm, 1, PMOP);
2374 #ifdef USE_ITHREADS
2375             {
2376                 SV* const repointer = &PL_sv_undef;
2377                 /* this regexp is also owned by the new PL_reg_curpm, which
2378                    will try to free it.  */
2379                 av_push(PL_regex_padav, repointer);
2380                 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2381                 PL_regex_pad = AvARRAY(PL_regex_padav);
2382             }
2383 #endif
2384         }
2385 #ifdef USE_ITHREADS
2386         /* It seems that non-ithreads works both with and without this code.
2387            So for efficiency reasons it seems best not to have the code
2388            compiled when it is not needed.  */
2389         /* This is safe against NULLs: */
2390         ReREFCNT_dec(PM_GETRE(PL_reg_curpm));
2391         /* PM_reg_curpm owns a reference to this regexp.  */
2392         ReREFCNT_inc(rx);
2393 #endif
2394         PM_SETRE(PL_reg_curpm, rx);
2395         PL_reg_oldcurpm = PL_curpm;
2396         PL_curpm = PL_reg_curpm;
2397         if (RXp_MATCH_COPIED(prog)) {
2398             /*  Here is a serious problem: we cannot rewrite subbeg,
2399                 since it may be needed if this match fails.  Thus
2400                 $` inside (?{}) could fail... */
2401             PL_reg_oldsaved = prog->subbeg;
2402             PL_reg_oldsavedlen = prog->sublen;
2403 #ifdef PERL_OLD_COPY_ON_WRITE
2404             PL_nrs = prog->saved_copy;
2405 #endif
2406             RXp_MATCH_COPIED_off(prog);
2407         }
2408         else
2409             PL_reg_oldsaved = NULL;
2410         prog->subbeg = PL_bostr;
2411         prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2412     }
2413     DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
2414     prog->offs[0].start = *startpos - PL_bostr;
2415     PL_reginput = *startpos;
2416     PL_reglastparen = &prog->lastparen;
2417     PL_reglastcloseparen = &prog->lastcloseparen;
2418     prog->lastparen = 0;
2419     prog->lastcloseparen = 0;
2420     PL_regsize = 0;
2421     PL_regoffs = prog->offs;
2422     if (PL_reg_start_tmpl <= prog->nparens) {
2423         PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2424         if(PL_reg_start_tmp)
2425             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2426         else
2427             Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2428     }
2429
2430     /* XXXX What this code is doing here?!!!  There should be no need
2431        to do this again and again, PL_reglastparen should take care of
2432        this!  --ilya*/
2433
2434     /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2435      * Actually, the code in regcppop() (which Ilya may be meaning by
2436      * PL_reglastparen), is not needed at all by the test suite
2437      * (op/regexp, op/pat, op/split), but that code is needed otherwise
2438      * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
2439      * Meanwhile, this code *is* needed for the
2440      * above-mentioned test suite tests to succeed.  The common theme
2441      * on those tests seems to be returning null fields from matches.
2442      * --jhi updated by dapm */
2443 #if 1
2444     if (prog->nparens) {
2445         regexp_paren_pair *pp = PL_regoffs;
2446         register I32 i;
2447         for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2448             ++pp;
2449             pp->start = -1;
2450             pp->end = -1;
2451         }
2452     }
2453 #endif
2454     REGCP_SET(lastcp);
2455     if (regmatch(reginfo, progi->program + 1)) {
2456         PL_regoffs[0].end = PL_reginput - PL_bostr;
2457         return 1;
2458     }
2459     if (reginfo->cutpoint)
2460         *startpos= reginfo->cutpoint;
2461     REGCP_UNWIND(lastcp);
2462     return 0;
2463 }
2464
2465
2466 #define sayYES goto yes
2467 #define sayNO goto no
2468 #define sayNO_SILENT goto no_silent
2469
2470 /* we dont use STMT_START/END here because it leads to
2471    "unreachable code" warnings, which are bogus, but distracting. */
2472 #define CACHEsayNO \
2473     if (ST.cache_mask) \
2474        PL_reg_poscache[ST.cache_offset] |= ST.cache_mask; \
2475     sayNO
2476
2477 /* this is used to determine how far from the left messages like
2478    'failed...' are printed. It should be set such that messages
2479    are inline with the regop output that created them.
2480 */
2481 #define REPORT_CODE_OFF 32
2482
2483
2484 /* Make sure there is a test for this +1 options in re_tests */
2485 #define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2486
2487 #define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
2488 #define CHRTEST_VOID   -1000 /* the c1/c2 "next char" test should be skipped */
2489
2490 #define SLAB_FIRST(s) (&(s)->states[0])
2491 #define SLAB_LAST(s)  (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2492
2493 /* grab a new slab and return the first slot in it */
2494
2495 STATIC regmatch_state *
2496 S_push_slab(pTHX)
2497 {
2498 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2499     dMY_CXT;
2500 #endif
2501     regmatch_slab *s = PL_regmatch_slab->next;
2502     if (!s) {
2503         Newx(s, 1, regmatch_slab);
2504         s->prev = PL_regmatch_slab;
2505         s->next = NULL;
2506         PL_regmatch_slab->next = s;
2507     }
2508     PL_regmatch_slab = s;
2509     return SLAB_FIRST(s);
2510 }
2511
2512
2513 /* push a new state then goto it */
2514
2515 #define PUSH_STATE_GOTO(state, node) \
2516     scan = node; \
2517     st->resume_state = state; \
2518     goto push_state;
2519
2520 /* push a new state with success backtracking, then goto it */
2521
2522 #define PUSH_YES_STATE_GOTO(state, node) \
2523     scan = node; \
2524     st->resume_state = state; \
2525     goto push_yes_state;
2526
2527
2528
2529 /*
2530
2531 regmatch() - main matching routine
2532
2533 This is basically one big switch statement in a loop. We execute an op,
2534 set 'next' to point the next op, and continue. If we come to a point which
2535 we may need to backtrack to on failure such as (A|B|C), we push a
2536 backtrack state onto the backtrack stack. On failure, we pop the top
2537 state, and re-enter the loop at the state indicated. If there are no more
2538 states to pop, we return failure.
2539
2540 Sometimes we also need to backtrack on success; for example /A+/, where
2541 after successfully matching one A, we need to go back and try to
2542 match another one; similarly for lookahead assertions: if the assertion
2543 completes successfully, we backtrack to the state just before the assertion
2544 and then carry on.  In these cases, the pushed state is marked as
2545 'backtrack on success too'. This marking is in fact done by a chain of
2546 pointers, each pointing to the previous 'yes' state. On success, we pop to
2547 the nearest yes state, discarding any intermediate failure-only states.
2548 Sometimes a yes state is pushed just to force some cleanup code to be
2549 called at the end of a successful match or submatch; e.g. (??{$re}) uses
2550 it to free the inner regex.
2551
2552 Note that failure backtracking rewinds the cursor position, while
2553 success backtracking leaves it alone.
2554
2555 A pattern is complete when the END op is executed, while a subpattern
2556 such as (?=foo) is complete when the SUCCESS op is executed. Both of these
2557 ops trigger the "pop to last yes state if any, otherwise return true"
2558 behaviour.
2559
2560 A common convention in this function is to use A and B to refer to the two
2561 subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
2562 the subpattern to be matched possibly multiple times, while B is the entire
2563 rest of the pattern. Variable and state names reflect this convention.
2564
2565 The states in the main switch are the union of ops and failure/success of
2566 substates associated with with that op.  For example, IFMATCH is the op
2567 that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
2568 'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
2569 successfully matched A and IFMATCH_A_fail is a state saying that we have
2570 just failed to match A. Resume states always come in pairs. The backtrack
2571 state we push is marked as 'IFMATCH_A', but when that is popped, we resume
2572 at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
2573 on success or failure.
2574
2575 The struct that holds a backtracking state is actually a big union, with
2576 one variant for each major type of op. The variable st points to the
2577 top-most backtrack struct. To make the code clearer, within each
2578 block of code we #define ST to alias the relevant union.
2579
2580 Here's a concrete example of a (vastly oversimplified) IFMATCH
2581 implementation:
2582
2583     switch (state) {
2584     ....
2585
2586 #define ST st->u.ifmatch
2587
2588     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2589         ST.foo = ...; // some state we wish to save
2590         ...
2591         // push a yes backtrack state with a resume value of
2592         // IFMATCH_A/IFMATCH_A_fail, then continue execution at the
2593         // first node of A:
2594         PUSH_YES_STATE_GOTO(IFMATCH_A, A);
2595         // NOTREACHED
2596
2597     case IFMATCH_A: // we have successfully executed A; now continue with B
2598         next = B;
2599         bar = ST.foo; // do something with the preserved value
2600         break;
2601
2602     case IFMATCH_A_fail: // A failed, so the assertion failed
2603         ...;   // do some housekeeping, then ...
2604         sayNO; // propagate the failure
2605
2606 #undef ST
2607
2608     ...
2609     }
2610
2611 For any old-timers reading this who are familiar with the old recursive
2612 approach, the code above is equivalent to:
2613
2614     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2615     {
2616         int foo = ...
2617         ...
2618         if (regmatch(A)) {
2619             next = B;
2620             bar = foo;
2621             break;
2622         }
2623         ...;   // do some housekeeping, then ...
2624         sayNO; // propagate the failure
2625     }
2626
2627 The topmost backtrack state, pointed to by st, is usually free. If you
2628 want to claim it, populate any ST.foo fields in it with values you wish to
2629 save, then do one of
2630
2631         PUSH_STATE_GOTO(resume_state, node);
2632         PUSH_YES_STATE_GOTO(resume_state, node);
2633
2634 which sets that backtrack state's resume value to 'resume_state', pushes a
2635 new free entry to the top of the backtrack stack, then goes to 'node'.
2636 On backtracking, the free slot is popped, and the saved state becomes the
2637 new free state. An ST.foo field in this new top state can be temporarily
2638 accessed to retrieve values, but once the main loop is re-entered, it
2639 becomes available for reuse.
2640
2641 Note that the depth of the backtrack stack constantly increases during the
2642 left-to-right execution of the pattern, rather than going up and down with
2643 the pattern nesting. For example the stack is at its maximum at Z at the
2644 end of the pattern, rather than at X in the following:
2645
2646     /(((X)+)+)+....(Y)+....Z/
2647
2648 The only exceptions to this are lookahead/behind assertions and the cut,
2649 (?>A), which pop all the backtrack states associated with A before
2650 continuing.
2651
2652 Bascktrack state structs are allocated in slabs of about 4K in size.
2653 PL_regmatch_state and st always point to the currently active state,
2654 and PL_regmatch_slab points to the slab currently containing
2655 PL_regmatch_state.  The first time regmatch() is called, the first slab is
2656 allocated, and is never freed until interpreter destruction. When the slab
2657 is full, a new one is allocated and chained to the end. At exit from
2658 regmatch(), slabs allocated since entry are freed.
2659
2660 */
2661
2662
2663 #define DEBUG_STATE_pp(pp)                                  \
2664     DEBUG_STATE_r({                                         \
2665         DUMP_EXEC_POS(locinput, scan, do_utf8);             \
2666         PerlIO_printf(Perl_debug_log,                       \
2667             "    %*s"pp" %s%s%s%s%s\n",                     \
2668             depth*2, "",                                    \
2669             PL_reg_name[st->resume_state],                     \
2670             ((st==yes_state||st==mark_state) ? "[" : ""),   \
2671             ((st==yes_state) ? "Y" : ""),                   \
2672             ((st==mark_state) ? "M" : ""),                  \
2673             ((st==yes_state||st==mark_state) ? "]" : "")    \
2674         );                                                  \
2675     });
2676
2677
2678 #define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
2679
2680 #ifdef DEBUGGING
2681
2682 STATIC void
2683 S_debug_start_match(pTHX_ const REGEXP *prog, const bool do_utf8,
2684     const char *start, const char *end, const char *blurb)
2685 {
2686     const bool utf8_pat = RX_UTF8(prog) ? 1 : 0;
2687
2688     PERL_ARGS_ASSERT_DEBUG_START_MATCH;
2689
2690     if (!PL_colorset)
2691             reginitcolors();
2692     {
2693         RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
2694             RX_PRECOMP_const(prog), RX_PRELEN(prog), 60);
2695
2696         RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1),
2697             start, end - start, 60);
2698
2699         PerlIO_printf(Perl_debug_log,
2700             "%s%s REx%s %s against %s\n",
2701                        PL_colors[4], blurb, PL_colors[5], s0, s1);
2702
2703         if (do_utf8||utf8_pat)
2704             PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
2705                 utf8_pat ? "pattern" : "",
2706                 utf8_pat && do_utf8 ? " and " : "",
2707                 do_utf8 ? "string" : ""
2708             );
2709     }
2710 }
2711
2712 STATIC void
2713 S_dump_exec_pos(pTHX_ const char *locinput,
2714                       const regnode *scan,
2715                       const char *loc_regeol,
2716                       const char *loc_bostr,
2717                       const char *loc_reg_starttry,
2718                       const bool do_utf8)
2719 {
2720     const int docolor = *PL_colors[0] || *PL_colors[2] || *PL_colors[4];
2721     const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2722     int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
2723     /* The part of the string before starttry has one color
2724        (pref0_len chars), between starttry and current
2725        position another one (pref_len - pref0_len chars),
2726        after the current position the third one.
2727        We assume that pref0_len <= pref_len, otherwise we
2728        decrease pref0_len.  */
2729     int pref_len = (locinput - loc_bostr) > (5 + taill) - l
2730         ? (5 + taill) - l : locinput - loc_bostr;
2731     int pref0_len;
2732
2733     PERL_ARGS_ASSERT_DUMP_EXEC_POS;
2734
2735     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2736         pref_len++;
2737     pref0_len = pref_len  - (locinput - loc_reg_starttry);
2738     if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
2739         l = ( loc_regeol - locinput > (5 + taill) - pref_len
2740               ? (5 + taill) - pref_len : loc_regeol - locinput);
2741     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2742         l--;
2743     if (pref0_len < 0)
2744         pref0_len = 0;
2745     if (pref0_len > pref_len)
2746         pref0_len = pref_len;
2747     {
2748         const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0;
2749
2750         RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
2751             (locinput - pref_len),pref0_len, 60, 4, 5);
2752
2753         RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
2754                     (locinput - pref_len + pref0_len),
2755                     pref_len - pref0_len, 60, 2, 3);
2756
2757         RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
2758                     locinput, loc_regeol - locinput, 10, 0, 1);
2759
2760         const STRLEN tlen=len0+len1+len2;
2761         PerlIO_printf(Perl_debug_log,
2762                     "%4"IVdf" <%.*s%.*s%s%.*s>%*s|",
2763                     (IV)(locinput - loc_bostr),
2764                     len0, s0,
2765                     len1, s1,
2766                     (docolor ? "" : "> <"),
2767                     len2, s2,
2768                     (int)(tlen > 19 ? 0 :  19 - tlen),
2769                     "");
2770     }
2771 }
2772
2773 #endif
2774
2775 /* reg_check_named_buff_matched()
2776  * Checks to see if a named buffer has matched. The data array of
2777  * buffer numbers corresponding to the buffer is expected to reside
2778  * in the regexp->data->data array in the slot stored in the ARG() of
2779  * node involved. Note that this routine doesn't actually care about the
2780  * name, that information is not preserved from compilation to execution.
2781  * Returns the index of the leftmost defined buffer with the given name
2782  * or 0 if non of the buffers matched.
2783  */
2784 STATIC I32
2785 S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan)
2786 {
2787     I32 n;
2788     RXi_GET_DECL(rex,rexi);
2789     SV *sv_dat= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
2790     I32 *nums=(I32*)SvPVX(sv_dat);
2791
2792     PERL_ARGS_ASSERT_REG_CHECK_NAMED_BUFF_MATCHED;
2793
2794     for ( n=0; n<SvIVX(sv_dat); n++ ) {
2795         if ((I32)*PL_reglastparen >= nums[n] &&
2796             PL_regoffs[nums[n]].end != -1)
2797         {
2798             return nums[n];
2799         }
2800     }
2801     return 0;
2802 }
2803
2804
2805 /* free all slabs above current one  - called during LEAVE_SCOPE */
2806
2807 STATIC void
2808 S_clear_backtrack_stack(pTHX_ void *p)
2809 {
2810     regmatch_slab *s = PL_regmatch_slab->next;
2811     PERL_UNUSED_ARG(p);
2812
2813     if (!s)
2814         return;
2815     PL_regmatch_slab->next = NULL;
2816     while (s) {
2817         regmatch_slab * const osl = s;
2818         s = s->next;
2819         Safefree(osl);
2820     }
2821 }
2822
2823
2824 #define SETREX(Re1,Re2) \
2825     if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \
2826     Re1 = (Re2)
2827
2828 STATIC I32                      /* 0 failure, 1 success */
2829 S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
2830 {
2831 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2832     dMY_CXT;
2833 #endif
2834     dVAR;
2835     register const bool do_utf8 = PL_reg_match_utf8;
2836     const U32 uniflags = UTF8_ALLOW_DEFAULT;
2837     REGEXP *rex_sv = reginfo->prog;
2838     regexp *rex = (struct regexp *)SvANY(rex_sv);
2839     RXi_GET_DECL(rex,rexi);
2840     I32 oldsave;
2841     /* the current state. This is a cached copy of PL_regmatch_state */
2842     register regmatch_state *st;
2843     /* cache heavy used fields of st in registers */
2844     register regnode *scan;
2845     register regnode *next;
2846     register U32 n = 0; /* general value; init to avoid compiler warning */
2847     register I32 ln = 0; /* len or last;  init to avoid compiler warning */
2848     register char *locinput = PL_reginput;
2849     register I32 nextchr;   /* is always set to UCHARAT(locinput) */
2850
2851     bool result = 0;        /* return value of S_regmatch */
2852     int depth = 0;          /* depth of backtrack stack */
2853     U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
2854     const U32 max_nochange_depth =
2855         (3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
2856         3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
2857     regmatch_state *yes_state = NULL; /* state to pop to on success of
2858                                                             subpattern */
2859     /* mark_state piggy backs on the yes_state logic so that when we unwind
2860        the stack on success we can update the mark_state as we go */
2861     regmatch_state *mark_state = NULL; /* last mark state we have seen */
2862     regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
2863     struct regmatch_state  *cur_curlyx = NULL; /* most recent curlyx */
2864     U32 state_num;
2865     bool no_final = 0;      /* prevent failure from backtracking? */
2866     bool do_cutgroup = 0;   /* no_final only until next branch/trie entry */
2867     char *startpoint = PL_reginput;
2868     SV *popmark = NULL;     /* are we looking for a mark? */
2869     SV *sv_commit = NULL;   /* last mark name seen in failure */
2870     SV *sv_yes_mark = NULL; /* last mark name we have seen
2871                                during a successfull match */
2872     U32 lastopen = 0;       /* last open we saw */
2873     bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
2874     SV* const oreplsv = GvSV(PL_replgv);
2875     /* these three flags are set by various ops to signal information to
2876      * the very next op. They have a useful lifetime of exactly one loop
2877      * iteration, and are not preserved or restored by state pushes/pops
2878      */
2879     bool sw = 0;            /* the condition value in (?(cond)a|b) */
2880     bool minmod = 0;        /* the next "{n,m}" is a "{n,m}?" */
2881     int logical = 0;        /* the following EVAL is:
2882                                 0: (?{...})
2883                                 1: (?(?{...})X|Y)
2884                                 2: (??{...})
2885                                or the following IFMATCH/UNLESSM is:
2886                                 false: plain (?=foo)
2887                                 true:  used as a condition: (?(?=foo))
2888                             */
2889 #ifdef DEBUGGING
2890     GET_RE_DEBUG_FLAGS_DECL;
2891 #endif
2892
2893     PERL_ARGS_ASSERT_REGMATCH;
2894
2895     DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
2896             PerlIO_printf(Perl_debug_log,"regmatch start\n");
2897     }));
2898     /* on first ever call to regmatch, allocate first slab */
2899     if (!PL_regmatch_slab) {
2900         Newx(PL_regmatch_slab, 1, regmatch_slab);
2901         PL_regmatch_slab->prev = NULL;
2902         PL_regmatch_slab->next = NULL;
2903         PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
2904     }
2905
2906     oldsave = PL_savestack_ix;
2907     SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
2908     SAVEVPTR(PL_regmatch_slab);
2909     SAVEVPTR(PL_regmatch_state);
2910
2911     /* grab next free state slot */
2912     st = ++PL_regmatch_state;
2913     if (st >  SLAB_LAST(PL_regmatch_slab))
2914         st = PL_regmatch_state = S_push_slab(aTHX);
2915
2916     /* Note that nextchr is a byte even in UTF */
2917     nextchr = UCHARAT(locinput);
2918     scan = prog;
2919     while (scan != NULL) {
2920
2921         DEBUG_EXECUTE_r( {
2922             SV * const prop = sv_newmortal();
2923             regnode *rnext=regnext(scan);
2924             DUMP_EXEC_POS( locinput, scan, do_utf8 );
2925             regprop(rex, prop, scan);
2926
2927             PerlIO_printf(Perl_debug_log,
2928                     "%3"IVdf":%*s%s(%"IVdf")\n",
2929                     (IV)(scan - rexi->program), depth*2, "",
2930                     SvPVX_const(prop),
2931                     (PL_regkind[OP(scan)] == END || !rnext) ?
2932                         0 : (IV)(rnext - rexi->program));
2933         });
2934
2935         next = scan + NEXT_OFF(scan);
2936         if (next == scan)
2937             next = NULL;
2938         state_num = OP(scan);
2939
2940         REH_CALL_REGEXEC_HOOK(rex, scan, reginfo, st);
2941       reenter_switch:
2942
2943         assert(PL_reglastparen == &rex->lastparen);
2944         assert(PL_reglastcloseparen == &rex->lastcloseparen);
2945         assert(PL_regoffs == rex->offs);
2946
2947         switch (state_num) {
2948         case BOL:
2949             if (locinput == PL_bostr)
2950             {
2951                 /* reginfo->till = reginfo->bol; */
2952                 break;
2953             }
2954             sayNO;
2955         case MBOL:
2956             if (locinput == PL_bostr ||
2957                 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2958             {
2959                 break;
2960             }
2961             sayNO;
2962         case SBOL:
2963             if (locinput == PL_bostr)
2964                 break;
2965             sayNO;
2966         case GPOS:
2967             if (locinput == reginfo->ganch)
2968                 break;
2969             sayNO;
2970
2971         case KEEPS:
2972             /* update the startpoint */
2973             st->u.keeper.val = PL_regoffs[0].start;
2974             PL_reginput = locinput;
2975             PL_regoffs[0].start = locinput - PL_bostr;
2976             PUSH_STATE_GOTO(KEEPS_next, next);
2977             /*NOT-REACHED*/
2978         case KEEPS_next_fail:
2979             /* rollback the start point change */
2980             PL_regoffs[0].start = st->u.keeper.val;
2981             sayNO_SILENT;
2982             /*NOT-REACHED*/
2983         case EOL:
2984                 goto seol;
2985         case MEOL:
2986             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2987                 sayNO;
2988             break;
2989         case SEOL:
2990           seol:
2991             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2992                 sayNO;
2993             if (PL_regeol - locinput > 1)
2994                 sayNO;
2995             break;
2996         case EOS:
2997             if (PL_regeol != locinput)
2998                 sayNO;
2999             break;
3000         case SANY:
3001             if (!nextchr && locinput >= PL_regeol)
3002                 sayNO;
3003             if (do_utf8) {
3004                 locinput += PL_utf8skip[nextchr];
3005                 if (locinput > PL_regeol)
3006                     sayNO;
3007                 nextchr = UCHARAT(locinput);
3008             }
3009             else
3010                 nextchr = UCHARAT(++locinput);
3011             break;
3012         case CANY:
3013             if (!nextchr && locinput >= PL_regeol)
3014                 sayNO;
3015             nextchr = UCHARAT(++locinput);
3016             break;
3017         case REG_ANY:
3018             if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
3019                 sayNO;
3020             if (do_utf8) {
3021                 locinput += PL_utf8skip[nextchr];
3022                 if (locinput > PL_regeol)
3023                     sayNO;
3024                 nextchr = UCHARAT(locinput);
3025             }
3026             else
3027                 nextchr = UCHARAT(++locinput);
3028             break;
3029
3030 #undef  ST
3031 #define ST st->u.trie
3032         case TRIEC:
3033             /* In this case the charclass data is available inline so
3034                we can fail fast without a lot of extra overhead.
3035              */
3036             if (scan->flags == EXACT || !do_utf8) {
3037                 if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
3038                     DEBUG_EXECUTE_r(
3039                         PerlIO_printf(Perl_debug_log,
3040                                   "%*s  %sfailed to match trie start class...%s\n",
3041                                   REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
3042                     );
3043                     sayNO_SILENT;
3044                     /* NOTREACHED */
3045                 }
3046             }
3047             /* FALL THROUGH */
3048         case TRIE:
3049             {
3050                 /* what type of TRIE am I? (utf8 makes this contextual) */
3051                 DECL_TRIE_TYPE(scan);
3052
3053                 /* what trie are we using right now */
3054                 reg_trie_data * const trie
3055                     = (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
3056                 HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG( scan ) + 1 ]);
3057                 U32 state = trie->startstate;
3058
3059                 if (trie->bitmap && trie_type != trie_utf8_fold &&
3060                     !TRIE_BITMAP_TEST(trie,*locinput)
3061                 ) {
3062                     if (trie->states[ state ].wordnum) {
3063                          DEBUG_EXECUTE_r(
3064                             PerlIO_printf(Perl_debug_log,
3065                                           "%*s  %smatched empty string...%s\n",
3066                                           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
3067                         );
3068                         break;
3069                     } else {
3070                         DEBUG_EXECUTE_r(
3071                             PerlIO_printf(Perl_debug_log,
3072                                           "%*s  %sfailed to match trie start class...%s\n",
3073                                           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
3074                         );
3075                         sayNO_SILENT;
3076                    }
3077                 }
3078
3079             {
3080                 U8 *uc = ( U8* )locinput;
3081
3082                 STRLEN len = 0;
3083                 STRLEN foldlen = 0;
3084                 U8 *uscan = (U8*)NULL;
3085                 STRLEN bufflen=0;
3086                 SV *sv_accept_buff = NULL;
3087                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
3088
3089                 ST.accepted = 0; /* how many accepting states we have seen */
3090                 ST.B = next;
3091                 ST.jump = trie->jump;
3092                 ST.me = scan;
3093                 /*
3094                    traverse the TRIE keeping track of all accepting states
3095                    we transition through until we get to a failing node.
3096                 */
3097
3098                 while ( state && uc <= (U8*)PL_regeol ) {
3099                     U32 base = trie->states[ state ].trans.base;
3100                     UV uvc = 0;
3101                     U16 charid;
3102                     /* We use charid to hold the wordnum as we don't use it
3103                        for charid until after we have done the wordnum logic.
3104                        We define an alias just so that the wordnum logic reads
3105                        more naturally. */
3106
3107 #define got_wordnum charid
3108                     got_wordnum = trie->states[ state ].wordnum;
3109
3110                     if ( got_wordnum ) {
3111                         if ( ! ST.accepted ) {
3112                             ENTER;
3113                             SAVETMPS; /* XXX is this necessary? dmq */
3114                             bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
3115                             sv_accept_buff=newSV(bufflen *
3116                                             sizeof(reg_trie_accepted) - 1);
3117                             SvCUR_set(sv_accept_buff, 0);
3118                             SvPOK_on(sv_accept_buff);
3119                             sv_2mortal(sv_accept_buff);
3120                             SAVETMPS;
3121                             ST.accept_buff =
3122                                 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
3123                         }
3124                         do {
3125                             if (ST.accepted >= bufflen) {
3126                                 bufflen *= 2;
3127                                 ST.accept_buff =(reg_trie_accepted*)
3128                                     SvGROW(sv_accept_buff,
3129                                         bufflen * sizeof(reg_trie_accepted));
3130                             }
3131                             SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
3132                                 + sizeof(reg_trie_accepted));
3133
3134
3135                             ST.accept_buff[ST.accepted].wordnum = got_wordnum;
3136                             ST.accept_buff[ST.accepted].endpos = uc;
3137                             ++ST.accepted;
3138                         } while (trie->nextword && (got_wordnum= trie->nextword[got_wordnum]));
3139                     }
3140 #undef got_wordnum
3141
3142                     DEBUG_TRIE_EXECUTE_r({
3143                                 DUMP_EXEC_POS( (char *)uc, scan, do_utf8 );
3144                                 PerlIO_printf( Perl_debug_log,
3145                                     "%*s  %sState: %4"UVxf" Accepted: %4"UVxf" ",
3146                                     2+depth * 2, "", PL_colors[4],
3147                                     (UV)state, (UV)ST.accepted );
3148                     });
3149
3150                     if ( base ) {
3151                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
3152                                              uscan, len, uvc, charid, foldlen,
3153                                              foldbuf, uniflags);
3154
3155                         if (charid &&
3156                              (base + charid > trie->uniquecharcount )
3157                              && (base + charid - 1 - trie->uniquecharcount
3158                                     < trie->lasttrans)
3159                              && trie->trans[base + charid - 1 -
3160                                     trie->uniquecharcount].check == state)
3161                         {
3162                             state = trie->trans[base + charid - 1 -
3163                                 trie->uniquecharcount ].next;
3164                         }
3165                         else {
3166                             state = 0;
3167                         }
3168                         uc += len;
3169
3170                     }
3171                     else {
3172                         state = 0;
3173                     }
3174                     DEBUG_TRIE_EXECUTE_r(
3175                         PerlIO_printf( Perl_debug_log,
3176                             "Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
3177                             charid, uvc, (UV)state, PL_colors[5] );
3178                     );
3179                 }
3180                 if (!ST.accepted )
3181                    sayNO;
3182
3183                 DEBUG_EXECUTE_r(
3184                     PerlIO_printf( Perl_debug_log,
3185                         "%*s  %sgot %"IVdf" possible matches%s\n",
3186                         REPORT_CODE_OFF + depth * 2, "",
3187                         PL_colors[4], (IV)ST.accepted, PL_colors[5] );
3188                 );
3189             }}
3190             goto trie_first_try; /* jump into the fail handler */
3191             /* NOTREACHED */
3192         case TRIE_next_fail: /* we failed - try next alterative */
3193             if ( ST.jump) {
3194                 REGCP_UNWIND(ST.cp);
3195                 for (n = *PL_reglastparen; n > ST.lastparen; n--)
3196                     PL_regoffs[n].end = -1;
3197                 *PL_reglastparen = n;
3198             }
3199           trie_first_try:
3200             if (do_cutgroup) {
3201                 do_cutgroup = 0;
3202                 no_final = 0;
3203             }
3204
3205             if ( ST.jump) {
3206                 ST.lastparen = *PL_reglastparen;
3207                 REGCP_SET(ST.cp);
3208             }
3209             if ( ST.accepted == 1 ) {
3210                 /* only one choice left - just continue */
3211                 DEBUG_EXECUTE_r({
3212                     AV *const trie_words
3213                         = MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
3214                     SV ** const tmp = av_fetch( trie_words,
3215                         ST.accept_buff[ 0 ].wordnum-1, 0 );
3216                     SV *sv= tmp ? sv_newmortal() : NULL;
3217
3218                     PerlIO_printf( Perl_debug_log,
3219                         "%*s  %sonly one match left: #%d <%s>%s\n",
3220                         REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3221                         ST.accept_buff[ 0 ].wordnum,
3222                         tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3223                                 PL_colors[0], PL_colors[1],
3224                                 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3225                             )
3226                         : "not compiled under -Dr",
3227                         PL_colors[5] );
3228                 });
3229                 PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
3230                 /* in this case we free tmps/leave before we call regmatch
3231                    as we wont be using accept_buff again. */
3232
3233                 locinput = PL_reginput;
3234                 nextchr = UCHARAT(locinput);
3235                 if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum])
3236                     scan = ST.B;
3237                 else
3238                     scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
3239                 if (!has_cutgroup) {
3240                     FREETMPS;
3241                     LEAVE;
3242                 } else {
3243                     ST.accepted--;
3244                     PUSH_YES_STATE_GOTO(TRIE_next, scan);
3245                 }
3246
3247                 continue; /* execute rest of RE */
3248             }
3249
3250             if ( !ST.accepted-- ) {
3251                 DEBUG_EXECUTE_r({
3252                     PerlIO_printf( Perl_debug_log,
3253                         "%*s  %sTRIE failed...%s\n",
3254                         REPORT_CODE_OFF+depth*2, "",
3255                         PL_colors[4],
3256                         PL_colors[5] );
3257                 });
3258                 FREETMPS;
3259                 LEAVE;
3260                 sayNO_SILENT;
3261                 /*NOTREACHED*/
3262             }
3263
3264             /*
3265                There are at least two accepting states left.  Presumably
3266                the number of accepting states is going to be low,
3267                typically two. So we simply scan through to find the one
3268                with lowest wordnum.  Once we find it, we swap the last
3269                state into its place and decrement the size. We then try to
3270                match the rest of the pattern at the point where the word
3271                ends. If we succeed, control just continues along the
3272                regex; if we fail we return here to try the next accepting
3273                state
3274              */
3275
3276             {
3277                 U32 best = 0;
3278                 U32 cur;
3279                 for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
3280                     DEBUG_TRIE_EXECUTE_r(
3281                         PerlIO_printf( Perl_debug_log,
3282                             "%*s  %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
3283                             REPORT_CODE_OFF + depth * 2, "", PL_colors[4],
3284                             (IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
3285                             ST.accept_buff[ cur ].wordnum, PL_colors[5] );
3286                     );
3287
3288                     if (ST.accept_buff[cur].wordnum <
3289                             ST.accept_buff[best].wordnum)
3290                         best = cur;
3291                 }
3292
3293                 DEBUG_EXECUTE_r({
3294                     AV *const trie_words
3295                         = MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
3296                     SV ** const tmp = av_fetch( trie_words,
3297                         ST.accept_buff[ best ].wordnum - 1, 0 );
3298                     regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ?
3299                                     ST.B :
3300                                     ST.me + ST.jump[ST.accept_buff[best].wordnum];
3301                     SV *sv= tmp ? sv_newmortal() : NULL;
3302
3303                     PerlIO_printf( Perl_debug_log,
3304                         "%*s  %strying alternation #%d <%s> at node #%d %s\n",
3305                         REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3306                         ST.accept_buff[best].wordnum,
3307                         tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3308                                 PL_colors[0], PL_colors[1],
3309                                 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3310                             ) : "not compiled under -Dr",
3311                             REG_NODE_NUM(nextop),
3312                         PL_colors[5] );
3313                 });
3314
3315                 if ( best<ST.accepted ) {
3316                     reg_trie_accepted tmp = ST.accept_buff[ best ];
3317                     ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
3318                     ST.accept_buff[ ST.accepted ] = tmp;
3319                     best = ST.accepted;
3320                 }
3321                 PL_reginput = (char *)ST.accept_buff[ best ].endpos;
3322                 if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
3323                     scan = ST.B;
3324                 } else {
3325                     scan = ST.me + ST.jump[ST.accept_buff[best].wordnum];
3326                 }
3327                 PUSH_YES_STATE_GOTO(TRIE_next, scan);
3328                 /* NOTREACHED */
3329             }
3330             /* NOTREACHED */
3331         case TRIE_next:
3332             /* we dont want to throw this away, see bug 57042*/
3333             if (oreplsv != GvSV(PL_replgv))
3334                 sv_setsv(oreplsv, GvSV(PL_replgv));
3335             FREETMPS;
3336             LEAVE;
3337             sayYES;
3338 #undef  ST
3339
3340         case EXACT: {
3341             char *s = STRING(scan);
3342             ln = STR_LEN(scan);
3343             if (do_utf8 != UTF) {
3344                 /* The target and the pattern have differing utf8ness. */
3345                 char *l = locinput;
3346                 const char * const e = s + ln;
3347
3348                 if (do_utf8) {
3349                     /* The target is utf8, the pattern is not utf8. */
3350                     while (s < e) {
3351                         STRLEN ulen;
3352                         if (l >= PL_regeol)
3353                              sayNO;
3354                         if (NATIVE_TO_UNI(*(U8*)s) !=
3355                             utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
3356                                             uniflags))
3357                              sayNO;
3358                         l += ulen;
3359                         s ++;
3360                     }
3361                 }
3362                 else {
3363                     /* The target is not utf8, the pattern is utf8. */
3364                     while (s < e) {
3365                         STRLEN ulen;
3366                         if (l >= PL_regeol)
3367                             sayNO;
3368                         if (NATIVE_TO_UNI(*((U8*)l)) !=
3369                             utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
3370                                            uniflags))
3371                             sayNO;
3372                         s += ulen;
3373                         l ++;
3374                     }
3375                 }
3376                 locinput = l;
3377                 nextchr = UCHARAT(locinput);
3378                 break;
3379             }
3380             /* The target and the pattern have the same utf8ness. */
3381             /* Inline the first character, for speed. */
3382             if (UCHARAT(s) != nextchr)
3383                 sayNO;
3384             if (PL_regeol - locinput < ln)
3385                 sayNO;
3386             if (ln > 1 && memNE(s, locinput, ln))
3387                 sayNO;
3388             locinput += ln;
3389             nextchr = UCHARAT(locinput);
3390             break;
3391             }
3392         case EXACTFL:
3393             PL_reg_flags |= RF_tainted;
3394             /* FALL THROUGH */
3395         case EXACTF: {
3396             char * const s = STRING(scan);
3397             ln = STR_LEN(scan);
3398
3399             if (do_utf8 || UTF) {
3400               /* Either target or the pattern are utf8. */
3401                 const char * const l = locinput;
3402                 char *e = PL_regeol;
3403
3404                 if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
3405                                l, &e, 0,  do_utf8)) {
3406                      /* One more case for the sharp s:
3407                       * pack("U0U*", 0xDF) =~ /ss/i,
3408                       * the 0xC3 0x9F are the UTF-8
3409                       * byte sequence for the U+00DF. */
3410
3411                      if (!(do_utf8 &&
3412                            toLOWER(s[0]) == 's' &&
3413                            ln >= 2 &&
3414                            toLOWER(s[1]) == 's' &&
3415                            (U8)l[0] == 0xC3 &&
3416                            e - l >= 2 &&
3417                            (U8)l[1] == 0x9F))
3418                           sayNO;
3419                 }
3420                 locinput = e;
3421                 nextchr = UCHARAT(locinput);
3422                 break;
3423             }
3424
3425             /* Neither the target and the pattern are utf8. */
3426
3427             /* Inline the first character, for speed. */
3428             if (UCHARAT(s) != nextchr &&
3429                 UCHARAT(s) != ((OP(scan) == EXACTF)
3430                                ? PL_fold : PL_fold_locale)[nextchr])
3431                 sayNO;
3432             if (PL_regeol - locinput < ln)
3433                 sayNO;
3434             if (ln > 1 && (OP(scan) == EXACTF
3435                            ? ibcmp(s, locinput, ln)
3436                            : ibcmp_locale(s, locinput, ln)))
3437                 sayNO;
3438             locinput += ln;
3439             nextchr = UCHARAT(locinput);
3440             break;
3441             }
3442         case BOUNDL:
3443         case NBOUNDL:
3444             PL_reg_flags |= RF_tainted;
3445             /* FALL THROUGH */
3446         case BOUND:
3447         case NBOUND:
3448             /* was last char in word? */
3449             if (do_utf8) {
3450                 if (locinput == PL_bostr)
3451                     ln = '\n';
3452                 else {
3453                     const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
3454
3455                     ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
3456                 }
3457                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3458                     ln = isALNUM_uni(ln);
3459                     LOAD_UTF8_CHARCLASS_ALNUM();
3460                     n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
3461                 }
3462                 else {
3463                     ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
3464                     n = isALNUM_LC_utf8((U8*)locinput);
3465                 }
3466             }
3467             else {
3468                 ln = (locinput != PL_bostr) ?
3469                     UCHARAT(locinput - 1) : '\n';
3470                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3471                     ln = isALNUM(ln);
3472                     n = isALNUM(nextchr);
3473                 }
3474                 else {
3475                     ln = isALNUM_LC(ln);
3476                     n = isALNUM_LC(nextchr);
3477                 }
3478             }
3479             if (((!ln) == (!n)) == (OP(scan) == BOUND ||
3480                                     OP(scan) == BOUNDL))
3481                     sayNO;
3482             break;
3483         case ANYOF:
3484             if (do_utf8) {
3485                 STRLEN inclasslen = PL_regeol - locinput;
3486
3487                 if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
3488                     goto anyof_fail;
3489                 if (locinput >= PL_regeol)
3490                     sayNO;
3491                 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
3492                 nextchr = UCHARAT(locinput);
3493                 break;
3494             }
3495             else {
3496                 if (nextchr < 0)
3497                     nextchr = UCHARAT(locinput);
3498                 if (!REGINCLASS(rex, scan, (U8*)locinput))
3499                     goto anyof_fail;
3500                 if (!nextchr && locinput >= PL_regeol)
3501                     sayNO;
3502                 nextchr = UCHARAT(++locinput);
3503                 break;
3504             }
3505         anyof_fail:
3506             /* If we might have the case of the German sharp s
3507              * in a casefolding Unicode character class. */
3508
3509             if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3510                  locinput += SHARP_S_SKIP;
3511                  nextchr = UCHARAT(locinput);
3512             }
3513             else
3514                  sayNO;
3515             break;
3516         /* Special char classes - The defines start on line 129 or so */
3517         CCC_TRY_AFF( ALNUM,  ALNUML, perl_word,   "a", isALNUM_LC_utf8, isALNUM, isALNUM_LC);
3518         CCC_TRY_NEG(NALNUM, NALNUML, perl_word,   "a", isALNUM_LC_utf8, isALNUM, isALNUM_LC);
3519
3520         CCC_TRY_AFF( SPACE,  SPACEL, perl_space,  " ", isSPACE_LC_utf8, isSPACE, isSPACE_LC);
3521         CCC_TRY_NEG(NSPACE, NSPACEL, perl_space,  " ", isSPACE_LC_utf8, isSPACE, isSPACE_LC);
3522
3523         CCC_TRY_AFF( DIGIT,  DIGITL, posix_digit, "0", isDIGIT_LC_utf8, isDIGIT, isDIGIT_LC);
3524         CCC_TRY_NEG(NDIGIT, NDIGITL, posix_digit, "0", isDIGIT_LC_utf8, isDIGIT, isDIGIT_LC);
3525
3526         case CLUMP:
3527             if (locinput >= PL_regeol)
3528                 sayNO;
3529             if  (do_utf8) {
3530                 LOAD_UTF8_CHARCLASS_MARK();
3531                 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3532                     sayNO;
3533                 locinput += PL_utf8skip[nextchr];
3534                 while (locinput < PL_regeol &&
3535                        swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3536                     locinput += UTF8SKIP(locinput);
3537                 if (locinput > PL_regeol)
3538                     sayNO;
3539             }
3540             else
3541                locinput++;
3542             nextchr = UCHARAT(locinput);
3543             break;
3544
3545         case NREFFL:
3546         {
3547             char *s;
3548             char type;
3549             PL_reg_flags |= RF_tainted;
3550             /* FALL THROUGH */
3551         case NREF:
3552         case NREFF:
3553             type = OP(scan);
3554             n = reg_check_named_buff_matched(rex,scan);
3555
3556             if ( n ) {
3557                 type = REF + ( type - NREF );
3558                 goto do_ref;
3559             } else {
3560                 sayNO;
3561             }
3562             /* unreached */
3563         case REFFL:
3564             PL_reg_flags |= RF_tainted;
3565             /* FALL THROUGH */
3566         case REF:
3567         case REFF:
3568             n = ARG(scan);  /* which paren pair */
3569             type = OP(scan);
3570           do_ref:
3571             ln = PL_regoffs[n].start;
3572             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3573             if (*PL_reglastparen < n || ln == -1)
3574                 sayNO;                  /* Do not match unless seen CLOSEn. */
3575             if (ln == PL_regoffs[n].end)
3576                 break;
3577
3578             s = PL_bostr + ln;
3579             if (do_utf8 && type != REF) {       /* REF can do byte comparison */
3580                 char *l = locinput;
3581                 const char *e = PL_bostr + PL_regoffs[n].end;
3582                 /*
3583                  * Note that we can't do the "other character" lookup trick as
3584                  * in the 8-bit case (no pun intended) because in Unicode we
3585                  * have to map both upper and title case to lower case.
3586                  */
3587                 if (type == REFF) {
3588                     while (s < e) {
3589                         STRLEN ulen1, ulen2;
3590                         U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3591                         U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3592
3593                         if (l >= PL_regeol)
3594                             sayNO;
3595                         toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3596                         toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
3597                         if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
3598                             sayNO;
3599                         s += ulen1;
3600                         l += ulen2;
3601                     }
3602                 }
3603                 locinput = l;
3604                 nextchr = UCHARAT(locinput);
3605                 break;
3606             }
3607
3608             /* Inline the first character, for speed. */
3609             if (UCHARAT(s) != nextchr &&
3610                 (type == REF ||
3611                  (UCHARAT(s) != (type == REFF
3612                                   ? PL_fold : PL_fold_locale)[nextchr])))
3613                 sayNO;
3614             ln = PL_regoffs[n].end - ln;
3615             if (locinput + ln > PL_regeol)
3616                 sayNO;
3617             if (ln > 1 && (type == REF
3618                            ? memNE(s, locinput, ln)
3619                            : (type == REFF
3620                               ? ibcmp(s, locinput, ln)
3621                               : ibcmp_locale(s, locinput, ln))))
3622                 sayNO;
3623             locinput += ln;
3624             nextchr = UCHARAT(locinput);
3625             break;
3626         }
3627         case NOTHING:
3628         case TAIL:
3629             break;
3630         case BACK:
3631             break;
3632
3633 #undef  ST
3634 #define ST st->u.eval
3635         {
3636             SV *ret;
3637             REGEXP *re_sv;
3638             regexp *re;
3639             regexp_internal *rei;
3640             regnode *startpoint;
3641
3642         case GOSTART:
3643         case GOSUB: /*    /(...(?1))/   /(...(?&foo))/   */
3644             if (cur_eval && cur_eval->locinput==locinput) {
3645                 if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
3646                     Perl_croak(aTHX_ "Infinite recursion in regex");
3647                 if ( ++nochange_depth > max_nochange_depth )
3648                     Perl_croak(aTHX_
3649                         "Pattern subroutine nesting without pos change"
3650                         " exceeded limit in regex");
3651             } else {
3652                 nochange_depth = 0;
3653             }
3654             re_sv = rex_sv;
3655             re = rex;
3656             rei = rexi;
3657             (void)ReREFCNT_inc(rex_sv);
3658             if (OP(scan)==GOSUB) {
3659                 startpoint = scan + ARG2L(scan);
3660                 ST.close_paren = ARG(scan);
3661             } else {
3662                 startpoint = rei->program+1;
3663                 ST.close_paren = 0;
3664             }
3665             goto eval_recurse_doit;
3666             /* NOTREACHED */
3667         case EVAL:  /*   /(?{A})B/   /(??{A})B/  and /(?(?{A})X|Y)B/   */
3668             if (cur_eval && cur_eval->locinput==locinput) {
3669                 if ( ++nochange_depth > max_nochange_depth )
3670                     Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
3671             } else {
3672                 nochange_depth = 0;
3673             }
3674             {
3675                 /* execute the code in the {...} */
3676                 dSP;
3677                 SV ** const before = SP;
3678                 OP_4tree * const oop = PL_op;
3679                 COP * const ocurcop = PL_curcop;
3680                 PAD *old_comppad;
3681                 char *saved_regeol = PL_regeol;
3682
3683                 n = ARG(scan);
3684                 PL_op = (OP_4tree*)rexi->data->data[n];
3685                 DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
3686                     "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
3687                 PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
3688                 PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
3689
3690                 if (sv_yes_mark) {
3691                     SV *sv_mrk = get_sv("REGMARK", 1);
3692                     sv_setsv(sv_mrk, sv_yes_mark);
3693                 }
3694
3695                 CALLRUNOPS(aTHX);                       /* Scalar context. */
3696                 SPAGAIN;
3697                 if (SP == before)
3698                     ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
3699                 else {
3700                     ret = POPs;
3701                     PUTBACK;
3702                 }
3703
3704                 PL_op = oop;
3705                 PAD_RESTORE_LOCAL(old_comppad);
3706                 PL_curcop = ocurcop;
3707                 PL_regeol = saved_regeol;
3708                 if (!logical) {
3709                     /* /(?{...})/ */
3710                     sv_setsv(save_scalar(PL_replgv), ret);
3711                     break;
3712                 }
3713             }
3714             if (logical == 2) { /* Postponed subexpression: /(??{...})/ */
3715                 logical = 0;
3716                 {
3717                     /* extract RE object from returned value; compiling if
3718                      * necessary */
3719                     MAGIC *mg = NULL;
3720                     REGEXP *rx = NULL;
3721
3722                     if (SvROK(ret)) {
3723                         SV *const sv = SvRV(ret);
3724
3725                         if (SvTYPE(sv) == SVt_REGEXP) {
3726                             rx = (REGEXP*) sv;
3727                         } else if (SvSMAGICAL(sv)) {
3728                             mg = mg_find(sv, PERL_MAGIC_qr);
3729                             assert(mg);
3730                         }
3731                     } else if (SvTYPE(ret) == SVt_REGEXP) {
3732                         rx = (REGEXP*) ret;
3733                     } else if (SvSMAGICAL(ret)) {
3734                         if (SvGMAGICAL(ret)) {
3735                             /* I don't believe that there is ever qr magic
3736                                here.  */
3737                             assert(!mg_find(ret, PERL_MAGIC_qr));
3738                             sv_unmagic(ret, PERL_MAGIC_qr);
3739                         }
3740                         else {
3741                             mg = mg_find(ret, PERL_MAGIC_qr);
3742                             /* testing suggests mg only ends up non-NULL for
3743                                scalars who were upgraded and compiled in the
3744                                else block below. In turn, this is only
3745                                triggered in the "postponed utf8 string" tests
3746                                in t/op/pat.t  */
3747                         }
3748                     }
3749
3750                     if (mg) {
3751                         rx = (REGEXP *) mg->mg_obj; /*XXX:dmq*/
3752                         assert(rx);
3753                     }
3754                     if (rx) {
3755                         rx = reg_temp_copy(NULL, rx);
3756                     }
3757                     else {
3758                         U32 pm_flags = 0;
3759                         const I32 osize = PL_regsize;
3760
3761                         if (DO_UTF8(ret)) {
3762                             assert (SvUTF8(ret));
3763                         } else if (SvUTF8(ret)) {
3764                             /* Not doing UTF-8, despite what the SV says. Is
3765                                this only if we're trapped in use 'bytes'?  */
3766                             /* Make a copy of the octet sequence, but without
3767                                the flag on, as the compiler now honours the
3768                                SvUTF8 flag on ret.  */
3769                             STRLEN len;
3770                             const char *const p = SvPV(ret, len);
3771                             ret = newSVpvn_flags(p, len, SVs_TEMP);
3772                         }
3773                         rx = CALLREGCOMP(ret, pm_flags);
3774                         if (!(SvFLAGS(ret)
3775                               & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3776                                  | SVs_GMG))) {
3777                             /* This isn't a first class regexp. Instead, it's
3778                                caching a regexp onto an existing, Perl visible
3779                                scalar.  */
3780                             sv_magic(ret, MUTABLE_SV(rx), PERL_MAGIC_qr, 0, 0);
3781                         }
3782                         PL_regsize = osize;
3783                     }
3784                     re_sv = rx;
3785                     re = (struct regexp *)SvANY(rx);
3786                 }
3787                 RXp_MATCH_COPIED_off(re);
3788                 re->subbeg = rex->subbeg;
3789                 re->sublen = rex->sublen;
3790                 rei = RXi_GET(re);
3791                 DEBUG_EXECUTE_r(
3792                     debug_start_match(re_sv, do_utf8, locinput, PL_regeol,
3793                         "Matching embedded");
3794                 );
3795                 startpoint = rei->program + 1;
3796                 ST.close_paren = 0; /* only used for GOSUB */
3797                 /* borrowed from regtry */
3798                 if (PL_reg_start_tmpl <= re->nparens) {
3799                     PL_reg_start_tmpl = re->nparens*3/2 + 3;
3800                     if(PL_reg_start_tmp)
3801                         Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3802                     else
3803                         Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3804                 }
3805
3806         eval_recurse_doit: /* Share code with GOSUB below this line */
3807                 /* run the pattern returned from (??{...}) */
3808                 ST.cp = regcppush(0);   /* Save *all* the positions. */
3809                 REGCP_SET(ST.lastcp);
3810
3811                 PL_regoffs = re->offs; /* essentially NOOP on GOSUB */
3812
3813                 /* see regtry, specifically PL_reglast(?:close)?paren is a pointer! (i dont know why) :dmq */
3814                 PL_reglastparen = &re->lastparen;
3815                 PL_reglastcloseparen = &re->lastcloseparen;
3816                 re->lastparen = 0;
3817                 re->lastcloseparen = 0;
3818
3819                 PL_reginput = locinput;
3820                 PL_regsize = 0;
3821
3822                 /* XXXX This is too dramatic a measure... */
3823                 PL_reg_maxiter = 0;
3824
3825                 ST.toggle_reg_flags = PL_reg_flags;
3826                 if (RX_UTF8(re_sv))
3827                     PL_reg_flags |= RF_utf8;
3828                 else
3829                     PL_reg_flags &= ~RF_utf8;
3830                 ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
3831
3832                 ST.prev_rex = rex_sv;
3833                 ST.prev_curlyx = cur_curlyx;
3834                 SETREX(rex_sv,re_sv);
3835                 rex = re;
3836                 rexi = rei;
3837                 cur_curlyx = NULL;
3838                 ST.B = next;
3839                 ST.prev_eval = cur_eval;
3840                 cur_eval = st;
3841                 /* now continue from first node in postoned RE */
3842                 PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
3843                 /* NOTREACHED */
3844             }
3845             /* logical is 1,   /(?(?{...})X|Y)/ */
3846             sw = (bool)SvTRUE(ret);
3847             logical = 0;
3848             break;
3849         }
3850
3851         case EVAL_AB: /* cleanup after a successful (??{A})B */
3852             /* note: this is called twice; first after popping B, then A */
3853             PL_reg_flags ^= ST.toggle_reg_flags;
3854             ReREFCNT_dec(rex_sv);
3855             SETREX(rex_sv,ST.prev_rex);
3856             rex = (struct regexp *)SvANY(rex_sv);
3857             rexi = RXi_GET(rex);
3858             regcpblow(ST.cp);
3859             cur_eval = ST.prev_eval;
3860             cur_curlyx = ST.prev_curlyx;
3861
3862             /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
3863             PL_reglastparen = &rex->lastparen;
3864             PL_reglastcloseparen = &rex->lastcloseparen;
3865             /* also update PL_regoffs */
3866             PL_regoffs = rex->offs;
3867
3868             /* XXXX This is too dramatic a measure... */
3869             PL_reg_maxiter = 0;
3870             if ( nochange_depth )
3871                 nochange_depth--;
3872             sayYES;
3873
3874
3875         case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
3876             /* note: this is called twice; first after popping B, then A */
3877             PL_reg_flags ^= ST.toggle_reg_flags;
3878             ReREFCNT_dec(rex_sv);
3879             SETREX(rex_sv,ST.prev_rex);
3880             rex = (struct regexp *)SvANY(rex_sv);
3881             rexi = RXi_GET(rex);
3882             /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
3883             PL_reglastparen = &rex->lastparen;
3884             PL_reglastcloseparen = &rex->lastcloseparen;
3885
3886             PL_reginput = locinput;
3887             REGCP_UNWIND(ST.lastcp);
3888             regcppop(rex);
3889             cur_eval = ST.prev_eval;
3890             cur_curlyx = ST.prev_curlyx;
3891             /* XXXX This is too dramatic a measure... */
3892             PL_reg_maxiter = 0;
3893             if ( nochange_depth )
3894                 nochange_depth--;
3895             sayNO_SILENT;
3896 #undef ST
3897
3898         case OPEN:
3899             n = ARG(scan);  /* which paren pair */
3900             PL_reg_start_tmp[n] = locinput;
3901             if (n > PL_regsize)
3902                 PL_regsize = n;
3903             lastopen = n;
3904             break;
3905         case CLOSE:
3906             n = ARG(scan);  /* which paren pair */
3907             PL_regoffs[n].start = PL_reg_start_tmp[n] - PL_bostr;
3908             PL_regoffs[n].end = locinput - PL_bostr;
3909             /*if (n > PL_regsize)
3910                 PL_regsize = n;*/
3911             if (n > *PL_reglastparen)
3912                 *PL_reglastparen = n;
3913             *PL_reglastcloseparen = n;
3914             if (cur_eval && cur_eval->u.eval.close_paren == n) {
3915                 goto fake_end;
3916             }
3917             break;
3918         case ACCEPT:
3919             if (ARG(scan)){
3920                 regnode *cursor;
3921                 for (cursor=scan;
3922                      cursor && OP(cursor)!=END;
3923                      cursor=regnext(cursor))
3924                 {
3925                     if ( OP(cursor)==CLOSE ){
3926                         n = ARG(cursor);
3927                         if ( n <= lastopen ) {
3928                             PL_regoffs[n].start
3929                                 = PL_reg_start_tmp[n] - PL_bostr;
3930                             PL_regoffs[n].end = locinput - PL_bostr;
3931                             /*if (n > PL_regsize)
3932                             PL_regsize = n;*/
3933                             if (n > *PL_reglastparen)
3934                                 *PL_reglastparen = n;
3935                             *PL_reglastcloseparen = n;
3936                             if ( n == ARG(scan) || (cur_eval &&
3937                                 cur_eval->u.eval.close_paren == n))
3938                                 break;
3939                         }
3940                     }
3941                 }
3942             }
3943             goto fake_end;
3944             /*NOTREACHED*/
3945         case GROUPP:
3946             n = ARG(scan);  /* which paren pair */
3947             sw = (bool)(*PL_reglastparen >= n && PL_regoffs[n].end != -1);
3948             break;
3949         case NGROUPP:
3950             /* reg_check_named_buff_matched returns 0 for no match */
3951             sw = (bool)(0 < reg_check_named_buff_matched(rex,scan));
3952             break;
3953         case INSUBP:
3954             n = ARG(scan);
3955             sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
3956             break;
3957         case DEFINEP:
3958             sw = 0;
3959             break;
3960         case IFTHEN:
3961             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3962             if (sw)
3963                 next = NEXTOPER(NEXTOPER(scan));
3964             else {
3965                 next = scan + ARG(scan);
3966                 if (OP(next) == IFTHEN) /* Fake one. */
3967                     next = NEXTOPER(NEXTOPER(next));
3968             }
3969             break;
3970         case LOGICAL:
3971             logical = scan->flags;
3972             break;
3973
3974 /*******************************************************************
3975
3976 The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
3977 pattern, where A and B are subpatterns. (For simple A, CURLYM or
3978 STAR/PLUS/CURLY/CURLYN are used instead.)
3979
3980 A*B is compiled as <CURLYX><A><WHILEM><B>
3981
3982 On entry to the subpattern, CURLYX is called. This pushes a CURLYX
3983 state, which contains the current count, initialised to -1. It also sets
3984 cur_curlyx to point to this state, with any previous value saved in the
3985 state block.
3986
3987 CURLYX then jumps straight to the WHILEM op, rather than executing A,
3988 since the pattern may possibly match zero times (i.e. it's a while {} loop
3989 rather than a do {} while loop).
3990
3991 Each entry to WHILEM represents a successful match of A. The count in the
3992 CURLYX block is incremented, another WHILEM state is pushed, and execution
3993 passes to A or B depending on greediness and the current count.
3994
3995 For example, if matching against the string a1a2a3b (where the aN are
3996 substrings that match /A/), then the match progresses as follows: (the
3997 pushed states are interspersed with the bits of strings matched so far):
3998
3999     <CURLYX cnt=-1>
4000     <CURLYX cnt=0><WHILEM>
4001     <CURLYX cnt=1><WHILEM> a1 <WHILEM>
4002     <CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
4003     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
4004     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
4005
4006 (Contrast this with something like CURLYM, which maintains only a single
4007 backtrack state:
4008
4009     <CURLYM cnt=0> a1
4010     a1 <CURLYM cnt=1> a2
4011     a1 a2 <CURLYM cnt=2> a3
4012     a1 a2 a3 <CURLYM cnt=3> b
4013 )
4014
4015 Each WHILEM state block marks a point to backtrack to upon partial failure
4016 of A or B, and also contains some minor state data related to that
4017 iteration.  The CURLYX block, pointed to by cur_curlyx, contains the
4018 overall state, such as the count, and pointers to the A and B ops.
4019
4020 This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
4021 must always point to the *current* CURLYX block, the rules are:
4022
4023 When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
4024 and set cur_curlyx to point the new block.
4025
4026 When popping the CURLYX block after a successful or unsuccessful match,
4027 restore the previous cur_curlyx.
4028
4029 When WHILEM is about to execute B, save the current cur_curlyx, and set it
4030 to the outer one saved in the CURLYX block.
4031
4032 When popping the WHILEM block after a successful or unsuccessful B match,
4033 restore the previous cur_curlyx.
4034
4035 Here's an example for the pattern (AI* BI)*BO
4036 I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
4037
4038 cur_
4039 curlyx backtrack stack
4040 ------ ---------------
4041 NULL
4042 CO     <CO prev=NULL> <WO>
4043 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4044 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4045 NULL   <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
4046
4047 At this point the pattern succeeds, and we work back down the stack to
4048 clean up, restoring as we go:
4049
4050 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4051 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4052 CO     <CO prev=NULL> <WO>
4053 NULL
4054
4055 *******************************************************************/
4056
4057 #define ST st->u.curlyx
4058
4059         case CURLYX:    /* start of /A*B/  (for complex A) */
4060         {
4061             /* No need to save/restore up to this paren */
4062             I32 parenfloor = scan->flags;
4063
4064             assert(next); /* keep Coverity happy */
4065             if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
4066                 next += ARG(next);
4067
4068             /* XXXX Probably it is better to teach regpush to support
4069                parenfloor > PL_regsize... */
4070             if (parenfloor > (I32)*PL_reglastparen)
4071                 parenfloor = *PL_reglastparen; /* Pessimization... */
4072
4073             ST.prev_curlyx= cur_curlyx;
4074             cur_curlyx = st;
4075             ST.cp = PL_savestack_ix;
4076
4077             /* these fields contain the state of the current curly.
4078              * they are accessed by subsequent WHILEMs */
4079             ST.parenfloor = parenfloor;
4080             ST.min = ARG1(scan);
4081             ST.max = ARG2(scan);
4082             ST.A = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
4083             ST.B = next;
4084             ST.minmod = minmod;
4085             minmod = 0;
4086             ST.count = -1;      /* this will be updated by WHILEM */
4087             ST.lastloc = NULL;  /* this will be updated by WHILEM */
4088
4089             PL_reginput = locinput;
4090             PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
4091             /* NOTREACHED */
4092         }
4093
4094         case CURLYX_end: /* just finished matching all of A*B */
4095             cur_curlyx = ST.prev_curlyx;
4096             sayYES;
4097             /* NOTREACHED */
4098
4099         case CURLYX_end_fail: /* just failed to match all of A*B */
4100             regcpblow(ST.cp);
4101             cur_curlyx = ST.prev_curlyx;
4102             sayNO;
4103             /* NOTREACHED */
4104
4105
4106 #undef ST
4107 #define ST st->u.whilem
4108
4109         case WHILEM:     /* just matched an A in /A*B/  (for complex A) */
4110         {
4111             /* see the discussion above about CURLYX/WHILEM */
4112             I32 n;
4113             assert(cur_curlyx); /* keep Coverity happy */
4114             n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
4115             ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
4116             ST.cache_offset = 0;
4117             ST.cache_mask = 0;
4118
4119             PL_reginput = locinput;
4120
4121             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4122                   "%*s  whilem: matched %ld out of %ld..%ld\n",
4123                   REPORT_CODE_OFF+depth*2, "", (long)n,
4124                   (long)cur_curlyx->u.curlyx.min,
4125                   (long)cur_curlyx->u.curlyx.max)
4126             );
4127
4128             /* First just match a string of min A's. */
4129
4130             if (n < cur_curlyx->u.curlyx.min) {
4131                 cur_curlyx->u.curlyx.lastloc = locinput;
4132                 PUSH_STATE_GOTO(WHILEM_A_pre, cur_curlyx->u.curlyx.A);
4133                 /* NOTREACHED */
4134             }
4135
4136             /* If degenerate A matches "", assume A done. */
4137
4138             if (locinput == cur_curlyx->u.curlyx.lastloc) {
4139                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4140                    "%*s  whilem: empty match detected, trying continuation...\n",
4141                    REPORT_CODE_OFF+depth*2, "")
4142                 );
4143                 goto do_whilem_B_max;
4144             }
4145
4146             /* super-linear cache processing */
4147
4148             if (scan->flags) {
4149
4150                 if (!PL_reg_maxiter) {
4151                     /* start the countdown: Postpone detection until we
4152                      * know the match is not *that* much linear. */
4153                     PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
4154                     /* possible overflow for long strings and many CURLYX's */
4155                     if (PL_reg_maxiter < 0)
4156                         PL_reg_maxiter = I32_MAX;
4157                     PL_reg_leftiter = PL_reg_maxiter;
4158                 }
4159
4160                 if (PL_reg_leftiter-- == 0) {
4161                     /* initialise cache */
4162                     const I32 size = (PL_reg_maxiter + 7)/8;
4163                     if (PL_reg_poscache) {
4164                         if ((I32)PL_reg_poscache_size < size) {
4165                             Renew(PL_reg_poscache, size, char);
4166                             PL_reg_poscache_size = size;
4167                         }
4168                         Zero(PL_reg_poscache, size, char);
4169                     }
4170                     else {
4171                         PL_reg_poscache_size = size;
4172                         Newxz(PL_reg_poscache, size, char);
4173                     }
4174                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4175       "%swhilem: Detected a super-linear match, switching on caching%s...\n",
4176                               PL_colors[4], PL_colors[5])
4177                     );
4178                 }
4179
4180                 if (PL_reg_leftiter < 0) {
4181                     /* have we already failed at this position? */
4182                     I32 offset, mask;
4183                     offset  = (scan->flags & 0xf) - 1
4184                                 + (locinput - PL_bostr)  * (scan->flags>>4);
4185                     mask    = 1 << (offset % 8);
4186                     offset /= 8;
4187                     if (PL_reg_poscache[offset] & mask) {
4188                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4189                             "%*s  whilem: (cache) already tried at this position...\n",
4190                             REPORT_CODE_OFF+depth*2, "")
4191                         );
4192                         sayNO; /* cache records failure */
4193                     }
4194                     ST.cache_offset = offset;
4195                     ST.cache_mask   = mask;
4196                 }
4197             }
4198
4199             /* Prefer B over A for minimal matching. */
4200
4201             if (cur_curlyx->u.curlyx.minmod) {
4202                 ST.save_curlyx = cur_curlyx;
4203                 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4204                 ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
4205                 REGCP_SET(ST.lastcp);
4206                 PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
4207                 /* NOTREACHED */
4208             }
4209
4210             /* Prefer A over B for maximal matching. */
4211
4212             if (n < cur_curlyx->u.curlyx.max) { /* More greed allowed? */
4213                 ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4214                 cur_curlyx->u.curlyx.lastloc = locinput;
4215                 REGCP_SET(ST.lastcp);
4216                 PUSH_STATE_GOTO(WHILEM_A_max, cur_curlyx->u.curlyx.A);
4217                 /* NOTREACHED */
4218             }
4219             goto do_whilem_B_max;
4220         }
4221         /* NOTREACHED */
4222
4223         case WHILEM_B_min: /* just matched B in a minimal match */
4224         case WHILEM_B_max: /* just matched B in a maximal match */
4225             cur_curlyx = ST.save_curlyx;
4226             sayYES;
4227             /* NOTREACHED */
4228
4229         case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
4230             cur_curlyx = ST.save_curlyx;
4231             cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4232             cur_curlyx->u.curlyx.count--;
4233             CACHEsayNO;
4234             /* NOTREACHED */
4235
4236         case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
4237             REGCP_UNWIND(ST.lastcp);
4238             regcppop(rex);
4239             /* FALL THROUGH */
4240         case WHILEM_A_pre_fail: /* just failed to match even minimal A */
4241             cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4242             cur_curlyx->u.curlyx.count--;
4243             CACHEsayNO;
4244             /* NOTREACHED */
4245
4246         case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
4247             REGCP_UNWIND(ST.lastcp);
4248             regcppop(rex);      /* Restore some previous $<digit>s? */
4249             PL_reginput = locinput;
4250             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4251                 "%*s  whilem: failed, trying continuation...\n",
4252                 REPORT_CODE_OFF+depth*2, "")
4253             );
4254           do_whilem_B_max:
4255             if (cur_curlyx->u.curlyx.count >= REG_INFTY
4256                 && ckWARN(WARN_REGEXP)
4257                 && !(PL_reg_flags & RF_warned))
4258             {
4259                 PL_reg_flags |= RF_warned;
4260                 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
4261                      "Complex regular subexpression recursion",
4262                      REG_INFTY - 1);
4263             }
4264
4265             /* now try B */
4266             ST.save_curlyx = cur_curlyx;
4267             cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4268             PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
4269             /* NOTREACHED */
4270
4271         case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
4272             cur_curlyx = ST.save_curlyx;
4273             REGCP_UNWIND(ST.lastcp);
4274             regcppop(rex);
4275
4276             if (cur_curlyx->u.curlyx.count >= cur_curlyx->u.curlyx.max) {
4277                 /* Maximum greed exceeded */
4278                 if (cur_curlyx->u.curlyx.count >= REG_INFTY
4279                     && ckWARN(WARN_REGEXP)
4280                     && !(PL_reg_flags & RF_warned))
4281                 {
4282                     PL_reg_flags |= RF_warned;
4283                     Perl_warner(aTHX_ packWARN(WARN_REGEXP),
4284                         "%s limit (%d) exceeded",
4285                         "Complex regular subexpression recursion",
4286                         REG_INFTY - 1);
4287                 }
4288                 cur_curlyx->u.curlyx.count--;
4289                 CACHEsayNO;
4290             }
4291
4292             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4293                 "%*s  trying longer...\n", REPORT_CODE_OFF+depth*2, "")
4294             );
4295             /* Try grabbing another A and see if it helps. */
4296             PL_reginput = locinput;
4297             cur_curlyx->u.curlyx.lastloc = locinput;
4298             ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4299             REGCP_SET(ST.lastcp);
4300             PUSH_STATE_GOTO(WHILEM_A_min, ST.save_curlyx->u.curlyx.A);
4301             /* NOTREACHED */
4302
4303 #undef  ST
4304 #define ST st->u.branch
4305
4306         case BRANCHJ:       /*  /(...|A|...)/ with long next pointer */
4307             next = scan + ARG(scan);
4308             if (next == scan)
4309                 next = NULL;
4310             scan = NEXTOPER(scan);
4311             /* FALL THROUGH */
4312
4313         case BRANCH:        /*  /(...|A|...)/ */
4314             scan = NEXTOPER(scan); /* scan now points to inner node */
4315             ST.lastparen = *PL_reglastparen;
4316             ST.next_branch = next;
4317             REGCP_SET(ST.cp);
4318             PL_reginput = locinput;
4319
4320             /* Now go into the branch */
4321             if (has_cutgroup) {
4322                 PUSH_YES_STATE_GOTO(BRANCH_next, scan);
4323             } else {
4324                 PUSH_STATE_GOTO(BRANCH_next, scan);
4325             }
4326             /* NOTREACHED */
4327         case CUTGROUP:
4328             PL_reginput = locinput;
4329             sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
4330                 MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
4331             PUSH_STATE_GOTO(CUTGROUP_next,next);
4332             /* NOTREACHED */
4333         case CUTGROUP_next_fail:
4334             do_cutgroup = 1;
4335             no_final = 1;
4336             if (st->u.mark.mark_name)
4337                 sv_commit = st->u.mark.mark_name;
4338             sayNO;
4339             /* NOTREACHED */
4340         case BRANCH_next:
4341             sayYES;
4342             /* NOTREACHED */
4343         case BRANCH_next_fail: /* that branch failed; try the next, if any */
4344             if (do_cutgroup) {
4345                 do_cutgroup = 0;
4346                 no_final = 0;
4347             }
4348             REGCP_UNWIND(ST.cp);
4349             for (n = *PL_reglastparen; n > ST.lastparen; n--)
4350                 PL_regoffs[n].end = -1;
4351             *PL_reglastparen = n;
4352             /*dmq: *PL_reglastcloseparen = n; */
4353             scan = ST.next_branch;
4354             /* no more branches? */
4355             if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
4356                 DEBUG_EXECUTE_r({
4357                     PerlIO_printf( Perl_debug_log,
4358                         "%*s  %sBRANCH failed...%s\n",
4359                         REPORT_CODE_OFF+depth*2, "",
4360                         PL_colors[4],
4361                         PL_colors[5] );
4362                 });
4363                 sayNO_SILENT;
4364             }
4365             continue; /* execute next BRANCH[J] op */
4366             /* NOTREACHED */
4367
4368         case MINMOD:
4369             minmod = 1;
4370             break;
4371
4372 #undef  ST
4373 #define ST st->u.curlym
4374
4375         case CURLYM:    /* /A{m,n}B/ where A is fixed-length */
4376
4377             /* This is an optimisation of CURLYX that enables us to push
4378              * only a single backtracking state, no matter how many matches
4379              * there are in {m,n}. It relies on the pattern being constant
4380              * length, with no parens to influence future backrefs
4381              */
4382
4383             ST.me = scan;
4384             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4385
4386             /* if paren positive, emulate an OPEN/CLOSE around A */
4387             if (ST.me->flags) {
4388                 U32 paren = ST.me->flags;
4389                 if (paren > PL_regsize)
4390                     PL_regsize = paren;
4391                 if (paren > *PL_reglastparen)
4392                     *PL_reglastparen = paren;
4393                 scan += NEXT_OFF(scan); /* Skip former OPEN. */
4394             }
4395             ST.A = scan;
4396             ST.B = next;
4397             ST.alen = 0;
4398             ST.count = 0;
4399             ST.minmod = minmod;
4400             minmod = 0;
4401             ST.c1 = CHRTEST_UNINIT;
4402             REGCP_SET(ST.cp);
4403
4404             if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
4405                 goto curlym_do_B;
4406
4407           curlym_do_A: /* execute the A in /A{m,n}B/  */
4408             PL_reginput = locinput;
4409             PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
4410             /* NOTREACHED */
4411
4412         case CURLYM_A: /* we've just matched an A */
4413             locinput = st->locinput;
4414             nextchr = UCHARAT(locinput);
4415
4416             ST.count++;
4417             /* after first match, determine A's length: u.curlym.alen */
4418             if (ST.count == 1) {
4419                 if (PL_reg_match_utf8) {
4420                     char *s = locinput;
4421                     while (s < PL_reginput) {
4422                         ST.alen++;
4423                         s += UTF8SKIP(s);
4424                     }
4425                 }
4426                 else {
4427                     ST.alen = PL_reginput - locinput;
4428                 }
4429                 if (ST.alen == 0)
4430                     ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
4431             }
4432             DEBUG_EXECUTE_r(
4433                 PerlIO_printf(Perl_debug_log,
4434                           "%*s  CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
4435                           (int)(REPORT_CODE_OFF+(depth*2)), "",
4436                           (IV) ST.count, (IV)ST.alen)
4437             );
4438
4439             locinput = PL_reginput;
4440
4441             if (cur_eval && cur_eval->u.eval.close_paren &&
4442                 cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4443                 goto fake_end;
4444
4445             {
4446                 I32 max = (ST.minmod ? ARG1(ST.me) : ARG2(ST.me));
4447                 if ( max == REG_INFTY || ST.count < max )
4448                     goto curlym_do_A; /* try to match another A */
4449             }
4450             goto curlym_do_B; /* try to match B */
4451
4452         case CURLYM_A_fail: /* just failed to match an A */
4453             REGCP_UNWIND(ST.cp);
4454
4455             if (ST.minmod || ST.count < ARG1(ST.me) /* min*/
4456                 || (cur_eval && cur_eval->u.eval.close_paren &&
4457                     cur_eval->u.eval.close_paren == (U32)ST.me->flags))
4458                 sayNO;
4459
4460           curlym_do_B: /* execute the B in /A{m,n}B/  */
4461             PL_reginput = locinput;
4462             if (ST.c1 == CHRTEST_UNINIT) {
4463                 /* calculate c1 and c2 for possible match of 1st char
4464                  * following curly */
4465                 ST.c1 = ST.c2 = CHRTEST_VOID;
4466                 if (HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
4467                     regnode *text_node = ST.B;
4468                     if (! HAS_TEXT(text_node))
4469                         FIND_NEXT_IMPT(text_node);
4470                     /* this used to be
4471
4472                         (HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
4473
4474                         But the former is redundant in light of the latter.
4475
4476                         if this changes back then the macro for
4477                         IS_TEXT and friends need to change.
4478                      */
4479                     if (PL_regkind[OP(text_node)] == EXACT)
4480                     {
4481
4482                         ST.c1 = (U8)*STRING(text_node);
4483                         ST.c2 =
4484                             (IS_TEXTF(text_node))
4485                             ? PL_fold[ST.c1]
4486                             : (IS_TEXTFL(text_node))
4487                                 ? PL_fold_locale[ST.c1]
4488                                 : ST.c1;
4489                     }
4490                 }
4491             }
4492
4493             DEBUG_EXECUTE_r(
4494                 PerlIO_printf(Perl_debug_log,
4495                     "%*s  CURLYM trying tail with matches=%"IVdf"...\n",
4496                     (int)(REPORT_CODE_OFF+(depth*2)),
4497                     "", (IV)ST.count)
4498                 );
4499             if (ST.c1 != CHRTEST_VOID
4500                     && UCHARAT(PL_reginput) != ST.c1
4501                     && UCHARAT(PL_reginput) != ST.c2)
4502             {
4503                 /* simulate B failing */
4504                 DEBUG_OPTIMISE_r(
4505                     PerlIO_printf(Perl_debug_log,
4506                         "%*s  CURLYM Fast bail c1=%"IVdf" c2=%"IVdf"\n",
4507                         (int)(REPORT_CODE_OFF+(depth*2)),"",
4508                         (IV)ST.c1,(IV)ST.c2
4509                 ));
4510                 state_num = CURLYM_B_fail;
4511                 goto reenter_switch;
4512             }
4513
4514             if (ST.me->flags) {
4515                 /* mark current A as captured */
4516                 I32 paren = ST.me->flags;
4517                 if (ST.count) {
4518                     PL_regoffs[paren].start
4519                         = HOPc(PL_reginput, -ST.alen) - PL_bostr;
4520                     PL_regoffs[paren].end = PL_reginput - PL_bostr;
4521                     /*dmq: *PL_reglastcloseparen = paren; */
4522                 }
4523                 else
4524                     PL_regoffs[paren].end = -1;
4525                 if (cur_eval && cur_eval->u.eval.close_paren &&
4526                     cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4527                 {
4528                     if (ST.count)
4529                         goto fake_end;
4530                     else
4531                         sayNO;
4532                 }
4533             }
4534
4535             PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
4536             /* NOTREACHED */
4537
4538         case CURLYM_B_fail: /* just failed to match a B */
4539             REGCP_UNWIND(ST.cp);
4540             if (ST.minmod) {
4541                 I32 max = ARG2(ST.me);
4542                 if (max != REG_INFTY && ST.count == max)
4543                     sayNO;
4544                 goto curlym_do_A; /* try to match a further A */
4545             }
4546             /* backtrack one A */
4547             if (ST.count == ARG1(ST.me) /* min */)
4548                 sayNO;
4549             ST.count--;
4550             locinput = HOPc(locinput, -ST.alen);
4551             goto curlym_do_B; /* try to match B */
4552
4553 #undef ST
4554 #define ST st->u.curly
4555
4556 #define CURLY_SETPAREN(paren, success) \
4557     if (paren) { \
4558         if (success) { \
4559             PL_regoffs[paren].start = HOPc(locinput, -1) - PL_bostr; \
4560             PL_regoffs[paren].end = locinput - PL_bostr; \
4561             *PL_reglastcloseparen = paren; \
4562         } \
4563         else \
4564             PL_regoffs[paren].end = -1; \
4565     }
4566
4567         case STAR:              /*  /A*B/ where A is width 1 */
4568             ST.paren = 0;
4569             ST.min = 0;
4570             ST.max = REG_INFTY;
4571             scan = NEXTOPER(scan);
4572             goto repeat;
4573         case PLUS:              /*  /A+B/ where A is width 1 */
4574             ST.paren = 0;
4575             ST.min = 1;
4576             ST.max = REG_INFTY;
4577             scan = NEXTOPER(scan);
4578             goto repeat;
4579         case CURLYN:            /*  /(A){m,n}B/ where A is width 1 */
4580             ST.paren = scan->flags;     /* Which paren to set */
4581             if (ST.paren > PL_regsize)
4582                 PL_regsize = ST.paren;
4583             if (ST.paren > *PL_reglastparen)
4584                 *PL_reglastparen = ST.paren;
4585             ST.min = ARG1(scan);  /* min to match */
4586             ST.max = ARG2(scan);  /* max to match */
4587             if (cur_eval && cur_eval->u.eval.close_paren &&
4588                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4589                 ST.min=1;
4590                 ST.max=1;
4591             }
4592             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
4593             goto repeat;
4594         case CURLY:             /*  /A{m,n}B/ where A is width 1 */
4595             ST.paren = 0;
4596             ST.min = ARG1(scan);  /* min to match */
4597             ST.max = ARG2(scan);  /* max to match */
4598             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4599           repeat:
4600             /*
4601             * Lookahead to avoid useless match attempts
4602             * when we know what character comes next.
4603             *
4604             * Used to only do .*x and .*?x, but now it allows
4605             * for )'s, ('s and (?{ ... })'s to be in the way
4606             * of the quantifier and the EXACT-like node.  -- japhy
4607             */
4608
4609             if (ST.min > ST.max) /* XXX make this a compile-time check? */
4610                 sayNO;
4611             if (HAS_TEXT(next) || JUMPABLE(next)) {
4612                 U8 *s;
4613                 regnode *text_node = next;
4614
4615                 if (! HAS_TEXT(text_node))
4616                     FIND_NEXT_IMPT(text_node);
4617
4618                 if (! HAS_TEXT(text_node))
4619                     ST.c1 = ST.c2 = CHRTEST_VOID;
4620                 else {
4621                     if ( PL_regkind[OP(text_node)] != EXACT ) {
4622                         ST.c1 = ST.c2 = CHRTEST_VOID;
4623                         goto assume_ok_easy;
4624                     }
4625                     else
4626                         s = (U8*)STRING(text_node);
4627
4628                     /*  Currently we only get here when
4629
4630                         PL_rekind[OP(text_node)] == EXACT
4631
4632                         if this changes back then the macro for IS_TEXT and
4633                         friends need to change. */
4634                     if (!UTF) {
4635                         ST.c2 = ST.c1 = *s;
4636                         if (IS_TEXTF(text_node))
4637                             ST.c2 = PL_fold[ST.c1];
4638                         else if (IS_TEXTFL(text_node))
4639                             ST.c2 = PL_fold_locale[ST.c1];
4640                     }
4641                     else { /* UTF */
4642                         if (IS_TEXTF(text_node)) {
4643                              STRLEN ulen1, ulen2;
4644                              U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4645                              U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4646
4647                              to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4648                              to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4649 #ifdef EBCDIC
4650                              ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
4651                                                     ckWARN(WARN_UTF8) ?
4652                                                     0 : UTF8_ALLOW_ANY);
4653                              ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
4654                                                     ckWARN(WARN_UTF8) ?
4655                                                     0 : UTF8_ALLOW_ANY);
4656 #else
4657                              ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
4658                                                     uniflags);
4659                              ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
4660                                                     uniflags);
4661 #endif
4662                         }
4663                         else {
4664                             ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
4665                                                      uniflags);
4666                         }
4667                     }
4668                 }
4669             }
4670             else
4671                 ST.c1 = ST.c2 = CHRTEST_VOID;
4672         assume_ok_easy:
4673
4674             ST.A = scan;
4675             ST.B = next;
4676             PL_reginput = locinput;
4677             if (minmod) {
4678                 minmod = 0;
4679                 if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
4680                     sayNO;
4681                 ST.count = ST.min;
4682                 locinput = PL_reginput;
4683                 REGCP_SET(ST.cp);
4684                 if (ST.c1 == CHRTEST_VOID)
4685                     goto curly_try_B_min;
4686
4687                 ST.oldloc = locinput;
4688
4689                 /* set ST.maxpos to the furthest point along the
4690                  * string that could possibly match */
4691                 if  (ST.max == REG_INFTY) {
4692                     ST.maxpos = PL_regeol - 1;
4693                     if (do_utf8)
4694                         while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
4695                             ST.maxpos--;
4696                 }
4697                 else if (do_utf8) {
4698                     int m = ST.max - ST.min;
4699                     for (ST.maxpos = locinput;
4700                          m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
4701                         ST.maxpos += UTF8SKIP(ST.maxpos);
4702                 }
4703                 else {
4704                     ST.maxpos = locinput + ST.max - ST.min;
4705                     if (ST.maxpos >= PL_regeol)
4706                         ST.maxpos = PL_regeol - 1;
4707                 }
4708                 goto curly_try_B_min_known;
4709
4710             }
4711             else {
4712                 ST.count = regrepeat(rex, ST.A, ST.max, depth);
4713                 locinput = PL_reginput;
4714                 if (ST.count < ST.min)
4715                     sayNO;
4716                 if ((ST.count > ST.min)
4717                     && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
4718                 {
4719                     /* A{m,n} must come at the end of the string, there's
4720                      * no point in backing off ... */
4721                     ST.min = ST.count;
4722                     /* ...except that $ and \Z can match before *and* after
4723                        newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
4724                        We may back off by one in this case. */
4725                     if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
4726                         ST.min--;
4727                 }
4728                 REGCP_SET(ST.cp);
4729                 goto curly_try_B_max;
4730             }
4731             /* NOTREACHED */
4732
4733
4734         case CURLY_B_min_known_fail:
4735             /* failed to find B in a non-greedy match where c1,c2 valid */
4736             if (ST.paren && ST.count)
4737                 PL_regoffs[ST.paren].end = -1;
4738
4739             PL_reginput = locinput;     /* Could be reset... */
4740             REGCP_UNWIND(ST.cp);
4741             /* Couldn't or didn't -- move forward. */
4742             ST.oldloc = locinput;
4743             if (do_utf8)
4744                 locinput += UTF8SKIP(locinput);
4745             else
4746                 locinput++;
4747             ST.count++;
4748           curly_try_B_min_known:
4749              /* find the next place where 'B' could work, then call B */
4750             {
4751                 int n;
4752                 if (do_utf8) {
4753                     n = (ST.oldloc == locinput) ? 0 : 1;
4754                     if (ST.c1 == ST.c2) {
4755                         STRLEN len;
4756                         /* set n to utf8_distance(oldloc, locinput) */
4757                         while (locinput <= ST.maxpos &&
4758                                utf8n_to_uvchr((U8*)locinput,
4759                                               UTF8_MAXBYTES, &len,
4760                                               uniflags) != (UV)ST.c1) {
4761                             locinput += len;
4762                             n++;
4763                         }
4764                     }
4765                     else {
4766                         /* set n to utf8_distance(oldloc, locinput) */
4767                         while (locinput <= ST.maxpos) {
4768                             STRLEN len;
4769                             const UV c = utf8n_to_uvchr((U8*)locinput,
4770                                                   UTF8_MAXBYTES, &len,
4771                                                   uniflags);
4772                             if (c == (UV)ST.c1 || c == (UV)ST.c2)
4773                                 break;
4774                             locinput += len;
4775                             n++;
4776                         }
4777                     }
4778                 }
4779                 else {
4780                     if (ST.c1 == ST.c2) {
4781                         while (locinput <= ST.maxpos &&
4782                                UCHARAT(locinput) != ST.c1)
4783                             locinput++;
4784                     }
4785                     else {
4786                         while (locinput <= ST.maxpos
4787                                && UCHARAT(locinput) != ST.c1
4788                                && UCHARAT(locinput) != ST.c2)
4789                             locinput++;
4790                     }
4791                     n = locinput - ST.oldloc;
4792                 }
4793                 if (locinput > ST.maxpos)
4794                     sayNO;
4795                 /* PL_reginput == oldloc now */
4796                 if (n) {
4797                     ST.count += n;
4798                     if (regrepeat(rex, ST.A, n, depth) < n)
4799                         sayNO;
4800                 }
4801                 PL_reginput = locinput;
4802                 CURLY_SETPAREN(ST.paren, ST.count);
4803                 if (cur_eval && cur_eval->u.eval.close_paren &&
4804                     cur_eval->u.eval.close_paren == (U32)ST.paren) {
4805                     goto fake_end;
4806                 }
4807                 PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
4808             }
4809             /* NOTREACHED */
4810
4811
4812         case CURLY_B_min_fail:
4813             /* failed to find B in a non-greedy match where c1,c2 invalid */
4814             if (ST.paren && ST.count)
4815                 PL_regoffs[ST.paren].end = -1;
4816
4817             REGCP_UNWIND(ST.cp);
4818             /* failed -- move forward one */
4819             PL_reginput = locinput;
4820             if (regrepeat(rex, ST.A, 1, depth)) {
4821                 ST.count++;
4822                 locinput = PL_reginput;
4823                 if (ST.count <= ST.max || (ST.max == REG_INFTY &&
4824                         ST.count > 0)) /* count overflow ? */
4825                 {
4826                   curly_try_B_min:
4827                     CURLY_SETPAREN(ST.paren, ST.count);
4828                     if (cur_eval && cur_eval->u.eval.close_paren &&
4829                         cur_eval->u.eval.close_paren == (U32)ST.paren) {
4830                         goto fake_end;
4831                     }
4832                     PUSH_STATE_GOTO(CURLY_B_min, ST.B);
4833                 }
4834             }
4835             sayNO;
4836             /* NOTREACHED */
4837
4838
4839         curly_try_B_max:
4840             /* a successful greedy match: now try to match B */
4841             if (cur_eval && cur_eval->u.eval.close_paren &&
4842                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4843                 goto fake_end;
4844             }
4845             {
4846                 UV c = 0;
4847                 if (ST.c1 != CHRTEST_VOID)
4848                     c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
4849                                            UTF8_MAXBYTES, 0, uniflags)
4850                                 : (UV) UCHARAT(PL_reginput);
4851                 /* If it could work, try it. */
4852                 if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
4853                     CURLY_SETPAREN(ST.paren, ST.count);
4854                     PUSH_STATE_GOTO(CURLY_B_max, ST.B);
4855                     /* NOTREACHED */
4856                 }
4857             }
4858             /* FALL THROUGH */
4859         case CURLY_B_max_fail:
4860             /* failed to find B in a greedy match */
4861             if (ST.paren && ST.count)
4862                 PL_regoffs[ST.paren].end = -1;
4863
4864             REGCP_UNWIND(ST.cp);
4865             /*  back up. */
4866             if (--ST.count < ST.min)
4867                 sayNO;
4868             PL_reginput = locinput = HOPc(locinput, -1);
4869             goto curly_try_B_max;
4870
4871 #undef ST
4872
4873         case END:
4874             fake_end:
4875             if (cur_eval) {
4876                 /* we've just finished A in /(??{A})B/; now continue with B */
4877                 I32 tmpix;
4878                 st->u.eval.toggle_reg_flags
4879                             = cur_eval->u.eval.toggle_reg_flags;
4880                 PL_reg_flags ^= st->u.eval.toggle_reg_flags;
4881
4882                 st->u.eval.prev_rex = rex_sv;           /* inner */
4883                 SETREX(rex_sv,cur_eval->u.eval.prev_rex);
4884                 rex = (struct regexp *)SvANY(rex_sv);
4885                 rexi = RXi_GET(rex);
4886                 cur_curlyx = cur_eval->u.eval.prev_curlyx;
4887                 ReREFCNT_inc(rex_sv);
4888                 st->u.eval.cp = regcppush(0);   /* Save *all* the positions. */
4889
4890                 /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
4891                 PL_reglastparen = &rex->lastparen;
4892                 PL_reglastcloseparen = &rex->lastcloseparen;
4893
4894                 REGCP_SET(st->u.eval.lastcp);
4895                 PL_reginput = locinput;
4896
4897                 /* Restore parens of the outer rex without popping the
4898                  * savestack */
4899                 tmpix = PL_savestack_ix;
4900                 PL_savestack_ix = cur_eval->u.eval.lastcp;
4901                 regcppop(rex);
4902                 PL_savestack_ix = tmpix;
4903
4904                 st->u.eval.prev_eval = cur_eval;
4905                 cur_eval = cur_eval->u.eval.prev_eval;
4906                 DEBUG_EXECUTE_r(
4907                     PerlIO_printf(Perl_debug_log, "%*s  EVAL trying tail ... %"UVxf"\n",
4908                                       REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
4909                 if ( nochange_depth )
4910                     nochange_depth--;
4911
4912                 PUSH_YES_STATE_GOTO(EVAL_AB,
4913                         st->u.eval.prev_eval->u.eval.B); /* match B */
4914             }
4915
4916             if (locinput < reginfo->till) {
4917                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4918                                       "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4919                                       PL_colors[4],
4920                                       (long)(locinput - PL_reg_starttry),
4921                                       (long)(reginfo->till - PL_reg_starttry),
4922                                       PL_colors[5]));
4923
4924                 sayNO_SILENT;           /* Cannot match: too short. */
4925             }
4926             PL_reginput = locinput;     /* put where regtry can find it */
4927             sayYES;                     /* Success! */
4928
4929         case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
4930             DEBUG_EXECUTE_r(
4931             PerlIO_printf(Perl_debug_log,
4932                 "%*s  %ssubpattern success...%s\n",
4933                 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
4934             PL_reginput = locinput;     /* put where regtry can find it */
4935             sayYES;                     /* Success! */
4936
4937 #undef  ST
4938 #define ST st->u.ifmatch
4939
4940         case SUSPEND:   /* (?>A) */
4941             ST.wanted = 1;
4942             PL_reginput = locinput;
4943             goto do_ifmatch;
4944
4945         case UNLESSM:   /* -ve lookaround: (?!A), or with flags, (?<!A) */
4946             ST.wanted = 0;
4947             goto ifmatch_trivial_fail_test;
4948
4949         case IFMATCH:   /* +ve lookaround: (?=A), or with flags, (?<=A) */
4950             ST.wanted = 1;
4951           ifmatch_trivial_fail_test:
4952             if (scan->flags) {
4953                 char * const s = HOPBACKc(locinput, scan->flags);
4954                 if (!s) {
4955                     /* trivial fail */
4956                     if (logical) {
4957                         logical = 0;
4958                         sw = 1 - (bool)ST.wanted;
4959                     }
4960                     else if (ST.wanted)
4961                         sayNO;
4962                     next = scan + ARG(scan);
4963                     if (next == scan)
4964                         next = NULL;
4965                     break;
4966                 }
4967                 PL_reginput = s;
4968             }
4969             else
4970                 PL_reginput = locinput;
4971
4972           do_ifmatch:
4973             ST.me = scan;
4974             ST.logical = logical;
4975             logical = 0; /* XXX: reset state of logical once it has been saved into ST */
4976
4977             /* execute body of (?...A) */
4978             PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
4979             /* NOTREACHED */
4980
4981         case IFMATCH_A_fail: /* body of (?...A) failed */
4982             ST.wanted = !ST.wanted;
4983             /* FALL THROUGH */
4984
4985         case IFMATCH_A: /* body of (?...A) succeeded */
4986             if (ST.logical) {
4987                 sw = (bool)ST.wanted;
4988             }
4989             else if (!ST.wanted)
4990                 sayNO;
4991
4992             if (OP(ST.me) == SUSPEND)
4993                 locinput = PL_reginput;
4994             else {
4995                 locinput = PL_reginput = st->locinput;
4996                 nextchr = UCHARAT(locinput);
4997             }
4998             scan = ST.me + ARG(ST.me);
4999             if (scan == ST.me)
5000                 scan = NULL;
5001             continue; /* execute B */
5002
5003 #undef ST
5004
5005         case LONGJMP:
5006             next = scan + ARG(scan);
5007             if (next == scan)
5008                 next = NULL;
5009             break;
5010         case COMMIT:
5011             reginfo->cutpoint = PL_regeol;
5012             /* FALLTHROUGH */
5013         case PRUNE:
5014             PL_reginput = locinput;
5015             if (!scan->flags)
5016                 sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5017             PUSH_STATE_GOTO(COMMIT_next,next);
5018             /* NOTREACHED */
5019         case COMMIT_next_fail:
5020             no_final = 1;
5021             /* FALLTHROUGH */
5022         case OPFAIL:
5023             sayNO;
5024             /* NOTREACHED */
5025
5026 #define ST st->u.mark
5027         case MARKPOINT:
5028             ST.prev_mark = mark_state;
5029             ST.mark_name = sv_commit = sv_yes_mark
5030                 = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5031             mark_state = st;
5032             ST.mark_loc = PL_reginput = locinput;
5033             PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
5034             /* NOTREACHED */
5035         case MARKPOINT_next:
5036             mark_state = ST.prev_mark;
5037             sayYES;
5038             /* NOTREACHED */
5039         case MARKPOINT_next_fail:
5040             if (popmark && sv_eq(ST.mark_name,popmark))
5041             {
5042                 if (ST.mark_loc > startpoint)
5043                     reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5044                 popmark = NULL; /* we found our mark */
5045                 sv_commit = ST.mark_name;
5046
5047                 DEBUG_EXECUTE_r({
5048                         PerlIO_printf(Perl_debug_log,
5049                             "%*s  %ssetting cutpoint to mark:%"SVf"...%s\n",
5050                             REPORT_CODE_OFF+depth*2, "",
5051                             PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
5052                 });
5053             }
5054             mark_state = ST.prev_mark;
5055             sv_yes_mark = mark_state ?
5056                 mark_state->u.mark.mark_name : NULL;
5057             sayNO;
5058             /* NOTREACHED */
5059         case SKIP:
5060             PL_reginput = locinput;
5061             if (scan->flags) {
5062                 /* (*SKIP) : if we fail we cut here*/
5063                 ST.mark_name = NULL;
5064                 ST.mark_loc = locinput;
5065                 PUSH_STATE_GOTO(SKIP_next,next);
5066             } else {
5067                 /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was,
5068                    otherwise do nothing.  Meaning we need to scan
5069                  */
5070                 regmatch_state *cur = mark_state;
5071                 SV *find = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5072
5073                 while (cur) {
5074                     if ( sv_eq( cur->u.mark.mark_name,
5075                                 find ) )
5076                     {
5077                         ST.mark_name = find;
5078                         PUSH_STATE_GOTO( SKIP_next, next );
5079                     }
5080                     cur = cur->u.mark.prev_mark;
5081                 }
5082             }
5083             /* Didn't find our (*MARK:NAME) so ignore this (*SKIP:NAME) */
5084             break;
5085         case SKIP_next_fail:
5086             if (ST.mark_name) {
5087                 /* (*CUT:NAME) - Set up to search for the name as we
5088                    collapse the stack*/
5089                 popmark = ST.mark_name;
5090             } else {
5091                 /* (*CUT) - No name, we cut here.*/
5092                 if (ST.mark_loc > startpoint)
5093                     reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5094                 /* but we set sv_commit to latest mark_name if there
5095                    is one so they can test to see how things lead to this
5096                    cut */
5097                 if (mark_state)
5098                     sv_commit=mark_state->u.mark.mark_name;
5099             }
5100             no_final = 1;
5101             sayNO;
5102             /* NOTREACHED */
5103 #undef ST
5104         case FOLDCHAR:
5105             n = ARG(scan);
5106             if ( n == (U32)what_len_TRICKYFOLD(locinput,do_utf8,ln) ) {
5107                 locinput += ln;
5108             } else if ( 0xDF == n && !do_utf8 && !UTF ) {
5109                 sayNO;
5110             } else  {
5111                 U8 folded[UTF8_MAXBYTES_CASE+1];
5112                 STRLEN foldlen;
5113                 const char * const l = locinput;
5114                 char *e = PL_regeol;
5115                 to_uni_fold(n, folded, &foldlen);
5116
5117                 if (ibcmp_utf8((const char*) folded, 0,  foldlen, 1,
5118                                l, &e, 0,  do_utf8)) {
5119                         sayNO;
5120                 }
5121                 locinput = e;
5122             }
5123             nextchr = UCHARAT(locinput);
5124             break;
5125         case LNBREAK:
5126             if ((n=is_LNBREAK(locinput,do_utf8))) {
5127                 locinput += n;
5128                 nextchr = UCHARAT(locinput);
5129             } else
5130                 sayNO;
5131             break;
5132
5133 #define CASE_CLASS(nAmE)                              \
5134         case nAmE:                                    \
5135             if ((n=is_##nAmE(locinput,do_utf8))) {    \
5136                 locinput += n;                        \
5137                 nextchr = UCHARAT(locinput);          \
5138             } else                                    \
5139                 sayNO;                                \
5140             break;                                    \
5141         case N##nAmE:                                 \
5142             if ((n=is_##nAmE(locinput,do_utf8))) {    \
5143                 sayNO;                                \
5144             } else {                                  \
5145                 locinput += UTF8SKIP(locinput);       \
5146                 nextchr = UCHARAT(locinput);          \
5147             }                                         \
5148             break
5149
5150         CASE_CLASS(VERTWS);
5151         CASE_CLASS(HORIZWS);
5152 #undef CASE_CLASS
5153
5154         default:
5155             PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
5156                           PTR2UV(scan), OP(scan));
5157             Perl_croak(aTHX_ "regexp memory corruption");
5158
5159         } /* end switch */
5160
5161         /* switch break jumps here */
5162         scan = next; /* prepare to execute the next op and ... */
5163         continue;    /* ... jump back to the top, reusing st */
5164         /* NOTREACHED */
5165
5166       push_yes_state:
5167         /* push a state that backtracks on success */
5168         st->u.yes.prev_yes_state = yes_state;
5169         yes_state = st;
5170         /* FALL THROUGH */
5171       push_state:
5172         /* push a new regex state, then continue at scan  */
5173         {
5174             regmatch_state *newst;
5175
5176             DEBUG_STACK_r({
5177                 regmatch_state *cur = st;
5178                 regmatch_state *curyes = yes_state;
5179                 int curd = depth;
5180                 regmatch_slab *slab = PL_regmatch_slab;
5181                 for (;curd > -1;cur--,curd--) {
5182                     if (cur < SLAB_FIRST(slab)) {
5183                         slab = slab->prev;
5184                         cur = SLAB_LAST(slab);
5185                     }
5186                     PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
5187                         REPORT_CODE_OFF + 2 + depth * 2,"",
5188                         curd, PL_reg_name[cur->resume_state],
5189                         (curyes == cur) ? "yes" : ""
5190                     );
5191                     if (curyes == cur)
5192                         curyes = cur->u.yes.prev_yes_state;
5193                 }
5194             } else
5195                 DEBUG_STATE_pp("push")
5196             );
5197             depth++;
5198             st->locinput = locinput;
5199             newst = st+1;
5200             if (newst >  SLAB_LAST(PL_regmatch_slab))
5201                 newst = S_push_slab(aTHX);
5202             PL_regmatch_state = newst;
5203
5204             locinput = PL_reginput;
5205             nextchr = UCHARAT(locinput);
5206             st = newst;
5207             continue;
5208             /* NOTREACHED */
5209         }
5210     }
5211
5212     /*
5213     * We get here only if there's trouble -- normally "case END" is
5214     * the terminating point.
5215     */
5216     Perl_croak(aTHX_ "corrupted regexp pointers");
5217     /*NOTREACHED*/
5218     sayNO;
5219
5220 yes:
5221     if (yes_state) {
5222         /* we have successfully completed a subexpression, but we must now
5223          * pop to the state marked by yes_state and continue from there */
5224         assert(st != yes_state);
5225 #ifdef DEBUGGING
5226         while (st != yes_state) {
5227             st--;
5228             if (st < SLAB_FIRST(PL_regmatch_slab)) {
5229                 PL_regmatch_slab = PL_regmatch_slab->prev;
5230                 st = SLAB_LAST(PL_regmatch_slab);
5231             }
5232             DEBUG_STATE_r({
5233                 if (no_final) {
5234                     DEBUG_STATE_pp("pop (no final)");
5235                 } else {
5236                     DEBUG_STATE_pp("pop (yes)");
5237                 }
5238             });
5239             depth--;
5240         }
5241 #else
5242         while (yes_state < SLAB_FIRST(PL_regmatch_slab)
5243             || yes_state > SLAB_LAST(PL_regmatch_slab))
5244         {
5245             /* not in this slab, pop slab */
5246             depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
5247             PL_regmatch_slab = PL_regmatch_slab->prev;
5248             st = SLAB_LAST(PL_regmatch_slab);
5249         }
5250         depth -= (st - yes_state);
5251 #endif
5252         st = yes_state;
5253         yes_state = st->u.yes.prev_yes_state;
5254         PL_regmatch_state = st;
5255
5256         if (no_final) {
5257             locinput= st->locinput;
5258             nextchr = UCHARAT(locinput);
5259         }
5260         state_num = st->resume_state + no_final;
5261         goto reenter_switch;
5262     }
5263
5264     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
5265                           PL_colors[4], PL_colors[5]));
5266
5267     if (PL_reg_eval_set) {
5268         /* each successfully executed (?{...}) block does the equivalent of
5269          *   local $^R = do {...}
5270          * When popping the save stack, all these locals would be undone;
5271          * bypass this by setting the outermost saved $^R to the latest
5272          * value */
5273         if (oreplsv != GvSV(PL_replgv))
5274             sv_setsv(oreplsv, GvSV(PL_replgv));
5275     }
5276     result = 1;
5277     goto final_exit;
5278
5279 no:
5280     DEBUG_EXECUTE_r(
5281         PerlIO_printf(Perl_debug_log,
5282             "%*s  %sfailed...%s\n",
5283             REPORT_CODE_OFF+depth*2, "",
5284             PL_colors[4], PL_colors[5])
5285         );
5286
5287 no_silent:
5288     if (no_final) {
5289         if (yes_state) {
5290             goto yes;
5291         } else {
5292             goto final_exit;
5293         }
5294     }
5295     if (depth) {
5296         /* there's a previous state to backtrack to */
5297         st--;
5298         if (st < SLAB_FIRST(PL_regmatch_slab)) {
5299             PL_regmatch_slab = PL_regmatch_slab->prev;
5300             st = SLAB_LAST(PL_regmatch_slab);
5301         }
5302         PL_regmatch_state = st;
5303         locinput= st->locinput;
5304         nextchr = UCHARAT(locinput);
5305
5306         DEBUG_STATE_pp("pop");
5307         depth--;
5308         if (yes_state == st)
5309             yes_state = st->u.yes.prev_yes_state;
5310
5311         state_num = st->resume_state + 1; /* failure = success + 1 */
5312         goto reenter_switch;
5313     }
5314     result = 0;
5315
5316   final_exit:
5317     if (rex->intflags & PREGf_VERBARG_SEEN) {
5318         SV *sv_err = get_sv("REGERROR", 1);
5319         SV *sv_mrk = get_sv("REGMARK", 1);
5320         if (result) {
5321             sv_commit = &PL_sv_no;
5322             if (!sv_yes_mark)
5323                 sv_yes_mark = &PL_sv_yes;
5324         } else {
5325             if (!sv_commit)
5326                 sv_commit = &PL_sv_yes;
5327             sv_yes_mark = &PL_sv_no;
5328         }
5329         sv_setsv(sv_err, sv_commit);
5330         sv_setsv(sv_mrk, sv_yes_mark);
5331     }
5332
5333     /* clean up; in particular, free all slabs above current one */
5334     LEAVE_SCOPE(oldsave);
5335
5336     return result;
5337 }
5338
5339 /*
5340  - regrepeat - repeatedly match something simple, report how many
5341  */
5342 /*
5343  * [This routine now assumes that it will only match on things of length 1.
5344  * That was true before, but now we assume scan - reginput is the count,
5345  * rather than incrementing count on every character.  [Er, except utf8.]]
5346  */
5347 STATIC I32
5348 S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
5349 {
5350     dVAR;
5351     register char *scan;
5352     register I32 c;
5353     register char *loceol = PL_regeol;
5354     register I32 hardcount = 0;
5355     register bool do_utf8 = PL_reg_match_utf8;
5356 #ifndef DEBUGGING
5357     PERL_UNUSED_ARG(depth);
5358 #endif
5359
5360     PERL_ARGS_ASSERT_REGREPEAT;
5361
5362     scan = PL_reginput;
5363     if (max == REG_INFTY)
5364         max = I32_MAX;
5365     else if (max < loceol - scan)
5366         loceol = scan + max;
5367     switch (OP(p)) {
5368     case REG_ANY:
5369         if (do_utf8) {
5370             loceol = PL_regeol;
5371             while (scan < loceol && hardcount < max && *scan != '\n') {
5372                 scan += UTF8SKIP(scan);
5373                 hardcount++;
5374             }
5375         } else {
5376             while (scan < loceol && *scan != '\n')
5377                 scan++;
5378         }
5379         break;
5380     case SANY:
5381         if (do_utf8) {
5382             loceol = PL_regeol;
5383             while (scan < loceol && hardcount < max) {
5384                 scan += UTF8SKIP(scan);
5385                 hardcount++;
5386             }
5387         }
5388         else
5389             scan = loceol;
5390         break;
5391     case CANY:
5392         scan = loceol;
5393         break;
5394     case EXACT:         /* length of string is 1 */
5395         c = (U8)*STRING(p);
5396         while (scan < loceol && UCHARAT(scan) == c)
5397             scan++;
5398         break;
5399     case EXACTF:        /* length of string is 1 */
5400         c = (U8)*STRING(p);
5401         while (scan < loceol &&
5402                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
5403             scan++;
5404         break;
5405     case EXACTFL:       /* length of string is 1 */
5406         PL_reg_flags |= RF_tainted;
5407         c = (U8)*STRING(p);
5408         while (scan < loceol &&
5409                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
5410             scan++;
5411         break;
5412     case ANYOF:
5413         if (do_utf8) {
5414             loceol = PL_regeol;
5415             while (hardcount < max && scan < loceol &&
5416                    reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
5417                 scan += UTF8SKIP(scan);
5418                 hardcount++;
5419             }
5420         } else {
5421             while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
5422                 scan++;
5423         }
5424         break;
5425     case ALNUM:
5426         if (do_utf8) {
5427             loceol = PL_regeol;
5428             LOAD_UTF8_CHARCLASS_ALNUM();
5429             while (hardcount < max && scan < loceol &&
5430                    swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5431                 scan += UTF8SKIP(scan);
5432                 hardcount++;
5433             }
5434         } else {
5435             while (scan < loceol && isALNUM(*scan))
5436                 scan++;
5437         }
5438         break;
5439     case ALNUML:
5440         PL_reg_flags |= RF_tainted;
5441         if (do_utf8) {
5442             loceol = PL_regeol;
5443             while (hardcount < max && scan < loceol &&
5444                    isALNUM_LC_utf8((U8*)scan)) {
5445                 scan += UTF8SKIP(scan);
5446                 hardcount++;
5447             }
5448         } else {
5449             while (scan < loceol && isALNUM_LC(*scan))
5450                 scan++;
5451         }
5452         break;
5453     case NALNUM:
5454         if (do_utf8) {
5455             loceol = PL_regeol;
5456             LOAD_UTF8_CHARCLASS_ALNUM();
5457             while (hardcount < max && scan < loceol &&
5458                    !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5459                 scan += UTF8SKIP(scan);
5460                 hardcount++;
5461             }
5462         } else {
5463             while (scan < loceol && !isALNUM(*scan))
5464                 scan++;
5465         }
5466         break;
5467     case NALNUML:
5468         PL_reg_flags |= RF_tainted;
5469         if (do_utf8) {
5470             loceol = PL_regeol;
5471             while (hardcount < max && scan < loceol &&
5472                    !isALNUM_LC_utf8((U8*)scan)) {
5473                 scan += UTF8SKIP(scan);
5474                 hardcount++;
5475             }
5476         } else {
5477             while (scan < loceol && !isALNUM_LC(*scan))
5478                 scan++;
5479         }
5480         break;
5481     case SPACE:
5482         if (do_utf8) {
5483             loceol = PL_regeol;
5484             LOAD_UTF8_CHARCLASS_SPACE();
5485             while (hardcount < max && scan < loceol &&
5486                    (*scan == ' ' ||
5487                     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5488                 scan += UTF8SKIP(scan);
5489                 hardcount++;
5490             }
5491         } else {
5492             while (scan < loceol && isSPACE(*scan))
5493                 scan++;
5494         }
5495         break;
5496     case SPACEL:
5497         PL_reg_flags |= RF_tainted;
5498         if (do_utf8) {
5499             loceol = PL_regeol;
5500             while (hardcount < max && scan < loceol &&
5501                    (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5502                 scan += UTF8SKIP(scan);
5503                 hardcount++;
5504             }
5505         } else {
5506             while (scan < loceol && isSPACE_LC(*scan))
5507                 scan++;
5508         }
5509         break;
5510     case NSPACE:
5511         if (do_utf8) {
5512             loceol = PL_regeol;
5513             LOAD_UTF8_CHARCLASS_SPACE();
5514             while (hardcount < max && scan < loceol &&
5515                    !(*scan == ' ' ||
5516                      swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5517                 scan += UTF8SKIP(scan);
5518                 hardcount++;
5519             }
5520         } else {
5521             while (scan < loceol && !isSPACE(*scan))
5522                 scan++;
5523         }
5524         break;
5525     case NSPACEL:
5526         PL_reg_flags |= RF_tainted;
5527         if (do_utf8) {
5528             loceol = PL_regeol;
5529             while (hardcount < max && scan < loceol &&
5530                    !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5531                 scan += UTF8SKIP(scan);
5532                 hardcount++;
5533             }
5534         } else {
5535             while (scan < loceol && !isSPACE_LC(*scan))
5536                 scan++;
5537         }
5538         break;
5539     case DIGIT:
5540         if (do_utf8) {
5541             loceol = PL_regeol;
5542             LOAD_UTF8_CHARCLASS_DIGIT();
5543             while (hardcount < max && scan < loceol &&
5544                    swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5545                 scan += UTF8SKIP(scan);
5546                 hardcount++;
5547             }
5548         } else {
5549             while (scan < loceol && isDIGIT(*scan))
5550                 scan++;
5551         }
5552         break;
5553     case NDIGIT:
5554         if (do_utf8) {
5555             loceol = PL_regeol;
5556             LOAD_UTF8_CHARCLASS_DIGIT();
5557             while (hardcount < max && scan < loceol &&
5558                    !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5559                 scan += UTF8SKIP(scan);
5560                 hardcount++;
5561             }
5562         } else {
5563             while (scan < loceol && !isDIGIT(*scan))
5564                 scan++;
5565         }
5566     case LNBREAK:
5567         if (do_utf8) {
5568             loceol = PL_regeol;
5569             while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
5570                 scan += c;
5571                 hardcount++;
5572             }
5573         } else {
5574             /*
5575               LNBREAK can match two latin chars, which is ok,
5576               because we have a null terminated string, but we
5577               have to use hardcount in this situation
5578             */
5579             while (scan < loceol && (c=is_LNBREAK_latin1(scan)))  {
5580                 scan+=c;
5581                 hardcount++;
5582             }
5583         }
5584         break;
5585     case HORIZWS:
5586         if (do_utf8) {
5587             loceol = PL_regeol;
5588             while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
5589                 scan += c;
5590                 hardcount++;
5591             }
5592         } else {
5593             while (scan < loceol && is_HORIZWS_latin1(scan))
5594                 scan++;
5595         }
5596         break;
5597     case NHORIZWS:
5598         if (do_utf8) {
5599             loceol = PL_regeol;
5600             while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
5601                 scan += UTF8SKIP(scan);
5602                 hardcount++;
5603             }
5604         } else {
5605             while (scan < loceol && !is_HORIZWS_latin1(scan))
5606                 scan++;
5607
5608         }
5609         break;
5610     case VERTWS:
5611         if (do_utf8) {
5612             loceol = PL_regeol;
5613             while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
5614                 scan += c;
5615                 hardcount++;
5616             }
5617         } else {
5618             while (scan < loceol && is_VERTWS_latin1(scan))
5619                 scan++;
5620
5621         }
5622         break;
5623     case NVERTWS:
5624         if (do_utf8) {
5625             loceol = PL_regeol;
5626             while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
5627                 scan += UTF8SKIP(scan);
5628                 hardcount++;
5629             }
5630         } else {
5631             while (scan < loceol && !is_VERTWS_latin1(scan))
5632                 scan++;
5633
5634         }
5635         break;
5636
5637     default:            /* Called on something of 0 width. */
5638         break;          /* So match right here or not at all. */
5639     }
5640
5641     if (hardcount)
5642         c = hardcount;
5643     else
5644         c = scan - PL_reginput;
5645     PL_reginput = scan;
5646
5647     DEBUG_r({
5648         GET_RE_DEBUG_FLAGS_DECL;
5649         DEBUG_EXECUTE_r({
5650             SV * const prop = sv_newmortal();
5651             regprop(prog, prop, p);
5652             PerlIO_printf(Perl_debug_log,
5653                         "%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
5654                         REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
5655         });
5656     });
5657
5658     return(c);
5659 }
5660
5661
5662 #if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
5663 /*
5664 - regclass_swash - prepare the utf8 swash
5665 */
5666
5667 SV *
5668 Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
5669 {
5670     dVAR;
5671     SV *sw  = NULL;
5672     SV *si  = NULL;
5673     SV *alt = NULL;
5674     RXi_GET_DECL(prog,progi);
5675     const struct reg_data * const data = prog ? progi->data : NULL;
5676
5677     PERL_ARGS_ASSERT_REGCLASS_SWASH;
5678
5679     if (data && data->count) {
5680         const U32 n = ARG(node);
5681
5682         if (data->what[n] == 's') {
5683             SV * const rv = MUTABLE_SV(data->data[n]);
5684             AV * const av = MUTABLE_AV(SvRV(rv));
5685             SV **const ary = AvARRAY(av);
5686             SV **a, **b;
5687
5688             /* See the end of regcomp.c:S_regclass() for
5689              * documentation of these array elements. */
5690
5691             si = *ary;
5692             a  = SvROK(ary[1]) ? &ary[1] : NULL;
5693             b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : NULL;
5694
5695             if (a)
5696                 sw = *a;
5697             else if (si && doinit) {
5698                 sw = swash_init("utf8", "", si, 1, 0);
5699                 (void)av_store(av, 1, sw);
5700             }
5701             if (b)
5702                 alt = *b;
5703         }
5704     }
5705
5706     if (listsvp)
5707         *listsvp = si;
5708     if (altsvp)
5709         *altsvp  = alt;
5710
5711     return sw;
5712 }
5713 #endif
5714
5715 /*
5716  - reginclass - determine if a character falls into a character class
5717
5718   The n is the ANYOF regnode, the p is the target string, lenp
5719   is pointer to the maximum length of how far to go in the p
5720   (if the lenp is zero, UTF8SKIP(p) is used),
5721   do_utf8 tells whether the target string is in UTF-8.
5722
5723  */
5724
5725 STATIC bool
5726 S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
5727 {
5728     dVAR;
5729     const char flags = ANYOF_FLAGS(n);
5730     bool match = FALSE;
5731     UV c = *p;
5732     STRLEN len = 0;
5733     STRLEN plen;
5734
5735     PERL_ARGS_ASSERT_REGINCLASS;
5736
5737     if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
5738         c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
5739                 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
5740                 /* see [perl #37836] for UTF8_ALLOW_ANYUV */
5741         if (len == (STRLEN)-1)
5742             Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
5743     }
5744
5745     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
5746     if (do_utf8 || (flags & ANYOF_UNICODE)) {
5747         if (lenp)
5748             *lenp = 0;
5749         if (do_utf8 && !ANYOF_RUNTIME(n)) {
5750             if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
5751                 match = TRUE;
5752         }
5753         if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
5754             match = TRUE;
5755         if (!match) {
5756             AV *av;
5757             SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
5758
5759             if (sw) {
5760                 U8 * utf8_p;
5761                 if (do_utf8) {
5762                     utf8_p = (U8 *) p;
5763                 } else {
5764                     STRLEN len = 1;
5765                     utf8_p = bytes_to_utf8(p, &len);
5766                 }
5767                 if (swash_fetch(sw, utf8_p, 1))
5768                     match = TRUE;
5769                 else if (flags & ANYOF_FOLD) {
5770                     if (!match && lenp && av) {
5771                         I32 i;
5772                         for (i = 0; i <= av_len(av); i++) {
5773                             SV* const sv = *av_fetch(av, i, FALSE);
5774                             STRLEN len;
5775                             const char * const s = SvPV_const(sv, len);
5776                             if (len <= plen && memEQ(s, (char*)utf8_p, len)) {
5777                                 *lenp = len;
5778                                 match = TRUE;
5779                                 break;
5780                             }
5781                         }
5782                     }
5783                     if (!match) {
5784                         U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
5785
5786                         STRLEN tmplen;
5787                         to_utf8_fold(utf8_p, tmpbuf, &tmplen);
5788                         if (swash_fetch(sw, tmpbuf, 1))
5789                             match = TRUE;
5790                     }
5791                 }
5792
5793                 /* If we allocated a string above, free it */
5794                 if (! do_utf8) Safefree(utf8_p);
5795             }
5796         }
5797         if (match && lenp && *lenp == 0)
5798             *lenp = UNISKIP(NATIVE_TO_UNI(c));
5799     }
5800     if (!match && c < 256) {
5801         if (ANYOF_BITMAP_TEST(n, c))
5802             match = TRUE;
5803         else if (flags & ANYOF_FOLD) {
5804             U8 f;
5805
5806             if (flags & ANYOF_LOCALE) {
5807                 PL_reg_flags |= RF_tainted;
5808                 f = PL_fold_locale[c];
5809             }
5810             else
5811                 f = PL_fold[c];
5812             if (f != c && ANYOF_BITMAP_TEST(n, f))
5813                 match = TRUE;
5814         }
5815
5816         if (!match && (flags & ANYOF_CLASS)) {
5817             PL_reg_flags |= RF_tainted;
5818             if (
5819                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
5820                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
5821                 (ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
5822                 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
5823                 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
5824                 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
5825                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
5826                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5827                 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
5828                 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
5829                 (ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
5830                 (ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
5831                 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
5832                 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
5833                 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
5834                 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
5835                 (ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
5836                 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
5837                 (ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
5838                 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
5839                 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
5840                 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
5841                 (ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
5842                 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
5843                 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
5844                 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
5845                 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
5846                 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
5847                 (ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
5848                 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
5849                 ) /* How's that for a conditional? */
5850             {
5851                 match = TRUE;
5852             }
5853         }
5854     }
5855
5856     return (flags & ANYOF_INVERT) ? !match : match;
5857 }
5858
5859 STATIC U8 *
5860 S_reghop3(U8 *s, I32 off, const U8* lim)
5861 {
5862     dVAR;
5863
5864     PERL_ARGS_ASSERT_REGHOP3;
5865
5866     if (off >= 0) {
5867         while (off-- && s < lim) {
5868             /* XXX could check well-formedness here */
5869             s += UTF8SKIP(s);
5870         }
5871     }
5872     else {
5873         while (off++ && s > lim) {
5874             s--;
5875             if (UTF8_IS_CONTINUED(*s)) {
5876                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5877                     s--;
5878             }
5879             /* XXX could check well-formedness here */
5880         }
5881     }
5882     return s;
5883 }
5884
5885 #ifdef XXX_dmq
5886 /* there are a bunch of places where we use two reghop3's that should
5887    be replaced with this routine. but since thats not done yet
5888    we ifdef it out - dmq
5889 */
5890 STATIC U8 *
5891 S_reghop4(U8 *s, I32 off, const U8* llim, const U8* rlim)
5892 {
5893     dVAR;
5894
5895     PERL_ARGS_ASSERT_REGHOP4;
5896
5897     if (off >= 0) {
5898         while (off-- && s < rlim) {
5899             /* XXX could check well-formedness here */
5900             s += UTF8SKIP(s);
5901         }
5902     }
5903     else {
5904         while (off++ && s > llim) {
5905             s--;
5906             if (UTF8_IS_CONTINUED(*s)) {
5907                 while (s > llim && UTF8_IS_CONTINUATION(*s))
5908                     s--;
5909             }
5910             /* XXX could check well-formedness here */
5911         }
5912     }
5913     return s;
5914 }
5915 #endif
5916
5917 STATIC U8 *
5918 S_reghopmaybe3(U8* s, I32 off, const U8* lim)
5919 {
5920     dVAR;
5921
5922     PERL_ARGS_ASSERT_REGHOPMAYBE3;
5923
5924     if (off >= 0) {
5925         while (off-- && s < lim) {
5926             /* XXX could check well-formedness here */
5927             s += UTF8SKIP(s);
5928         }
5929         if (off >= 0)
5930             return NULL;
5931     }
5932     else {
5933         while (off++ && s > lim) {
5934             s--;
5935             if (UTF8_IS_CONTINUED(*s)) {
5936                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5937                     s--;
5938             }
5939             /* XXX could check well-formedness here */
5940         }
5941         if (off <= 0)
5942             return NULL;
5943     }
5944     return s;
5945 }
5946
5947 static void
5948 restore_pos(pTHX_ void *arg)
5949 {
5950     dVAR;
5951     regexp * const rex = (regexp *)arg;
5952     if (PL_reg_eval_set) {
5953         if (PL_reg_oldsaved) {
5954             rex->subbeg = PL_reg_oldsaved;
5955             rex->sublen = PL_reg_oldsavedlen;
5956 #ifdef PERL_OLD_COPY_ON_WRITE
5957             rex->saved_copy = PL_nrs;
5958 #endif
5959             RXp_MATCH_COPIED_on(rex);
5960         }
5961         PL_reg_magic->mg_len = PL_reg_oldpos;
5962         PL_reg_eval_set = 0;
5963         PL_curpm = PL_reg_oldcurpm;
5964     }
5965 }
5966
5967 STATIC void
5968 S_to_utf8_substr(pTHX_ register regexp *prog)
5969 {
5970     int i = 1;
5971
5972     PERL_ARGS_ASSERT_TO_UTF8_SUBSTR;
5973
5974     do {
5975         if (prog->substrs->data[i].substr
5976             && !prog->substrs->data[i].utf8_substr) {
5977             SV* const sv = newSVsv(prog->substrs->data[i].substr);
5978             prog->substrs->data[i].utf8_substr = sv;
5979             sv_utf8_upgrade(sv);
5980             if (SvVALID(prog->substrs->data[i].substr)) {
5981                 const U8 flags = BmFLAGS(prog->substrs->data[i].substr);
5982                 if (flags & FBMcf_TAIL) {
5983                     /* Trim the trailing \n that fbm_compile added last
5984                        time.  */
5985                     SvCUR_set(sv, SvCUR(sv) - 1);
5986                     /* Whilst this makes the SV technically "invalid" (as its
5987                        buffer is no longer followed by "\0") when fbm_compile()
5988                        adds the "\n" back, a "\0" is restored.  */
5989                 }
5990                 fbm_compile(sv, flags);
5991             }
5992             if (prog->substrs->data[i].substr == prog->check_substr)
5993                 prog->check_utf8 = sv;
5994         }
5995     } while (i--);
5996 }
5997
5998 STATIC void
5999 S_to_byte_substr(pTHX_ register regexp *prog)
6000 {
6001     dVAR;
6002     int i = 1;
6003
6004     PERL_ARGS_ASSERT_TO_BYTE_SUBSTR;
6005
6006     do {
6007         if (prog->substrs->data[i].utf8_substr
6008             && !prog->substrs->data[i].substr) {
6009             SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
6010             if (sv_utf8_downgrade(sv, TRUE)) {
6011                 if (SvVALID(prog->substrs->data[i].utf8_substr)) {
6012                     const U8 flags
6013                         = BmFLAGS(prog->substrs->data[i].utf8_substr);
6014                     if (flags & FBMcf_TAIL) {
6015                         /* Trim the trailing \n that fbm_compile added last
6016                            time.  */
6017                         SvCUR_set(sv, SvCUR(sv) - 1);
6018                     }
6019                     fbm_compile(sv, flags);
6020                 }
6021             } else {
6022                 SvREFCNT_dec(sv);
6023                 sv = &PL_sv_undef;
6024             }
6025             prog->substrs->data[i].substr = sv;
6026             if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
6027                 prog->check_substr = sv;
6028         }
6029     } while (i--);
6030 }
6031
6032 /*
6033  * Local variables:
6034  * c-indentation-style: bsd
6035  * c-basic-offset: 4
6036  * indent-tabs-mode: t
6037  * End:
6038  *
6039  * ex: set ts=8 sts=4 sw=4 noet:
6040  */