src/5011002/orig/regexec.c

   1 /*    regexec.c
   2  */
   3
   4 /*
   5  *      One Ring to rule them all, One Ring to find them
   6  &
   7  *     [p.v of _The Lord of the Rings_, opening poem]
   8  *     [p.50 of _The Lord of the Rings_, I/iii: "The Shadow of the Past"]
   9  *     [p.254 of _The Lord of the Rings_, II/ii: "The Council of Elrond"]
  10  */
  11
  12 /* This file contains functions for executing a regular expression.  See
  13  * also regcomp.c which funnily enough, contains functions for compiling
  14  * a regular expression.
  15  *
  16  * This file is also copied at build time to ext/re/re_exec.c, where
  17  * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
  18  * This causes the main functions to be compiled under new names and with
  19  * debugging support added, which makes "use re 'debug'" work.
  20  */
  21
  22 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
  23  * confused with the original package (see point 3 below).  Thanks, Henry!
  24  */
  25
  26 /* Additional note: this code is very heavily munged from Henry's version
  27  * in places.  In some spots I've traded clarity for efficiency, so don't
  28  * blame Henry for some of the lack of readability.
  29  */
  30
  31 /* The names of the functions have been changed from regcomp and
  32  * regexec to  pregcomp and pregexec in order to avoid conflicts
  33  * with the POSIX routines of the same names.
  34 */
  35
  36 #ifdef PERL_EXT_RE_BUILD
  37 #include "re_top.h"
  38 #endif
  39
  40 /*
  41  * pregcomp and pregexec -- regsub and regerror are not used in perl
  42  *
  43  *      Copyright (c) 1986 by University of Toronto.
  44  *      Written by Henry Spencer.  Not derived from licensed software.
  45  *
  46  *      Permission is granted to anyone to use this software for any
  47  *      purpose on any computer system, and to redistribute it freely,
  48  *      subject to the following restrictions:
  49  *
  50  *      1. The author is not responsible for the consequences of use of
  51  *              this software, no matter how awful, even if they arise
  52  *              from defects in it.
  53  *
  54  *      2. The origin of this software must not be misrepresented, either
  55  *              by explicit claim or by omission.
  56  *
  57  *      3. Altered versions must be plainly marked as such, and must not
  58  *              be misrepresented as being the original software.
  59  *
  60  ****    Alterations to Henry's code are...
  61  ****
  62  ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
  63  ****    2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
  64  ****    by Larry Wall and others
  65  ****
  66  ****    You may distribute under the terms of either the GNU General Public
  67  ****    License or the Artistic License, as specified in the README file.
  68  *
  69  * Beware that some of this code is subtly aware of the way operator
  70  * precedence is structured in regular expressions.  Serious changes in
  71  * regular-expression syntax might require a total rethink.
  72  */
  73 #include "EXTERN.h"
  74 #define PERL_IN_REGEXEC_C
  75 #include "perl.h"
  76
  77 #ifdef PERL_IN_XSUB_RE
  78 #  include "re_comp.h"
  79 #else
  80 #  include "regcomp.h"
  81 #endif
  82
  83 #define RF_tainted      1               /* tainted information used? */
  84 #define RF_warned       2               /* warned about big count? */
  85
  86 #define RF_utf8         8               /* Pattern contains multibyte chars? */
  87
  88 #define UTF ((PL_reg_flags & RF_utf8) != 0)
  89
  90 #define RS_init         1               /* eval environment created */
  91 #define RS_set          2               /* replsv value is set */
  92
  93 #ifndef STATIC
  94 #define STATIC  static
  95 #endif
  96
  97 #define REGINCLASS(prog,p,c)  (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
  98
  99 /*
 100  * Forwards.
 101  */
 102
 103 #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
 104 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
 105
 106 #define HOPc(pos,off) \
 107         (char *)(PL_reg_match_utf8 \
 108             ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
 109             : (U8*)(pos + off))
 110 #define HOPBACKc(pos, off) \
 111         (char*)(PL_reg_match_utf8\
 112             ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
 113             : (pos - off >= PL_bostr)           \
 114                 ? (U8*)pos - off                \
 115                 : NULL)
 116
 117 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
 118 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
 119
 120 /* these are unrolled below in the CCC_TRY_XXX defined */
 121 #define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
 122     if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
 123 #define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
 124 #define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
 125 #define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
 126 #define LOAD_UTF8_CHARCLASS_MARK()  LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
 127
 128
 129 /*
 130    We dont use PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS as the direct test
 131    so that it is possible to override the option here without having to
 132    rebuild the entire core. as we are required to do if we change regcomp.h
 133    which is where PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS is defined.
 134 */
 135 #if PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS
 136 #define BROKEN_UNICODE_CHARCLASS_MAPPINGS
 137 #endif
 138
 139 #ifdef BROKEN_UNICODE_CHARCLASS_MAPPINGS
 140 #define LOAD_UTF8_CHARCLASS_PERL_WORD()   LOAD_UTF8_CHARCLASS_ALNUM()
 141 #define LOAD_UTF8_CHARCLASS_PERL_SPACE()  LOAD_UTF8_CHARCLASS_SPACE()
 142 #define LOAD_UTF8_CHARCLASS_POSIX_DIGIT() LOAD_UTF8_CHARCLASS_DIGIT()
 143 #define RE_utf8_perl_word   PL_utf8_alnum
 144 #define RE_utf8_perl_space  PL_utf8_space
 145 #define RE_utf8_posix_digit PL_utf8_digit
 146 #define perl_word  alnum
 147 #define perl_space space
 148 #define posix_digit digit
 149 #else
 150 #define LOAD_UTF8_CHARCLASS_PERL_WORD()   LOAD_UTF8_CHARCLASS(perl_word,"a")
 151 #define LOAD_UTF8_CHARCLASS_PERL_SPACE()  LOAD_UTF8_CHARCLASS(perl_space," ")
 152 #define LOAD_UTF8_CHARCLASS_POSIX_DIGIT() LOAD_UTF8_CHARCLASS(posix_digit,"0")
 153 #define RE_utf8_perl_word   PL_utf8_perl_word
 154 #define RE_utf8_perl_space  PL_utf8_perl_space
 155 #define RE_utf8_posix_digit PL_utf8_posix_digit
 156 #endif
 157
 158
 159 #define CCC_TRY_AFF(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNC,LCFUNC)                          \
 160         case NAMEL:                                                              \
 161             PL_reg_flags |= RF_tainted;                                                 \
 162             /* FALL THROUGH */                                                          \
 163         case NAME:                                                                     \
 164             if (!nextchr)                                                               \
 165                 sayNO;                                                                  \
 166             if (do_utf8 && UTF8_IS_CONTINUED(nextchr)) {                                \
 167                 if (!CAT2(PL_utf8_,CLASS)) {                                            \
 168                     bool ok;                                                            \
 169                     ENTER;                                                              \
 170                     save_re_context();                                                  \
 171                     ok=CAT2(is_utf8_,CLASS)((const U8*)STR);                            \
 172                     assert(ok);                                                         \
 173                     LEAVE;                                                              \
 174                 }                                                                       \
 175                 if (!(OP(scan) == NAME                                                  \
 176                     ? (bool)swash_fetch(CAT2(PL_utf8_,CLASS), (U8*)locinput, do_utf8)   \
 177                     : LCFUNC_utf8((U8*)locinput)))                                      \
 178                 {                                                                       \
 179                     sayNO;                                                              \
 180                 }                                                                       \
 181                 locinput += PL_utf8skip[nextchr];                                       \
 182                 nextchr = UCHARAT(locinput);                                            \
 183                 break;                                                                  \
 184             }                                                                           \
 185             if (!(OP(scan) == NAME ? FUNC(nextchr) : LCFUNC(nextchr)))                  \
 186                 sayNO;                                                                  \
 187             nextchr = UCHARAT(++locinput);                                              \
 188             break
 189
 190 #define CCC_TRY_NEG(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNC,LCFUNC)                        \
 191         case NAMEL:                                                              \
 192             PL_reg_flags |= RF_tainted;                                                 \
 193             /* FALL THROUGH */                                                          \
 194         case NAME :                                                                     \
 195             if (!nextchr && locinput >= PL_regeol)                                      \
 196                 sayNO;                                                                  \
 197             if (do_utf8 && UTF8_IS_CONTINUED(nextchr)) {                                \
 198                 if (!CAT2(PL_utf8_,CLASS)) {                                            \
 199                     bool ok;                                                            \
 200                     ENTER;                                                              \
 201                     save_re_context();                                                  \
 202                     ok=CAT2(is_utf8_,CLASS)((const U8*)STR);                            \
 203                     assert(ok);                                                         \
 204                     LEAVE;                                                              \
 205                 }                                                                       \
 206                 if ((OP(scan) == NAME                                                  \
 207                     ? (bool)swash_fetch(CAT2(PL_utf8_,CLASS), (U8*)locinput, do_utf8)    \
 208                     : LCFUNC_utf8((U8*)locinput)))                                      \
 209                 {                                                                       \
 210                     sayNO;                                                              \
 211                 }                                                                       \
 212                 locinput += PL_utf8skip[nextchr];                                       \
 213                 nextchr = UCHARAT(locinput);                                            \
 214                 break;                                                                  \
 215             }                                                                           \
 216             if ((OP(scan) == NAME ? FUNC(nextchr) : LCFUNC(nextchr)))                   \
 217                 sayNO;                                                                  \
 218             nextchr = UCHARAT(++locinput);                                              \
 219             break
 220
 221
 222
 223
 224
 225 /* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
 226
 227 /* for use after a quantifier and before an EXACT-like node -- japhy */
 228 /* it would be nice to rework regcomp.sym to generate this stuff. sigh */
 229 #define JUMPABLE(rn) (      \
 230     OP(rn) == OPEN ||       \
 231     (OP(rn) == CLOSE && (!cur_eval || cur_eval->u.eval.close_paren != ARG(rn))) || \
 232     OP(rn) == EVAL ||   \
 233     OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
 234     OP(rn) == PLUS || OP(rn) == MINMOD || \
 235     OP(rn) == KEEPS || (PL_regkind[OP(rn)] == VERB) || \
 236     (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
 237 )
 238 #define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
 239
 240 #define HAS_TEXT(rn) ( IS_EXACT(rn) || PL_regkind[OP(rn)] == REF )
 241
 242 #if 0
 243 /* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
 244    we don't need this definition. */
 245 #define IS_TEXT(rn)   ( OP(rn)==EXACT   || OP(rn)==REF   || OP(rn)==NREF   )
 246 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  || OP(rn)==REFF  || OP(rn)==NREFF  )
 247 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL || OP(rn)==REFFL || OP(rn)==NREFFL )
 248
 249 #else
 250 /* ... so we use this as its faster. */
 251 #define IS_TEXT(rn)   ( OP(rn)==EXACT   )
 252 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  )
 253 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
 254
 255 #endif
 256
 257 /*
 258   Search for mandatory following text node; for lookahead, the text must
 259   follow but for lookbehind (rn->flags != 0) we skip to the next step.
 260 */
 261 #define FIND_NEXT_IMPT(rn) STMT_START { \
 262     while (JUMPABLE(rn)) { \
 263         const OPCODE type = OP(rn); \
 264         if (type == SUSPEND || PL_regkind[type] == CURLY) \
 265             rn = NEXTOPER(NEXTOPER(rn)); \
 266         else if (type == PLUS) \
 267             rn = NEXTOPER(rn); \
 268         else if (type == IFMATCH) \
 269             rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
 270         else rn += NEXT_OFF(rn); \
 271     } \
 272 } STMT_END
 273
 274
 275 static void restore_pos(pTHX_ void *arg);
 276
 277 STATIC CHECKPOINT
 278 S_regcppush(pTHX_ I32 parenfloor)
 279 {
 280     dVAR;
 281     const int retval = PL_savestack_ix;
 282 #define REGCP_PAREN_ELEMS 4
 283     const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
 284     int p;
 285     GET_RE_DEBUG_FLAGS_DECL;
 286
 287     if (paren_elems_to_push < 0)
 288         Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
 289
 290 #define REGCP_OTHER_ELEMS 7
 291     SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
 292
 293     for (p = PL_regsize; p > parenfloor; p--) {
 294 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
 295         SSPUSHINT(PL_regoffs[p].end);
 296         SSPUSHINT(PL_regoffs[p].start);
 297         SSPUSHPTR(PL_reg_start_tmp[p]);
 298         SSPUSHINT(p);
 299         DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
 300           "     saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
 301                       (UV)p, (IV)PL_regoffs[p].start,
 302                       (IV)(PL_reg_start_tmp[p] - PL_bostr),
 303                       (IV)PL_regoffs[p].end
 304         ));
 305     }
 306 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
 307     SSPUSHPTR(PL_regoffs);
 308     SSPUSHINT(PL_regsize);
 309     SSPUSHINT(*PL_reglastparen);
 310     SSPUSHINT(*PL_reglastcloseparen);
 311     SSPUSHPTR(PL_reginput);
 312 #define REGCP_FRAME_ELEMS 2
 313 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
 314  * are needed for the regexp context stack bookkeeping. */
 315     SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 316     SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
 317
 318     return retval;
 319 }
 320
 321 /* These are needed since we do not localize EVAL nodes: */
 322 #define REGCP_SET(cp)                                           \
 323     DEBUG_STATE_r(                                              \
 324             PerlIO_printf(Perl_debug_log,                       \
 325                 "  Setting an EVAL scope, savestack=%"IVdf"\n", \
 326                 (IV)PL_savestack_ix));                          \
 327     cp = PL_savestack_ix
 328
 329 #define REGCP_UNWIND(cp)                                        \
 330     DEBUG_STATE_r(                                              \
 331         if (cp != PL_savestack_ix)                              \
 332             PerlIO_printf(Perl_debug_log,                       \
 333                 "  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
 334                 (IV)(cp), (IV)PL_savestack_ix));                \
 335     regcpblow(cp)
 336
 337 STATIC char *
 338 S_regcppop(pTHX_ const regexp *rex)
 339 {
 340     dVAR;
 341     U32 i;
 342     char *input;
 343     GET_RE_DEBUG_FLAGS_DECL;
 344
 345     PERL_ARGS_ASSERT_REGCPPOP;
 346
 347     /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
 348     i = SSPOPINT;
 349     assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
 350     i = SSPOPINT; /* Parentheses elements to pop. */
 351     input = (char *) SSPOPPTR;
 352     *PL_reglastcloseparen = SSPOPINT;
 353     *PL_reglastparen = SSPOPINT;
 354     PL_regsize = SSPOPINT;
 355     PL_regoffs=(regexp_paren_pair *) SSPOPPTR;
 356
 357
 358     /* Now restore the parentheses context. */
 359     for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 360          i > 0; i -= REGCP_PAREN_ELEMS) {
 361         I32 tmps;
 362         U32 paren = (U32)SSPOPINT;
 363         PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
 364         PL_regoffs[paren].start = SSPOPINT;
 365         tmps = SSPOPINT;
 366         if (paren <= *PL_reglastparen)
 367             PL_regoffs[paren].end = tmps;
 368         DEBUG_BUFFERS_r(
 369             PerlIO_printf(Perl_debug_log,
 370                           "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
 371                           (UV)paren, (IV)PL_regoffs[paren].start,
 372                           (IV)(PL_reg_start_tmp[paren] - PL_bostr),
 373                           (IV)PL_regoffs[paren].end,
 374                           (paren > *PL_reglastparen ? "(no)" : ""));
 375         );
 376     }
 377     DEBUG_BUFFERS_r(
 378         if (*PL_reglastparen + 1 <= rex->nparens) {
 379             PerlIO_printf(Perl_debug_log,
 380                           "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
 381                           (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
 382         }
 383     );
 384 #if 1
 385     /* It would seem that the similar code in regtry()
 386      * already takes care of this, and in fact it is in
 387      * a better location to since this code can #if 0-ed out
 388      * but the code in regtry() is needed or otherwise tests
 389      * requiring null fields (pat.t#187 and split.t#{13,14}
 390      * (as of patchlevel 7877)  will fail.  Then again,
 391      * this code seems to be necessary or otherwise
 392      * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
 393      * --jhi updated by dapm */
 394     for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
 395         if (i > PL_regsize)
 396             PL_regoffs[i].start = -1;
 397         PL_regoffs[i].end = -1;
 398     }
 399 #endif
 400     return input;
 401 }
 402
 403 #define regcpblow(cp) LEAVE_SCOPE(cp)   /* Ignores regcppush()ed data. */
 404
 405 /*
 406  * pregexec and friends
 407  */
 408
 409 #ifndef PERL_IN_XSUB_RE
 410 /*
 411  - pregexec - match a regexp against a string
 412  */
 413 I32
 414 Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
 415          char *strbeg, I32 minend, SV *screamer, U32 nosave)
 416 /* strend: pointer to null at end of string */
 417 /* strbeg: real beginning of string */
 418 /* minend: end of match must be >=minend after stringarg. */
 419 /* nosave: For optimizations. */
 420 {
 421     PERL_ARGS_ASSERT_PREGEXEC;
 422
 423     return
 424         regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
 425                       nosave ? 0 : REXEC_COPY_STR);
 426 }
 427 #endif
 428
 429 /*
 430  * Need to implement the following flags for reg_anch:
 431  *
 432  * USE_INTUIT_NOML              - Useful to call re_intuit_start() first
 433  * USE_INTUIT_ML
 434  * INTUIT_AUTORITATIVE_NOML     - Can trust a positive answer
 435  * INTUIT_AUTORITATIVE_ML
 436  * INTUIT_ONCE_NOML             - Intuit can match in one location only.
 437  * INTUIT_ONCE_ML
 438  *
 439  * Another flag for this function: SECOND_TIME (so that float substrs
 440  * with giant delta may be not rechecked).
 441  */
 442
 443 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
 444
 445 /* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
 446    Otherwise, only SvCUR(sv) is used to get strbeg. */
 447
 448 /* XXXX We assume that strpos is strbeg unless sv. */
 449
 450 /* XXXX Some places assume that there is a fixed substring.
 451         An update may be needed if optimizer marks as "INTUITable"
 452         RExen without fixed substrings.  Similarly, it is assumed that
 453         lengths of all the strings are no more than minlen, thus they
 454         cannot come from lookahead.
 455         (Or minlen should take into account lookahead.)
 456   NOTE: Some of this comment is not correct. minlen does now take account
 457   of lookahead/behind. Further research is required. -- demerphq
 458
 459 */
 460
 461 /* A failure to find a constant substring means that there is no need to make
 462    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
 463    finding a substring too deep into the string means that less calls to
 464    regtry() should be needed.
 465
 466    REx compiler's optimizer found 4 possible hints:
 467         a) Anchored substring;
 468         b) Fixed substring;
 469         c) Whether we are anchored (beginning-of-line or \G);
 470         d) First node (of those at offset 0) which may distingush positions;
 471    We use a)b)d) and multiline-part of c), and try to find a position in the
 472    string which does not contradict any of them.
 473  */
 474
 475 /* Most of decisions we do here should have been done at compile time.
 476    The nodes of the REx which we used for the search should have been
 477    deleted from the finite automaton. */
 478
 479 char *
 480 Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
 481                      char *strend, const U32 flags, re_scream_pos_data *data)
 482 {
 483     dVAR;
 484     struct regexp *const prog = (struct regexp *)SvANY(rx);
 485     register I32 start_shift = 0;
 486     /* Should be nonnegative! */
 487     register I32 end_shift   = 0;
 488     register char *s;
 489     register SV *check;
 490     char *strbeg;
 491     char *t;
 492     const bool do_utf8 = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
 493     I32 ml_anch;
 494     register char *other_last = NULL;   /* other substr checked before this */
 495     char *check_at = NULL;              /* check substr found at this pos */
 496     const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
 497     RXi_GET_DECL(prog,progi);
 498 #ifdef DEBUGGING
 499     const char * const i_strpos = strpos;
 500 #endif
 501     GET_RE_DEBUG_FLAGS_DECL;
 502
 503     PERL_ARGS_ASSERT_RE_INTUIT_START;
 504
 505     RX_MATCH_UTF8_set(rx,do_utf8);
 506
 507     if (RX_UTF8(rx)) {
 508         PL_reg_flags |= RF_utf8;
 509     }
 510     DEBUG_EXECUTE_r(
 511         debug_start_match(rx, do_utf8, strpos, strend,
 512             sv ? "Guessing start of match in sv for"
 513                : "Guessing start of match in string for");
 514               );
 515
 516     /* CHR_DIST() would be more correct here but it makes things slow. */
 517     if (prog->minlen > strend - strpos) {
 518         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 519                               "String too short... [re_intuit_start]\n"));
 520         goto fail;
 521     }
 522
 523     strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
 524     PL_regeol = strend;
 525     if (do_utf8) {
 526         if (!prog->check_utf8 && prog->check_substr)
 527             to_utf8_substr(prog);
 528         check = prog->check_utf8;
 529     } else {
 530         if (!prog->check_substr && prog->check_utf8)
 531             to_byte_substr(prog);
 532         check = prog->check_substr;
 533     }
 534     if (check == &PL_sv_undef) {
 535         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 536                 "Non-utf8 string cannot match utf8 check string\n"));
 537         goto fail;
 538     }
 539     if (prog->extflags & RXf_ANCH) {    /* Match at beg-of-str or after \n */
 540         ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
 541                      || ( (prog->extflags & RXf_ANCH_BOL)
 542                           && !multiline ) );    /* Check after \n? */
 543
 544         if (!ml_anch) {
 545           if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
 546                 && !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
 547                /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
 548                && sv && !SvROK(sv)
 549                && (strpos != strbeg)) {
 550               DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
 551               goto fail;
 552           }
 553           if (prog->check_offset_min == prog->check_offset_max &&
 554               !(prog->extflags & RXf_CANY_SEEN)) {
 555             /* Substring at constant offset from beg-of-str... */
 556             I32 slen;
 557
 558             s = HOP3c(strpos, prog->check_offset_min, strend);
 559
 560             if (SvTAIL(check)) {
 561                 slen = SvCUR(check);    /* >= 1 */
 562
 563                 if ( strend - s > slen || strend - s < slen - 1
 564                      || (strend - s == slen && strend[-1] != '\n')) {
 565                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
 566                     goto fail_finish;
 567                 }
 568                 /* Now should match s[0..slen-2] */
 569                 slen--;
 570                 if (slen && (*SvPVX_const(check) != *s
 571                              || (slen > 1
 572                                  && memNE(SvPVX_const(check), s, slen)))) {
 573                   report_neq:
 574                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
 575                     goto fail_finish;
 576                 }
 577             }
 578             else if (*SvPVX_const(check) != *s
 579                      || ((slen = SvCUR(check)) > 1
 580                          && memNE(SvPVX_const(check), s, slen)))
 581                 goto report_neq;
 582             check_at = s;
 583             goto success_at_start;
 584           }
 585         }
 586         /* Match is anchored, but substr is not anchored wrt beg-of-str. */
 587         s = strpos;
 588         start_shift = prog->check_offset_min; /* okay to underestimate on CC */
 589         end_shift = prog->check_end_shift;
 590
 591         if (!ml_anch) {
 592             const I32 end = prog->check_offset_max + CHR_SVLEN(check)
 593                                          - (SvTAIL(check) != 0);
 594             const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
 595
 596             if (end_shift < eshift)
 597                 end_shift = eshift;
 598         }
 599     }
 600     else {                              /* Can match at random position */
 601         ml_anch = 0;
 602         s = strpos;
 603         start_shift = prog->check_offset_min;  /* okay to underestimate on CC */
 604         end_shift = prog->check_end_shift;
 605
 606         /* end shift should be non negative here */
 607     }
 608
 609 #ifdef QDEBUGGING       /* 7/99: reports of failure (with the older version) */
 610     if (end_shift < 0)
 611         Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
 612                    (IV)end_shift, RX_PRECOMP(prog));
 613 #endif
 614
 615   restart:
 616     /* Find a possible match in the region s..strend by looking for
 617        the "check" substring in the region corrected by start/end_shift. */
 618
 619     {
 620         I32 srch_start_shift = start_shift;
 621         I32 srch_end_shift = end_shift;
 622         if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
 623             srch_end_shift -= ((strbeg - s) - srch_start_shift);
 624             srch_start_shift = strbeg - s;
 625         }
 626     DEBUG_OPTIMISE_MORE_r({
 627         PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
 628             (IV)prog->check_offset_min,
 629             (IV)srch_start_shift,
 630             (IV)srch_end_shift,
 631             (IV)prog->check_end_shift);
 632     });
 633
 634     if (flags & REXEC_SCREAM) {
 635         I32 p = -1;                     /* Internal iterator of scream. */
 636         I32 * const pp = data ? data->scream_pos : &p;
 637
 638         if (PL_screamfirst[BmRARE(check)] >= 0
 639             || ( BmRARE(check) == '\n'
 640                  && (BmPREVIOUS(check) == SvCUR(check) - 1)
 641                  && SvTAIL(check) ))
 642             s = screaminstr(sv, check,
 643                             srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
 644         else
 645             goto fail_finish;
 646         /* we may be pointing at the wrong string */
 647         if (s && RXp_MATCH_COPIED(prog))
 648             s = strbeg + (s - SvPVX_const(sv));
 649         if (data)
 650             *data->scream_olds = s;
 651     }
 652     else {
 653         U8* start_point;
 654         U8* end_point;
 655         if (prog->extflags & RXf_CANY_SEEN) {
 656             start_point= (U8*)(s + srch_start_shift);
 657             end_point= (U8*)(strend - srch_end_shift);
 658         } else {
 659             start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
 660             end_point= HOP3(strend, -srch_end_shift, strbeg);
 661         }
 662         DEBUG_OPTIMISE_MORE_r({
 663             PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
 664                 (int)(end_point - start_point),
 665                 (int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
 666                 start_point);
 667         });
 668
 669         s = fbm_instr( start_point, end_point,
 670                       check, multiline ? FBMrf_MULTILINE : 0);
 671     }
 672     }
 673     /* Update the count-of-usability, remove useless subpatterns,
 674         unshift s.  */
 675
 676     DEBUG_EXECUTE_r({
 677         RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 678             SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
 679         PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
 680                           (s ? "Found" : "Did not find"),
 681             (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)
 682                 ? "anchored" : "floating"),
 683             quoted,
 684             RE_SV_TAIL(check),
 685             (s ? " at offset " : "...\n") );
 686     });
 687
 688     if (!s)
 689         goto fail_finish;
 690     /* Finish the diagnostic message */
 691     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
 692
 693     /* XXX dmq: first branch is for positive lookbehind...
 694        Our check string is offset from the beginning of the pattern.
 695        So we need to do any stclass tests offset forward from that
 696        point. I think. :-(
 697      */
 698
 699
 700
 701     check_at=s;
 702
 703
 704     /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
 705        Start with the other substr.
 706        XXXX no SCREAM optimization yet - and a very coarse implementation
 707        XXXX /ttx+/ results in anchored="ttx", floating="x".  floating will
 708                 *always* match.  Probably should be marked during compile...
 709        Probably it is right to do no SCREAM here...
 710      */
 711
 712     if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8)
 713                 : (prog->float_substr && prog->anchored_substr))
 714     {
 715         /* Take into account the "other" substring. */
 716         /* XXXX May be hopelessly wrong for UTF... */
 717         if (!other_last)
 718             other_last = strpos;
 719         if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
 720           do_other_anchored:
 721             {
 722                 char * const last = HOP3c(s, -start_shift, strbeg);
 723                 char *last1, *last2;
 724                 char * const saved_s = s;
 725                 SV* must;
 726
 727                 t = s - prog->check_offset_max;
 728                 if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 729                     && (!do_utf8
 730                         || ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
 731                             && t > strpos)))
 732                     NOOP;
 733                 else
 734                     t = strpos;
 735                 t = HOP3c(t, prog->anchored_offset, strend);
 736                 if (t < other_last)     /* These positions already checked */
 737                     t = other_last;
 738                 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
 739                 if (last < last1)
 740                     last1 = last;
 741                 /* XXXX It is not documented what units *_offsets are in.
 742                    We assume bytes, but this is clearly wrong.
 743                    Meaning this code needs to be carefully reviewed for errors.
 744                    dmq.
 745                   */
 746
 747                 /* On end-of-str: see comment below. */
 748                 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
 749                 if (must == &PL_sv_undef) {
 750                     s = (char*)NULL;
 751                     DEBUG_r(must = prog->anchored_utf8);        /* for debug */
 752                 }
 753                 else
 754                     s = fbm_instr(
 755                         (unsigned char*)t,
 756                         HOP3(HOP3(last1, prog->anchored_offset, strend)
 757                                 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
 758                         must,
 759                         multiline ? FBMrf_MULTILINE : 0
 760                     );
 761                 DEBUG_EXECUTE_r({
 762                     RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 763                         SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 764                     PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
 765                         (s ? "Found" : "Contradicts"),
 766                         quoted, RE_SV_TAIL(must));
 767                 });
 768
 769
 770                 if (!s) {
 771                     if (last1 >= last2) {
 772                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 773                                                 ", giving up...\n"));
 774                         goto fail_finish;
 775                     }
 776                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 777                         ", trying floating at offset %ld...\n",
 778                         (long)(HOP3c(saved_s, 1, strend) - i_strpos)));
 779                     other_last = HOP3c(last1, prog->anchored_offset+1, strend);
 780                     s = HOP3c(last, 1, strend);
 781                     goto restart;
 782                 }
 783                 else {
 784                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 785                           (long)(s - i_strpos)));
 786                     t = HOP3c(s, -prog->anchored_offset, strbeg);
 787                     other_last = HOP3c(s, 1, strend);
 788                     s = saved_s;
 789                     if (t == strpos)
 790                         goto try_at_start;
 791                     goto try_at_offset;
 792                 }
 793             }
 794         }
 795         else {          /* Take into account the floating substring. */
 796             char *last, *last1;
 797             char * const saved_s = s;
 798             SV* must;
 799
 800             t = HOP3c(s, -start_shift, strbeg);
 801             last1 = last =
 802                 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
 803             if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
 804                 last = HOP3c(t, prog->float_max_offset, strend);
 805             s = HOP3c(t, prog->float_min_offset, strend);
 806             if (s < other_last)
 807                 s = other_last;
 808  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
 809             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
 810             /* fbm_instr() takes into account exact value of end-of-str
 811                if the check is SvTAIL(ed).  Since false positives are OK,
 812                and end-of-str is not later than strend we are OK. */
 813             if (must == &PL_sv_undef) {
 814                 s = (char*)NULL;
 815                 DEBUG_r(must = prog->float_utf8);       /* for debug message */
 816             }
 817             else
 818                 s = fbm_instr((unsigned char*)s,
 819                               (unsigned char*)last + SvCUR(must)
 820                                   - (SvTAIL(must)!=0),
 821                               must, multiline ? FBMrf_MULTILINE : 0);
 822             DEBUG_EXECUTE_r({
 823                 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 824                     SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 825                 PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
 826                     (s ? "Found" : "Contradicts"),
 827                     quoted, RE_SV_TAIL(must));
 828             });
 829             if (!s) {
 830                 if (last1 == last) {
 831                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 832                                             ", giving up...\n"));
 833                     goto fail_finish;
 834                 }
 835                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 836                     ", trying anchored starting at offset %ld...\n",
 837                     (long)(saved_s + 1 - i_strpos)));
 838                 other_last = last;
 839                 s = HOP3c(t, 1, strend);
 840                 goto restart;
 841             }
 842             else {
 843                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 844                       (long)(s - i_strpos)));
 845                 other_last = s; /* Fix this later. --Hugo */
 846                 s = saved_s;
 847                 if (t == strpos)
 848                     goto try_at_start;
 849                 goto try_at_offset;
 850             }
 851         }
 852     }
 853
 854
 855     t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
 856
 857     DEBUG_OPTIMISE_MORE_r(
 858         PerlIO_printf(Perl_debug_log,
 859             "Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
 860             (IV)prog->check_offset_min,
 861             (IV)prog->check_offset_max,
 862             (IV)(s-strpos),
 863             (IV)(t-strpos),
 864             (IV)(t-s),
 865             (IV)(strend-strpos)
 866         )
 867     );
 868
 869     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 870         && (!do_utf8
 871             || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
 872                  && t > strpos)))
 873     {
 874         /* Fixed substring is found far enough so that the match
 875            cannot start at strpos. */
 876       try_at_offset:
 877         if (ml_anch && t[-1] != '\n') {
 878             /* Eventually fbm_*() should handle this, but often
 879                anchored_offset is not 0, so this check will not be wasted. */
 880             /* XXXX In the code below we prefer to look for "^" even in
 881                presence of anchored substrings.  And we search even
 882                beyond the found float position.  These pessimizations
 883                are historical artefacts only.  */
 884           find_anchor:
 885             while (t < strend - prog->minlen) {
 886                 if (*t == '\n') {
 887                     if (t < check_at - prog->check_offset_min) {
 888                         if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
 889                             /* Since we moved from the found position,
 890                                we definitely contradict the found anchored
 891                                substr.  Due to the above check we do not
 892                                contradict "check" substr.
 893                                Thus we can arrive here only if check substr
 894                                is float.  Redo checking for "other"=="fixed".
 895                              */
 896                             strpos = t + 1;
 897                             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
 898                                 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
 899                             goto do_other_anchored;
 900                         }
 901                         /* We don't contradict the found floating substring. */
 902                         /* XXXX Why not check for STCLASS? */
 903                         s = t + 1;
 904                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
 905                             PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
 906                         goto set_useful;
 907                     }
 908                     /* Position contradicts check-string */
 909                     /* XXXX probably better to look for check-string
 910                        than for "\n", so one should lower the limit for t? */
 911                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
 912                         PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
 913                     other_last = strpos = s = t + 1;
 914                     goto restart;
 915                 }
 916                 t++;
 917             }
 918             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
 919                         PL_colors[0], PL_colors[1]));
 920             goto fail_finish;
 921         }
 922         else {
 923             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
 924                         PL_colors[0], PL_colors[1]));
 925         }
 926         s = t;
 927       set_useful:
 928         ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);    /* hooray/5 */
 929     }
 930     else {
 931         /* The found string does not prohibit matching at strpos,
 932            - no optimization of calling REx engine can be performed,
 933            unless it was an MBOL and we are not after MBOL,
 934            or a future STCLASS check will fail this. */
 935       try_at_start:
 936         /* Even in this situation we may use MBOL flag if strpos is offset
 937            wrt the start of the string. */
 938         if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
 939             && (strpos != strbeg) && strpos[-1] != '\n'
 940             /* May be due to an implicit anchor of m{.*foo}  */
 941             && !(prog->intflags & PREGf_IMPLICIT))
 942         {
 943             t = strpos;
 944             goto find_anchor;
 945         }
 946         DEBUG_EXECUTE_r( if (ml_anch)
 947             PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
 948                           (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
 949         );
 950       success_at_start:
 951         if (!(prog->intflags & PREGf_NAUGHTY)   /* XXXX If strpos moved? */
 952             && (do_utf8 ? (
 953                 prog->check_utf8                /* Could be deleted already */
 954                 && --BmUSEFUL(prog->check_utf8) < 0
 955                 && (prog->check_utf8 == prog->float_utf8)
 956             ) : (
 957                 prog->check_substr              /* Could be deleted already */
 958                 && --BmUSEFUL(prog->check_substr) < 0
 959                 && (prog->check_substr == prog->float_substr)
 960             )))
 961         {
 962             /* If flags & SOMETHING - do not do it many times on the same match */
 963             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
 964             /* XXX Does the destruction order has to change with do_utf8? */
 965             SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
 966             SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
 967             prog->check_substr = prog->check_utf8 = NULL;       /* disable */
 968             prog->float_substr = prog->float_utf8 = NULL;       /* clear */
 969             check = NULL;                       /* abort */
 970             s = strpos;
 971             /* XXXX This is a remnant of the old implementation.  It
 972                     looks wasteful, since now INTUIT can use many
 973                     other heuristics. */
 974             prog->extflags &= ~RXf_USE_INTUIT;
 975         }
 976         else
 977             s = strpos;
 978     }
 979
 980     /* Last resort... */
 981     /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
 982     /* trie stclasses are too expensive to use here, we are better off to
 983        leave it to regmatch itself */
 984     if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
 985         /* minlen == 0 is possible if regstclass is \b or \B,
 986            and the fixed substr is ''$.
 987            Since minlen is already taken into account, s+1 is before strend;
 988            accidentally, minlen >= 1 guaranties no false positives at s + 1
 989            even for \b or \B.  But (minlen? 1 : 0) below assumes that
 990            regstclass does not come from lookahead...  */
 991         /* If regstclass takes bytelength more than 1: If charlength==1, OK.
 992            This leaves EXACTF only, which is dealt with in find_byclass().  */
 993         const U8* const str = (U8*)STRING(progi->regstclass);
 994         const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
 995                     ? CHR_DIST(str+STR_LEN(progi->regstclass), str)
 996                     : 1);
 997         char * endpos;
 998         if (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
 999             endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
1000         else if (prog->float_substr || prog->float_utf8)
1001             endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
1002         else
1003             endpos= strend;
1004
1005         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf"\n",
1006                                       (IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg)));
1007
1008         t = s;
1009         s = find_byclass(prog, progi->regstclass, s, endpos, NULL);
1010         if (!s) {
1011 #ifdef DEBUGGING
1012             const char *what = NULL;
1013 #endif
1014             if (endpos == strend) {
1015                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1016                                 "Could not match STCLASS...\n") );
1017                 goto fail;
1018             }
1019             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1020                                    "This position contradicts STCLASS...\n") );
1021             if ((prog->extflags & RXf_ANCH) && !ml_anch)
1022                 goto fail;
1023             /* Contradict one of substrings */
1024             if (prog->anchored_substr || prog->anchored_utf8) {
1025                 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
1026                     DEBUG_EXECUTE_r( what = "anchored" );
1027                   hop_and_restart:
1028                     s = HOP3c(t, 1, strend);
1029                     if (s + start_shift + end_shift > strend) {
1030                         /* XXXX Should be taken into account earlier? */
1031                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1032                                                "Could not match STCLASS...\n") );
1033                         goto fail;
1034                     }
1035                     if (!check)
1036                         goto giveup;
1037                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1038                                 "Looking for %s substr starting at offset %ld...\n",
1039                                  what, (long)(s + start_shift - i_strpos)) );
1040                     goto restart;
1041                 }
1042                 /* Have both, check_string is floating */
1043                 if (t + start_shift >= check_at) /* Contradicts floating=check */
1044                     goto retry_floating_check;
1045                 /* Recheck anchored substring, but not floating... */
1046                 s = check_at;
1047                 if (!check)
1048                     goto giveup;
1049                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1050                           "Looking for anchored substr starting at offset %ld...\n",
1051                           (long)(other_last - i_strpos)) );
1052                 goto do_other_anchored;
1053             }
1054             /* Another way we could have checked stclass at the
1055                current position only: */
1056             if (ml_anch) {
1057                 s = t = t + 1;
1058                 if (!check)
1059                     goto giveup;
1060                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1061                           "Looking for /%s^%s/m starting at offset %ld...\n",
1062                           PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
1063                 goto try_at_offset;
1064             }
1065             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))     /* Could have been deleted */
1066                 goto fail;
1067             /* Check is floating subtring. */
1068           retry_floating_check:
1069             t = check_at - start_shift;
1070             DEBUG_EXECUTE_r( what = "floating" );
1071             goto hop_and_restart;
1072         }
1073         if (t != s) {
1074             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1075                         "By STCLASS: moving %ld --> %ld\n",
1076                                   (long)(t - i_strpos), (long)(s - i_strpos))
1077                    );
1078         }
1079         else {
1080             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1081                                   "Does not contradict STCLASS...\n");
1082                    );
1083         }
1084     }
1085   giveup:
1086     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
1087                           PL_colors[4], (check ? "Guessed" : "Giving up"),
1088                           PL_colors[5], (long)(s - i_strpos)) );
1089     return s;
1090
1091   fail_finish:                          /* Substring not found */
1092     if (prog->check_substr || prog->check_utf8)         /* could be removed already */
1093         BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
1094   fail:
1095     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
1096                           PL_colors[4], PL_colors[5]));
1097     return NULL;
1098 }
1099
1100 #define DECL_TRIE_TYPE(scan) \
1101     const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold } \
1102                     trie_type = (scan->flags != EXACT) \
1103                               ? (do_utf8 ? trie_utf8_fold : (UTF ? trie_latin_utf8_fold : trie_plain)) \
1104                               : (do_utf8 ? trie_utf8 : trie_plain)
1105
1106 #define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len,  \
1107 uvc, charid, foldlen, foldbuf, uniflags) STMT_START {                       \
1108     switch (trie_type) {                                                    \
1109     case trie_utf8_fold:                                                    \
1110         if ( foldlen>0 ) {                                                  \
1111             uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
1112             foldlen -= len;                                                 \
1113             uscan += len;                                                   \
1114             len=0;                                                          \
1115         } else {                                                            \
1116             uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
1117             uvc = to_uni_fold( uvc, foldbuf, &foldlen );                    \
1118             foldlen -= UNISKIP( uvc );                                      \
1119             uscan = foldbuf + UNISKIP( uvc );                               \
1120         }                                                                   \
1121         break;                                                              \
1122     case trie_latin_utf8_fold:                                              \
1123         if ( foldlen>0 ) {                                                  \
1124             uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );     \
1125             foldlen -= len;                                                 \
1126             uscan += len;                                                   \
1127             len=0;                                                          \
1128         } else {                                                            \
1129             len = 1;                                                        \
1130             uvc = to_uni_fold( *(U8*)uc, foldbuf, &foldlen );               \
1131             foldlen -= UNISKIP( uvc );                                      \
1132             uscan = foldbuf + UNISKIP( uvc );                               \
1133         }                                                                   \
1134         break;                                                              \
1135     case trie_utf8:                                                         \
1136         uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );       \
1137         break;                                                              \
1138     case trie_plain:                                                        \
1139         uvc = (UV)*uc;                                                      \
1140         len = 1;                                                            \
1141     }                                                                       \
1142     if (uvc < 256) {                                                        \
1143         charid = trie->charmap[ uvc ];                                      \
1144     }                                                                       \
1145     else {                                                                  \
1146         charid = 0;                                                         \
1147         if (widecharmap) {                                                  \
1148             SV** const svpp = hv_fetch(widecharmap,                         \
1149                         (char*)&uvc, sizeof(UV), 0);                        \
1150             if (svpp)                                                       \
1151                 charid = (U16)SvIV(*svpp);                                  \
1152         }                                                                   \
1153     }                                                                       \
1154 } STMT_END
1155
1156 #define REXEC_FBC_EXACTISH_CHECK(CoNd)                 \
1157 {                                                      \
1158     char *my_strend= (char *)strend;                   \
1159     if ( (CoNd)                                        \
1160          && (ln == len ||                              \
1161              !ibcmp_utf8(s, &my_strend, 0,  do_utf8,   \
1162                         m, NULL, ln, (bool)UTF))       \
1163          && (!reginfo || regtry(reginfo, &s)) )        \
1164         goto got_it;                                   \
1165     else {                                             \
1166          U8 foldbuf[UTF8_MAXBYTES_CASE+1];             \
1167          uvchr_to_utf8(tmpbuf, c);                     \
1168          f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);  \
1169          if ( f != c                                   \
1170               && (f == c1 || f == c2)                  \
1171               && (ln == len ||                         \
1172                 !ibcmp_utf8(s, &my_strend, 0,  do_utf8,\
1173                               m, NULL, ln, (bool)UTF)) \
1174               && (!reginfo || regtry(reginfo, &s)) )   \
1175               goto got_it;                             \
1176     }                                                  \
1177 }                                                      \
1178 s += len
1179
1180 #define REXEC_FBC_EXACTISH_SCAN(CoNd)                     \
1181 STMT_START {                                              \
1182     while (s <= e) {                                      \
1183         if ( (CoNd)                                       \
1184              && (ln == 1 || !(OP(c) == EXACTF             \
1185                               ? ibcmp(s, m, ln)           \
1186                               : ibcmp_locale(s, m, ln)))  \
1187              && (!reginfo || regtry(reginfo, &s)) )        \
1188             goto got_it;                                  \
1189         s++;                                              \
1190     }                                                     \
1191 } STMT_END
1192
1193 #define REXEC_FBC_UTF8_SCAN(CoDe)                     \
1194 STMT_START {                                          \
1195     while (s + (uskip = UTF8SKIP(s)) <= strend) {     \
1196         CoDe                                          \
1197         s += uskip;                                   \
1198     }                                                 \
1199 } STMT_END
1200
1201 #define REXEC_FBC_SCAN(CoDe)                          \
1202 STMT_START {                                          \
1203     while (s < strend) {                              \
1204         CoDe                                          \
1205         s++;                                          \
1206     }                                                 \
1207 } STMT_END
1208
1209 #define REXEC_FBC_UTF8_CLASS_SCAN(CoNd)               \
1210 REXEC_FBC_UTF8_SCAN(                                  \
1211     if (CoNd) {                                       \
1212         if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1213             goto got_it;                              \
1214         else                                          \
1215             tmp = doevery;                            \
1216     }                                                 \
1217     else                                              \
1218         tmp = 1;                                      \
1219 )
1220
1221 #define REXEC_FBC_CLASS_SCAN(CoNd)                    \
1222 REXEC_FBC_SCAN(                                       \
1223     if (CoNd) {                                       \
1224         if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1225             goto got_it;                              \
1226         else                                          \
1227             tmp = doevery;                            \
1228     }                                                 \
1229     else                                              \
1230         tmp = 1;                                      \
1231 )
1232
1233 #define REXEC_FBC_TRYIT               \
1234 if ((!reginfo || regtry(reginfo, &s))) \
1235     goto got_it
1236
1237 #define REXEC_FBC_CSCAN(CoNdUtF8,CoNd)                         \
1238     if (do_utf8) {                                             \
1239         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1240     }                                                          \
1241     else {                                                     \
1242         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1243     }                                                          \
1244     break
1245
1246 #define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd)      \
1247     if (do_utf8) {                                             \
1248         UtFpReLoAd;                                            \
1249         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1250     }                                                          \
1251     else {                                                     \
1252         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1253     }                                                          \
1254     break
1255
1256 #define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd)                   \
1257     PL_reg_flags |= RF_tainted;                                \
1258     if (do_utf8) {                                             \
1259         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1260     }                                                          \
1261     else {                                                     \
1262         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1263     }                                                          \
1264     break
1265
1266 #define DUMP_EXEC_POS(li,s,doutf8) \
1267     dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
1268
1269 /* We know what class REx starts with.  Try to find this position... */
1270 /* if reginfo is NULL, its a dryrun */
1271 /* annoyingly all the vars in this routine have different names from their counterparts
1272    in regmatch. /grrr */
1273
1274 STATIC char *
1275 S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
1276     const char *strend, regmatch_info *reginfo)
1277 {
1278         dVAR;
1279         const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
1280         char *m;
1281         STRLEN ln;
1282         STRLEN lnc;
1283         register STRLEN uskip;
1284         unsigned int c1;
1285         unsigned int c2;
1286         char *e;
1287         register I32 tmp = 1;   /* Scratch variable? */
1288         register const bool do_utf8 = PL_reg_match_utf8;
1289         RXi_GET_DECL(prog,progi);
1290
1291         PERL_ARGS_ASSERT_FIND_BYCLASS;
1292
1293         /* We know what class it must start with. */
1294         switch (OP(c)) {
1295         case ANYOF:
1296             if (do_utf8) {
1297                  REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
1298                           !UTF8_IS_INVARIANT((U8)s[0]) ?
1299                           reginclass(prog, c, (U8*)s, 0, do_utf8) :
1300                           REGINCLASS(prog, c, (U8*)s));
1301             }
1302             else {
1303                  while (s < strend) {
1304                       STRLEN skip = 1;
1305
1306                       if (REGINCLASS(prog, c, (U8*)s) ||
1307                           (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1308                            /* The assignment of 2 is intentional:
1309                             * for the folded sharp s, the skip is 2. */
1310                            (skip = SHARP_S_SKIP))) {
1311                            if (tmp && (!reginfo || regtry(reginfo, &s)))
1312                                 goto got_it;
1313                            else
1314                                 tmp = doevery;
1315                       }
1316                       else
1317                            tmp = 1;
1318                       s += skip;
1319                  }
1320             }
1321             break;
1322         case CANY:
1323             REXEC_FBC_SCAN(
1324                 if (tmp && (!reginfo || regtry(reginfo, &s)))
1325                     goto got_it;
1326                 else
1327                     tmp = doevery;
1328             );
1329             break;
1330         case EXACTF:
1331             m   = STRING(c);
1332             ln  = STR_LEN(c);   /* length to match in octets/bytes */
1333             lnc = (I32) ln;     /* length to match in characters */
1334             if (UTF) {
1335                 STRLEN ulen1, ulen2;
1336                 U8 *sm = (U8 *) m;
1337                 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1338                 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
1339                 /* used by commented-out code below */
1340                 /*const U32 uniflags = UTF8_ALLOW_DEFAULT;*/
1341
1342                 /* XXX: Since the node will be case folded at compile
1343                    time this logic is a little odd, although im not
1344                    sure that its actually wrong. --dmq */
1345
1346                 c1 = to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1347                 c2 = to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1348
1349                 /* XXX: This is kinda strange. to_utf8_XYZ returns the
1350                    codepoint of the first character in the converted
1351                    form, yet originally we did the extra step.
1352                    No tests fail by commenting this code out however
1353                    so Ive left it out. -- dmq.
1354
1355                 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
1356                                     0, uniflags);
1357                 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
1358                                     0, uniflags);
1359                 */
1360
1361                 lnc = 0;
1362                 while (sm < ((U8 *) m + ln)) {
1363                     lnc++;
1364                     sm += UTF8SKIP(sm);
1365                 }
1366             }
1367             else {
1368                 c1 = *(U8*)m;
1369                 c2 = PL_fold[c1];
1370             }
1371             goto do_exactf;
1372         case EXACTFL:
1373             m   = STRING(c);
1374             ln  = STR_LEN(c);
1375             lnc = (I32) ln;
1376             c1 = *(U8*)m;
1377             c2 = PL_fold_locale[c1];
1378           do_exactf:
1379             e = HOP3c(strend, -((I32)lnc), s);
1380
1381             if (!reginfo && e < s)
1382                 e = s;                  /* Due to minlen logic of intuit() */
1383
1384             /* The idea in the EXACTF* cases is to first find the
1385              * first character of the EXACTF* node and then, if
1386              * necessary, case-insensitively compare the full
1387              * text of the node.  The c1 and c2 are the first
1388              * characters (though in Unicode it gets a bit
1389              * more complicated because there are more cases
1390              * than just upper and lower: one needs to use
1391              * the so-called folding case for case-insensitive
1392              * matching (called "loose matching" in Unicode).
1393              * ibcmp_utf8() will do just that. */
1394
1395             if (do_utf8 || UTF) {
1396                 UV c, f;
1397                 U8 tmpbuf [UTF8_MAXBYTES+1];
1398                 STRLEN len = 1;
1399                 STRLEN foldlen;
1400                 const U32 uniflags = UTF8_ALLOW_DEFAULT;
1401                 if (c1 == c2) {
1402                     /* Upper and lower of 1st char are equal -
1403                      * probably not a "letter". */
1404                     while (s <= e) {
1405                         if (do_utf8) {
1406                             c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1407                                            uniflags);
1408                         } else {
1409                             c = *((U8*)s);
1410                         }
1411                         REXEC_FBC_EXACTISH_CHECK(c == c1);
1412                     }
1413                 }
1414                 else {
1415                     while (s <= e) {
1416                         if (do_utf8) {
1417                             c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1418                                            uniflags);
1419                         } else {
1420                             c = *((U8*)s);
1421                         }
1422
1423                         /* Handle some of the three Greek sigmas cases.
1424                          * Note that not all the possible combinations
1425                          * are handled here: some of them are handled
1426                          * by the standard folding rules, and some of
1427                          * them (the character class or ANYOF cases)
1428                          * are handled during compiletime in
1429                          * regexec.c:S_regclass(). */
1430                         if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1431                             c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1432                             c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1433
1434                         REXEC_FBC_EXACTISH_CHECK(c == c1 || c == c2);
1435                     }
1436                 }
1437             }
1438             else {
1439                 /* Neither pattern nor string are UTF8 */
1440                 if (c1 == c2)
1441                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
1442                 else
1443                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
1444             }
1445             break;
1446         case BOUNDL:
1447             PL_reg_flags |= RF_tainted;
1448             /* FALL THROUGH */
1449         case BOUND:
1450             if (do_utf8) {
1451                 if (s == PL_bostr)
1452                     tmp = '\n';
1453                 else {
1454                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1455                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1456                 }
1457                 tmp = ((OP(c) == BOUND ?
1458                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1459                 LOAD_UTF8_CHARCLASS_ALNUM();
1460                 REXEC_FBC_UTF8_SCAN(
1461                     if (tmp == !(OP(c) == BOUND ?
1462                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1463                                  isALNUM_LC_utf8((U8*)s)))
1464                     {
1465                         tmp = !tmp;
1466                         REXEC_FBC_TRYIT;
1467                 }
1468                 );
1469             }
1470             else {
1471                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1472                 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1473                 REXEC_FBC_SCAN(
1474                     if (tmp ==
1475                         !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1476                         tmp = !tmp;
1477                         REXEC_FBC_TRYIT;
1478                 }
1479                 );
1480             }
1481             if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, &s)))
1482                 goto got_it;
1483             break;
1484         case NBOUNDL:
1485             PL_reg_flags |= RF_tainted;
1486             /* FALL THROUGH */
1487         case NBOUND:
1488             if (do_utf8) {
1489                 if (s == PL_bostr)
1490                     tmp = '\n';
1491                 else {
1492                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1493                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1494                 }
1495                 tmp = ((OP(c) == NBOUND ?
1496                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1497                 LOAD_UTF8_CHARCLASS_ALNUM();
1498                 REXEC_FBC_UTF8_SCAN(
1499                     if (tmp == !(OP(c) == NBOUND ?
1500                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1501                                  isALNUM_LC_utf8((U8*)s)))
1502                         tmp = !tmp;
1503                     else REXEC_FBC_TRYIT;
1504                 );
1505             }
1506             else {
1507                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1508                 tmp = ((OP(c) == NBOUND ?
1509                         isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1510                 REXEC_FBC_SCAN(
1511                     if (tmp ==
1512                         !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1513                         tmp = !tmp;
1514                     else REXEC_FBC_TRYIT;
1515                 );
1516             }
1517             if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, &s)))
1518                 goto got_it;
1519             break;
1520         case ALNUM:
1521             REXEC_FBC_CSCAN_PRELOAD(
1522                 LOAD_UTF8_CHARCLASS_PERL_WORD(),
1523                 swash_fetch(RE_utf8_perl_word, (U8*)s, do_utf8),
1524                 isALNUM(*s)
1525             );
1526         case ALNUML:
1527             REXEC_FBC_CSCAN_TAINT(
1528                 isALNUM_LC_utf8((U8*)s),
1529                 isALNUM_LC(*s)
1530             );
1531         case NALNUM:
1532             REXEC_FBC_CSCAN_PRELOAD(
1533                 LOAD_UTF8_CHARCLASS_PERL_WORD(),
1534                 !swash_fetch(RE_utf8_perl_word, (U8*)s, do_utf8),
1535                 !isALNUM(*s)
1536             );
1537         case NALNUML:
1538             REXEC_FBC_CSCAN_TAINT(
1539                 !isALNUM_LC_utf8((U8*)s),
1540                 !isALNUM_LC(*s)
1541             );
1542         case SPACE:
1543             REXEC_FBC_CSCAN_PRELOAD(
1544                 LOAD_UTF8_CHARCLASS_PERL_SPACE(),
1545                 *s == ' ' || swash_fetch(RE_utf8_perl_space,(U8*)s, do_utf8),
1546                 isSPACE(*s)
1547             );
1548         case SPACEL:
1549             REXEC_FBC_CSCAN_TAINT(
1550                 *s == ' ' || isSPACE_LC_utf8((U8*)s),
1551                 isSPACE_LC(*s)
1552             );
1553         case NSPACE:
1554             REXEC_FBC_CSCAN_PRELOAD(
1555                 LOAD_UTF8_CHARCLASS_PERL_SPACE(),
1556                 !(*s == ' ' || swash_fetch(RE_utf8_perl_space,(U8*)s, do_utf8)),
1557                 !isSPACE(*s)
1558             );
1559         case NSPACEL:
1560             REXEC_FBC_CSCAN_TAINT(
1561                 !(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
1562                 !isSPACE_LC(*s)
1563             );
1564         case DIGIT:
1565             REXEC_FBC_CSCAN_PRELOAD(
1566                 LOAD_UTF8_CHARCLASS_POSIX_DIGIT(),
1567                 swash_fetch(RE_utf8_posix_digit,(U8*)s, do_utf8),
1568                 isDIGIT(*s)
1569             );
1570         case DIGITL:
1571             REXEC_FBC_CSCAN_TAINT(
1572                 isDIGIT_LC_utf8((U8*)s),
1573                 isDIGIT_LC(*s)
1574             );
1575         case NDIGIT:
1576             REXEC_FBC_CSCAN_PRELOAD(
1577                 LOAD_UTF8_CHARCLASS_POSIX_DIGIT(),
1578                 !swash_fetch(RE_utf8_posix_digit,(U8*)s, do_utf8),
1579                 !isDIGIT(*s)
1580             );
1581         case NDIGITL:
1582             REXEC_FBC_CSCAN_TAINT(
1583                 !isDIGIT_LC_utf8((U8*)s),
1584                 !isDIGIT_LC(*s)
1585             );
1586         case LNBREAK:
1587             REXEC_FBC_CSCAN(
1588                 is_LNBREAK_utf8(s),
1589                 is_LNBREAK_latin1(s)
1590             );
1591         case VERTWS:
1592             REXEC_FBC_CSCAN(
1593                 is_VERTWS_utf8(s),
1594                 is_VERTWS_latin1(s)
1595             );
1596         case NVERTWS:
1597             REXEC_FBC_CSCAN(
1598                 !is_VERTWS_utf8(s),
1599                 !is_VERTWS_latin1(s)
1600             );
1601         case HORIZWS:
1602             REXEC_FBC_CSCAN(
1603                 is_HORIZWS_utf8(s),
1604                 is_HORIZWS_latin1(s)
1605             );
1606         case NHORIZWS:
1607             REXEC_FBC_CSCAN(
1608                 !is_HORIZWS_utf8(s),
1609                 !is_HORIZWS_latin1(s)
1610             );
1611         case AHOCORASICKC:
1612         case AHOCORASICK:
1613             {
1614                 DECL_TRIE_TYPE(c);
1615                 /* what trie are we using right now */
1616                 reg_ac_data *aho
1617                     = (reg_ac_data*)progi->data->data[ ARG( c ) ];
1618                 reg_trie_data *trie
1619                     = (reg_trie_data*)progi->data->data[ aho->trie ];
1620                 HV *widecharmap = MUTABLE_HV(progi->data->data[ aho->trie + 1 ]);
1621
1622                 const char *last_start = strend - trie->minlen;
1623 #ifdef DEBUGGING
1624                 const char *real_start = s;
1625 #endif
1626                 STRLEN maxlen = trie->maxlen;
1627                 SV *sv_points;
1628                 U8 **points; /* map of where we were in the input string
1629                                 when reading a given char. For ASCII this
1630                                 is unnecessary overhead as the relationship
1631                                 is always 1:1, but for Unicode, especially
1632                                 case folded Unicode this is not true. */
1633                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
1634                 U8 *bitmap=NULL;
1635
1636
1637                 GET_RE_DEBUG_FLAGS_DECL;
1638
1639                 /* We can't just allocate points here. We need to wrap it in
1640                  * an SV so it gets freed properly if there is a croak while
1641                  * running the match */
1642                 ENTER;
1643                 SAVETMPS;
1644                 sv_points=newSV(maxlen * sizeof(U8 *));
1645                 SvCUR_set(sv_points,
1646                     maxlen * sizeof(U8 *));
1647                 SvPOK_on(sv_points);
1648                 sv_2mortal(sv_points);
1649                 points=(U8**)SvPV_nolen(sv_points );
1650                 if ( trie_type != trie_utf8_fold
1651                      && (trie->bitmap || OP(c)==AHOCORASICKC) )
1652                 {
1653                     if (trie->bitmap)
1654                         bitmap=(U8*)trie->bitmap;
1655                     else
1656                         bitmap=(U8*)ANYOF_BITMAP(c);
1657                 }
1658                 /* this is the Aho-Corasick algorithm modified a touch
1659                    to include special handling for long "unknown char"
1660                    sequences. The basic idea being that we use AC as long
1661                    as we are dealing with a possible matching char, when
1662                    we encounter an unknown char (and we have not encountered
1663                    an accepting state) we scan forward until we find a legal
1664                    starting char.
1665                    AC matching is basically that of trie matching, except
1666                    that when we encounter a failing transition, we fall back
1667                    to the current states "fail state", and try the current char
1668                    again, a process we repeat until we reach the root state,
1669                    state 1, or a legal transition. If we fail on the root state
1670                    then we can either terminate if we have reached an accepting
1671                    state previously, or restart the entire process from the beginning
1672                    if we have not.
1673
1674                  */
1675                 while (s <= last_start) {
1676                     const U32 uniflags = UTF8_ALLOW_DEFAULT;
1677                     U8 *uc = (U8*)s;
1678                     U16 charid = 0;
1679                     U32 base = 1;
1680                     U32 state = 1;
1681                     UV uvc = 0;
1682                     STRLEN len = 0;
1683                     STRLEN foldlen = 0;
1684                     U8 *uscan = (U8*)NULL;
1685                     U8 *leftmost = NULL;
1686 #ifdef DEBUGGING
1687                     U32 accepted_word= 0;
1688 #endif
1689                     U32 pointpos = 0;
1690
1691                     while ( state && uc <= (U8*)strend ) {
1692                         int failed=0;
1693                         U32 word = aho->states[ state ].wordnum;
1694
1695                         if( state==1 ) {
1696                             if ( bitmap ) {
1697                                 DEBUG_TRIE_EXECUTE_r(
1698                                     if ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1699                                         dump_exec_pos( (char *)uc, c, strend, real_start,
1700                                             (char *)uc, do_utf8 );
1701                                         PerlIO_printf( Perl_debug_log,
1702                                             " Scanning for legal start char...\n");
1703                                     }
1704                                 );
1705                                 while ( uc <= (U8*)last_start  && !BITMAP_TEST(bitmap,*uc) ) {
1706                                     uc++;
1707                                 }
1708                                 s= (char *)uc;
1709                             }
1710                             if (uc >(U8*)last_start) break;
1711                         }
1712
1713                         if ( word ) {
1714                             U8 *lpos= points[ (pointpos - trie->wordlen[word-1] ) % maxlen ];
1715                             if (!leftmost || lpos < leftmost) {
1716                                 DEBUG_r(accepted_word=word);
1717                                 leftmost= lpos;
1718                             }
1719                             if (base==0) break;
1720
1721                         }
1722                         points[pointpos++ % maxlen]= uc;
1723                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
1724                                              uscan, len, uvc, charid, foldlen,
1725                                              foldbuf, uniflags);
1726                         DEBUG_TRIE_EXECUTE_r({
1727                             dump_exec_pos( (char *)uc, c, strend, real_start,
1728                                 s,   do_utf8 );
1729                             PerlIO_printf(Perl_debug_log,
1730                                 " Charid:%3u CP:%4"UVxf" ",
1731                                  charid, uvc);
1732                         });
1733
1734                         do {
1735 #ifdef DEBUGGING
1736                             word = aho->states[ state ].wordnum;
1737 #endif
1738                             base = aho->states[ state ].trans.base;
1739
1740                             DEBUG_TRIE_EXECUTE_r({
1741                                 if (failed)
1742                                     dump_exec_pos( (char *)uc, c, strend, real_start,
1743                                         s,   do_utf8 );
1744                                 PerlIO_printf( Perl_debug_log,
1745                                     "%sState: %4"UVxf", word=%"UVxf,
1746                                     failed ? " Fail transition to " : "",
1747                                     (UV)state, (UV)word);
1748                             });
1749                             if ( base ) {
1750                                 U32 tmp;
1751                                 if (charid &&
1752                                      (base + charid > trie->uniquecharcount )
1753                                      && (base + charid - 1 - trie->uniquecharcount
1754                                             < trie->lasttrans)
1755                                      && trie->trans[base + charid - 1 -
1756                                             trie->uniquecharcount].check == state
1757                                      && (tmp=trie->trans[base + charid - 1 -
1758                                         trie->uniquecharcount ].next))
1759                                 {
1760                                     DEBUG_TRIE_EXECUTE_r(
1761                                         PerlIO_printf( Perl_debug_log," - legal\n"));
1762                                     state = tmp;
1763                                     break;
1764                                 }
1765                                 else {
1766                                     DEBUG_TRIE_EXECUTE_r(
1767                                         PerlIO_printf( Perl_debug_log," - fail\n"));
1768                                     failed = 1;
1769                                     state = aho->fail[state];
1770                                 }
1771                             }
1772                             else {
1773                                 /* we must be accepting here */
1774                                 DEBUG_TRIE_EXECUTE_r(
1775                                         PerlIO_printf( Perl_debug_log," - accepting\n"));
1776                                 failed = 1;
1777                                 break;
1778                             }
1779                         } while(state);
1780                         uc += len;
1781                         if (failed) {
1782                             if (leftmost)
1783                                 break;
1784                             if (!state) state = 1;
1785                         }
1786                     }
1787                     if ( aho->states[ state ].wordnum ) {
1788                         U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
1789                         if (!leftmost || lpos < leftmost) {
1790                             DEBUG_r(accepted_word=aho->states[ state ].wordnum);
1791                             leftmost = lpos;
1792                         }
1793                     }
1794                     if (leftmost) {
1795                         s = (char*)leftmost;
1796                         DEBUG_TRIE_EXECUTE_r({
1797                             PerlIO_printf(
1798                                 Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
1799                                 (UV)accepted_word, (IV)(s - real_start)
1800                             );
1801                         });
1802                         if (!reginfo || regtry(reginfo, &s)) {
1803                             FREETMPS;
1804                             LEAVE;
1805                             goto got_it;
1806                         }
1807                         s = HOPc(s,1);
1808                         DEBUG_TRIE_EXECUTE_r({
1809                             PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
1810                         });
1811                     } else {
1812                         DEBUG_TRIE_EXECUTE_r(
1813                             PerlIO_printf( Perl_debug_log,"No match.\n"));
1814                         break;
1815                     }
1816                 }
1817                 FREETMPS;
1818                 LEAVE;
1819             }
1820             break;
1821         default:
1822             Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1823             break;
1824         }
1825         return 0;
1826       got_it:
1827         return s;
1828 }
1829
1830
1831 /*
1832  - regexec_flags - match a regexp against a string
1833  */
1834 I32
1835 Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *strend,
1836               char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1837 /* strend: pointer to null at end of string */
1838 /* strbeg: real beginning of string */
1839 /* minend: end of match must be >=minend after stringarg. */
1840 /* data: May be used for some additional optimizations.
1841          Currently its only used, with a U32 cast, for transmitting
1842          the ganch offset when doing a /g match. This will change */
1843 /* nosave: For optimizations. */
1844 {
1845     dVAR;
1846     struct regexp *const prog = (struct regexp *)SvANY(rx);
1847     /*register*/ char *s;
1848     register regnode *c;
1849     /*register*/ char *startpos = stringarg;
1850     I32 minlen;         /* must match at least this many chars */
1851     I32 dontbother = 0; /* how many characters not to try at end */
1852     I32 end_shift = 0;                  /* Same for the end. */         /* CC */
1853     I32 scream_pos = -1;                /* Internal iterator of scream. */
1854     char *scream_olds = NULL;
1855     const bool do_utf8 = (bool)DO_UTF8(sv);
1856     I32 multiline;
1857     RXi_GET_DECL(prog,progi);
1858     regmatch_info reginfo;  /* create some info to pass to regtry etc */
1859     regexp_paren_pair *swap = NULL;
1860     GET_RE_DEBUG_FLAGS_DECL;
1861
1862     PERL_ARGS_ASSERT_REGEXEC_FLAGS;
1863     PERL_UNUSED_ARG(data);
1864
1865     /* Be paranoid... */
1866     if (prog == NULL || startpos == NULL) {
1867         Perl_croak(aTHX_ "NULL regexp parameter");
1868         return 0;
1869     }
1870
1871     multiline = prog->extflags & RXf_PMf_MULTILINE;
1872     reginfo.prog = rx;   /* Yes, sorry that this is confusing.  */
1873
1874     RX_MATCH_UTF8_set(rx, do_utf8);
1875     DEBUG_EXECUTE_r(
1876         debug_start_match(rx, do_utf8, startpos, strend,
1877         "Matching");
1878     );
1879
1880     minlen = prog->minlen;
1881
1882     if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
1883         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1884                               "String too short [regexec_flags]...\n"));
1885         goto phooey;
1886     }
1887
1888
1889     /* Check validity of program. */
1890     if (UCHARAT(progi->program) != REG_MAGIC) {
1891         Perl_croak(aTHX_ "corrupted regexp program");
1892     }
1893
1894     PL_reg_flags = 0;
1895     PL_reg_eval_set = 0;
1896     PL_reg_maxiter = 0;
1897
1898     if (RX_UTF8(rx))
1899         PL_reg_flags |= RF_utf8;
1900
1901     /* Mark beginning of line for ^ and lookbehind. */
1902     reginfo.bol = startpos; /* XXX not used ??? */
1903     PL_bostr  = strbeg;
1904     reginfo.sv = sv;
1905
1906     /* Mark end of line for $ (and such) */
1907     PL_regeol = strend;
1908
1909     /* see how far we have to get to not match where we matched before */
1910     reginfo.till = startpos+minend;
1911
1912     /* If there is a "must appear" string, look for it. */
1913     s = startpos;
1914
1915     if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
1916         MAGIC *mg;
1917         if (flags & REXEC_IGNOREPOS){   /* Means: check only at start */
1918             reginfo.ganch = startpos + prog->gofs;
1919             DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
1920               "GPOS IGNOREPOS: reginfo.ganch = startpos + %"UVxf"\n",(UV)prog->gofs));
1921         } else if (sv && SvTYPE(sv) >= SVt_PVMG
1922                   && SvMAGIC(sv)
1923                   && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1924                   && mg->mg_len >= 0) {
1925             reginfo.ganch = strbeg + mg->mg_len;        /* Defined pos() */
1926             DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
1927                 "GPOS MAGIC: reginfo.ganch = strbeg + %"IVdf"\n",(IV)mg->mg_len));
1928
1929             if (prog->extflags & RXf_ANCH_GPOS) {
1930                 if (s > reginfo.ganch)
1931                     goto phooey;
1932                 s = reginfo.ganch - prog->gofs;
1933                 DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
1934                      "GPOS ANCH_GPOS: s = ganch - %"UVxf"\n",(UV)prog->gofs));
1935                 if (s < strbeg)
1936                     goto phooey;
1937             }
1938         }
1939         else if (data) {
1940             reginfo.ganch = strbeg + PTR2UV(data);
1941             DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
1942                  "GPOS DATA: reginfo.ganch= strbeg + %"UVxf"\n",PTR2UV(data)));
1943
1944         } else {                                /* pos() not defined */
1945             reginfo.ganch = strbeg;
1946             DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
1947                  "GPOS: reginfo.ganch = strbeg\n"));
1948         }
1949     }
1950     if (PL_curpm && (PM_GETRE(PL_curpm) == rx)) {
1951         /* We have to be careful. If the previous successful match
1952            was from this regex we don't want a subsequent partially
1953            successful match to clobber the old results.
1954            So when we detect this possibility we add a swap buffer
1955            to the re, and switch the buffer each match. If we fail
1956            we switch it back, otherwise we leave it swapped.
1957         */
1958         swap = prog->offs;
1959         /* do we need a save destructor here for eval dies? */
1960         Newxz(prog->offs, (prog->nparens + 1), regexp_paren_pair);
1961     }
1962     if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
1963         re_scream_pos_data d;
1964
1965         d.scream_olds = &scream_olds;
1966         d.scream_pos = &scream_pos;
1967         s = re_intuit_start(rx, sv, s, strend, flags, &d);
1968         if (!s) {
1969             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1970             goto phooey;        /* not present */
1971         }
1972     }
1973
1974
1975
1976     /* Simplest case:  anchored match need be tried only once. */
1977     /*  [unless only anchor is BOL and multiline is set] */
1978     if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
1979         if (s == startpos && regtry(&reginfo, &startpos))
1980             goto got_it;
1981         else if (multiline || (prog->intflags & PREGf_IMPLICIT)
1982                  || (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
1983         {
1984             char *end;
1985
1986             if (minlen)
1987                 dontbother = minlen - 1;
1988             end = HOP3c(strend, -dontbother, strbeg) - 1;
1989             /* for multiline we only have to try after newlines */
1990             if (prog->check_substr || prog->check_utf8) {
1991                 if (s == startpos)
1992                     goto after_try;
1993                 while (1) {
1994                     if (regtry(&reginfo, &s))
1995                         goto got_it;
1996                   after_try:
1997                     if (s > end)
1998                         goto phooey;
1999                     if (prog->extflags & RXf_USE_INTUIT) {
2000                         s = re_intuit_start(rx, sv, s + 1, strend, flags, NULL);
2001                         if (!s)
2002                             goto phooey;
2003                     }
2004                     else
2005                         s++;
2006                 }
2007             } else {
2008                 if (s > startpos)
2009                     s--;
2010                 while (s < end) {
2011                     if (*s++ == '\n') { /* don't need PL_utf8skip here */
2012                         if (regtry(&reginfo, &s))
2013                             goto got_it;
2014                     }
2015                 }
2016             }
2017         }
2018         goto phooey;
2019     } else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
2020     {
2021         /* the warning about reginfo.ganch being used without intialization
2022            is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
2023            and we only enter this block when the same bit is set. */
2024         char *tmp_s = reginfo.ganch - prog->gofs;
2025
2026         if (tmp_s >= strbeg && regtry(&reginfo, &tmp_s))
2027             goto got_it;
2028         goto phooey;
2029     }
2030
2031     /* Messy cases:  unanchored match. */
2032     if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
2033         /* we have /x+whatever/ */
2034         /* it must be a one character string (XXXX Except UTF?) */
2035         char ch;
2036 #ifdef DEBUGGING
2037         int did_match = 0;
2038 #endif
2039         if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
2040             do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2041         ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
2042
2043         if (do_utf8) {
2044             REXEC_FBC_SCAN(
2045                 if (*s == ch) {
2046                     DEBUG_EXECUTE_r( did_match = 1 );
2047                     if (regtry(&reginfo, &s)) goto got_it;
2048                     s += UTF8SKIP(s);
2049                     while (s < strend && *s == ch)
2050                         s += UTF8SKIP(s);
2051                 }
2052             );
2053         }
2054         else {
2055             REXEC_FBC_SCAN(
2056                 if (*s == ch) {
2057                     DEBUG_EXECUTE_r( did_match = 1 );
2058                     if (regtry(&reginfo, &s)) goto got_it;
2059                     s++;
2060                     while (s < strend && *s == ch)
2061                         s++;
2062                 }
2063             );
2064         }
2065         DEBUG_EXECUTE_r(if (!did_match)
2066                 PerlIO_printf(Perl_debug_log,
2067                                   "Did not find anchored character...\n")
2068                );
2069     }
2070     else if (prog->anchored_substr != NULL
2071               || prog->anchored_utf8 != NULL
2072               || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
2073                   && prog->float_max_offset < strend - s)) {
2074         SV *must;
2075         I32 back_max;
2076         I32 back_min;
2077         char *last;
2078         char *last1;            /* Last position checked before */
2079 #ifdef DEBUGGING
2080         int did_match = 0;
2081 #endif
2082         if (prog->anchored_substr || prog->anchored_utf8) {
2083             if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
2084                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2085             must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
2086             back_max = back_min = prog->anchored_offset;
2087         } else {
2088             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
2089                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2090             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
2091             back_max = prog->float_max_offset;
2092             back_min = prog->float_min_offset;
2093         }
2094
2095
2096         if (must == &PL_sv_undef)
2097             /* could not downgrade utf8 check substring, so must fail */
2098             goto phooey;
2099
2100         if (back_min<0) {
2101             last = strend;
2102         } else {
2103             last = HOP3c(strend,        /* Cannot start after this */
2104                   -(I32)(CHR_SVLEN(must)
2105                          - (SvTAIL(must) != 0) + back_min), strbeg);
2106         }
2107         if (s > PL_bostr)
2108             last1 = HOPc(s, -1);
2109         else
2110             last1 = s - 1;      /* bogus */
2111
2112         /* XXXX check_substr already used to find "s", can optimize if
2113            check_substr==must. */
2114         scream_pos = -1;
2115         dontbother = end_shift;
2116         strend = HOPc(strend, -dontbother);
2117         while ( (s <= last) &&
2118                 ((flags & REXEC_SCREAM)
2119                  ? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
2120                                     end_shift, &scream_pos, 0))
2121                  : (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
2122                                   (unsigned char*)strend, must,
2123                                   multiline ? FBMrf_MULTILINE : 0))) ) {
2124             /* we may be pointing at the wrong string */
2125             if ((flags & REXEC_SCREAM) && RXp_MATCH_COPIED(prog))
2126                 s = strbeg + (s - SvPVX_const(sv));
2127             DEBUG_EXECUTE_r( did_match = 1 );
2128             if (HOPc(s, -back_max) > last1) {
2129                 last1 = HOPc(s, -back_min);
2130                 s = HOPc(s, -back_max);
2131             }
2132             else {
2133                 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
2134
2135                 last1 = HOPc(s, -back_min);
2136                 s = t;
2137             }
2138             if (do_utf8) {
2139                 while (s <= last1) {
2140                     if (regtry(&reginfo, &s))
2141                         goto got_it;
2142                     s += UTF8SKIP(s);
2143                 }
2144             }
2145             else {
2146                 while (s <= last1) {
2147                     if (regtry(&reginfo, &s))
2148                         goto got_it;
2149                     s++;
2150                 }
2151             }
2152         }
2153         DEBUG_EXECUTE_r(if (!did_match) {
2154             RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
2155                 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
2156             PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
2157                               ((must == prog->anchored_substr || must == prog->anchored_utf8)
2158                                ? "anchored" : "floating"),
2159                 quoted, RE_SV_TAIL(must));
2160         });
2161         goto phooey;
2162     }
2163     else if ( (c = progi->regstclass) ) {
2164         if (minlen) {
2165             const OPCODE op = OP(progi->regstclass);
2166             /* don't bother with what can't match */
2167             if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
2168                 strend = HOPc(strend, -(minlen - 1));
2169         }
2170         DEBUG_EXECUTE_r({
2171             SV * const prop = sv_newmortal();
2172             regprop(prog, prop, c);
2173             {
2174                 RE_PV_QUOTED_DECL(quoted,do_utf8,PERL_DEBUG_PAD_ZERO(1),
2175                     s,strend-s,60);
2176                 PerlIO_printf(Perl_debug_log,
2177                     "Matching stclass %.*s against %s (%d chars)\n",
2178                     (int)SvCUR(prop), SvPVX_const(prop),
2179                      quoted, (int)(strend - s));
2180             }
2181         });
2182         if (find_byclass(prog, c, s, strend, &reginfo))
2183             goto got_it;
2184         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
2185     }
2186     else {
2187         dontbother = 0;
2188         if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
2189             /* Trim the end. */
2190             char *last;
2191             SV* float_real;
2192
2193             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
2194                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2195             float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
2196
2197             if (flags & REXEC_SCREAM) {
2198                 last = screaminstr(sv, float_real, s - strbeg,
2199                                    end_shift, &scream_pos, 1); /* last one */
2200                 if (!last)
2201                     last = scream_olds; /* Only one occurrence. */
2202                 /* we may be pointing at the wrong string */
2203                 else if (RXp_MATCH_COPIED(prog))
2204                     s = strbeg + (s - SvPVX_const(sv));
2205             }
2206             else {
2207                 STRLEN len;
2208                 const char * const little = SvPV_const(float_real, len);
2209
2210                 if (SvTAIL(float_real)) {
2211                     if (memEQ(strend - len + 1, little, len - 1))
2212                         last = strend - len + 1;
2213                     else if (!multiline)
2214                         last = memEQ(strend - len, little, len)
2215                             ? strend - len : NULL;
2216                     else
2217                         goto find_last;
2218                 } else {
2219                   find_last:
2220                     if (len)
2221                         last = rninstr(s, strend, little, little + len);
2222                     else
2223                         last = strend;  /* matching "$" */
2224                 }
2225             }
2226             if (last == NULL) {
2227                 DEBUG_EXECUTE_r(
2228                     PerlIO_printf(Perl_debug_log,
2229                         "%sCan't trim the tail, match fails (should not happen)%s\n",
2230                         PL_colors[4], PL_colors[5]));
2231                 goto phooey; /* Should not happen! */
2232             }
2233             dontbother = strend - last + prog->float_min_offset;
2234         }
2235         if (minlen && (dontbother < minlen))
2236             dontbother = minlen - 1;
2237         strend -= dontbother;              /* this one's always in bytes! */
2238         /* We don't know much -- general case. */
2239         if (do_utf8) {
2240             for (;;) {
2241                 if (regtry(&reginfo, &s))
2242                     goto got_it;
2243                 if (s >= strend)
2244                     break;
2245                 s += UTF8SKIP(s);
2246             };
2247         }
2248         else {
2249             do {
2250                 if (regtry(&reginfo, &s))
2251                     goto got_it;
2252             } while (s++ < strend);
2253         }
2254     }
2255
2256     /* Failure. */
2257     goto phooey;
2258
2259 got_it:
2260     Safefree(swap);
2261     RX_MATCH_TAINTED_set(rx, PL_reg_flags & RF_tainted);
2262
2263     if (PL_reg_eval_set)
2264         restore_pos(aTHX_ prog);
2265     if (RXp_PAREN_NAMES(prog))
2266         (void)hv_iterinit(RXp_PAREN_NAMES(prog));
2267
2268     /* make sure $`, $&, $', and $digit will work later */
2269     if ( !(flags & REXEC_NOT_FIRST) ) {
2270         RX_MATCH_COPY_FREE(rx);
2271         if (flags & REXEC_COPY_STR) {
2272             const I32 i = PL_regeol - startpos + (stringarg - strbeg);
2273 #ifdef PERL_OLD_COPY_ON_WRITE
2274             if ((SvIsCOW(sv)
2275                  || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2276                 if (DEBUG_C_TEST) {
2277                     PerlIO_printf(Perl_debug_log,
2278                                   "Copy on write: regexp capture, type %d\n",
2279                                   (int) SvTYPE(sv));
2280                 }
2281                 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
2282                 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
2283                 assert (SvPOKp(prog->saved_copy));
2284             } else
2285 #endif
2286             {
2287                 RX_MATCH_COPIED_on(rx);
2288                 s = savepvn(strbeg, i);
2289                 prog->subbeg = s;
2290             }
2291             prog->sublen = i;
2292         }
2293         else {
2294             prog->subbeg = strbeg;
2295             prog->sublen = PL_regeol - strbeg;  /* strend may have been modified */
2296         }
2297     }
2298
2299     return 1;
2300
2301 phooey:
2302     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2303                           PL_colors[4], PL_colors[5]));
2304     if (PL_reg_eval_set)
2305         restore_pos(aTHX_ prog);
2306     if (swap) {
2307         /* we failed :-( roll it back */
2308         Safefree(prog->offs);
2309         prog->offs = swap;
2310     }
2311
2312     return 0;
2313 }
2314
2315
2316 /*
2317  - regtry - try match at specific point
2318  */
2319 STATIC I32                      /* 0 failure, 1 success */
2320 S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
2321 {
2322     dVAR;
2323     CHECKPOINT lastcp;
2324     REGEXP *const rx = reginfo->prog;
2325     regexp *const prog = (struct regexp *)SvANY(rx);
2326     RXi_GET_DECL(prog,progi);
2327     GET_RE_DEBUG_FLAGS_DECL;
2328
2329     PERL_ARGS_ASSERT_REGTRY;
2330
2331     reginfo->cutpoint=NULL;
2332
2333     if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
2334         MAGIC *mg;
2335
2336         PL_reg_eval_set = RS_init;
2337         DEBUG_EXECUTE_r(DEBUG_s(
2338             PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2339                           (IV)(PL_stack_sp - PL_stack_base));
2340             ));
2341         SAVESTACK_CXPOS();
2342         cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2343         /* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2344         SAVETMPS;
2345         /* Apparently this is not needed, judging by wantarray. */
2346         /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2347            cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2348
2349         if (reginfo->sv) {
2350             /* Make $_ available to executed code. */
2351             if (reginfo->sv != DEFSV) {
2352                 SAVE_DEFSV;
2353                 DEFSV_set(reginfo->sv);
2354             }
2355
2356             if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2357                   && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
2358                 /* prepare for quick setting of pos */
2359 #ifdef PERL_OLD_COPY_ON_WRITE
2360                 if (SvIsCOW(reginfo->sv))
2361                     sv_force_normal_flags(reginfo->sv, 0);
2362 #endif
2363                 mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
2364                                  &PL_vtbl_mglob, NULL, 0);
2365                 mg->mg_len = -1;
2366             }
2367             PL_reg_magic    = mg;
2368             PL_reg_oldpos   = mg->mg_len;
2369             SAVEDESTRUCTOR_X(restore_pos, prog);
2370         }
2371         if (!PL_reg_curpm) {
2372             Newxz(PL_reg_curpm, 1, PMOP);
2373 #ifdef USE_ITHREADS
2374             {
2375                 SV* const repointer = &PL_sv_undef;
2376                 /* this regexp is also owned by the new PL_reg_curpm, which
2377                    will try to free it.  */
2378                 av_push(PL_regex_padav, repointer);
2379                 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2380                 PL_regex_pad = AvARRAY(PL_regex_padav);
2381             }
2382 #endif
2383         }
2384 #ifdef USE_ITHREADS
2385         /* It seems that non-ithreads works both with and without this code.
2386            So for efficiency reasons it seems best not to have the code
2387            compiled when it is not needed.  */
2388         /* This is safe against NULLs: */
2389         ReREFCNT_dec(PM_GETRE(PL_reg_curpm));
2390         /* PM_reg_curpm owns a reference to this regexp.  */
2391         ReREFCNT_inc(rx);
2392 #endif
2393         PM_SETRE(PL_reg_curpm, rx);
2394         PL_reg_oldcurpm = PL_curpm;
2395         PL_curpm = PL_reg_curpm;
2396         if (RXp_MATCH_COPIED(prog)) {
2397             /*  Here is a serious problem: we cannot rewrite subbeg,
2398                 since it may be needed if this match fails.  Thus
2399                 $` inside (?{}) could fail... */
2400             PL_reg_oldsaved = prog->subbeg;
2401             PL_reg_oldsavedlen = prog->sublen;
2402 #ifdef PERL_OLD_COPY_ON_WRITE
2403             PL_nrs = prog->saved_copy;
2404 #endif
2405             RXp_MATCH_COPIED_off(prog);
2406         }
2407         else
2408             PL_reg_oldsaved = NULL;
2409         prog->subbeg = PL_bostr;
2410         prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2411     }
2412     DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
2413     prog->offs[0].start = *startpos - PL_bostr;
2414     PL_reginput = *startpos;
2415     PL_reglastparen = &prog->lastparen;
2416     PL_reglastcloseparen = &prog->lastcloseparen;
2417     prog->lastparen = 0;
2418     prog->lastcloseparen = 0;
2419     PL_regsize = 0;
2420     PL_regoffs = prog->offs;
2421     if (PL_reg_start_tmpl <= prog->nparens) {
2422         PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2423         if(PL_reg_start_tmp)
2424             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2425         else
2426             Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2427     }
2428
2429     /* XXXX What this code is doing here?!!!  There should be no need
2430        to do this again and again, PL_reglastparen should take care of
2431        this!  --ilya*/
2432
2433     /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2434      * Actually, the code in regcppop() (which Ilya may be meaning by
2435      * PL_reglastparen), is not needed at all by the test suite
2436      * (op/regexp, op/pat, op/split), but that code is needed otherwise
2437      * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
2438      * Meanwhile, this code *is* needed for the
2439      * above-mentioned test suite tests to succeed.  The common theme
2440      * on those tests seems to be returning null fields from matches.
2441      * --jhi updated by dapm */
2442 #if 1
2443     if (prog->nparens) {
2444         regexp_paren_pair *pp = PL_regoffs;
2445         register I32 i;
2446         for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2447             ++pp;
2448             pp->start = -1;
2449             pp->end = -1;
2450         }
2451     }
2452 #endif
2453     REGCP_SET(lastcp);
2454     if (regmatch(reginfo, progi->program + 1)) {
2455         PL_regoffs[0].end = PL_reginput - PL_bostr;
2456         return 1;
2457     }
2458     if (reginfo->cutpoint)
2459         *startpos= reginfo->cutpoint;
2460     REGCP_UNWIND(lastcp);
2461     return 0;
2462 }
2463
2464
2465 #define sayYES goto yes
2466 #define sayNO goto no
2467 #define sayNO_SILENT goto no_silent
2468
2469 /* we dont use STMT_START/END here because it leads to
2470    "unreachable code" warnings, which are bogus, but distracting. */
2471 #define CACHEsayNO \
2472     if (ST.cache_mask) \
2473        PL_reg_poscache[ST.cache_offset] |= ST.cache_mask; \
2474     sayNO
2475
2476 /* this is used to determine how far from the left messages like
2477    'failed...' are printed. It should be set such that messages
2478    are inline with the regop output that created them.
2479 */
2480 #define REPORT_CODE_OFF 32
2481
2482
2483 /* Make sure there is a test for this +1 options in re_tests */
2484 #define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2485
2486 #define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
2487 #define CHRTEST_VOID   -1000 /* the c1/c2 "next char" test should be skipped */
2488
2489 #define SLAB_FIRST(s) (&(s)->states[0])
2490 #define SLAB_LAST(s)  (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2491
2492 /* grab a new slab and return the first slot in it */
2493
2494 STATIC regmatch_state *
2495 S_push_slab(pTHX)
2496 {
2497 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2498     dMY_CXT;
2499 #endif
2500     regmatch_slab *s = PL_regmatch_slab->next;
2501     if (!s) {
2502         Newx(s, 1, regmatch_slab);
2503         s->prev = PL_regmatch_slab;
2504         s->next = NULL;
2505         PL_regmatch_slab->next = s;
2506     }
2507     PL_regmatch_slab = s;
2508     return SLAB_FIRST(s);
2509 }
2510
2511
2512 /* push a new state then goto it */
2513
2514 #define PUSH_STATE_GOTO(state, node) \
2515     scan = node; \
2516     st->resume_state = state; \
2517     goto push_state;
2518
2519 /* push a new state with success backtracking, then goto it */
2520
2521 #define PUSH_YES_STATE_GOTO(state, node) \
2522     scan = node; \
2523     st->resume_state = state; \
2524     goto push_yes_state;
2525
2526
2527
2528 /*
2529
2530 regmatch() - main matching routine
2531
2532 This is basically one big switch statement in a loop. We execute an op,
2533 set 'next' to point the next op, and continue. If we come to a point which
2534 we may need to backtrack to on failure such as (A|B|C), we push a
2535 backtrack state onto the backtrack stack. On failure, we pop the top
2536 state, and re-enter the loop at the state indicated. If there are no more
2537 states to pop, we return failure.
2538
2539 Sometimes we also need to backtrack on success; for example /A+/, where
2540 after successfully matching one A, we need to go back and try to
2541 match another one; similarly for lookahead assertions: if the assertion
2542 completes successfully, we backtrack to the state just before the assertion
2543 and then carry on.  In these cases, the pushed state is marked as
2544 'backtrack on success too'. This marking is in fact done by a chain of
2545 pointers, each pointing to the previous 'yes' state. On success, we pop to
2546 the nearest yes state, discarding any intermediate failure-only states.
2547 Sometimes a yes state is pushed just to force some cleanup code to be
2548 called at the end of a successful match or submatch; e.g. (??{$re}) uses
2549 it to free the inner regex.
2550
2551 Note that failure backtracking rewinds the cursor position, while
2552 success backtracking leaves it alone.
2553
2554 A pattern is complete when the END op is executed, while a subpattern
2555 such as (?=foo) is complete when the SUCCESS op is executed. Both of these
2556 ops trigger the "pop to last yes state if any, otherwise return true"
2557 behaviour.
2558
2559 A common convention in this function is to use A and B to refer to the two
2560 subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
2561 the subpattern to be matched possibly multiple times, while B is the entire
2562 rest of the pattern. Variable and state names reflect this convention.
2563
2564 The states in the main switch are the union of ops and failure/success of
2565 substates associated with with that op.  For example, IFMATCH is the op
2566 that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
2567 'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
2568 successfully matched A and IFMATCH_A_fail is a state saying that we have
2569 just failed to match A. Resume states always come in pairs. The backtrack
2570 state we push is marked as 'IFMATCH_A', but when that is popped, we resume
2571 at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
2572 on success or failure.
2573
2574 The struct that holds a backtracking state is actually a big union, with
2575 one variant for each major type of op. The variable st points to the
2576 top-most backtrack struct. To make the code clearer, within each
2577 block of code we #define ST to alias the relevant union.
2578
2579 Here's a concrete example of a (vastly oversimplified) IFMATCH
2580 implementation:
2581
2582     switch (state) {
2583     ....
2584
2585 #define ST st->u.ifmatch
2586
2587     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2588         ST.foo = ...; // some state we wish to save
2589         ...
2590         // push a yes backtrack state with a resume value of
2591         // IFMATCH_A/IFMATCH_A_fail, then continue execution at the
2592         // first node of A:
2593         PUSH_YES_STATE_GOTO(IFMATCH_A, A);
2594         // NOTREACHED
2595
2596     case IFMATCH_A: // we have successfully executed A; now continue with B
2597         next = B;
2598         bar = ST.foo; // do something with the preserved value
2599         break;
2600
2601     case IFMATCH_A_fail: // A failed, so the assertion failed
2602         ...;   // do some housekeeping, then ...
2603         sayNO; // propagate the failure
2604
2605 #undef ST
2606
2607     ...
2608     }
2609
2610 For any old-timers reading this who are familiar with the old recursive
2611 approach, the code above is equivalent to:
2612
2613     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2614     {
2615         int foo = ...
2616         ...
2617         if (regmatch(A)) {
2618             next = B;
2619             bar = foo;
2620             break;
2621         }
2622         ...;   // do some housekeeping, then ...
2623         sayNO; // propagate the failure
2624     }
2625
2626 The topmost backtrack state, pointed to by st, is usually free. If you
2627 want to claim it, populate any ST.foo fields in it with values you wish to
2628 save, then do one of
2629
2630         PUSH_STATE_GOTO(resume_state, node);
2631         PUSH_YES_STATE_GOTO(resume_state, node);
2632
2633 which sets that backtrack state's resume value to 'resume_state', pushes a
2634 new free entry to the top of the backtrack stack, then goes to 'node'.
2635 On backtracking, the free slot is popped, and the saved state becomes the
2636 new free state. An ST.foo field in this new top state can be temporarily
2637 accessed to retrieve values, but once the main loop is re-entered, it
2638 becomes available for reuse.
2639
2640 Note that the depth of the backtrack stack constantly increases during the
2641 left-to-right execution of the pattern, rather than going up and down with
2642 the pattern nesting. For example the stack is at its maximum at Z at the
2643 end of the pattern, rather than at X in the following:
2644
2645     /(((X)+)+)+....(Y)+....Z/
2646
2647 The only exceptions to this are lookahead/behind assertions and the cut,
2648 (?>A), which pop all the backtrack states associated with A before
2649 continuing.
2650
2651 Bascktrack state structs are allocated in slabs of about 4K in size.
2652 PL_regmatch_state and st always point to the currently active state,
2653 and PL_regmatch_slab points to the slab currently containing
2654 PL_regmatch_state.  The first time regmatch() is called, the first slab is
2655 allocated, and is never freed until interpreter destruction. When the slab
2656 is full, a new one is allocated and chained to the end. At exit from
2657 regmatch(), slabs allocated since entry are freed.
2658
2659 */
2660
2661
2662 #define DEBUG_STATE_pp(pp)                                  \
2663     DEBUG_STATE_r({                                         \
2664         DUMP_EXEC_POS(locinput, scan, do_utf8);             \
2665         PerlIO_printf(Perl_debug_log,                       \
2666             "    %*s"pp" %s%s%s%s%s\n",                     \
2667             depth*2, "",                                    \
2668             PL_reg_name[st->resume_state],                     \
2669             ((st==yes_state||st==mark_state) ? "[" : ""),   \
2670             ((st==yes_state) ? "Y" : ""),                   \
2671             ((st==mark_state) ? "M" : ""),                  \
2672             ((st==yes_state||st==mark_state) ? "]" : "")    \
2673         );                                                  \
2674     });
2675
2676
2677 #define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
2678
2679 #ifdef DEBUGGING
2680
2681 STATIC void
2682 S_debug_start_match(pTHX_ const REGEXP *prog, const bool do_utf8,
2683     const char *start, const char *end, const char *blurb)
2684 {
2685     const bool utf8_pat = RX_UTF8(prog) ? 1 : 0;
2686
2687     PERL_ARGS_ASSERT_DEBUG_START_MATCH;
2688
2689     if (!PL_colorset)
2690             reginitcolors();
2691     {
2692         RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
2693             RX_PRECOMP_const(prog), RX_PRELEN(prog), 60);
2694
2695         RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1),
2696             start, end - start, 60);
2697
2698         PerlIO_printf(Perl_debug_log,
2699             "%s%s REx%s %s against %s\n",
2700                        PL_colors[4], blurb, PL_colors[5], s0, s1);
2701
2702         if (do_utf8||utf8_pat)
2703             PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
2704                 utf8_pat ? "pattern" : "",
2705                 utf8_pat && do_utf8 ? " and " : "",
2706                 do_utf8 ? "string" : ""
2707             );
2708     }
2709 }
2710
2711 STATIC void
2712 S_dump_exec_pos(pTHX_ const char *locinput,
2713                       const regnode *scan,
2714                       const char *loc_regeol,
2715                       const char *loc_bostr,
2716                       const char *loc_reg_starttry,
2717                       const bool do_utf8)
2718 {
2719     const int docolor = *PL_colors[0] || *PL_colors[2] || *PL_colors[4];
2720     const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2721     int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
2722     /* The part of the string before starttry has one color
2723        (pref0_len chars), between starttry and current
2724        position another one (pref_len - pref0_len chars),
2725        after the current position the third one.
2726        We assume that pref0_len <= pref_len, otherwise we
2727        decrease pref0_len.  */
2728     int pref_len = (locinput - loc_bostr) > (5 + taill) - l
2729         ? (5 + taill) - l : locinput - loc_bostr;
2730     int pref0_len;
2731
2732     PERL_ARGS_ASSERT_DUMP_EXEC_POS;
2733
2734     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2735         pref_len++;
2736     pref0_len = pref_len  - (locinput - loc_reg_starttry);
2737     if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
2738         l = ( loc_regeol - locinput > (5 + taill) - pref_len
2739               ? (5 + taill) - pref_len : loc_regeol - locinput);
2740     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2741         l--;
2742     if (pref0_len < 0)
2743         pref0_len = 0;
2744     if (pref0_len > pref_len)
2745         pref0_len = pref_len;
2746     {
2747         const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0;
2748
2749         RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
2750             (locinput - pref_len),pref0_len, 60, 4, 5);
2751
2752         RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
2753                     (locinput - pref_len + pref0_len),
2754                     pref_len - pref0_len, 60, 2, 3);
2755
2756         RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
2757                     locinput, loc_regeol - locinput, 10, 0, 1);
2758
2759         const STRLEN tlen=len0+len1+len2;
2760         PerlIO_printf(Perl_debug_log,
2761                     "%4"IVdf" <%.*s%.*s%s%.*s>%*s|",
2762                     (IV)(locinput - loc_bostr),
2763                     len0, s0,
2764                     len1, s1,
2765                     (docolor ? "" : "> <"),
2766                     len2, s2,
2767                     (int)(tlen > 19 ? 0 :  19 - tlen),
2768                     "");
2769     }
2770 }
2771
2772 #endif
2773
2774 /* reg_check_named_buff_matched()
2775  * Checks to see if a named buffer has matched. The data array of
2776  * buffer numbers corresponding to the buffer is expected to reside
2777  * in the regexp->data->data array in the slot stored in the ARG() of
2778  * node involved. Note that this routine doesn't actually care about the
2779  * name, that information is not preserved from compilation to execution.
2780  * Returns the index of the leftmost defined buffer with the given name
2781  * or 0 if non of the buffers matched.
2782  */
2783 STATIC I32
2784 S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan)
2785 {
2786     I32 n;
2787     RXi_GET_DECL(rex,rexi);
2788     SV *sv_dat= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
2789     I32 *nums=(I32*)SvPVX(sv_dat);
2790
2791     PERL_ARGS_ASSERT_REG_CHECK_NAMED_BUFF_MATCHED;
2792
2793     for ( n=0; n<SvIVX(sv_dat); n++ ) {
2794         if ((I32)*PL_reglastparen >= nums[n] &&
2795             PL_regoffs[nums[n]].end != -1)
2796         {
2797             return nums[n];
2798         }
2799     }
2800     return 0;
2801 }
2802
2803
2804 /* free all slabs above current one  - called during LEAVE_SCOPE */
2805
2806 STATIC void
2807 S_clear_backtrack_stack(pTHX_ void *p)
2808 {
2809     regmatch_slab *s = PL_regmatch_slab->next;
2810     PERL_UNUSED_ARG(p);
2811
2812     if (!s)
2813         return;
2814     PL_regmatch_slab->next = NULL;
2815     while (s) {
2816         regmatch_slab * const osl = s;
2817         s = s->next;
2818         Safefree(osl);
2819     }
2820 }
2821
2822
2823 #define SETREX(Re1,Re2) \
2824     if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \
2825     Re1 = (Re2)
2826
2827 STATIC I32                      /* 0 failure, 1 success */
2828 S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
2829 {
2830 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2831     dMY_CXT;
2832 #endif
2833     dVAR;
2834     register const bool do_utf8 = PL_reg_match_utf8;
2835     const U32 uniflags = UTF8_ALLOW_DEFAULT;
2836     REGEXP *rex_sv = reginfo->prog;
2837     regexp *rex = (struct regexp *)SvANY(rex_sv);
2838     RXi_GET_DECL(rex,rexi);
2839     I32 oldsave;
2840     /* the current state. This is a cached copy of PL_regmatch_state */
2841     register regmatch_state *st;
2842     /* cache heavy used fields of st in registers */
2843     register regnode *scan;
2844     register regnode *next;
2845     register U32 n = 0; /* general value; init to avoid compiler warning */
2846     register I32 ln = 0; /* len or last;  init to avoid compiler warning */
2847     register char *locinput = PL_reginput;
2848     register I32 nextchr;   /* is always set to UCHARAT(locinput) */
2849
2850     bool result = 0;        /* return value of S_regmatch */
2851     int depth = 0;          /* depth of backtrack stack */
2852     U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
2853     const U32 max_nochange_depth =
2854         (3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
2855         3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
2856     regmatch_state *yes_state = NULL; /* state to pop to on success of
2857                                                             subpattern */
2858     /* mark_state piggy backs on the yes_state logic so that when we unwind
2859        the stack on success we can update the mark_state as we go */
2860     regmatch_state *mark_state = NULL; /* last mark state we have seen */
2861     regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
2862     struct regmatch_state  *cur_curlyx = NULL; /* most recent curlyx */
2863     U32 state_num;
2864     bool no_final = 0;      /* prevent failure from backtracking? */
2865     bool do_cutgroup = 0;   /* no_final only until next branch/trie entry */
2866     char *startpoint = PL_reginput;
2867     SV *popmark = NULL;     /* are we looking for a mark? */
2868     SV *sv_commit = NULL;   /* last mark name seen in failure */
2869     SV *sv_yes_mark = NULL; /* last mark name we have seen
2870                                during a successfull match */
2871     U32 lastopen = 0;       /* last open we saw */
2872     bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
2873     SV* const oreplsv = GvSV(PL_replgv);
2874     /* these three flags are set by various ops to signal information to
2875      * the very next op. They have a useful lifetime of exactly one loop
2876      * iteration, and are not preserved or restored by state pushes/pops
2877      */
2878     bool sw = 0;            /* the condition value in (?(cond)a|b) */
2879     bool minmod = 0;        /* the next "{n,m}" is a "{n,m}?" */
2880     int logical = 0;        /* the following EVAL is:
2881                                 0: (?{...})
2882                                 1: (?(?{...})X|Y)
2883                                 2: (??{...})
2884                                or the following IFMATCH/UNLESSM is:
2885                                 false: plain (?=foo)
2886                                 true:  used as a condition: (?(?=foo))
2887                             */
2888 #ifdef DEBUGGING
2889     GET_RE_DEBUG_FLAGS_DECL;
2890 #endif
2891
2892     PERL_ARGS_ASSERT_REGMATCH;
2893
2894     DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
2895             PerlIO_printf(Perl_debug_log,"regmatch start\n");
2896     }));
2897     /* on first ever call to regmatch, allocate first slab */
2898     if (!PL_regmatch_slab) {
2899         Newx(PL_regmatch_slab, 1, regmatch_slab);
2900         PL_regmatch_slab->prev = NULL;
2901         PL_regmatch_slab->next = NULL;
2902         PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
2903     }
2904
2905     oldsave = PL_savestack_ix;
2906     SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
2907     SAVEVPTR(PL_regmatch_slab);
2908     SAVEVPTR(PL_regmatch_state);
2909
2910     /* grab next free state slot */
2911     st = ++PL_regmatch_state;
2912     if (st >  SLAB_LAST(PL_regmatch_slab))
2913         st = PL_regmatch_state = S_push_slab(aTHX);
2914
2915     /* Note that nextchr is a byte even in UTF */
2916     nextchr = UCHARAT(locinput);
2917     scan = prog;
2918     while (scan != NULL) {
2919
2920         DEBUG_EXECUTE_r( {
2921             SV * const prop = sv_newmortal();
2922             regnode *rnext=regnext(scan);
2923             DUMP_EXEC_POS( locinput, scan, do_utf8 );
2924             regprop(rex, prop, scan);
2925
2926             PerlIO_printf(Perl_debug_log,
2927                     "%3"IVdf":%*s%s(%"IVdf")\n",
2928                     (IV)(scan - rexi->program), depth*2, "",
2929                     SvPVX_const(prop),
2930                     (PL_regkind[OP(scan)] == END || !rnext) ?
2931                         0 : (IV)(rnext - rexi->program));
2932         });
2933
2934         next = scan + NEXT_OFF(scan);
2935         if (next == scan)
2936             next = NULL;
2937         state_num = OP(scan);
2938
2939       reenter_switch:
2940
2941         assert(PL_reglastparen == &rex->lastparen);
2942         assert(PL_reglastcloseparen == &rex->lastcloseparen);
2943         assert(PL_regoffs == rex->offs);
2944
2945         switch (state_num) {
2946         case BOL:
2947             if (locinput == PL_bostr)
2948             {
2949                 /* reginfo->till = reginfo->bol; */
2950                 break;
2951             }
2952             sayNO;
2953         case MBOL:
2954             if (locinput == PL_bostr ||
2955                 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2956             {
2957                 break;
2958             }
2959             sayNO;
2960         case SBOL:
2961             if (locinput == PL_bostr)
2962                 break;
2963             sayNO;
2964         case GPOS:
2965             if (locinput == reginfo->ganch)
2966                 break;
2967             sayNO;
2968
2969         case KEEPS:
2970             /* update the startpoint */
2971             st->u.keeper.val = PL_regoffs[0].start;
2972             PL_reginput = locinput;
2973             PL_regoffs[0].start = locinput - PL_bostr;
2974             PUSH_STATE_GOTO(KEEPS_next, next);
2975             /*NOT-REACHED*/
2976         case KEEPS_next_fail:
2977             /* rollback the start point change */
2978             PL_regoffs[0].start = st->u.keeper.val;
2979             sayNO_SILENT;
2980             /*NOT-REACHED*/
2981         case EOL:
2982                 goto seol;
2983         case MEOL:
2984             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2985                 sayNO;
2986             break;
2987         case SEOL:
2988           seol:
2989             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2990                 sayNO;
2991             if (PL_regeol - locinput > 1)
2992                 sayNO;
2993             break;
2994         case EOS:
2995             if (PL_regeol != locinput)
2996                 sayNO;
2997             break;
2998         case SANY:
2999             if (!nextchr && locinput >= PL_regeol)
3000                 sayNO;
3001             if (do_utf8) {
3002                 locinput += PL_utf8skip[nextchr];
3003                 if (locinput > PL_regeol)
3004                     sayNO;
3005                 nextchr = UCHARAT(locinput);
3006             }
3007             else
3008                 nextchr = UCHARAT(++locinput);
3009             break;
3010         case CANY:
3011             if (!nextchr && locinput >= PL_regeol)
3012                 sayNO;
3013             nextchr = UCHARAT(++locinput);
3014             break;
3015         case REG_ANY:
3016             if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
3017                 sayNO;
3018             if (do_utf8) {
3019                 locinput += PL_utf8skip[nextchr];
3020                 if (locinput > PL_regeol)
3021                     sayNO;
3022                 nextchr = UCHARAT(locinput);
3023             }
3024             else
3025                 nextchr = UCHARAT(++locinput);
3026             break;
3027
3028 #undef  ST
3029 #define ST st->u.trie
3030         case TRIEC:
3031             /* In this case the charclass data is available inline so
3032                we can fail fast without a lot of extra overhead.
3033              */
3034             if (scan->flags == EXACT || !do_utf8) {
3035                 if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
3036                     DEBUG_EXECUTE_r(
3037                         PerlIO_printf(Perl_debug_log,
3038                                   "%*s  %sfailed to match trie start class...%s\n",
3039                                   REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
3040                     );
3041                     sayNO_SILENT;
3042                     /* NOTREACHED */
3043                 }
3044             }
3045             /* FALL THROUGH */
3046         case TRIE:
3047             {
3048                 /* what type of TRIE am I? (utf8 makes this contextual) */
3049                 DECL_TRIE_TYPE(scan);
3050
3051                 /* what trie are we using right now */
3052                 reg_trie_data * const trie
3053                     = (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
3054                 HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG( scan ) + 1 ]);
3055                 U32 state = trie->startstate;
3056
3057                 if (trie->bitmap && trie_type != trie_utf8_fold &&
3058                     !TRIE_BITMAP_TEST(trie,*locinput)
3059                 ) {
3060                     if (trie->states[ state ].wordnum) {
3061                          DEBUG_EXECUTE_r(
3062                             PerlIO_printf(Perl_debug_log,
3063                                           "%*s  %smatched empty string...%s\n",
3064                                           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
3065                         );
3066                         break;
3067                     } else {
3068                         DEBUG_EXECUTE_r(
3069                             PerlIO_printf(Perl_debug_log,
3070                                           "%*s  %sfailed to match trie start class...%s\n",
3071                                           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
3072                         );
3073                         sayNO_SILENT;
3074                    }
3075                 }
3076
3077             {
3078                 U8 *uc = ( U8* )locinput;
3079
3080                 STRLEN len = 0;
3081                 STRLEN foldlen = 0;
3082                 U8 *uscan = (U8*)NULL;
3083                 STRLEN bufflen=0;
3084                 SV *sv_accept_buff = NULL;
3085                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
3086
3087                 ST.accepted = 0; /* how many accepting states we have seen */
3088                 ST.B = next;
3089                 ST.jump = trie->jump;
3090                 ST.me = scan;
3091                 /*
3092                    traverse the TRIE keeping track of all accepting states
3093                    we transition through until we get to a failing node.
3094                 */
3095
3096                 while ( state && uc <= (U8*)PL_regeol ) {
3097                     U32 base = trie->states[ state ].trans.base;
3098                     UV uvc = 0;
3099                     U16 charid;
3100                     /* We use charid to hold the wordnum as we don't use it
3101                        for charid until after we have done the wordnum logic.
3102                        We define an alias just so that the wordnum logic reads
3103                        more naturally. */
3104
3105 #define got_wordnum charid
3106                     got_wordnum = trie->states[ state ].wordnum;
3107
3108                     if ( got_wordnum ) {
3109                         if ( ! ST.accepted ) {
3110                             ENTER;
3111                             SAVETMPS; /* XXX is this necessary? dmq */
3112                             bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
3113                             sv_accept_buff=newSV(bufflen *
3114                                             sizeof(reg_trie_accepted) - 1);
3115                             SvCUR_set(sv_accept_buff, 0);
3116                             SvPOK_on(sv_accept_buff);
3117                             sv_2mortal(sv_accept_buff);
3118                             SAVETMPS;
3119                             ST.accept_buff =
3120                                 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
3121                         }
3122                         do {
3123                             if (ST.accepted >= bufflen) {
3124                                 bufflen *= 2;
3125                                 ST.accept_buff =(reg_trie_accepted*)
3126                                     SvGROW(sv_accept_buff,
3127                                         bufflen * sizeof(reg_trie_accepted));
3128                             }
3129                             SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
3130                                 + sizeof(reg_trie_accepted));
3131
3132
3133                             ST.accept_buff[ST.accepted].wordnum = got_wordnum;
3134                             ST.accept_buff[ST.accepted].endpos = uc;
3135                             ++ST.accepted;
3136                         } while (trie->nextword && (got_wordnum= trie->nextword[got_wordnum]));
3137                     }
3138 #undef got_wordnum
3139
3140                     DEBUG_TRIE_EXECUTE_r({
3141                                 DUMP_EXEC_POS( (char *)uc, scan, do_utf8 );
3142                                 PerlIO_printf( Perl_debug_log,
3143                                     "%*s  %sState: %4"UVxf" Accepted: %4"UVxf" ",
3144                                     2+depth * 2, "", PL_colors[4],
3145                                     (UV)state, (UV)ST.accepted );
3146                     });
3147
3148                     if ( base ) {
3149                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
3150                                              uscan, len, uvc, charid, foldlen,
3151                                              foldbuf, uniflags);
3152
3153                         if (charid &&
3154                              (base + charid > trie->uniquecharcount )
3155                              && (base + charid - 1 - trie->uniquecharcount
3156                                     < trie->lasttrans)
3157                              && trie->trans[base + charid - 1 -
3158                                     trie->uniquecharcount].check == state)
3159                         {
3160                             state = trie->trans[base + charid - 1 -
3161                                 trie->uniquecharcount ].next;
3162                         }
3163                         else {
3164                             state = 0;
3165                         }
3166                         uc += len;
3167
3168                     }
3169                     else {
3170                         state = 0;
3171                     }
3172                     DEBUG_TRIE_EXECUTE_r(
3173                         PerlIO_printf( Perl_debug_log,
3174                             "Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
3175                             charid, uvc, (UV)state, PL_colors[5] );
3176                     );
3177                 }
3178                 if (!ST.accepted )
3179                    sayNO;
3180
3181                 DEBUG_EXECUTE_r(
3182                     PerlIO_printf( Perl_debug_log,
3183                         "%*s  %sgot %"IVdf" possible matches%s\n",
3184                         REPORT_CODE_OFF + depth * 2, "",
3185                         PL_colors[4], (IV)ST.accepted, PL_colors[5] );
3186                 );
3187             }}
3188             goto trie_first_try; /* jump into the fail handler */
3189             /* NOTREACHED */
3190         case TRIE_next_fail: /* we failed - try next alterative */
3191             if ( ST.jump) {
3192                 REGCP_UNWIND(ST.cp);
3193                 for (n = *PL_reglastparen; n > ST.lastparen; n--)
3194                     PL_regoffs[n].end = -1;
3195                 *PL_reglastparen = n;
3196             }
3197           trie_first_try:
3198             if (do_cutgroup) {
3199                 do_cutgroup = 0;
3200                 no_final = 0;
3201             }
3202
3203             if ( ST.jump) {
3204                 ST.lastparen = *PL_reglastparen;
3205                 REGCP_SET(ST.cp);
3206             }
3207             if ( ST.accepted == 1 ) {
3208                 /* only one choice left - just continue */
3209                 DEBUG_EXECUTE_r({
3210                     AV *const trie_words
3211                         = MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
3212                     SV ** const tmp = av_fetch( trie_words,
3213                         ST.accept_buff[ 0 ].wordnum-1, 0 );
3214                     SV *sv= tmp ? sv_newmortal() : NULL;
3215
3216                     PerlIO_printf( Perl_debug_log,
3217                         "%*s  %sonly one match left: #%d <%s>%s\n",
3218                         REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3219                         ST.accept_buff[ 0 ].wordnum,
3220                         tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3221                                 PL_colors[0], PL_colors[1],
3222                                 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3223                             )
3224                         : "not compiled under -Dr",
3225                         PL_colors[5] );
3226                 });
3227                 PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
3228                 /* in this case we free tmps/leave before we call regmatch
3229                    as we wont be using accept_buff again. */
3230
3231                 locinput = PL_reginput;
3232                 nextchr = UCHARAT(locinput);
3233                 if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum])
3234                     scan = ST.B;
3235                 else
3236                     scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
3237                 if (!has_cutgroup) {
3238                     FREETMPS;
3239                     LEAVE;
3240                 } else {
3241                     ST.accepted--;
3242                     PUSH_YES_STATE_GOTO(TRIE_next, scan);
3243                 }
3244
3245                 continue; /* execute rest of RE */
3246             }
3247
3248             if ( !ST.accepted-- ) {
3249                 DEBUG_EXECUTE_r({
3250                     PerlIO_printf( Perl_debug_log,
3251                         "%*s  %sTRIE failed...%s\n",
3252                         REPORT_CODE_OFF+depth*2, "",
3253                         PL_colors[4],
3254                         PL_colors[5] );
3255                 });
3256                 FREETMPS;
3257                 LEAVE;
3258                 sayNO_SILENT;
3259                 /*NOTREACHED*/
3260             }
3261
3262             /*
3263                There are at least two accepting states left.  Presumably
3264                the number of accepting states is going to be low,
3265                typically two. So we simply scan through to find the one
3266                with lowest wordnum.  Once we find it, we swap the last
3267                state into its place and decrement the size. We then try to
3268                match the rest of the pattern at the point where the word
3269                ends. If we succeed, control just continues along the
3270                regex; if we fail we return here to try the next accepting
3271                state
3272              */
3273
3274             {
3275                 U32 best = 0;
3276                 U32 cur;
3277                 for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
3278                     DEBUG_TRIE_EXECUTE_r(
3279                         PerlIO_printf( Perl_debug_log,
3280                             "%*s  %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
3281                             REPORT_CODE_OFF + depth * 2, "", PL_colors[4],
3282                             (IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
3283                             ST.accept_buff[ cur ].wordnum, PL_colors[5] );
3284                     );
3285
3286                     if (ST.accept_buff[cur].wordnum <
3287                             ST.accept_buff[best].wordnum)
3288                         best = cur;
3289                 }
3290
3291                 DEBUG_EXECUTE_r({
3292                     AV *const trie_words
3293                         = MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
3294                     SV ** const tmp = av_fetch( trie_words,
3295                         ST.accept_buff[ best ].wordnum - 1, 0 );
3296                     regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ?
3297                                     ST.B :
3298                                     ST.me + ST.jump[ST.accept_buff[best].wordnum];
3299                     SV *sv= tmp ? sv_newmortal() : NULL;
3300
3301                     PerlIO_printf( Perl_debug_log,
3302                         "%*s  %strying alternation #%d <%s> at node #%d %s\n",
3303                         REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3304                         ST.accept_buff[best].wordnum,
3305                         tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3306                                 PL_colors[0], PL_colors[1],
3307                                 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3308                             ) : "not compiled under -Dr",
3309                             REG_NODE_NUM(nextop),
3310                         PL_colors[5] );
3311                 });
3312
3313                 if ( best<ST.accepted ) {
3314                     reg_trie_accepted tmp = ST.accept_buff[ best ];
3315                     ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
3316                     ST.accept_buff[ ST.accepted ] = tmp;
3317                     best = ST.accepted;
3318                 }
3319                 PL_reginput = (char *)ST.accept_buff[ best ].endpos;
3320                 if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
3321                     scan = ST.B;
3322                 } else {
3323                     scan = ST.me + ST.jump[ST.accept_buff[best].wordnum];
3324                 }
3325                 PUSH_YES_STATE_GOTO(TRIE_next, scan);
3326                 /* NOTREACHED */
3327             }
3328             /* NOTREACHED */
3329         case TRIE_next:
3330             /* we dont want to throw this away, see bug 57042*/
3331             if (oreplsv != GvSV(PL_replgv))
3332                 sv_setsv(oreplsv, GvSV(PL_replgv));
3333             FREETMPS;
3334             LEAVE;
3335             sayYES;
3336 #undef  ST
3337
3338         case EXACT: {
3339             char *s = STRING(scan);
3340             ln = STR_LEN(scan);
3341             if (do_utf8 != UTF) {
3342                 /* The target and the pattern have differing utf8ness. */
3343                 char *l = locinput;
3344                 const char * const e = s + ln;
3345
3346                 if (do_utf8) {
3347                     /* The target is utf8, the pattern is not utf8. */
3348                     while (s < e) {
3349                         STRLEN ulen;
3350                         if (l >= PL_regeol)
3351                              sayNO;
3352                         if (NATIVE_TO_UNI(*(U8*)s) !=
3353                             utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
3354                                             uniflags))
3355                              sayNO;
3356                         l += ulen;
3357                         s ++;
3358                     }
3359                 }
3360                 else {
3361                     /* The target is not utf8, the pattern is utf8. */
3362                     while (s < e) {
3363                         STRLEN ulen;
3364                         if (l >= PL_regeol)
3365                             sayNO;
3366                         if (NATIVE_TO_UNI(*((U8*)l)) !=
3367                             utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
3368                                            uniflags))
3369                             sayNO;
3370                         s += ulen;
3371                         l ++;
3372                     }
3373                 }
3374                 locinput = l;
3375                 nextchr = UCHARAT(locinput);
3376                 break;
3377             }
3378             /* The target and the pattern have the same utf8ness. */
3379             /* Inline the first character, for speed. */
3380             if (UCHARAT(s) != nextchr)
3381                 sayNO;
3382             if (PL_regeol - locinput < ln)
3383                 sayNO;
3384             if (ln > 1 && memNE(s, locinput, ln))
3385                 sayNO;
3386             locinput += ln;
3387             nextchr = UCHARAT(locinput);
3388             break;
3389             }
3390         case EXACTFL:
3391             PL_reg_flags |= RF_tainted;
3392             /* FALL THROUGH */
3393         case EXACTF: {
3394             char * const s = STRING(scan);
3395             ln = STR_LEN(scan);
3396
3397             if (do_utf8 || UTF) {
3398               /* Either target or the pattern are utf8. */
3399                 const char * const l = locinput;
3400                 char *e = PL_regeol;
3401
3402                 if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
3403                                l, &e, 0,  do_utf8)) {
3404                      /* One more case for the sharp s:
3405                       * pack("U0U*", 0xDF) =~ /ss/i,
3406                       * the 0xC3 0x9F are the UTF-8
3407                       * byte sequence for the U+00DF. */
3408
3409                      if (!(do_utf8 &&
3410                            toLOWER(s[0]) == 's' &&
3411                            ln >= 2 &&
3412                            toLOWER(s[1]) == 's' &&
3413                            (U8)l[0] == 0xC3 &&
3414                            e - l >= 2 &&
3415                            (U8)l[1] == 0x9F))
3416                           sayNO;
3417                 }
3418                 locinput = e;
3419                 nextchr = UCHARAT(locinput);
3420                 break;
3421             }
3422
3423             /* Neither the target and the pattern are utf8. */
3424
3425             /* Inline the first character, for speed. */
3426             if (UCHARAT(s) != nextchr &&
3427                 UCHARAT(s) != ((OP(scan) == EXACTF)
3428                                ? PL_fold : PL_fold_locale)[nextchr])
3429                 sayNO;
3430             if (PL_regeol - locinput < ln)
3431                 sayNO;
3432             if (ln > 1 && (OP(scan) == EXACTF
3433                            ? ibcmp(s, locinput, ln)
3434                            : ibcmp_locale(s, locinput, ln)))
3435                 sayNO;
3436             locinput += ln;
3437             nextchr = UCHARAT(locinput);
3438             break;
3439             }
3440         case BOUNDL:
3441         case NBOUNDL:
3442             PL_reg_flags |= RF_tainted;
3443             /* FALL THROUGH */
3444         case BOUND:
3445         case NBOUND:
3446             /* was last char in word? */
3447             if (do_utf8) {
3448                 if (locinput == PL_bostr)
3449                     ln = '\n';
3450                 else {
3451                     const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
3452
3453                     ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
3454                 }
3455                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3456                     ln = isALNUM_uni(ln);
3457                     LOAD_UTF8_CHARCLASS_ALNUM();
3458                     n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
3459                 }
3460                 else {
3461                     ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
3462                     n = isALNUM_LC_utf8((U8*)locinput);
3463                 }
3464             }
3465             else {
3466                 ln = (locinput != PL_bostr) ?
3467                     UCHARAT(locinput - 1) : '\n';
3468                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3469                     ln = isALNUM(ln);
3470                     n = isALNUM(nextchr);
3471                 }
3472                 else {
3473                     ln = isALNUM_LC(ln);
3474                     n = isALNUM_LC(nextchr);
3475                 }
3476             }
3477             if (((!ln) == (!n)) == (OP(scan) == BOUND ||
3478                                     OP(scan) == BOUNDL))
3479                     sayNO;
3480             break;
3481         case ANYOF:
3482             if (do_utf8) {
3483                 STRLEN inclasslen = PL_regeol - locinput;
3484
3485                 if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
3486                     goto anyof_fail;
3487                 if (locinput >= PL_regeol)
3488                     sayNO;
3489                 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
3490                 nextchr = UCHARAT(locinput);
3491                 break;
3492             }
3493             else {
3494                 if (nextchr < 0)
3495                     nextchr = UCHARAT(locinput);
3496                 if (!REGINCLASS(rex, scan, (U8*)locinput))
3497                     goto anyof_fail;
3498                 if (!nextchr && locinput >= PL_regeol)
3499                     sayNO;
3500                 nextchr = UCHARAT(++locinput);
3501                 break;
3502             }
3503         anyof_fail:
3504             /* If we might have the case of the German sharp s
3505              * in a casefolding Unicode character class. */
3506
3507             if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3508                  locinput += SHARP_S_SKIP;
3509                  nextchr = UCHARAT(locinput);
3510             }
3511             else
3512                  sayNO;
3513             break;
3514         /* Special char classes - The defines start on line 129 or so */
3515         CCC_TRY_AFF( ALNUM,  ALNUML, perl_word,   "a", isALNUM_LC_utf8, isALNUM, isALNUM_LC);
3516         CCC_TRY_NEG(NALNUM, NALNUML, perl_word,   "a", isALNUM_LC_utf8, isALNUM, isALNUM_LC);
3517
3518         CCC_TRY_AFF( SPACE,  SPACEL, perl_space,  " ", isSPACE_LC_utf8, isSPACE, isSPACE_LC);
3519         CCC_TRY_NEG(NSPACE, NSPACEL, perl_space,  " ", isSPACE_LC_utf8, isSPACE, isSPACE_LC);
3520
3521         CCC_TRY_AFF( DIGIT,  DIGITL, posix_digit, "0", isDIGIT_LC_utf8, isDIGIT, isDIGIT_LC);
3522         CCC_TRY_NEG(NDIGIT, NDIGITL, posix_digit, "0", isDIGIT_LC_utf8, isDIGIT, isDIGIT_LC);
3523
3524         case CLUMP:
3525             if (locinput >= PL_regeol)
3526                 sayNO;
3527             if  (do_utf8) {
3528                 LOAD_UTF8_CHARCLASS_MARK();
3529                 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3530                     sayNO;
3531                 locinput += PL_utf8skip[nextchr];
3532                 while (locinput < PL_regeol &&
3533                        swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3534                     locinput += UTF8SKIP(locinput);
3535                 if (locinput > PL_regeol)
3536                     sayNO;
3537             }
3538             else
3539                locinput++;
3540             nextchr = UCHARAT(locinput);
3541             break;
3542
3543         case NREFFL:
3544         {
3545             char *s;
3546             char type;
3547             PL_reg_flags |= RF_tainted;
3548             /* FALL THROUGH */
3549         case NREF:
3550         case NREFF:
3551             type = OP(scan);
3552             n = reg_check_named_buff_matched(rex,scan);
3553
3554             if ( n ) {
3555                 type = REF + ( type - NREF );
3556                 goto do_ref;
3557             } else {
3558                 sayNO;
3559             }
3560             /* unreached */
3561         case REFFL:
3562             PL_reg_flags |= RF_tainted;
3563             /* FALL THROUGH */
3564         case REF:
3565         case REFF:
3566             n = ARG(scan);  /* which paren pair */
3567             type = OP(scan);
3568           do_ref:
3569             ln = PL_regoffs[n].start;
3570             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3571             if (*PL_reglastparen < n || ln == -1)
3572                 sayNO;                  /* Do not match unless seen CLOSEn. */
3573             if (ln == PL_regoffs[n].end)
3574                 break;
3575
3576             s = PL_bostr + ln;
3577             if (do_utf8 && type != REF) {       /* REF can do byte comparison */
3578                 char *l = locinput;
3579                 const char *e = PL_bostr + PL_regoffs[n].end;
3580                 /*
3581                  * Note that we can't do the "other character" lookup trick as
3582                  * in the 8-bit case (no pun intended) because in Unicode we
3583                  * have to map both upper and title case to lower case.
3584                  */
3585                 if (type == REFF) {
3586                     while (s < e) {
3587                         STRLEN ulen1, ulen2;
3588                         U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3589                         U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3590
3591                         if (l >= PL_regeol)
3592                             sayNO;
3593                         toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3594                         toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
3595                         if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
3596                             sayNO;
3597                         s += ulen1;
3598                         l += ulen2;
3599                     }
3600                 }
3601                 locinput = l;
3602                 nextchr = UCHARAT(locinput);
3603                 break;
3604             }
3605
3606             /* Inline the first character, for speed. */
3607             if (UCHARAT(s) != nextchr &&
3608                 (type == REF ||
3609                  (UCHARAT(s) != (type == REFF
3610                                   ? PL_fold : PL_fold_locale)[nextchr])))
3611                 sayNO;
3612             ln = PL_regoffs[n].end - ln;
3613             if (locinput + ln > PL_regeol)
3614                 sayNO;
3615             if (ln > 1 && (type == REF
3616                            ? memNE(s, locinput, ln)
3617                            : (type == REFF
3618                               ? ibcmp(s, locinput, ln)
3619                               : ibcmp_locale(s, locinput, ln))))
3620                 sayNO;
3621             locinput += ln;
3622             nextchr = UCHARAT(locinput);
3623             break;
3624         }
3625         case NOTHING:
3626         case TAIL:
3627             break;
3628         case BACK:
3629             break;
3630
3631 #undef  ST
3632 #define ST st->u.eval
3633         {
3634             SV *ret;
3635             REGEXP *re_sv;
3636             regexp *re;
3637             regexp_internal *rei;
3638             regnode *startpoint;
3639
3640         case GOSTART:
3641         case GOSUB: /*    /(...(?1))/   /(...(?&foo))/   */
3642             if (cur_eval && cur_eval->locinput==locinput) {
3643                 if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
3644                     Perl_croak(aTHX_ "Infinite recursion in regex");
3645                 if ( ++nochange_depth > max_nochange_depth )
3646                     Perl_croak(aTHX_
3647                         "Pattern subroutine nesting without pos change"
3648                         " exceeded limit in regex");
3649             } else {
3650                 nochange_depth = 0;
3651             }
3652             re_sv = rex_sv;
3653             re = rex;
3654             rei = rexi;
3655             (void)ReREFCNT_inc(rex_sv);
3656             if (OP(scan)==GOSUB) {
3657                 startpoint = scan + ARG2L(scan);
3658                 ST.close_paren = ARG(scan);
3659             } else {
3660                 startpoint = rei->program+1;
3661                 ST.close_paren = 0;
3662             }
3663             goto eval_recurse_doit;
3664             /* NOTREACHED */
3665         case EVAL:  /*   /(?{A})B/   /(??{A})B/  and /(?(?{A})X|Y)B/   */
3666             if (cur_eval && cur_eval->locinput==locinput) {
3667                 if ( ++nochange_depth > max_nochange_depth )
3668                     Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
3669             } else {
3670                 nochange_depth = 0;
3671             }
3672             {
3673                 /* execute the code in the {...} */
3674                 dSP;
3675                 SV ** const before = SP;
3676                 OP_4tree * const oop = PL_op;
3677                 COP * const ocurcop = PL_curcop;
3678                 PAD *old_comppad;
3679                 char *saved_regeol = PL_regeol;
3680
3681                 n = ARG(scan);
3682                 PL_op = (OP_4tree*)rexi->data->data[n];
3683                 DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
3684                     "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
3685                 PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
3686                 PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
3687
3688                 if (sv_yes_mark) {
3689                     SV *sv_mrk = get_sv("REGMARK", 1);
3690                     sv_setsv(sv_mrk, sv_yes_mark);
3691                 }
3692
3693                 CALLRUNOPS(aTHX);                       /* Scalar context. */
3694                 SPAGAIN;
3695                 if (SP == before)
3696                     ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
3697                 else {
3698                     ret = POPs;
3699                     PUTBACK;
3700                 }
3701
3702                 PL_op = oop;
3703                 PAD_RESTORE_LOCAL(old_comppad);
3704                 PL_curcop = ocurcop;
3705                 PL_regeol = saved_regeol;
3706                 if (!logical) {
3707                     /* /(?{...})/ */
3708                     sv_setsv(save_scalar(PL_replgv), ret);
3709                     break;
3710                 }
3711             }
3712             if (logical == 2) { /* Postponed subexpression: /(??{...})/ */
3713                 logical = 0;
3714                 {
3715                     /* extract RE object from returned value; compiling if
3716                      * necessary */
3717                     MAGIC *mg = NULL;
3718                     REGEXP *rx = NULL;
3719
3720                     if (SvROK(ret)) {
3721                         SV *const sv = SvRV(ret);
3722
3723                         if (SvTYPE(sv) == SVt_REGEXP) {
3724                             rx = (REGEXP*) sv;
3725                         } else if (SvSMAGICAL(sv)) {
3726                             mg = mg_find(sv, PERL_MAGIC_qr);
3727                             assert(mg);
3728                         }
3729                     } else if (SvTYPE(ret) == SVt_REGEXP) {
3730                         rx = (REGEXP*) ret;
3731                     } else if (SvSMAGICAL(ret)) {
3732                         if (SvGMAGICAL(ret)) {
3733                             /* I don't believe that there is ever qr magic
3734                                here.  */
3735                             assert(!mg_find(ret, PERL_MAGIC_qr));
3736                             sv_unmagic(ret, PERL_MAGIC_qr);
3737                         }
3738                         else {
3739                             mg = mg_find(ret, PERL_MAGIC_qr);
3740                             /* testing suggests mg only ends up non-NULL for
3741                                scalars who were upgraded and compiled in the
3742                                else block below. In turn, this is only
3743                                triggered in the "postponed utf8 string" tests
3744                                in t/op/pat.t  */
3745                         }
3746                     }
3747
3748                     if (mg) {
3749                         rx = (REGEXP *) mg->mg_obj; /*XXX:dmq*/
3750                         assert(rx);
3751                     }
3752                     if (rx) {
3753                         rx = reg_temp_copy(NULL, rx);
3754                     }
3755                     else {
3756                         U32 pm_flags = 0;
3757                         const I32 osize = PL_regsize;
3758
3759                         if (DO_UTF8(ret)) {
3760                             assert (SvUTF8(ret));
3761                         } else if (SvUTF8(ret)) {
3762                             /* Not doing UTF-8, despite what the SV says. Is
3763                                this only if we're trapped in use 'bytes'?  */
3764                             /* Make a copy of the octet sequence, but without
3765                                the flag on, as the compiler now honours the
3766                                SvUTF8 flag on ret.  */
3767                             STRLEN len;
3768                             const char *const p = SvPV(ret, len);
3769                             ret = newSVpvn_flags(p, len, SVs_TEMP);
3770                         }
3771                         rx = CALLREGCOMP(ret, pm_flags);
3772                         if (!(SvFLAGS(ret)
3773                               & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3774                                  | SVs_GMG))) {
3775                             /* This isn't a first class regexp. Instead, it's
3776                                caching a regexp onto an existing, Perl visible
3777                                scalar.  */
3778                             sv_magic(ret, MUTABLE_SV(rx), PERL_MAGIC_qr, 0, 0);
3779                         }
3780                         PL_regsize = osize;
3781                     }
3782                     re_sv = rx;
3783                     re = (struct regexp *)SvANY(rx);
3784                 }
3785                 RXp_MATCH_COPIED_off(re);
3786                 re->subbeg = rex->subbeg;
3787                 re->sublen = rex->sublen;
3788                 rei = RXi_GET(re);
3789                 DEBUG_EXECUTE_r(
3790                     debug_start_match(re_sv, do_utf8, locinput, PL_regeol,
3791                         "Matching embedded");
3792                 );
3793                 startpoint = rei->program + 1;
3794                 ST.close_paren = 0; /* only used for GOSUB */
3795                 /* borrowed from regtry */
3796                 if (PL_reg_start_tmpl <= re->nparens) {
3797                     PL_reg_start_tmpl = re->nparens*3/2 + 3;
3798                     if(PL_reg_start_tmp)
3799                         Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3800                     else
3801                         Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3802                 }
3803
3804         eval_recurse_doit: /* Share code with GOSUB below this line */
3805                 /* run the pattern returned from (??{...}) */
3806                 ST.cp = regcppush(0);   /* Save *all* the positions. */
3807                 REGCP_SET(ST.lastcp);
3808
3809                 PL_regoffs = re->offs; /* essentially NOOP on GOSUB */
3810
3811                 /* see regtry, specifically PL_reglast(?:close)?paren is a pointer! (i dont know why) :dmq */
3812                 PL_reglastparen = &re->lastparen;
3813                 PL_reglastcloseparen = &re->lastcloseparen;
3814                 re->lastparen = 0;
3815                 re->lastcloseparen = 0;
3816
3817                 PL_reginput = locinput;
3818                 PL_regsize = 0;
3819
3820                 /* XXXX This is too dramatic a measure... */
3821                 PL_reg_maxiter = 0;
3822
3823                 ST.toggle_reg_flags = PL_reg_flags;
3824                 if (RX_UTF8(re_sv))
3825                     PL_reg_flags |= RF_utf8;
3826                 else
3827                     PL_reg_flags &= ~RF_utf8;
3828                 ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
3829
3830                 ST.prev_rex = rex_sv;
3831                 ST.prev_curlyx = cur_curlyx;
3832                 SETREX(rex_sv,re_sv);
3833                 rex = re;
3834                 rexi = rei;
3835                 cur_curlyx = NULL;
3836                 ST.B = next;
3837                 ST.prev_eval = cur_eval;
3838                 cur_eval = st;
3839                 /* now continue from first node in postoned RE */
3840                 PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
3841                 /* NOTREACHED */
3842             }
3843             /* logical is 1,   /(?(?{...})X|Y)/ */
3844             sw = (bool)SvTRUE(ret);
3845             logical = 0;
3846             break;
3847         }
3848
3849         case EVAL_AB: /* cleanup after a successful (??{A})B */
3850             /* note: this is called twice; first after popping B, then A */
3851             PL_reg_flags ^= ST.toggle_reg_flags;
3852             ReREFCNT_dec(rex_sv);
3853             SETREX(rex_sv,ST.prev_rex);
3854             rex = (struct regexp *)SvANY(rex_sv);
3855             rexi = RXi_GET(rex);
3856             regcpblow(ST.cp);
3857             cur_eval = ST.prev_eval;
3858             cur_curlyx = ST.prev_curlyx;
3859
3860             /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
3861             PL_reglastparen = &rex->lastparen;
3862             PL_reglastcloseparen = &rex->lastcloseparen;
3863             /* also update PL_regoffs */
3864             PL_regoffs = rex->offs;
3865
3866             /* XXXX This is too dramatic a measure... */
3867             PL_reg_maxiter = 0;
3868             if ( nochange_depth )
3869                 nochange_depth--;
3870             sayYES;
3871
3872
3873         case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
3874             /* note: this is called twice; first after popping B, then A */
3875             PL_reg_flags ^= ST.toggle_reg_flags;
3876             ReREFCNT_dec(rex_sv);
3877             SETREX(rex_sv,ST.prev_rex);
3878             rex = (struct regexp *)SvANY(rex_sv);
3879             rexi = RXi_GET(rex);
3880             /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
3881             PL_reglastparen = &rex->lastparen;
3882             PL_reglastcloseparen = &rex->lastcloseparen;
3883
3884             PL_reginput = locinput;
3885             REGCP_UNWIND(ST.lastcp);
3886             regcppop(rex);
3887             cur_eval = ST.prev_eval;
3888             cur_curlyx = ST.prev_curlyx;
3889             /* XXXX This is too dramatic a measure... */
3890             PL_reg_maxiter = 0;
3891             if ( nochange_depth )
3892                 nochange_depth--;
3893             sayNO_SILENT;
3894 #undef ST
3895
3896         case OPEN:
3897             n = ARG(scan);  /* which paren pair */
3898             PL_reg_start_tmp[n] = locinput;
3899             if (n > PL_regsize)
3900                 PL_regsize = n;
3901             lastopen = n;
3902             break;
3903         case CLOSE:
3904             n = ARG(scan);  /* which paren pair */
3905             PL_regoffs[n].start = PL_reg_start_tmp[n] - PL_bostr;
3906             PL_regoffs[n].end = locinput - PL_bostr;
3907             /*if (n > PL_regsize)
3908                 PL_regsize = n;*/
3909             if (n > *PL_reglastparen)
3910                 *PL_reglastparen = n;
3911             *PL_reglastcloseparen = n;
3912             if (cur_eval && cur_eval->u.eval.close_paren == n) {
3913                 goto fake_end;
3914             }
3915             break;
3916         case ACCEPT:
3917             if (ARG(scan)){
3918                 regnode *cursor;
3919                 for (cursor=scan;
3920                      cursor && OP(cursor)!=END;
3921                      cursor=regnext(cursor))
3922                 {
3923                     if ( OP(cursor)==CLOSE ){
3924                         n = ARG(cursor);
3925                         if ( n <= lastopen ) {
3926                             PL_regoffs[n].start
3927                                 = PL_reg_start_tmp[n] - PL_bostr;
3928                             PL_regoffs[n].end = locinput - PL_bostr;
3929                             /*if (n > PL_regsize)
3930                             PL_regsize = n;*/
3931                             if (n > *PL_reglastparen)
3932                                 *PL_reglastparen = n;
3933                             *PL_reglastcloseparen = n;
3934                             if ( n == ARG(scan) || (cur_eval &&
3935                                 cur_eval->u.eval.close_paren == n))
3936                                 break;
3937                         }
3938                     }
3939                 }
3940             }
3941             goto fake_end;
3942             /*NOTREACHED*/
3943         case GROUPP:
3944             n = ARG(scan);  /* which paren pair */
3945             sw = (bool)(*PL_reglastparen >= n && PL_regoffs[n].end != -1);
3946             break;
3947         case NGROUPP:
3948             /* reg_check_named_buff_matched returns 0 for no match */
3949             sw = (bool)(0 < reg_check_named_buff_matched(rex,scan));
3950             break;
3951         case INSUBP:
3952             n = ARG(scan);
3953             sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
3954             break;
3955         case DEFINEP:
3956             sw = 0;
3957             break;
3958         case IFTHEN:
3959             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3960             if (sw)
3961                 next = NEXTOPER(NEXTOPER(scan));
3962             else {
3963                 next = scan + ARG(scan);
3964                 if (OP(next) == IFTHEN) /* Fake one. */
3965                     next = NEXTOPER(NEXTOPER(next));
3966             }
3967             break;
3968         case LOGICAL:
3969             logical = scan->flags;
3970             break;
3971
3972 /*******************************************************************
3973
3974 The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
3975 pattern, where A and B are subpatterns. (For simple A, CURLYM or
3976 STAR/PLUS/CURLY/CURLYN are used instead.)
3977
3978 A*B is compiled as <CURLYX><A><WHILEM><B>
3979
3980 On entry to the subpattern, CURLYX is called. This pushes a CURLYX
3981 state, which contains the current count, initialised to -1. It also sets
3982 cur_curlyx to point to this state, with any previous value saved in the
3983 state block.
3984
3985 CURLYX then jumps straight to the WHILEM op, rather than executing A,
3986 since the pattern may possibly match zero times (i.e. it's a while {} loop
3987 rather than a do {} while loop).
3988
3989 Each entry to WHILEM represents a successful match of A. The count in the
3990 CURLYX block is incremented, another WHILEM state is pushed, and execution
3991 passes to A or B depending on greediness and the current count.
3992
3993 For example, if matching against the string a1a2a3b (where the aN are
3994 substrings that match /A/), then the match progresses as follows: (the
3995 pushed states are interspersed with the bits of strings matched so far):
3996
3997     <CURLYX cnt=-1>
3998     <CURLYX cnt=0><WHILEM>
3999     <CURLYX cnt=1><WHILEM> a1 <WHILEM>
4000     <CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
4001     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
4002     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
4003
4004 (Contrast this with something like CURLYM, which maintains only a single
4005 backtrack state:
4006
4007     <CURLYM cnt=0> a1
4008     a1 <CURLYM cnt=1> a2
4009     a1 a2 <CURLYM cnt=2> a3
4010     a1 a2 a3 <CURLYM cnt=3> b
4011 )
4012
4013 Each WHILEM state block marks a point to backtrack to upon partial failure
4014 of A or B, and also contains some minor state data related to that
4015 iteration.  The CURLYX block, pointed to by cur_curlyx, contains the
4016 overall state, such as the count, and pointers to the A and B ops.
4017
4018 This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
4019 must always point to the *current* CURLYX block, the rules are:
4020
4021 When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
4022 and set cur_curlyx to point the new block.
4023
4024 When popping the CURLYX block after a successful or unsuccessful match,
4025 restore the previous cur_curlyx.
4026
4027 When WHILEM is about to execute B, save the current cur_curlyx, and set it
4028 to the outer one saved in the CURLYX block.
4029
4030 When popping the WHILEM block after a successful or unsuccessful B match,
4031 restore the previous cur_curlyx.
4032
4033 Here's an example for the pattern (AI* BI)*BO
4034 I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
4035
4036 cur_
4037 curlyx backtrack stack
4038 ------ ---------------
4039 NULL
4040 CO     <CO prev=NULL> <WO>
4041 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4042 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4043 NULL   <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
4044
4045 At this point the pattern succeeds, and we work back down the stack to
4046 clean up, restoring as we go:
4047
4048 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4049 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4050 CO     <CO prev=NULL> <WO>
4051 NULL
4052
4053 *******************************************************************/
4054
4055 #define ST st->u.curlyx
4056
4057         case CURLYX:    /* start of /A*B/  (for complex A) */
4058         {
4059             /* No need to save/restore up to this paren */
4060             I32 parenfloor = scan->flags;
4061
4062             assert(next); /* keep Coverity happy */
4063             if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
4064                 next += ARG(next);
4065
4066             /* XXXX Probably it is better to teach regpush to support
4067                parenfloor > PL_regsize... */
4068             if (parenfloor > (I32)*PL_reglastparen)
4069                 parenfloor = *PL_reglastparen; /* Pessimization... */
4070
4071             ST.prev_curlyx= cur_curlyx;
4072             cur_curlyx = st;
4073             ST.cp = PL_savestack_ix;
4074
4075             /* these fields contain the state of the current curly.
4076              * they are accessed by subsequent WHILEMs */
4077             ST.parenfloor = parenfloor;
4078             ST.min = ARG1(scan);
4079             ST.max = ARG2(scan);
4080             ST.A = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
4081             ST.B = next;
4082             ST.minmod = minmod;
4083             minmod = 0;
4084             ST.count = -1;      /* this will be updated by WHILEM */
4085             ST.lastloc = NULL;  /* this will be updated by WHILEM */
4086
4087             PL_reginput = locinput;
4088             PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
4089             /* NOTREACHED */
4090         }
4091
4092         case CURLYX_end: /* just finished matching all of A*B */
4093             cur_curlyx = ST.prev_curlyx;
4094             sayYES;
4095             /* NOTREACHED */
4096
4097         case CURLYX_end_fail: /* just failed to match all of A*B */
4098             regcpblow(ST.cp);
4099             cur_curlyx = ST.prev_curlyx;
4100             sayNO;
4101             /* NOTREACHED */
4102
4103
4104 #undef ST
4105 #define ST st->u.whilem
4106
4107         case WHILEM:     /* just matched an A in /A*B/  (for complex A) */
4108         {
4109             /* see the discussion above about CURLYX/WHILEM */
4110             I32 n;
4111             assert(cur_curlyx); /* keep Coverity happy */
4112             n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
4113             ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
4114             ST.cache_offset = 0;
4115             ST.cache_mask = 0;
4116
4117             PL_reginput = locinput;
4118
4119             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4120                   "%*s  whilem: matched %ld out of %ld..%ld\n",
4121                   REPORT_CODE_OFF+depth*2, "", (long)n,
4122                   (long)cur_curlyx->u.curlyx.min,
4123                   (long)cur_curlyx->u.curlyx.max)
4124             );
4125
4126             /* First just match a string of min A's. */
4127
4128             if (n < cur_curlyx->u.curlyx.min) {
4129                 cur_curlyx->u.curlyx.lastloc = locinput;
4130                 PUSH_STATE_GOTO(WHILEM_A_pre, cur_curlyx->u.curlyx.A);
4131                 /* NOTREACHED */
4132             }
4133
4134             /* If degenerate A matches "", assume A done. */
4135
4136             if (locinput == cur_curlyx->u.curlyx.lastloc) {
4137                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4138                    "%*s  whilem: empty match detected, trying continuation...\n",
4139                    REPORT_CODE_OFF+depth*2, "")
4140                 );
4141                 goto do_whilem_B_max;
4142             }
4143
4144             /* super-linear cache processing */
4145
4146             if (scan->flags) {
4147
4148                 if (!PL_reg_maxiter) {
4149                     /* start the countdown: Postpone detection until we
4150                      * know the match is not *that* much linear. */
4151                     PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
4152                     /* possible overflow for long strings and many CURLYX's */
4153                     if (PL_reg_maxiter < 0)
4154                         PL_reg_maxiter = I32_MAX;
4155                     PL_reg_leftiter = PL_reg_maxiter;
4156                 }
4157
4158                 if (PL_reg_leftiter-- == 0) {
4159                     /* initialise cache */
4160                     const I32 size = (PL_reg_maxiter + 7)/8;
4161                     if (PL_reg_poscache) {
4162                         if ((I32)PL_reg_poscache_size < size) {
4163                             Renew(PL_reg_poscache, size, char);
4164                             PL_reg_poscache_size = size;
4165                         }
4166                         Zero(PL_reg_poscache, size, char);
4167                     }
4168                     else {
4169                         PL_reg_poscache_size = size;
4170                         Newxz(PL_reg_poscache, size, char);
4171                     }
4172                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4173       "%swhilem: Detected a super-linear match, switching on caching%s...\n",
4174                               PL_colors[4], PL_colors[5])
4175                     );
4176                 }
4177
4178                 if (PL_reg_leftiter < 0) {
4179                     /* have we already failed at this position? */
4180                     I32 offset, mask;
4181                     offset  = (scan->flags & 0xf) - 1
4182                                 + (locinput - PL_bostr)  * (scan->flags>>4);
4183                     mask    = 1 << (offset % 8);
4184                     offset /= 8;
4185                     if (PL_reg_poscache[offset] & mask) {
4186                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4187                             "%*s  whilem: (cache) already tried at this position...\n",
4188                             REPORT_CODE_OFF+depth*2, "")
4189                         );
4190                         sayNO; /* cache records failure */
4191                     }
4192                     ST.cache_offset = offset;
4193                     ST.cache_mask   = mask;
4194                 }
4195             }
4196
4197             /* Prefer B over A for minimal matching. */
4198
4199             if (cur_curlyx->u.curlyx.minmod) {
4200                 ST.save_curlyx = cur_curlyx;
4201                 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4202                 ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
4203                 REGCP_SET(ST.lastcp);
4204                 PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
4205                 /* NOTREACHED */
4206             }
4207
4208             /* Prefer A over B for maximal matching. */
4209
4210             if (n < cur_curlyx->u.curlyx.max) { /* More greed allowed? */
4211                 ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4212                 cur_curlyx->u.curlyx.lastloc = locinput;
4213                 REGCP_SET(ST.lastcp);
4214                 PUSH_STATE_GOTO(WHILEM_A_max, cur_curlyx->u.curlyx.A);
4215                 /* NOTREACHED */
4216             }
4217             goto do_whilem_B_max;
4218         }
4219         /* NOTREACHED */
4220
4221         case WHILEM_B_min: /* just matched B in a minimal match */
4222         case WHILEM_B_max: /* just matched B in a maximal match */
4223             cur_curlyx = ST.save_curlyx;
4224             sayYES;
4225             /* NOTREACHED */
4226
4227         case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
4228             cur_curlyx = ST.save_curlyx;
4229             cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4230             cur_curlyx->u.curlyx.count--;
4231             CACHEsayNO;
4232             /* NOTREACHED */
4233
4234         case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
4235             REGCP_UNWIND(ST.lastcp);
4236             regcppop(rex);
4237             /* FALL THROUGH */
4238         case WHILEM_A_pre_fail: /* just failed to match even minimal A */
4239             cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4240             cur_curlyx->u.curlyx.count--;
4241             CACHEsayNO;
4242             /* NOTREACHED */
4243
4244         case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
4245             REGCP_UNWIND(ST.lastcp);
4246             regcppop(rex);      /* Restore some previous $<digit>s? */
4247             PL_reginput = locinput;
4248             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4249                 "%*s  whilem: failed, trying continuation...\n",
4250                 REPORT_CODE_OFF+depth*2, "")
4251             );
4252           do_whilem_B_max:
4253             if (cur_curlyx->u.curlyx.count >= REG_INFTY
4254                 && ckWARN(WARN_REGEXP)
4255                 && !(PL_reg_flags & RF_warned))
4256             {
4257                 PL_reg_flags |= RF_warned;
4258                 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
4259                      "Complex regular subexpression recursion",
4260                      REG_INFTY - 1);
4261             }
4262
4263             /* now try B */
4264             ST.save_curlyx = cur_curlyx;
4265             cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4266             PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
4267             /* NOTREACHED */
4268
4269         case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
4270             cur_curlyx = ST.save_curlyx;
4271             REGCP_UNWIND(ST.lastcp);
4272             regcppop(rex);
4273
4274             if (cur_curlyx->u.curlyx.count >= cur_curlyx->u.curlyx.max) {
4275                 /* Maximum greed exceeded */
4276                 if (cur_curlyx->u.curlyx.count >= REG_INFTY
4277                     && ckWARN(WARN_REGEXP)
4278                     && !(PL_reg_flags & RF_warned))
4279                 {
4280                     PL_reg_flags |= RF_warned;
4281                     Perl_warner(aTHX_ packWARN(WARN_REGEXP),
4282                         "%s limit (%d) exceeded",
4283                         "Complex regular subexpression recursion",
4284                         REG_INFTY - 1);
4285                 }
4286                 cur_curlyx->u.curlyx.count--;
4287                 CACHEsayNO;
4288             }
4289
4290             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4291                 "%*s  trying longer...\n", REPORT_CODE_OFF+depth*2, "")
4292             );
4293             /* Try grabbing another A and see if it helps. */
4294             PL_reginput = locinput;
4295             cur_curlyx->u.curlyx.lastloc = locinput;
4296             ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4297             REGCP_SET(ST.lastcp);
4298             PUSH_STATE_GOTO(WHILEM_A_min, ST.save_curlyx->u.curlyx.A);
4299             /* NOTREACHED */
4300
4301 #undef  ST
4302 #define ST st->u.branch
4303
4304         case BRANCHJ:       /*  /(...|A|...)/ with long next pointer */
4305             next = scan + ARG(scan);
4306             if (next == scan)
4307                 next = NULL;
4308             scan = NEXTOPER(scan);
4309             /* FALL THROUGH */
4310
4311         case BRANCH:        /*  /(...|A|...)/ */
4312             scan = NEXTOPER(scan); /* scan now points to inner node */
4313             ST.lastparen = *PL_reglastparen;
4314             ST.next_branch = next;
4315             REGCP_SET(ST.cp);
4316             PL_reginput = locinput;
4317
4318             /* Now go into the branch */
4319             if (has_cutgroup) {
4320                 PUSH_YES_STATE_GOTO(BRANCH_next, scan);
4321             } else {
4322                 PUSH_STATE_GOTO(BRANCH_next, scan);
4323             }
4324             /* NOTREACHED */
4325         case CUTGROUP:
4326             PL_reginput = locinput;
4327             sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
4328                 MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
4329             PUSH_STATE_GOTO(CUTGROUP_next,next);
4330             /* NOTREACHED */
4331         case CUTGROUP_next_fail:
4332             do_cutgroup = 1;
4333             no_final = 1;
4334             if (st->u.mark.mark_name)
4335                 sv_commit = st->u.mark.mark_name;
4336             sayNO;
4337             /* NOTREACHED */
4338         case BRANCH_next:
4339             sayYES;
4340             /* NOTREACHED */
4341         case BRANCH_next_fail: /* that branch failed; try the next, if any */
4342             if (do_cutgroup) {
4343                 do_cutgroup = 0;
4344                 no_final = 0;
4345             }
4346             REGCP_UNWIND(ST.cp);
4347             for (n = *PL_reglastparen; n > ST.lastparen; n--)
4348                 PL_regoffs[n].end = -1;
4349             *PL_reglastparen = n;
4350             /*dmq: *PL_reglastcloseparen = n; */
4351             scan = ST.next_branch;
4352             /* no more branches? */
4353             if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
4354                 DEBUG_EXECUTE_r({
4355                     PerlIO_printf( Perl_debug_log,
4356                         "%*s  %sBRANCH failed...%s\n",
4357                         REPORT_CODE_OFF+depth*2, "",
4358                         PL_colors[4],
4359                         PL_colors[5] );
4360                 });
4361                 sayNO_SILENT;
4362             }
4363             continue; /* execute next BRANCH[J] op */
4364             /* NOTREACHED */
4365
4366         case MINMOD:
4367             minmod = 1;
4368             break;
4369
4370 #undef  ST
4371 #define ST st->u.curlym
4372
4373         case CURLYM:    /* /A{m,n}B/ where A is fixed-length */
4374
4375             /* This is an optimisation of CURLYX that enables us to push
4376              * only a single backtracking state, no matter how many matches
4377              * there are in {m,n}. It relies on the pattern being constant
4378              * length, with no parens to influence future backrefs
4379              */
4380
4381             ST.me = scan;
4382             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4383
4384             /* if paren positive, emulate an OPEN/CLOSE around A */
4385             if (ST.me->flags) {
4386                 U32 paren = ST.me->flags;
4387                 if (paren > PL_regsize)
4388                     PL_regsize = paren;
4389                 if (paren > *PL_reglastparen)
4390                     *PL_reglastparen = paren;
4391                 scan += NEXT_OFF(scan); /* Skip former OPEN. */
4392             }
4393             ST.A = scan;
4394             ST.B = next;
4395             ST.alen = 0;
4396             ST.count = 0;
4397             ST.minmod = minmod;
4398             minmod = 0;
4399             ST.c1 = CHRTEST_UNINIT;
4400             REGCP_SET(ST.cp);
4401
4402             if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
4403                 goto curlym_do_B;
4404
4405           curlym_do_A: /* execute the A in /A{m,n}B/  */
4406             PL_reginput = locinput;
4407             PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
4408             /* NOTREACHED */
4409
4410         case CURLYM_A: /* we've just matched an A */
4411             locinput = st->locinput;
4412             nextchr = UCHARAT(locinput);
4413
4414             ST.count++;
4415             /* after first match, determine A's length: u.curlym.alen */
4416             if (ST.count == 1) {
4417                 if (PL_reg_match_utf8) {
4418                     char *s = locinput;
4419                     while (s < PL_reginput) {
4420                         ST.alen++;
4421                         s += UTF8SKIP(s);
4422                     }
4423                 }
4424                 else {
4425                     ST.alen = PL_reginput - locinput;
4426                 }
4427                 if (ST.alen == 0)
4428                     ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
4429             }
4430             DEBUG_EXECUTE_r(
4431                 PerlIO_printf(Perl_debug_log,
4432                           "%*s  CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
4433                           (int)(REPORT_CODE_OFF+(depth*2)), "",
4434                           (IV) ST.count, (IV)ST.alen)
4435             );
4436
4437             locinput = PL_reginput;
4438
4439             if (cur_eval && cur_eval->u.eval.close_paren &&
4440                 cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4441                 goto fake_end;
4442
4443             {
4444                 I32 max = (ST.minmod ? ARG1(ST.me) : ARG2(ST.me));
4445                 if ( max == REG_INFTY || ST.count < max )
4446                     goto curlym_do_A; /* try to match another A */
4447             }
4448             goto curlym_do_B; /* try to match B */
4449
4450         case CURLYM_A_fail: /* just failed to match an A */
4451             REGCP_UNWIND(ST.cp);
4452
4453             if (ST.minmod || ST.count < ARG1(ST.me) /* min*/
4454                 || (cur_eval && cur_eval->u.eval.close_paren &&
4455                     cur_eval->u.eval.close_paren == (U32)ST.me->flags))
4456                 sayNO;
4457
4458           curlym_do_B: /* execute the B in /A{m,n}B/  */
4459             PL_reginput = locinput;
4460             if (ST.c1 == CHRTEST_UNINIT) {
4461                 /* calculate c1 and c2 for possible match of 1st char
4462                  * following curly */
4463                 ST.c1 = ST.c2 = CHRTEST_VOID;
4464                 if (HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
4465                     regnode *text_node = ST.B;
4466                     if (! HAS_TEXT(text_node))
4467                         FIND_NEXT_IMPT(text_node);
4468                     /* this used to be
4469
4470                         (HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
4471
4472                         But the former is redundant in light of the latter.
4473
4474                         if this changes back then the macro for
4475                         IS_TEXT and friends need to change.
4476                      */
4477                     if (PL_regkind[OP(text_node)] == EXACT)
4478                     {
4479
4480                         ST.c1 = (U8)*STRING(text_node);
4481                         ST.c2 =
4482                             (IS_TEXTF(text_node))
4483                             ? PL_fold[ST.c1]
4484                             : (IS_TEXTFL(text_node))
4485                                 ? PL_fold_locale[ST.c1]
4486                                 : ST.c1;
4487                     }
4488                 }
4489             }
4490
4491             DEBUG_EXECUTE_r(
4492                 PerlIO_printf(Perl_debug_log,
4493                     "%*s  CURLYM trying tail with matches=%"IVdf"...\n",
4494                     (int)(REPORT_CODE_OFF+(depth*2)),
4495                     "", (IV)ST.count)
4496                 );
4497             if (ST.c1 != CHRTEST_VOID
4498                     && UCHARAT(PL_reginput) != ST.c1
4499                     && UCHARAT(PL_reginput) != ST.c2)
4500             {
4501                 /* simulate B failing */
4502                 DEBUG_OPTIMISE_r(
4503                     PerlIO_printf(Perl_debug_log,
4504                         "%*s  CURLYM Fast bail c1=%"IVdf" c2=%"IVdf"\n",
4505                         (int)(REPORT_CODE_OFF+(depth*2)),"",
4506                         (IV)ST.c1,(IV)ST.c2
4507                 ));
4508                 state_num = CURLYM_B_fail;
4509                 goto reenter_switch;
4510             }
4511
4512             if (ST.me->flags) {
4513                 /* mark current A as captured */
4514                 I32 paren = ST.me->flags;
4515                 if (ST.count) {
4516                     PL_regoffs[paren].start
4517                         = HOPc(PL_reginput, -ST.alen) - PL_bostr;
4518                     PL_regoffs[paren].end = PL_reginput - PL_bostr;
4519                     /*dmq: *PL_reglastcloseparen = paren; */
4520                 }
4521                 else
4522                     PL_regoffs[paren].end = -1;
4523                 if (cur_eval && cur_eval->u.eval.close_paren &&
4524                     cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4525                 {
4526                     if (ST.count)
4527                         goto fake_end;
4528                     else
4529                         sayNO;
4530                 }
4531             }
4532
4533             PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
4534             /* NOTREACHED */
4535
4536         case CURLYM_B_fail: /* just failed to match a B */
4537             REGCP_UNWIND(ST.cp);
4538             if (ST.minmod) {
4539                 I32 max = ARG2(ST.me);
4540                 if (max != REG_INFTY && ST.count == max)
4541                     sayNO;
4542                 goto curlym_do_A; /* try to match a further A */
4543             }
4544             /* backtrack one A */
4545             if (ST.count == ARG1(ST.me) /* min */)
4546                 sayNO;
4547             ST.count--;
4548             locinput = HOPc(locinput, -ST.alen);
4549             goto curlym_do_B; /* try to match B */
4550
4551 #undef ST
4552 #define ST st->u.curly
4553
4554 #define CURLY_SETPAREN(paren, success) \
4555     if (paren) { \
4556         if (success) { \
4557             PL_regoffs[paren].start = HOPc(locinput, -1) - PL_bostr; \
4558             PL_regoffs[paren].end = locinput - PL_bostr; \
4559             *PL_reglastcloseparen = paren; \
4560         } \
4561         else \
4562             PL_regoffs[paren].end = -1; \
4563     }
4564
4565         case STAR:              /*  /A*B/ where A is width 1 */
4566             ST.paren = 0;
4567             ST.min = 0;
4568             ST.max = REG_INFTY;
4569             scan = NEXTOPER(scan);
4570             goto repeat;
4571         case PLUS:              /*  /A+B/ where A is width 1 */
4572             ST.paren = 0;
4573             ST.min = 1;
4574             ST.max = REG_INFTY;
4575             scan = NEXTOPER(scan);
4576             goto repeat;
4577         case CURLYN:            /*  /(A){m,n}B/ where A is width 1 */
4578             ST.paren = scan->flags;     /* Which paren to set */
4579             if (ST.paren > PL_regsize)
4580                 PL_regsize = ST.paren;
4581             if (ST.paren > *PL_reglastparen)
4582                 *PL_reglastparen = ST.paren;
4583             ST.min = ARG1(scan);  /* min to match */
4584             ST.max = ARG2(scan);  /* max to match */
4585             if (cur_eval && cur_eval->u.eval.close_paren &&
4586                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4587                 ST.min=1;
4588                 ST.max=1;
4589             }
4590             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
4591             goto repeat;
4592         case CURLY:             /*  /A{m,n}B/ where A is width 1 */
4593             ST.paren = 0;
4594             ST.min = ARG1(scan);  /* min to match */
4595             ST.max = ARG2(scan);  /* max to match */
4596             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4597           repeat:
4598             /*
4599             * Lookahead to avoid useless match attempts
4600             * when we know what character comes next.
4601             *
4602             * Used to only do .*x and .*?x, but now it allows
4603             * for )'s, ('s and (?{ ... })'s to be in the way
4604             * of the quantifier and the EXACT-like node.  -- japhy
4605             */
4606
4607             if (ST.min > ST.max) /* XXX make this a compile-time check? */
4608                 sayNO;
4609             if (HAS_TEXT(next) || JUMPABLE(next)) {
4610                 U8 *s;
4611                 regnode *text_node = next;
4612
4613                 if (! HAS_TEXT(text_node))
4614                     FIND_NEXT_IMPT(text_node);
4615
4616                 if (! HAS_TEXT(text_node))
4617                     ST.c1 = ST.c2 = CHRTEST_VOID;
4618                 else {
4619                     if ( PL_regkind[OP(text_node)] != EXACT ) {
4620                         ST.c1 = ST.c2 = CHRTEST_VOID;
4621                         goto assume_ok_easy;
4622                     }
4623                     else
4624                         s = (U8*)STRING(text_node);
4625
4626                     /*  Currently we only get here when
4627
4628                         PL_rekind[OP(text_node)] == EXACT
4629
4630                         if this changes back then the macro for IS_TEXT and
4631                         friends need to change. */
4632                     if (!UTF) {
4633                         ST.c2 = ST.c1 = *s;
4634                         if (IS_TEXTF(text_node))
4635                             ST.c2 = PL_fold[ST.c1];
4636                         else if (IS_TEXTFL(text_node))
4637                             ST.c2 = PL_fold_locale[ST.c1];
4638                     }
4639                     else { /* UTF */
4640                         if (IS_TEXTF(text_node)) {
4641                              STRLEN ulen1, ulen2;
4642                              U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4643                              U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4644
4645                              to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4646                              to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4647 #ifdef EBCDIC
4648                              ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
4649                                                     ckWARN(WARN_UTF8) ?
4650                                                     0 : UTF8_ALLOW_ANY);
4651                              ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
4652                                                     ckWARN(WARN_UTF8) ?
4653                                                     0 : UTF8_ALLOW_ANY);
4654 #else
4655                              ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
4656                                                     uniflags);
4657                              ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
4658                                                     uniflags);
4659 #endif
4660                         }
4661                         else {
4662                             ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
4663                                                      uniflags);
4664                         }
4665                     }
4666                 }
4667             }
4668             else
4669                 ST.c1 = ST.c2 = CHRTEST_VOID;
4670         assume_ok_easy:
4671
4672             ST.A = scan;
4673             ST.B = next;
4674             PL_reginput = locinput;
4675             if (minmod) {
4676                 minmod = 0;
4677                 if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
4678                     sayNO;
4679                 ST.count = ST.min;
4680                 locinput = PL_reginput;
4681                 REGCP_SET(ST.cp);
4682                 if (ST.c1 == CHRTEST_VOID)
4683                     goto curly_try_B_min;
4684
4685                 ST.oldloc = locinput;
4686
4687                 /* set ST.maxpos to the furthest point along the
4688                  * string that could possibly match */
4689                 if  (ST.max == REG_INFTY) {
4690                     ST.maxpos = PL_regeol - 1;
4691                     if (do_utf8)
4692                         while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
4693                             ST.maxpos--;
4694                 }
4695                 else if (do_utf8) {
4696                     int m = ST.max - ST.min;
4697                     for (ST.maxpos = locinput;
4698                          m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
4699                         ST.maxpos += UTF8SKIP(ST.maxpos);
4700                 }
4701                 else {
4702                     ST.maxpos = locinput + ST.max - ST.min;
4703                     if (ST.maxpos >= PL_regeol)
4704                         ST.maxpos = PL_regeol - 1;
4705                 }
4706                 goto curly_try_B_min_known;
4707
4708             }
4709             else {
4710                 ST.count = regrepeat(rex, ST.A, ST.max, depth);
4711                 locinput = PL_reginput;
4712                 if (ST.count < ST.min)
4713                     sayNO;
4714                 if ((ST.count > ST.min)
4715                     && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
4716                 {
4717                     /* A{m,n} must come at the end of the string, there's
4718                      * no point in backing off ... */
4719                     ST.min = ST.count;
4720                     /* ...except that $ and \Z can match before *and* after
4721                        newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
4722                        We may back off by one in this case. */
4723                     if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
4724                         ST.min--;
4725                 }
4726                 REGCP_SET(ST.cp);
4727                 goto curly_try_B_max;
4728             }
4729             /* NOTREACHED */
4730
4731
4732         case CURLY_B_min_known_fail:
4733             /* failed to find B in a non-greedy match where c1,c2 valid */
4734             if (ST.paren && ST.count)
4735                 PL_regoffs[ST.paren].end = -1;
4736
4737             PL_reginput = locinput;     /* Could be reset... */
4738             REGCP_UNWIND(ST.cp);
4739             /* Couldn't or didn't -- move forward. */
4740             ST.oldloc = locinput;
4741             if (do_utf8)
4742                 locinput += UTF8SKIP(locinput);
4743             else
4744                 locinput++;
4745             ST.count++;
4746           curly_try_B_min_known:
4747              /* find the next place where 'B' could work, then call B */
4748             {
4749                 int n;
4750                 if (do_utf8) {
4751                     n = (ST.oldloc == locinput) ? 0 : 1;
4752                     if (ST.c1 == ST.c2) {
4753                         STRLEN len;
4754                         /* set n to utf8_distance(oldloc, locinput) */
4755                         while (locinput <= ST.maxpos &&
4756                                utf8n_to_uvchr((U8*)locinput,
4757                                               UTF8_MAXBYTES, &len,
4758                                               uniflags) != (UV)ST.c1) {
4759                             locinput += len;
4760                             n++;
4761                         }
4762                     }
4763                     else {
4764                         /* set n to utf8_distance(oldloc, locinput) */
4765                         while (locinput <= ST.maxpos) {
4766                             STRLEN len;
4767                             const UV c = utf8n_to_uvchr((U8*)locinput,
4768                                                   UTF8_MAXBYTES, &len,
4769                                                   uniflags);
4770                             if (c == (UV)ST.c1 || c == (UV)ST.c2)
4771                                 break;
4772                             locinput += len;
4773                             n++;
4774                         }
4775                     }
4776                 }
4777                 else {
4778                     if (ST.c1 == ST.c2) {
4779                         while (locinput <= ST.maxpos &&
4780                                UCHARAT(locinput) != ST.c1)
4781                             locinput++;
4782                     }
4783                     else {
4784                         while (locinput <= ST.maxpos
4785                                && UCHARAT(locinput) != ST.c1
4786                                && UCHARAT(locinput) != ST.c2)
4787                             locinput++;
4788                     }
4789                     n = locinput - ST.oldloc;
4790                 }
4791                 if (locinput > ST.maxpos)
4792                     sayNO;
4793                 /* PL_reginput == oldloc now */
4794                 if (n) {
4795                     ST.count += n;
4796                     if (regrepeat(rex, ST.A, n, depth) < n)
4797                         sayNO;
4798                 }
4799                 PL_reginput = locinput;
4800                 CURLY_SETPAREN(ST.paren, ST.count);
4801                 if (cur_eval && cur_eval->u.eval.close_paren &&
4802                     cur_eval->u.eval.close_paren == (U32)ST.paren) {
4803                     goto fake_end;
4804                 }
4805                 PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
4806             }
4807             /* NOTREACHED */
4808
4809
4810         case CURLY_B_min_fail:
4811             /* failed to find B in a non-greedy match where c1,c2 invalid */
4812             if (ST.paren && ST.count)
4813                 PL_regoffs[ST.paren].end = -1;
4814
4815             REGCP_UNWIND(ST.cp);
4816             /* failed -- move forward one */
4817             PL_reginput = locinput;
4818             if (regrepeat(rex, ST.A, 1, depth)) {
4819                 ST.count++;
4820                 locinput = PL_reginput;
4821                 if (ST.count <= ST.max || (ST.max == REG_INFTY &&
4822                         ST.count > 0)) /* count overflow ? */
4823                 {
4824                   curly_try_B_min:
4825                     CURLY_SETPAREN(ST.paren, ST.count);
4826                     if (cur_eval && cur_eval->u.eval.close_paren &&
4827                         cur_eval->u.eval.close_paren == (U32)ST.paren) {
4828                         goto fake_end;
4829                     }
4830                     PUSH_STATE_GOTO(CURLY_B_min, ST.B);
4831                 }
4832             }
4833             sayNO;
4834             /* NOTREACHED */
4835
4836
4837         curly_try_B_max:
4838             /* a successful greedy match: now try to match B */
4839             if (cur_eval && cur_eval->u.eval.close_paren &&
4840                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4841                 goto fake_end;
4842             }
4843             {
4844                 UV c = 0;
4845                 if (ST.c1 != CHRTEST_VOID)
4846                     c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
4847                                            UTF8_MAXBYTES, 0, uniflags)
4848                                 : (UV) UCHARAT(PL_reginput);
4849                 /* If it could work, try it. */
4850                 if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
4851                     CURLY_SETPAREN(ST.paren, ST.count);
4852                     PUSH_STATE_GOTO(CURLY_B_max, ST.B);
4853                     /* NOTREACHED */
4854                 }
4855             }
4856             /* FALL THROUGH */
4857         case CURLY_B_max_fail:
4858             /* failed to find B in a greedy match */
4859             if (ST.paren && ST.count)
4860                 PL_regoffs[ST.paren].end = -1;
4861
4862             REGCP_UNWIND(ST.cp);
4863             /*  back up. */
4864             if (--ST.count < ST.min)
4865                 sayNO;
4866             PL_reginput = locinput = HOPc(locinput, -1);
4867             goto curly_try_B_max;
4868
4869 #undef ST
4870
4871         case END:
4872             fake_end:
4873             if (cur_eval) {
4874                 /* we've just finished A in /(??{A})B/; now continue with B */
4875                 I32 tmpix;
4876                 st->u.eval.toggle_reg_flags
4877                             = cur_eval->u.eval.toggle_reg_flags;
4878                 PL_reg_flags ^= st->u.eval.toggle_reg_flags;
4879
4880                 st->u.eval.prev_rex = rex_sv;           /* inner */
4881                 SETREX(rex_sv,cur_eval->u.eval.prev_rex);
4882                 rex = (struct regexp *)SvANY(rex_sv);
4883                 rexi = RXi_GET(rex);
4884                 cur_curlyx = cur_eval->u.eval.prev_curlyx;
4885                 ReREFCNT_inc(rex_sv);
4886                 st->u.eval.cp = regcppush(0);   /* Save *all* the positions. */
4887
4888                 /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
4889                 PL_reglastparen = &rex->lastparen;
4890                 PL_reglastcloseparen = &rex->lastcloseparen;
4891
4892                 REGCP_SET(st->u.eval.lastcp);
4893                 PL_reginput = locinput;
4894
4895                 /* Restore parens of the outer rex without popping the
4896                  * savestack */
4897                 tmpix = PL_savestack_ix;
4898                 PL_savestack_ix = cur_eval->u.eval.lastcp;
4899                 regcppop(rex);
4900                 PL_savestack_ix = tmpix;
4901
4902                 st->u.eval.prev_eval = cur_eval;
4903                 cur_eval = cur_eval->u.eval.prev_eval;
4904                 DEBUG_EXECUTE_r(
4905                     PerlIO_printf(Perl_debug_log, "%*s  EVAL trying tail ... %"UVxf"\n",
4906                                       REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
4907                 if ( nochange_depth )
4908                     nochange_depth--;
4909
4910                 PUSH_YES_STATE_GOTO(EVAL_AB,
4911                         st->u.eval.prev_eval->u.eval.B); /* match B */
4912             }
4913
4914             if (locinput < reginfo->till) {
4915                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4916                                       "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4917                                       PL_colors[4],
4918                                       (long)(locinput - PL_reg_starttry),
4919                                       (long)(reginfo->till - PL_reg_starttry),
4920                                       PL_colors[5]));
4921
4922                 sayNO_SILENT;           /* Cannot match: too short. */
4923             }
4924             PL_reginput = locinput;     /* put where regtry can find it */
4925             sayYES;                     /* Success! */
4926
4927         case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
4928             DEBUG_EXECUTE_r(
4929             PerlIO_printf(Perl_debug_log,
4930                 "%*s  %ssubpattern success...%s\n",
4931                 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
4932             PL_reginput = locinput;     /* put where regtry can find it */
4933             sayYES;                     /* Success! */
4934
4935 #undef  ST
4936 #define ST st->u.ifmatch
4937
4938         case SUSPEND:   /* (?>A) */
4939             ST.wanted = 1;
4940             PL_reginput = locinput;
4941             goto do_ifmatch;
4942
4943         case UNLESSM:   /* -ve lookaround: (?!A), or with flags, (?<!A) */
4944             ST.wanted = 0;
4945             goto ifmatch_trivial_fail_test;
4946
4947         case IFMATCH:   /* +ve lookaround: (?=A), or with flags, (?<=A) */
4948             ST.wanted = 1;
4949           ifmatch_trivial_fail_test:
4950             if (scan->flags) {
4951                 char * const s = HOPBACKc(locinput, scan->flags);
4952                 if (!s) {
4953                     /* trivial fail */
4954                     if (logical) {
4955                         logical = 0;
4956                         sw = 1 - (bool)ST.wanted;
4957                     }
4958                     else if (ST.wanted)
4959                         sayNO;
4960                     next = scan + ARG(scan);
4961                     if (next == scan)
4962                         next = NULL;
4963                     break;
4964                 }
4965                 PL_reginput = s;
4966             }
4967             else
4968                 PL_reginput = locinput;
4969
4970           do_ifmatch:
4971             ST.me = scan;
4972             ST.logical = logical;
4973             logical = 0; /* XXX: reset state of logical once it has been saved into ST */
4974
4975             /* execute body of (?...A) */
4976             PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
4977             /* NOTREACHED */
4978
4979         case IFMATCH_A_fail: /* body of (?...A) failed */
4980             ST.wanted = !ST.wanted;
4981             /* FALL THROUGH */
4982
4983         case IFMATCH_A: /* body of (?...A) succeeded */
4984             if (ST.logical) {
4985                 sw = (bool)ST.wanted;
4986             }
4987             else if (!ST.wanted)
4988                 sayNO;
4989
4990             if (OP(ST.me) == SUSPEND)
4991                 locinput = PL_reginput;
4992             else {
4993                 locinput = PL_reginput = st->locinput;
4994                 nextchr = UCHARAT(locinput);
4995             }
4996             scan = ST.me + ARG(ST.me);
4997             if (scan == ST.me)
4998                 scan = NULL;
4999             continue; /* execute B */
5000
5001 #undef ST
5002
5003         case LONGJMP:
5004             next = scan + ARG(scan);
5005             if (next == scan)
5006                 next = NULL;
5007             break;
5008         case COMMIT:
5009             reginfo->cutpoint = PL_regeol;
5010             /* FALLTHROUGH */
5011         case PRUNE:
5012             PL_reginput = locinput;
5013             if (!scan->flags)
5014                 sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5015             PUSH_STATE_GOTO(COMMIT_next,next);
5016             /* NOTREACHED */
5017         case COMMIT_next_fail:
5018             no_final = 1;
5019             /* FALLTHROUGH */
5020         case OPFAIL:
5021             sayNO;
5022             /* NOTREACHED */
5023
5024 #define ST st->u.mark
5025         case MARKPOINT:
5026             ST.prev_mark = mark_state;
5027             ST.mark_name = sv_commit = sv_yes_mark
5028                 = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5029             mark_state = st;
5030             ST.mark_loc = PL_reginput = locinput;
5031             PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
5032             /* NOTREACHED */
5033         case MARKPOINT_next:
5034             mark_state = ST.prev_mark;
5035             sayYES;
5036             /* NOTREACHED */
5037         case MARKPOINT_next_fail:
5038             if (popmark && sv_eq(ST.mark_name,popmark))
5039             {
5040                 if (ST.mark_loc > startpoint)
5041                     reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5042                 popmark = NULL; /* we found our mark */
5043                 sv_commit = ST.mark_name;
5044
5045                 DEBUG_EXECUTE_r({
5046                         PerlIO_printf(Perl_debug_log,
5047                             "%*s  %ssetting cutpoint to mark:%"SVf"...%s\n",
5048                             REPORT_CODE_OFF+depth*2, "",
5049                             PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
5050                 });
5051             }
5052             mark_state = ST.prev_mark;
5053             sv_yes_mark = mark_state ?
5054                 mark_state->u.mark.mark_name : NULL;
5055             sayNO;
5056             /* NOTREACHED */
5057         case SKIP:
5058             PL_reginput = locinput;
5059             if (scan->flags) {
5060                 /* (*SKIP) : if we fail we cut here*/
5061                 ST.mark_name = NULL;
5062                 ST.mark_loc = locinput;
5063                 PUSH_STATE_GOTO(SKIP_next,next);
5064             } else {
5065                 /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was,
5066                    otherwise do nothing.  Meaning we need to scan
5067                  */
5068                 regmatch_state *cur = mark_state;
5069                 SV *find = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5070
5071                 while (cur) {
5072                     if ( sv_eq( cur->u.mark.mark_name,
5073                                 find ) )
5074                     {
5075                         ST.mark_name = find;
5076                         PUSH_STATE_GOTO( SKIP_next, next );
5077                     }
5078                     cur = cur->u.mark.prev_mark;
5079                 }
5080             }
5081             /* Didn't find our (*MARK:NAME) so ignore this (*SKIP:NAME) */
5082             break;
5083         case SKIP_next_fail:
5084             if (ST.mark_name) {
5085                 /* (*CUT:NAME) - Set up to search for the name as we
5086                    collapse the stack*/
5087                 popmark = ST.mark_name;
5088             } else {
5089                 /* (*CUT) - No name, we cut here.*/
5090                 if (ST.mark_loc > startpoint)
5091                     reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5092                 /* but we set sv_commit to latest mark_name if there
5093                    is one so they can test to see how things lead to this
5094                    cut */
5095                 if (mark_state)
5096                     sv_commit=mark_state->u.mark.mark_name;
5097             }
5098             no_final = 1;
5099             sayNO;
5100             /* NOTREACHED */
5101 #undef ST
5102         case FOLDCHAR:
5103             n = ARG(scan);
5104             if ( n == (U32)what_len_TRICKYFOLD(locinput,do_utf8,ln) ) {
5105                 locinput += ln;
5106             } else if ( 0xDF == n && !do_utf8 && !UTF ) {
5107                 sayNO;
5108             } else  {
5109                 U8 folded[UTF8_MAXBYTES_CASE+1];
5110                 STRLEN foldlen;
5111                 const char * const l = locinput;
5112                 char *e = PL_regeol;
5113                 to_uni_fold(n, folded, &foldlen);
5114
5115                 if (ibcmp_utf8((const char*) folded, 0,  foldlen, 1,
5116                                l, &e, 0,  do_utf8)) {
5117                         sayNO;
5118                 }
5119                 locinput = e;
5120             }
5121             nextchr = UCHARAT(locinput);
5122             break;
5123         case LNBREAK:
5124             if ((n=is_LNBREAK(locinput,do_utf8))) {
5125                 locinput += n;
5126                 nextchr = UCHARAT(locinput);
5127             } else
5128                 sayNO;
5129             break;
5130
5131 #define CASE_CLASS(nAmE)                              \
5132         case nAmE:                                    \
5133             if ((n=is_##nAmE(locinput,do_utf8))) {    \
5134                 locinput += n;                        \
5135                 nextchr = UCHARAT(locinput);          \
5136             } else                                    \
5137                 sayNO;                                \
5138             break;                                    \
5139         case N##nAmE:                                 \
5140             if ((n=is_##nAmE(locinput,do_utf8))) {    \
5141                 sayNO;                                \
5142             } else {                                  \
5143                 locinput += UTF8SKIP(locinput);       \
5144                 nextchr = UCHARAT(locinput);          \
5145             }                                         \
5146             break
5147
5148         CASE_CLASS(VERTWS);
5149         CASE_CLASS(HORIZWS);
5150 #undef CASE_CLASS
5151
5152         default:
5153             PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
5154                           PTR2UV(scan), OP(scan));
5155             Perl_croak(aTHX_ "regexp memory corruption");
5156
5157         } /* end switch */
5158
5159         /* switch break jumps here */
5160         scan = next; /* prepare to execute the next op and ... */
5161         continue;    /* ... jump back to the top, reusing st */
5162         /* NOTREACHED */
5163
5164       push_yes_state:
5165         /* push a state that backtracks on success */
5166         st->u.yes.prev_yes_state = yes_state;
5167         yes_state = st;
5168         /* FALL THROUGH */
5169       push_state:
5170         /* push a new regex state, then continue at scan  */
5171         {
5172             regmatch_state *newst;
5173
5174             DEBUG_STACK_r({
5175                 regmatch_state *cur = st;
5176                 regmatch_state *curyes = yes_state;
5177                 int curd = depth;
5178                 regmatch_slab *slab = PL_regmatch_slab;
5179                 for (;curd > -1;cur--,curd--) {
5180                     if (cur < SLAB_FIRST(slab)) {
5181                         slab = slab->prev;
5182                         cur = SLAB_LAST(slab);
5183                     }
5184                     PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
5185                         REPORT_CODE_OFF + 2 + depth * 2,"",
5186                         curd, PL_reg_name[cur->resume_state],
5187                         (curyes == cur) ? "yes" : ""
5188                     );
5189                     if (curyes == cur)
5190                         curyes = cur->u.yes.prev_yes_state;
5191                 }
5192             } else
5193                 DEBUG_STATE_pp("push")
5194             );
5195             depth++;
5196             st->locinput = locinput;
5197             newst = st+1;
5198             if (newst >  SLAB_LAST(PL_regmatch_slab))
5199                 newst = S_push_slab(aTHX);
5200             PL_regmatch_state = newst;
5201
5202             locinput = PL_reginput;
5203             nextchr = UCHARAT(locinput);
5204             st = newst;
5205             continue;
5206             /* NOTREACHED */
5207         }
5208     }
5209
5210     /*
5211     * We get here only if there's trouble -- normally "case END" is
5212     * the terminating point.
5213     */
5214     Perl_croak(aTHX_ "corrupted regexp pointers");
5215     /*NOTREACHED*/
5216     sayNO;
5217
5218 yes:
5219     if (yes_state) {
5220         /* we have successfully completed a subexpression, but we must now
5221          * pop to the state marked by yes_state and continue from there */
5222         assert(st != yes_state);
5223 #ifdef DEBUGGING
5224         while (st != yes_state) {
5225             st--;
5226             if (st < SLAB_FIRST(PL_regmatch_slab)) {
5227                 PL_regmatch_slab = PL_regmatch_slab->prev;
5228                 st = SLAB_LAST(PL_regmatch_slab);
5229             }
5230             DEBUG_STATE_r({
5231                 if (no_final) {
5232                     DEBUG_STATE_pp("pop (no final)");
5233                 } else {
5234                     DEBUG_STATE_pp("pop (yes)");
5235                 }
5236             });
5237             depth--;
5238         }
5239 #else
5240         while (yes_state < SLAB_FIRST(PL_regmatch_slab)
5241             || yes_state > SLAB_LAST(PL_regmatch_slab))
5242         {
5243             /* not in this slab, pop slab */
5244             depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
5245             PL_regmatch_slab = PL_regmatch_slab->prev;
5246             st = SLAB_LAST(PL_regmatch_slab);
5247         }
5248         depth -= (st - yes_state);
5249 #endif
5250         st = yes_state;
5251         yes_state = st->u.yes.prev_yes_state;
5252         PL_regmatch_state = st;
5253
5254         if (no_final) {
5255             locinput= st->locinput;
5256             nextchr = UCHARAT(locinput);
5257         }
5258         state_num = st->resume_state + no_final;
5259         goto reenter_switch;
5260     }
5261
5262     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
5263                           PL_colors[4], PL_colors[5]));
5264
5265     if (PL_reg_eval_set) {
5266         /* each successfully executed (?{...}) block does the equivalent of
5267          *   local $^R = do {...}
5268          * When popping the save stack, all these locals would be undone;
5269          * bypass this by setting the outermost saved $^R to the latest
5270          * value */
5271         if (oreplsv != GvSV(PL_replgv))
5272             sv_setsv(oreplsv, GvSV(PL_replgv));
5273     }
5274     result = 1;
5275     goto final_exit;
5276
5277 no:
5278     DEBUG_EXECUTE_r(
5279         PerlIO_printf(Perl_debug_log,
5280             "%*s  %sfailed...%s\n",
5281             REPORT_CODE_OFF+depth*2, "",
5282             PL_colors[4], PL_colors[5])
5283         );
5284
5285 no_silent:
5286     if (no_final) {
5287         if (yes_state) {
5288             goto yes;
5289         } else {
5290             goto final_exit;
5291         }
5292     }
5293     if (depth) {
5294         /* there's a previous state to backtrack to */
5295         st--;
5296         if (st < SLAB_FIRST(PL_regmatch_slab)) {
5297             PL_regmatch_slab = PL_regmatch_slab->prev;
5298             st = SLAB_LAST(PL_regmatch_slab);
5299         }
5300         PL_regmatch_state = st;
5301         locinput= st->locinput;
5302         nextchr = UCHARAT(locinput);
5303
5304         DEBUG_STATE_pp("pop");
5305         depth--;
5306         if (yes_state == st)
5307             yes_state = st->u.yes.prev_yes_state;
5308
5309         state_num = st->resume_state + 1; /* failure = success + 1 */
5310         goto reenter_switch;
5311     }
5312     result = 0;
5313
5314   final_exit:
5315     if (rex->intflags & PREGf_VERBARG_SEEN) {
5316         SV *sv_err = get_sv("REGERROR", 1);
5317         SV *sv_mrk = get_sv("REGMARK", 1);
5318         if (result) {
5319             sv_commit = &PL_sv_no;
5320             if (!sv_yes_mark)
5321                 sv_yes_mark = &PL_sv_yes;
5322         } else {
5323             if (!sv_commit)
5324                 sv_commit = &PL_sv_yes;
5325             sv_yes_mark = &PL_sv_no;
5326         }
5327         sv_setsv(sv_err, sv_commit);
5328         sv_setsv(sv_mrk, sv_yes_mark);
5329     }
5330
5331     /* clean up; in particular, free all slabs above current one */
5332     LEAVE_SCOPE(oldsave);
5333
5334     return result;
5335 }
5336
5337 /*
5338  - regrepeat - repeatedly match something simple, report how many
5339  */
5340 /*
5341  * [This routine now assumes that it will only match on things of length 1.
5342  * That was true before, but now we assume scan - reginput is the count,
5343  * rather than incrementing count on every character.  [Er, except utf8.]]
5344  */
5345 STATIC I32
5346 S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
5347 {
5348     dVAR;
5349     register char *scan;
5350     register I32 c;
5351     register char *loceol = PL_regeol;
5352     register I32 hardcount = 0;
5353     register bool do_utf8 = PL_reg_match_utf8;
5354 #ifndef DEBUGGING
5355     PERL_UNUSED_ARG(depth);
5356 #endif
5357
5358     PERL_ARGS_ASSERT_REGREPEAT;
5359
5360     scan = PL_reginput;
5361     if (max == REG_INFTY)
5362         max = I32_MAX;
5363     else if (max < loceol - scan)
5364         loceol = scan + max;
5365     switch (OP(p)) {
5366     case REG_ANY:
5367         if (do_utf8) {
5368             loceol = PL_regeol;
5369             while (scan < loceol && hardcount < max && *scan != '\n') {
5370                 scan += UTF8SKIP(scan);
5371                 hardcount++;
5372             }
5373         } else {
5374             while (scan < loceol && *scan != '\n')
5375                 scan++;
5376         }
5377         break;
5378     case SANY:
5379         if (do_utf8) {
5380             loceol = PL_regeol;
5381             while (scan < loceol && hardcount < max) {
5382                 scan += UTF8SKIP(scan);
5383                 hardcount++;
5384             }
5385         }
5386         else
5387             scan = loceol;
5388         break;
5389     case CANY:
5390         scan = loceol;
5391         break;
5392     case EXACT:         /* length of string is 1 */
5393         c = (U8)*STRING(p);
5394         while (scan < loceol && UCHARAT(scan) == c)
5395             scan++;
5396         break;
5397     case EXACTF:        /* length of string is 1 */
5398         c = (U8)*STRING(p);
5399         while (scan < loceol &&
5400                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
5401             scan++;
5402         break;
5403     case EXACTFL:       /* length of string is 1 */
5404         PL_reg_flags |= RF_tainted;
5405         c = (U8)*STRING(p);
5406         while (scan < loceol &&
5407                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
5408             scan++;
5409         break;
5410     case ANYOF:
5411         if (do_utf8) {
5412             loceol = PL_regeol;
5413             while (hardcount < max && scan < loceol &&
5414                    reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
5415                 scan += UTF8SKIP(scan);
5416                 hardcount++;
5417             }
5418         } else {
5419             while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
5420                 scan++;
5421         }
5422         break;
5423     case ALNUM:
5424         if (do_utf8) {
5425             loceol = PL_regeol;
5426             LOAD_UTF8_CHARCLASS_ALNUM();
5427             while (hardcount < max && scan < loceol &&
5428                    swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5429                 scan += UTF8SKIP(scan);
5430                 hardcount++;
5431             }
5432         } else {
5433             while (scan < loceol && isALNUM(*scan))
5434                 scan++;
5435         }
5436         break;
5437     case ALNUML:
5438         PL_reg_flags |= RF_tainted;
5439         if (do_utf8) {
5440             loceol = PL_regeol;
5441             while (hardcount < max && scan < loceol &&
5442                    isALNUM_LC_utf8((U8*)scan)) {
5443                 scan += UTF8SKIP(scan);
5444                 hardcount++;
5445             }
5446         } else {
5447             while (scan < loceol && isALNUM_LC(*scan))
5448                 scan++;
5449         }
5450         break;
5451     case NALNUM:
5452         if (do_utf8) {
5453             loceol = PL_regeol;
5454             LOAD_UTF8_CHARCLASS_ALNUM();
5455             while (hardcount < max && scan < loceol &&
5456                    !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5457                 scan += UTF8SKIP(scan);
5458                 hardcount++;
5459             }
5460         } else {
5461             while (scan < loceol && !isALNUM(*scan))
5462                 scan++;
5463         }
5464         break;
5465     case NALNUML:
5466         PL_reg_flags |= RF_tainted;
5467         if (do_utf8) {
5468             loceol = PL_regeol;
5469             while (hardcount < max && scan < loceol &&
5470                    !isALNUM_LC_utf8((U8*)scan)) {
5471                 scan += UTF8SKIP(scan);
5472                 hardcount++;
5473             }
5474         } else {
5475             while (scan < loceol && !isALNUM_LC(*scan))
5476                 scan++;
5477         }
5478         break;
5479     case SPACE:
5480         if (do_utf8) {
5481             loceol = PL_regeol;
5482             LOAD_UTF8_CHARCLASS_SPACE();
5483             while (hardcount < max && scan < loceol &&
5484                    (*scan == ' ' ||
5485                     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5486                 scan += UTF8SKIP(scan);
5487                 hardcount++;
5488             }
5489         } else {
5490             while (scan < loceol && isSPACE(*scan))
5491                 scan++;
5492         }
5493         break;
5494     case SPACEL:
5495         PL_reg_flags |= RF_tainted;
5496         if (do_utf8) {
5497             loceol = PL_regeol;
5498             while (hardcount < max && scan < loceol &&
5499                    (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5500                 scan += UTF8SKIP(scan);
5501                 hardcount++;
5502             }
5503         } else {
5504             while (scan < loceol && isSPACE_LC(*scan))
5505                 scan++;
5506         }
5507         break;
5508     case NSPACE:
5509         if (do_utf8) {
5510             loceol = PL_regeol;
5511             LOAD_UTF8_CHARCLASS_SPACE();
5512             while (hardcount < max && scan < loceol &&
5513                    !(*scan == ' ' ||
5514                      swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5515                 scan += UTF8SKIP(scan);
5516                 hardcount++;
5517             }
5518         } else {
5519             while (scan < loceol && !isSPACE(*scan))
5520                 scan++;
5521         }
5522         break;
5523     case NSPACEL:
5524         PL_reg_flags |= RF_tainted;
5525         if (do_utf8) {
5526             loceol = PL_regeol;
5527             while (hardcount < max && scan < loceol &&
5528                    !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5529                 scan += UTF8SKIP(scan);
5530                 hardcount++;
5531             }
5532         } else {
5533             while (scan < loceol && !isSPACE_LC(*scan))
5534                 scan++;
5535         }
5536         break;
5537     case DIGIT:
5538         if (do_utf8) {
5539             loceol = PL_regeol;
5540             LOAD_UTF8_CHARCLASS_DIGIT();
5541             while (hardcount < max && scan < loceol &&
5542                    swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5543                 scan += UTF8SKIP(scan);
5544                 hardcount++;
5545             }
5546         } else {
5547             while (scan < loceol && isDIGIT(*scan))
5548                 scan++;
5549         }
5550         break;
5551     case NDIGIT:
5552         if (do_utf8) {
5553             loceol = PL_regeol;
5554             LOAD_UTF8_CHARCLASS_DIGIT();
5555             while (hardcount < max && scan < loceol &&
5556                    !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5557                 scan += UTF8SKIP(scan);
5558                 hardcount++;
5559             }
5560         } else {
5561             while (scan < loceol && !isDIGIT(*scan))
5562                 scan++;
5563         }
5564     case LNBREAK:
5565         if (do_utf8) {
5566             loceol = PL_regeol;
5567             while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
5568                 scan += c;
5569                 hardcount++;
5570             }
5571         } else {
5572             /*
5573               LNBREAK can match two latin chars, which is ok,
5574               because we have a null terminated string, but we
5575               have to use hardcount in this situation
5576             */
5577             while (scan < loceol && (c=is_LNBREAK_latin1(scan)))  {
5578                 scan+=c;
5579                 hardcount++;
5580             }
5581         }
5582         break;
5583     case HORIZWS:
5584         if (do_utf8) {
5585             loceol = PL_regeol;
5586             while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
5587                 scan += c;
5588                 hardcount++;
5589             }
5590         } else {
5591             while (scan < loceol && is_HORIZWS_latin1(scan))
5592                 scan++;
5593         }
5594         break;
5595     case NHORIZWS:
5596         if (do_utf8) {
5597             loceol = PL_regeol;
5598             while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
5599                 scan += UTF8SKIP(scan);
5600                 hardcount++;
5601             }
5602         } else {
5603             while (scan < loceol && !is_HORIZWS_latin1(scan))
5604                 scan++;
5605
5606         }
5607         break;
5608     case VERTWS:
5609         if (do_utf8) {
5610             loceol = PL_regeol;
5611             while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
5612                 scan += c;
5613                 hardcount++;
5614             }
5615         } else {
5616             while (scan < loceol && is_VERTWS_latin1(scan))
5617                 scan++;
5618
5619         }
5620         break;
5621     case NVERTWS:
5622         if (do_utf8) {
5623             loceol = PL_regeol;
5624             while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
5625                 scan += UTF8SKIP(scan);
5626                 hardcount++;
5627             }
5628         } else {
5629             while (scan < loceol && !is_VERTWS_latin1(scan))
5630                 scan++;
5631
5632         }
5633         break;
5634
5635     default:            /* Called on something of 0 width. */
5636         break;          /* So match right here or not at all. */
5637     }
5638
5639     if (hardcount)
5640         c = hardcount;
5641     else
5642         c = scan - PL_reginput;
5643     PL_reginput = scan;
5644
5645     DEBUG_r({
5646         GET_RE_DEBUG_FLAGS_DECL;
5647         DEBUG_EXECUTE_r({
5648             SV * const prop = sv_newmortal();
5649             regprop(prog, prop, p);
5650             PerlIO_printf(Perl_debug_log,
5651                         "%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
5652                         REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
5653         });
5654     });
5655
5656     return(c);
5657 }
5658
5659
5660 #if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
5661 /*
5662 - regclass_swash - prepare the utf8 swash
5663 */
5664
5665 SV *
5666 Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
5667 {
5668     dVAR;
5669     SV *sw  = NULL;
5670     SV *si  = NULL;
5671     SV *alt = NULL;
5672     RXi_GET_DECL(prog,progi);
5673     const struct reg_data * const data = prog ? progi->data : NULL;
5674
5675     PERL_ARGS_ASSERT_REGCLASS_SWASH;
5676
5677     if (data && data->count) {
5678         const U32 n = ARG(node);
5679
5680         if (data->what[n] == 's') {
5681             SV * const rv = MUTABLE_SV(data->data[n]);
5682             AV * const av = MUTABLE_AV(SvRV(rv));
5683             SV **const ary = AvARRAY(av);
5684             SV **a, **b;
5685
5686             /* See the end of regcomp.c:S_regclass() for
5687              * documentation of these array elements. */
5688
5689             si = *ary;
5690             a  = SvROK(ary[1]) ? &ary[1] : NULL;
5691             b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : NULL;
5692
5693             if (a)
5694                 sw = *a;
5695             else if (si && doinit) {
5696                 sw = swash_init("utf8", "", si, 1, 0);
5697                 (void)av_store(av, 1, sw);
5698             }
5699             if (b)
5700                 alt = *b;
5701         }
5702     }
5703
5704     if (listsvp)
5705         *listsvp = si;
5706     if (altsvp)
5707         *altsvp  = alt;
5708
5709     return sw;
5710 }
5711 #endif
5712
5713 /*
5714  - reginclass - determine if a character falls into a character class
5715
5716   The n is the ANYOF regnode, the p is the target string, lenp
5717   is pointer to the maximum length of how far to go in the p
5718   (if the lenp is zero, UTF8SKIP(p) is used),
5719   do_utf8 tells whether the target string is in UTF-8.
5720
5721  */
5722
5723 STATIC bool
5724 S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
5725 {
5726     dVAR;
5727     const char flags = ANYOF_FLAGS(n);
5728     bool match = FALSE;
5729     UV c = *p;
5730     STRLEN len = 0;
5731     STRLEN plen;
5732
5733     PERL_ARGS_ASSERT_REGINCLASS;
5734
5735     if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
5736         c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
5737                 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
5738                 /* see [perl #37836] for UTF8_ALLOW_ANYUV */
5739         if (len == (STRLEN)-1)
5740             Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
5741     }
5742
5743     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
5744     if (do_utf8 || (flags & ANYOF_UNICODE)) {
5745         if (lenp)
5746             *lenp = 0;
5747         if (do_utf8 && !ANYOF_RUNTIME(n)) {
5748             if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
5749                 match = TRUE;
5750         }
5751         if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
5752             match = TRUE;
5753         if (!match) {
5754             AV *av;
5755             SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
5756
5757             if (sw) {
5758                 U8 * utf8_p;
5759                 if (do_utf8) {
5760                     utf8_p = (U8 *) p;
5761                 } else {
5762                     STRLEN len = 1;
5763                     utf8_p = bytes_to_utf8(p, &len);
5764                 }
5765                 if (swash_fetch(sw, utf8_p, 1))
5766                     match = TRUE;
5767                 else if (flags & ANYOF_FOLD) {
5768                     if (!match && lenp && av) {
5769                         I32 i;
5770                         for (i = 0; i <= av_len(av); i++) {
5771                             SV* const sv = *av_fetch(av, i, FALSE);
5772                             STRLEN len;
5773                             const char * const s = SvPV_const(sv, len);
5774                             if (len <= plen && memEQ(s, (char*)utf8_p, len)) {
5775                                 *lenp = len;
5776                                 match = TRUE;
5777                                 break;
5778                             }
5779                         }
5780                     }
5781                     if (!match) {
5782                         U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
5783
5784                         STRLEN tmplen;
5785                         to_utf8_fold(utf8_p, tmpbuf, &tmplen);
5786                         if (swash_fetch(sw, tmpbuf, 1))
5787                             match = TRUE;
5788                     }
5789                 }
5790
5791                 /* If we allocated a string above, free it */
5792                 if (! do_utf8) Safefree(utf8_p);
5793             }
5794         }
5795         if (match && lenp && *lenp == 0)
5796             *lenp = UNISKIP(NATIVE_TO_UNI(c));
5797     }
5798     if (!match && c < 256) {
5799         if (ANYOF_BITMAP_TEST(n, c))
5800             match = TRUE;
5801         else if (flags & ANYOF_FOLD) {
5802             U8 f;
5803
5804             if (flags & ANYOF_LOCALE) {
5805                 PL_reg_flags |= RF_tainted;
5806                 f = PL_fold_locale[c];
5807             }
5808             else
5809                 f = PL_fold[c];
5810             if (f != c && ANYOF_BITMAP_TEST(n, f))
5811                 match = TRUE;
5812         }
5813
5814         if (!match && (flags & ANYOF_CLASS)) {
5815             PL_reg_flags |= RF_tainted;
5816             if (
5817                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
5818                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
5819                 (ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
5820                 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
5821                 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
5822                 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
5823                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
5824                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5825                 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
5826                 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
5827                 (ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
5828                 (ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
5829                 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
5830                 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
5831                 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
5832                 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
5833                 (ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
5834                 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
5835                 (ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
5836                 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
5837                 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
5838                 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
5839                 (ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
5840                 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
5841                 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
5842                 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
5843                 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
5844                 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
5845                 (ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
5846                 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
5847                 ) /* How's that for a conditional? */
5848             {
5849                 match = TRUE;
5850             }
5851         }
5852     }
5853
5854     return (flags & ANYOF_INVERT) ? !match : match;
5855 }
5856
5857 STATIC U8 *
5858 S_reghop3(U8 *s, I32 off, const U8* lim)
5859 {
5860     dVAR;
5861
5862     PERL_ARGS_ASSERT_REGHOP3;
5863
5864     if (off >= 0) {
5865         while (off-- && s < lim) {
5866             /* XXX could check well-formedness here */
5867             s += UTF8SKIP(s);
5868         }
5869     }
5870     else {
5871         while (off++ && s > lim) {
5872             s--;
5873             if (UTF8_IS_CONTINUED(*s)) {
5874                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5875                     s--;
5876             }
5877             /* XXX could check well-formedness here */
5878         }
5879     }
5880     return s;
5881 }
5882
5883 #ifdef XXX_dmq
5884 /* there are a bunch of places where we use two reghop3's that should
5885    be replaced with this routine. but since thats not done yet
5886    we ifdef it out - dmq
5887 */
5888 STATIC U8 *
5889 S_reghop4(U8 *s, I32 off, const U8* llim, const U8* rlim)
5890 {
5891     dVAR;
5892
5893     PERL_ARGS_ASSERT_REGHOP4;
5894
5895     if (off >= 0) {
5896         while (off-- && s < rlim) {
5897             /* XXX could check well-formedness here */
5898             s += UTF8SKIP(s);
5899         }
5900     }
5901     else {
5902         while (off++ && s > llim) {
5903             s--;
5904             if (UTF8_IS_CONTINUED(*s)) {
5905                 while (s > llim && UTF8_IS_CONTINUATION(*s))
5906                     s--;
5907             }
5908             /* XXX could check well-formedness here */
5909         }
5910     }
5911     return s;
5912 }
5913 #endif
5914
5915 STATIC U8 *
5916 S_reghopmaybe3(U8* s, I32 off, const U8* lim)
5917 {
5918     dVAR;
5919
5920     PERL_ARGS_ASSERT_REGHOPMAYBE3;
5921
5922     if (off >= 0) {
5923         while (off-- && s < lim) {
5924             /* XXX could check well-formedness here */
5925             s += UTF8SKIP(s);
5926         }
5927         if (off >= 0)
5928             return NULL;
5929     }
5930     else {
5931         while (off++ && s > lim) {
5932             s--;
5933             if (UTF8_IS_CONTINUED(*s)) {
5934                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5935                     s--;
5936             }
5937             /* XXX could check well-formedness here */
5938         }
5939         if (off <= 0)
5940             return NULL;
5941     }
5942     return s;
5943 }
5944
5945 static void
5946 restore_pos(pTHX_ void *arg)
5947 {
5948     dVAR;
5949     regexp * const rex = (regexp *)arg;
5950     if (PL_reg_eval_set) {
5951         if (PL_reg_oldsaved) {
5952             rex->subbeg = PL_reg_oldsaved;
5953             rex->sublen = PL_reg_oldsavedlen;
5954 #ifdef PERL_OLD_COPY_ON_WRITE
5955             rex->saved_copy = PL_nrs;
5956 #endif
5957             RXp_MATCH_COPIED_on(rex);
5958         }
5959         PL_reg_magic->mg_len = PL_reg_oldpos;
5960         PL_reg_eval_set = 0;
5961         PL_curpm = PL_reg_oldcurpm;
5962     }
5963 }
5964
5965 STATIC void
5966 S_to_utf8_substr(pTHX_ register regexp *prog)
5967 {
5968     int i = 1;
5969
5970     PERL_ARGS_ASSERT_TO_UTF8_SUBSTR;
5971
5972     do {
5973         if (prog->substrs->data[i].substr
5974             && !prog->substrs->data[i].utf8_substr) {
5975             SV* const sv = newSVsv(prog->substrs->data[i].substr);
5976             prog->substrs->data[i].utf8_substr = sv;
5977             sv_utf8_upgrade(sv);
5978             if (SvVALID(prog->substrs->data[i].substr)) {
5979                 const U8 flags = BmFLAGS(prog->substrs->data[i].substr);
5980                 if (flags & FBMcf_TAIL) {
5981                     /* Trim the trailing \n that fbm_compile added last
5982                        time.  */
5983                     SvCUR_set(sv, SvCUR(sv) - 1);
5984                     /* Whilst this makes the SV technically "invalid" (as its
5985                        buffer is no longer followed by "\0") when fbm_compile()
5986                        adds the "\n" back, a "\0" is restored.  */
5987                 }
5988                 fbm_compile(sv, flags);
5989             }
5990             if (prog->substrs->data[i].substr == prog->check_substr)
5991                 prog->check_utf8 = sv;
5992         }
5993     } while (i--);
5994 }
5995
5996 STATIC void
5997 S_to_byte_substr(pTHX_ register regexp *prog)
5998 {
5999     dVAR;
6000     int i = 1;
6001
6002     PERL_ARGS_ASSERT_TO_BYTE_SUBSTR;
6003
6004     do {
6005         if (prog->substrs->data[i].utf8_substr
6006             && !prog->substrs->data[i].substr) {
6007             SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
6008             if (sv_utf8_downgrade(sv, TRUE)) {
6009                 if (SvVALID(prog->substrs->data[i].utf8_substr)) {
6010                     const U8 flags
6011                         = BmFLAGS(prog->substrs->data[i].utf8_substr);
6012                     if (flags & FBMcf_TAIL) {
6013                         /* Trim the trailing \n that fbm_compile added last
6014                            time.  */
6015                         SvCUR_set(sv, SvCUR(sv) - 1);
6016                     }
6017                     fbm_compile(sv, flags);
6018                 }
6019             } else {
6020                 SvREFCNT_dec(sv);
6021                 sv = &PL_sv_undef;
6022             }
6023             prog->substrs->data[i].substr = sv;
6024             if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
6025                 prog->check_substr = sv;
6026         }
6027     } while (i--);
6028 }
6029
6030 /*
6031  * Local variables:
6032  * c-indentation-style: bsd
6033  * c-basic-offset: 4
6034  * indent-tabs-mode: t
6035  * End:
6036  *
6037  * ex: set ts=8 sts=4 sw=4 noet:
6038  */