src/5011001/orig/regexec.c

   1 /*    regexec.c
   2  */
   3
   4 /*
   5  *      One Ring to rule them all, One Ring to find them
   6  &
   7  *     [p.v of _The Lord of the Rings_, opening poem]
   8  *     [p.50 of _The Lord of the Rings_, I/iii: "The Shadow of the Past"]
   9  *     [p.254 of _The Lord of the Rings_, II/ii: "The Council of Elrond"]
  10  */
  11
  12 /* This file contains functions for executing a regular expression.  See
  13  * also regcomp.c which funnily enough, contains functions for compiling
  14  * a regular expression.
  15  *
  16  * This file is also copied at build time to ext/re/re_exec.c, where
  17  * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
  18  * This causes the main functions to be compiled under new names and with
  19  * debugging support added, which makes "use re 'debug'" work.
  20  */
  21
  22 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
  23  * confused with the original package (see point 3 below).  Thanks, Henry!
  24  */
  25
  26 /* Additional note: this code is very heavily munged from Henry's version
  27  * in places.  In some spots I've traded clarity for efficiency, so don't
  28  * blame Henry for some of the lack of readability.
  29  */
  30
  31 /* The names of the functions have been changed from regcomp and
  32  * regexec to  pregcomp and pregexec in order to avoid conflicts
  33  * with the POSIX routines of the same names.
  34 */
  35
  36 #ifdef PERL_EXT_RE_BUILD
  37 #include "re_top.h"
  38 #endif
  39
  40 /*
  41  * pregcomp and pregexec -- regsub and regerror are not used in perl
  42  *
  43  *      Copyright (c) 1986 by University of Toronto.
  44  *      Written by Henry Spencer.  Not derived from licensed software.
  45  *
  46  *      Permission is granted to anyone to use this software for any
  47  *      purpose on any computer system, and to redistribute it freely,
  48  *      subject to the following restrictions:
  49  *
  50  *      1. The author is not responsible for the consequences of use of
  51  *              this software, no matter how awful, even if they arise
  52  *              from defects in it.
  53  *
  54  *      2. The origin of this software must not be misrepresented, either
  55  *              by explicit claim or by omission.
  56  *
  57  *      3. Altered versions must be plainly marked as such, and must not
  58  *              be misrepresented as being the original software.
  59  *
  60  ****    Alterations to Henry's code are...
  61  ****
  62  ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
  63  ****    2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
  64  ****    by Larry Wall and others
  65  ****
  66  ****    You may distribute under the terms of either the GNU General Public
  67  ****    License or the Artistic License, as specified in the README file.
  68  *
  69  * Beware that some of this code is subtly aware of the way operator
  70  * precedence is structured in regular expressions.  Serious changes in
  71  * regular-expression syntax might require a total rethink.
  72  */
  73 #include "EXTERN.h"
  74 #define PERL_IN_REGEXEC_C
  75 #include "perl.h"
  76
  77 #ifdef PERL_IN_XSUB_RE
  78 #  include "re_comp.h"
  79 #else
  80 #  include "regcomp.h"
  81 #endif
  82
  83 #define RF_tainted      1               /* tainted information used? */
  84 #define RF_warned       2               /* warned about big count? */
  85
  86 #define RF_utf8         8               /* Pattern contains multibyte chars? */
  87
  88 #define UTF ((PL_reg_flags & RF_utf8) != 0)
  89
  90 #define RS_init         1               /* eval environment created */
  91 #define RS_set          2               /* replsv value is set */
  92
  93 #ifndef STATIC
  94 #define STATIC  static
  95 #endif
  96
  97 #define REGINCLASS(prog,p,c)  (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
  98
  99 /*
 100  * Forwards.
 101  */
 102
 103 #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
 104 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
 105
 106 #define HOPc(pos,off) \
 107         (char *)(PL_reg_match_utf8 \
 108             ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
 109             : (U8*)(pos + off))
 110 #define HOPBACKc(pos, off) \
 111         (char*)(PL_reg_match_utf8\
 112             ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
 113             : (pos - off >= PL_bostr)           \
 114                 ? (U8*)pos - off                \
 115                 : NULL)
 116
 117 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
 118 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
 119
 120 /* these are unrolled below in the CCC_TRY_XXX defined */
 121 #define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
 122     if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
 123 #define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
 124 #define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
 125 #define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
 126 #define LOAD_UTF8_CHARCLASS_MARK()  LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
 127
 128
 129 /*
 130    We dont use PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS as the direct test
 131    so that it is possible to override the option here without having to
 132    rebuild the entire core. as we are required to do if we change regcomp.h
 133    which is where PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS is defined.
 134 */
 135 #if PERL_LEGACY_UNICODE_CHARCLASS_MAPPINGS
 136 #define BROKEN_UNICODE_CHARCLASS_MAPPINGS
 137 #endif
 138
 139 #ifdef BROKEN_UNICODE_CHARCLASS_MAPPINGS
 140 #define LOAD_UTF8_CHARCLASS_PERL_WORD()   LOAD_UTF8_CHARCLASS_ALNUM()
 141 #define LOAD_UTF8_CHARCLASS_PERL_SPACE()  LOAD_UTF8_CHARCLASS_SPACE()
 142 #define LOAD_UTF8_CHARCLASS_POSIX_DIGIT() LOAD_UTF8_CHARCLASS_DIGIT()
 143 #define RE_utf8_perl_word   PL_utf8_alnum
 144 #define RE_utf8_perl_space  PL_utf8_space
 145 #define RE_utf8_posix_digit PL_utf8_digit
 146 #define perl_word  alnum
 147 #define perl_space space
 148 #define posix_digit digit
 149 #else
 150 #define LOAD_UTF8_CHARCLASS_PERL_WORD()   LOAD_UTF8_CHARCLASS(perl_word,"a")
 151 #define LOAD_UTF8_CHARCLASS_PERL_SPACE()  LOAD_UTF8_CHARCLASS(perl_space," ")
 152 #define LOAD_UTF8_CHARCLASS_POSIX_DIGIT() LOAD_UTF8_CHARCLASS(posix_digit,"0")
 153 #define RE_utf8_perl_word   PL_utf8_perl_word
 154 #define RE_utf8_perl_space  PL_utf8_perl_space
 155 #define RE_utf8_posix_digit PL_utf8_posix_digit
 156 #endif
 157
 158
 159 #define CCC_TRY_AFF(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNC,LCFUNC)                          \
 160         case NAMEL:                                                              \
 161             PL_reg_flags |= RF_tainted;                                                 \
 162             /* FALL THROUGH */                                                          \
 163         case NAME:                                                                     \
 164             if (!nextchr)                                                               \
 165                 sayNO;                                                                  \
 166             if (do_utf8 && UTF8_IS_CONTINUED(nextchr)) {                                \
 167                 if (!CAT2(PL_utf8_,CLASS)) {                                            \
 168                     bool ok;                                                            \
 169                     ENTER;                                                              \
 170                     save_re_context();                                                  \
 171                     ok=CAT2(is_utf8_,CLASS)((const U8*)STR);                            \
 172                     assert(ok);                                                         \
 173                     LEAVE;                                                              \
 174                 }                                                                       \
 175                 if (!(OP(scan) == NAME                                                  \
 176                     ? (bool)swash_fetch(CAT2(PL_utf8_,CLASS), (U8*)locinput, do_utf8)   \
 177                     : LCFUNC_utf8((U8*)locinput)))                                      \
 178                 {                                                                       \
 179                     sayNO;                                                              \
 180                 }                                                                       \
 181                 locinput += PL_utf8skip[nextchr];                                       \
 182                 nextchr = UCHARAT(locinput);                                            \
 183                 break;                                                                  \
 184             }                                                                           \
 185             if (!(OP(scan) == NAME ? FUNC(nextchr) : LCFUNC(nextchr)))                  \
 186                 sayNO;                                                                  \
 187             nextchr = UCHARAT(++locinput);                                              \
 188             break
 189
 190 #define CCC_TRY_NEG(NAME,NAMEL,CLASS,STR,LCFUNC_utf8,FUNC,LCFUNC)                        \
 191         case NAMEL:                                                              \
 192             PL_reg_flags |= RF_tainted;                                                 \
 193             /* FALL THROUGH */                                                          \
 194         case NAME :                                                                     \
 195             if (!nextchr && locinput >= PL_regeol)                                      \
 196                 sayNO;                                                                  \
 197             if (do_utf8 && UTF8_IS_CONTINUED(nextchr)) {                                \
 198                 if (!CAT2(PL_utf8_,CLASS)) {                                            \
 199                     bool ok;                                                            \
 200                     ENTER;                                                              \
 201                     save_re_context();                                                  \
 202                     ok=CAT2(is_utf8_,CLASS)((const U8*)STR);                            \
 203                     assert(ok);                                                         \
 204                     LEAVE;                                                              \
 205                 }                                                                       \
 206                 if ((OP(scan) == NAME                                                  \
 207                     ? (bool)swash_fetch(CAT2(PL_utf8_,CLASS), (U8*)locinput, do_utf8)    \
 208                     : LCFUNC_utf8((U8*)locinput)))                                      \
 209                 {                                                                       \
 210                     sayNO;                                                              \
 211                 }                                                                       \
 212                 locinput += PL_utf8skip[nextchr];                                       \
 213                 nextchr = UCHARAT(locinput);                                            \
 214                 break;                                                                  \
 215             }                                                                           \
 216             if ((OP(scan) == NAME ? FUNC(nextchr) : LCFUNC(nextchr)))                   \
 217                 sayNO;                                                                  \
 218             nextchr = UCHARAT(++locinput);                                              \
 219             break
 220
 221
 222
 223
 224
 225 /* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
 226
 227 /* for use after a quantifier and before an EXACT-like node -- japhy */
 228 /* it would be nice to rework regcomp.sym to generate this stuff. sigh */
 229 #define JUMPABLE(rn) (      \
 230     OP(rn) == OPEN ||       \
 231     (OP(rn) == CLOSE && (!cur_eval || cur_eval->u.eval.close_paren != ARG(rn))) || \
 232     OP(rn) == EVAL ||   \
 233     OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
 234     OP(rn) == PLUS || OP(rn) == MINMOD || \
 235     OP(rn) == KEEPS || (PL_regkind[OP(rn)] == VERB) || \
 236     (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
 237 )
 238 #define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
 239
 240 #define HAS_TEXT(rn) ( IS_EXACT(rn) || PL_regkind[OP(rn)] == REF )
 241
 242 #if 0
 243 /* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
 244    we don't need this definition. */
 245 #define IS_TEXT(rn)   ( OP(rn)==EXACT   || OP(rn)==REF   || OP(rn)==NREF   )
 246 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  || OP(rn)==REFF  || OP(rn)==NREFF  )
 247 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL || OP(rn)==REFFL || OP(rn)==NREFFL )
 248
 249 #else
 250 /* ... so we use this as its faster. */
 251 #define IS_TEXT(rn)   ( OP(rn)==EXACT   )
 252 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  )
 253 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
 254
 255 #endif
 256
 257 /*
 258   Search for mandatory following text node; for lookahead, the text must
 259   follow but for lookbehind (rn->flags != 0) we skip to the next step.
 260 */
 261 #define FIND_NEXT_IMPT(rn) STMT_START { \
 262     while (JUMPABLE(rn)) { \
 263         const OPCODE type = OP(rn); \
 264         if (type == SUSPEND || PL_regkind[type] == CURLY) \
 265             rn = NEXTOPER(NEXTOPER(rn)); \
 266         else if (type == PLUS) \
 267             rn = NEXTOPER(rn); \
 268         else if (type == IFMATCH) \
 269             rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
 270         else rn += NEXT_OFF(rn); \
 271     } \
 272 } STMT_END
 273
 274
 275 static void restore_pos(pTHX_ void *arg);
 276
 277 STATIC CHECKPOINT
 278 S_regcppush(pTHX_ I32 parenfloor)
 279 {
 280     dVAR;
 281     const int retval = PL_savestack_ix;
 282 #define REGCP_PAREN_ELEMS 4
 283     const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
 284     int p;
 285     GET_RE_DEBUG_FLAGS_DECL;
 286
 287     if (paren_elems_to_push < 0)
 288         Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
 289
 290 #define REGCP_OTHER_ELEMS 7
 291     SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
 292
 293     for (p = PL_regsize; p > parenfloor; p--) {
 294 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
 295         SSPUSHINT(PL_regoffs[p].end);
 296         SSPUSHINT(PL_regoffs[p].start);
 297         SSPUSHPTR(PL_reg_start_tmp[p]);
 298         SSPUSHINT(p);
 299         DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
 300           "     saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
 301                       (UV)p, (IV)PL_regoffs[p].start,
 302                       (IV)(PL_reg_start_tmp[p] - PL_bostr),
 303                       (IV)PL_regoffs[p].end
 304         ));
 305     }
 306 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
 307     SSPUSHPTR(PL_regoffs);
 308     SSPUSHINT(PL_regsize);
 309     SSPUSHINT(*PL_reglastparen);
 310     SSPUSHINT(*PL_reglastcloseparen);
 311     SSPUSHPTR(PL_reginput);
 312 #define REGCP_FRAME_ELEMS 2
 313 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
 314  * are needed for the regexp context stack bookkeeping. */
 315     SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 316     SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
 317
 318     return retval;
 319 }
 320
 321 /* These are needed since we do not localize EVAL nodes: */
 322 #define REGCP_SET(cp)                                           \
 323     DEBUG_STATE_r(                                              \
 324             PerlIO_printf(Perl_debug_log,                       \
 325                 "  Setting an EVAL scope, savestack=%"IVdf"\n", \
 326                 (IV)PL_savestack_ix));                          \
 327     cp = PL_savestack_ix
 328
 329 #define REGCP_UNWIND(cp)                                        \
 330     DEBUG_STATE_r(                                              \
 331         if (cp != PL_savestack_ix)                              \
 332             PerlIO_printf(Perl_debug_log,                       \
 333                 "  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
 334                 (IV)(cp), (IV)PL_savestack_ix));                \
 335     regcpblow(cp)
 336
 337 STATIC char *
 338 S_regcppop(pTHX_ const regexp *rex)
 339 {
 340     dVAR;
 341     U32 i;
 342     char *input;
 343     GET_RE_DEBUG_FLAGS_DECL;
 344
 345     PERL_ARGS_ASSERT_REGCPPOP;
 346
 347     /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
 348     i = SSPOPINT;
 349     assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
 350     i = SSPOPINT; /* Parentheses elements to pop. */
 351     input = (char *) SSPOPPTR;
 352     *PL_reglastcloseparen = SSPOPINT;
 353     *PL_reglastparen = SSPOPINT;
 354     PL_regsize = SSPOPINT;
 355     PL_regoffs=(regexp_paren_pair *) SSPOPPTR;
 356
 357
 358     /* Now restore the parentheses context. */
 359     for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 360          i > 0; i -= REGCP_PAREN_ELEMS) {
 361         I32 tmps;
 362         U32 paren = (U32)SSPOPINT;
 363         PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
 364         PL_regoffs[paren].start = SSPOPINT;
 365         tmps = SSPOPINT;
 366         if (paren <= *PL_reglastparen)
 367             PL_regoffs[paren].end = tmps;
 368         DEBUG_BUFFERS_r(
 369             PerlIO_printf(Perl_debug_log,
 370                           "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
 371                           (UV)paren, (IV)PL_regoffs[paren].start,
 372                           (IV)(PL_reg_start_tmp[paren] - PL_bostr),
 373                           (IV)PL_regoffs[paren].end,
 374                           (paren > *PL_reglastparen ? "(no)" : ""));
 375         );
 376     }
 377     DEBUG_BUFFERS_r(
 378         if (*PL_reglastparen + 1 <= rex->nparens) {
 379             PerlIO_printf(Perl_debug_log,
 380                           "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
 381                           (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
 382         }
 383     );
 384 #if 1
 385     /* It would seem that the similar code in regtry()
 386      * already takes care of this, and in fact it is in
 387      * a better location to since this code can #if 0-ed out
 388      * but the code in regtry() is needed or otherwise tests
 389      * requiring null fields (pat.t#187 and split.t#{13,14}
 390      * (as of patchlevel 7877)  will fail.  Then again,
 391      * this code seems to be necessary or otherwise
 392      * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
 393      * --jhi updated by dapm */
 394     for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
 395         if (i > PL_regsize)
 396             PL_regoffs[i].start = -1;
 397         PL_regoffs[i].end = -1;
 398     }
 399 #endif
 400     return input;
 401 }
 402
 403 #define regcpblow(cp) LEAVE_SCOPE(cp)   /* Ignores regcppush()ed data. */
 404
 405 /*
 406  * pregexec and friends
 407  */
 408
 409 #ifndef PERL_IN_XSUB_RE
 410 /*
 411  - pregexec - match a regexp against a string
 412  */
 413 I32
 414 Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
 415          char *strbeg, I32 minend, SV *screamer, U32 nosave)
 416 /* strend: pointer to null at end of string */
 417 /* strbeg: real beginning of string */
 418 /* minend: end of match must be >=minend after stringarg. */
 419 /* nosave: For optimizations. */
 420 {
 421     PERL_ARGS_ASSERT_PREGEXEC;
 422
 423     return
 424         regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
 425                       nosave ? 0 : REXEC_COPY_STR);
 426 }
 427 #endif
 428
 429 /*
 430  * Need to implement the following flags for reg_anch:
 431  *
 432  * USE_INTUIT_NOML              - Useful to call re_intuit_start() first
 433  * USE_INTUIT_ML
 434  * INTUIT_AUTORITATIVE_NOML     - Can trust a positive answer
 435  * INTUIT_AUTORITATIVE_ML
 436  * INTUIT_ONCE_NOML             - Intuit can match in one location only.
 437  * INTUIT_ONCE_ML
 438  *
 439  * Another flag for this function: SECOND_TIME (so that float substrs
 440  * with giant delta may be not rechecked).
 441  */
 442
 443 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
 444
 445 /* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
 446    Otherwise, only SvCUR(sv) is used to get strbeg. */
 447
 448 /* XXXX We assume that strpos is strbeg unless sv. */
 449
 450 /* XXXX Some places assume that there is a fixed substring.
 451         An update may be needed if optimizer marks as "INTUITable"
 452         RExen without fixed substrings.  Similarly, it is assumed that
 453         lengths of all the strings are no more than minlen, thus they
 454         cannot come from lookahead.
 455         (Or minlen should take into account lookahead.)
 456   NOTE: Some of this comment is not correct. minlen does now take account
 457   of lookahead/behind. Further research is required. -- demerphq
 458
 459 */
 460
 461 /* A failure to find a constant substring means that there is no need to make
 462    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
 463    finding a substring too deep into the string means that less calls to
 464    regtry() should be needed.
 465
 466    REx compiler's optimizer found 4 possible hints:
 467         a) Anchored substring;
 468         b) Fixed substring;
 469         c) Whether we are anchored (beginning-of-line or \G);
 470         d) First node (of those at offset 0) which may distingush positions;
 471    We use a)b)d) and multiline-part of c), and try to find a position in the
 472    string which does not contradict any of them.
 473  */
 474
 475 /* Most of decisions we do here should have been done at compile time.
 476    The nodes of the REx which we used for the search should have been
 477    deleted from the finite automaton. */
 478
 479 char *
 480 Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, char *strpos,
 481                      char *strend, const U32 flags, re_scream_pos_data *data)
 482 {
 483     dVAR;
 484     struct regexp *const prog = (struct regexp *)SvANY(rx);
 485     register I32 start_shift = 0;
 486     /* Should be nonnegative! */
 487     register I32 end_shift   = 0;
 488     register char *s;
 489     register SV *check;
 490     char *strbeg;
 491     char *t;
 492     const bool do_utf8 = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
 493     I32 ml_anch;
 494     register char *other_last = NULL;   /* other substr checked before this */
 495     char *check_at = NULL;              /* check substr found at this pos */
 496     const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
 497     RXi_GET_DECL(prog,progi);
 498 #ifdef DEBUGGING
 499     const char * const i_strpos = strpos;
 500 #endif
 501     GET_RE_DEBUG_FLAGS_DECL;
 502
 503     PERL_ARGS_ASSERT_RE_INTUIT_START;
 504
 505     RX_MATCH_UTF8_set(rx,do_utf8);
 506
 507     if (RX_UTF8(rx)) {
 508         PL_reg_flags |= RF_utf8;
 509     }
 510     DEBUG_EXECUTE_r(
 511         debug_start_match(rx, do_utf8, strpos, strend,
 512             sv ? "Guessing start of match in sv for"
 513                : "Guessing start of match in string for");
 514               );
 515
 516     /* CHR_DIST() would be more correct here but it makes things slow. */
 517     if (prog->minlen > strend - strpos) {
 518         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 519                               "String too short... [re_intuit_start]\n"));
 520         goto fail;
 521     }
 522
 523     strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
 524     PL_regeol = strend;
 525     if (do_utf8) {
 526         if (!prog->check_utf8 && prog->check_substr)
 527             to_utf8_substr(prog);
 528         check = prog->check_utf8;
 529     } else {
 530         if (!prog->check_substr && prog->check_utf8)
 531             to_byte_substr(prog);
 532         check = prog->check_substr;
 533     }
 534     if (check == &PL_sv_undef) {
 535         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 536                 "Non-utf8 string cannot match utf8 check string\n"));
 537         goto fail;
 538     }
 539     if (prog->extflags & RXf_ANCH) {    /* Match at beg-of-str or after \n */
 540         ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
 541                      || ( (prog->extflags & RXf_ANCH_BOL)
 542                           && !multiline ) );    /* Check after \n? */
 543
 544         if (!ml_anch) {
 545           if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
 546                 && !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
 547                /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
 548                && sv && !SvROK(sv)
 549                && (strpos != strbeg)) {
 550               DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
 551               goto fail;
 552           }
 553           if (prog->check_offset_min == prog->check_offset_max &&
 554               !(prog->extflags & RXf_CANY_SEEN)) {
 555             /* Substring at constant offset from beg-of-str... */
 556             I32 slen;
 557
 558             s = HOP3c(strpos, prog->check_offset_min, strend);
 559
 560             if (SvTAIL(check)) {
 561                 slen = SvCUR(check);    /* >= 1 */
 562
 563                 if ( strend - s > slen || strend - s < slen - 1
 564                      || (strend - s == slen && strend[-1] != '\n')) {
 565                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
 566                     goto fail_finish;
 567                 }
 568                 /* Now should match s[0..slen-2] */
 569                 slen--;
 570                 if (slen && (*SvPVX_const(check) != *s
 571                              || (slen > 1
 572                                  && memNE(SvPVX_const(check), s, slen)))) {
 573                   report_neq:
 574                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
 575                     goto fail_finish;
 576                 }
 577             }
 578             else if (*SvPVX_const(check) != *s
 579                      || ((slen = SvCUR(check)) > 1
 580                          && memNE(SvPVX_const(check), s, slen)))
 581                 goto report_neq;
 582             check_at = s;
 583             goto success_at_start;
 584           }
 585         }
 586         /* Match is anchored, but substr is not anchored wrt beg-of-str. */
 587         s = strpos;
 588         start_shift = prog->check_offset_min; /* okay to underestimate on CC */
 589         end_shift = prog->check_end_shift;
 590
 591         if (!ml_anch) {
 592             const I32 end = prog->check_offset_max + CHR_SVLEN(check)
 593                                          - (SvTAIL(check) != 0);
 594             const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
 595
 596             if (end_shift < eshift)
 597                 end_shift = eshift;
 598         }
 599     }
 600     else {                              /* Can match at random position */
 601         ml_anch = 0;
 602         s = strpos;
 603         start_shift = prog->check_offset_min;  /* okay to underestimate on CC */
 604         end_shift = prog->check_end_shift;
 605
 606         /* end shift should be non negative here */
 607     }
 608
 609 #ifdef QDEBUGGING       /* 7/99: reports of failure (with the older version) */
 610     if (end_shift < 0)
 611         Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
 612                    (IV)end_shift, RX_PRECOMP(prog));
 613 #endif
 614
 615   restart:
 616     /* Find a possible match in the region s..strend by looking for
 617        the "check" substring in the region corrected by start/end_shift. */
 618
 619     {
 620         I32 srch_start_shift = start_shift;
 621         I32 srch_end_shift = end_shift;
 622         if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
 623             srch_end_shift -= ((strbeg - s) - srch_start_shift);
 624             srch_start_shift = strbeg - s;
 625         }
 626     DEBUG_OPTIMISE_MORE_r({
 627         PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
 628             (IV)prog->check_offset_min,
 629             (IV)srch_start_shift,
 630             (IV)srch_end_shift,
 631             (IV)prog->check_end_shift);
 632     });
 633
 634     if (flags & REXEC_SCREAM) {
 635         I32 p = -1;                     /* Internal iterator of scream. */
 636         I32 * const pp = data ? data->scream_pos : &p;
 637
 638         if (PL_screamfirst[BmRARE(check)] >= 0
 639             || ( BmRARE(check) == '\n'
 640                  && (BmPREVIOUS(check) == SvCUR(check) - 1)
 641                  && SvTAIL(check) ))
 642             s = screaminstr(sv, check,
 643                             srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
 644         else
 645             goto fail_finish;
 646         /* we may be pointing at the wrong string */
 647         if (s && RXp_MATCH_COPIED(prog))
 648             s = strbeg + (s - SvPVX_const(sv));
 649         if (data)
 650             *data->scream_olds = s;
 651     }
 652     else {
 653         U8* start_point;
 654         U8* end_point;
 655         if (prog->extflags & RXf_CANY_SEEN) {
 656             start_point= (U8*)(s + srch_start_shift);
 657             end_point= (U8*)(strend - srch_end_shift);
 658         } else {
 659             start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
 660             end_point= HOP3(strend, -srch_end_shift, strbeg);
 661         }
 662         DEBUG_OPTIMISE_MORE_r({
 663             PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
 664                 (int)(end_point - start_point),
 665                 (int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
 666                 start_point);
 667         });
 668
 669         s = fbm_instr( start_point, end_point,
 670                       check, multiline ? FBMrf_MULTILINE : 0);
 671     }
 672     }
 673     /* Update the count-of-usability, remove useless subpatterns,
 674         unshift s.  */
 675
 676     DEBUG_EXECUTE_r({
 677         RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 678             SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
 679         PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
 680                           (s ? "Found" : "Did not find"),
 681             (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)
 682                 ? "anchored" : "floating"),
 683             quoted,
 684             RE_SV_TAIL(check),
 685             (s ? " at offset " : "...\n") );
 686     });
 687
 688     if (!s)
 689         goto fail_finish;
 690     /* Finish the diagnostic message */
 691     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
 692
 693     /* XXX dmq: first branch is for positive lookbehind...
 694        Our check string is offset from the beginning of the pattern.
 695        So we need to do any stclass tests offset forward from that
 696        point. I think. :-(
 697      */
 698
 699
 700
 701     check_at=s;
 702
 703
 704     /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
 705        Start with the other substr.
 706        XXXX no SCREAM optimization yet - and a very coarse implementation
 707        XXXX /ttx+/ results in anchored="ttx", floating="x".  floating will
 708                 *always* match.  Probably should be marked during compile...
 709        Probably it is right to do no SCREAM here...
 710      */
 711
 712     if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8)
 713                 : (prog->float_substr && prog->anchored_substr))
 714     {
 715         /* Take into account the "other" substring. */
 716         /* XXXX May be hopelessly wrong for UTF... */
 717         if (!other_last)
 718             other_last = strpos;
 719         if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
 720           do_other_anchored:
 721             {
 722                 char * const last = HOP3c(s, -start_shift, strbeg);
 723                 char *last1, *last2;
 724                 char * const saved_s = s;
 725                 SV* must;
 726
 727                 t = s - prog->check_offset_max;
 728                 if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 729                     && (!do_utf8
 730                         || ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
 731                             && t > strpos)))
 732                     NOOP;
 733                 else
 734                     t = strpos;
 735                 t = HOP3c(t, prog->anchored_offset, strend);
 736                 if (t < other_last)     /* These positions already checked */
 737                     t = other_last;
 738                 last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
 739                 if (last < last1)
 740                     last1 = last;
 741                 /* XXXX It is not documented what units *_offsets are in.
 742                    We assume bytes, but this is clearly wrong.
 743                    Meaning this code needs to be carefully reviewed for errors.
 744                    dmq.
 745                   */
 746
 747                 /* On end-of-str: see comment below. */
 748                 must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
 749                 if (must == &PL_sv_undef) {
 750                     s = (char*)NULL;
 751                     DEBUG_r(must = prog->anchored_utf8);        /* for debug */
 752                 }
 753                 else
 754                     s = fbm_instr(
 755                         (unsigned char*)t,
 756                         HOP3(HOP3(last1, prog->anchored_offset, strend)
 757                                 + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
 758                         must,
 759                         multiline ? FBMrf_MULTILINE : 0
 760                     );
 761                 DEBUG_EXECUTE_r({
 762                     RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 763                         SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 764                     PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
 765                         (s ? "Found" : "Contradicts"),
 766                         quoted, RE_SV_TAIL(must));
 767                 });
 768
 769
 770                 if (!s) {
 771                     if (last1 >= last2) {
 772                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 773                                                 ", giving up...\n"));
 774                         goto fail_finish;
 775                     }
 776                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 777                         ", trying floating at offset %ld...\n",
 778                         (long)(HOP3c(saved_s, 1, strend) - i_strpos)));
 779                     other_last = HOP3c(last1, prog->anchored_offset+1, strend);
 780                     s = HOP3c(last, 1, strend);
 781                     goto restart;
 782                 }
 783                 else {
 784                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 785                           (long)(s - i_strpos)));
 786                     t = HOP3c(s, -prog->anchored_offset, strbeg);
 787                     other_last = HOP3c(s, 1, strend);
 788                     s = saved_s;
 789                     if (t == strpos)
 790                         goto try_at_start;
 791                     goto try_at_offset;
 792                 }
 793             }
 794         }
 795         else {          /* Take into account the floating substring. */
 796             char *last, *last1;
 797             char * const saved_s = s;
 798             SV* must;
 799
 800             t = HOP3c(s, -start_shift, strbeg);
 801             last1 = last =
 802                 HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
 803             if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
 804                 last = HOP3c(t, prog->float_max_offset, strend);
 805             s = HOP3c(t, prog->float_min_offset, strend);
 806             if (s < other_last)
 807                 s = other_last;
 808  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
 809             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
 810             /* fbm_instr() takes into account exact value of end-of-str
 811                if the check is SvTAIL(ed).  Since false positives are OK,
 812                and end-of-str is not later than strend we are OK. */
 813             if (must == &PL_sv_undef) {
 814                 s = (char*)NULL;
 815                 DEBUG_r(must = prog->float_utf8);       /* for debug message */
 816             }
 817             else
 818                 s = fbm_instr((unsigned char*)s,
 819                               (unsigned char*)last + SvCUR(must)
 820                                   - (SvTAIL(must)!=0),
 821                               must, multiline ? FBMrf_MULTILINE : 0);
 822             DEBUG_EXECUTE_r({
 823                 RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 824                     SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 825                 PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
 826                     (s ? "Found" : "Contradicts"),
 827                     quoted, RE_SV_TAIL(must));
 828             });
 829             if (!s) {
 830                 if (last1 == last) {
 831                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 832                                             ", giving up...\n"));
 833                     goto fail_finish;
 834                 }
 835                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 836                     ", trying anchored starting at offset %ld...\n",
 837                     (long)(saved_s + 1 - i_strpos)));
 838                 other_last = last;
 839                 s = HOP3c(t, 1, strend);
 840                 goto restart;
 841             }
 842             else {
 843                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 844                       (long)(s - i_strpos)));
 845                 other_last = s; /* Fix this later. --Hugo */
 846                 s = saved_s;
 847                 if (t == strpos)
 848                     goto try_at_start;
 849                 goto try_at_offset;
 850             }
 851         }
 852     }
 853
 854
 855     t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
 856
 857     DEBUG_OPTIMISE_MORE_r(
 858         PerlIO_printf(Perl_debug_log,
 859             "Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
 860             (IV)prog->check_offset_min,
 861             (IV)prog->check_offset_max,
 862             (IV)(s-strpos),
 863             (IV)(t-strpos),
 864             (IV)(t-s),
 865             (IV)(strend-strpos)
 866         )
 867     );
 868
 869     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 870         && (!do_utf8
 871             || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
 872                  && t > strpos)))
 873     {
 874         /* Fixed substring is found far enough so that the match
 875            cannot start at strpos. */
 876       try_at_offset:
 877         if (ml_anch && t[-1] != '\n') {
 878             /* Eventually fbm_*() should handle this, but often
 879                anchored_offset is not 0, so this check will not be wasted. */
 880             /* XXXX In the code below we prefer to look for "^" even in
 881                presence of anchored substrings.  And we search even
 882                beyond the found float position.  These pessimizations
 883                are historical artefacts only.  */
 884           find_anchor:
 885             while (t < strend - prog->minlen) {
 886                 if (*t == '\n') {
 887                     if (t < check_at - prog->check_offset_min) {
 888                         if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
 889                             /* Since we moved from the found position,
 890                                we definitely contradict the found anchored
 891                                substr.  Due to the above check we do not
 892                                contradict "check" substr.
 893                                Thus we can arrive here only if check substr
 894                                is float.  Redo checking for "other"=="fixed".
 895                              */
 896                             strpos = t + 1;
 897                             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
 898                                 PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
 899                             goto do_other_anchored;
 900                         }
 901                         /* We don't contradict the found floating substring. */
 902                         /* XXXX Why not check for STCLASS? */
 903                         s = t + 1;
 904                         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
 905                             PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
 906                         goto set_useful;
 907                     }
 908                     /* Position contradicts check-string */
 909                     /* XXXX probably better to look for check-string
 910                        than for "\n", so one should lower the limit for t? */
 911                     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
 912                         PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
 913                     other_last = strpos = s = t + 1;
 914                     goto restart;
 915                 }
 916                 t++;
 917             }
 918             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
 919                         PL_colors[0], PL_colors[1]));
 920             goto fail_finish;
 921         }
 922         else {
 923             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
 924                         PL_colors[0], PL_colors[1]));
 925         }
 926         s = t;
 927       set_useful:
 928         ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr);    /* hooray/5 */
 929     }
 930     else {
 931         /* The found string does not prohibit matching at strpos,
 932            - no optimization of calling REx engine can be performed,
 933            unless it was an MBOL and we are not after MBOL,
 934            or a future STCLASS check will fail this. */
 935       try_at_start:
 936         /* Even in this situation we may use MBOL flag if strpos is offset
 937            wrt the start of the string. */
 938         if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
 939             && (strpos != strbeg) && strpos[-1] != '\n'
 940             /* May be due to an implicit anchor of m{.*foo}  */
 941             && !(prog->intflags & PREGf_IMPLICIT))
 942         {
 943             t = strpos;
 944             goto find_anchor;
 945         }
 946         DEBUG_EXECUTE_r( if (ml_anch)
 947             PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
 948                           (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
 949         );
 950       success_at_start:
 951         if (!(prog->intflags & PREGf_NAUGHTY)   /* XXXX If strpos moved? */
 952             && (do_utf8 ? (
 953                 prog->check_utf8                /* Could be deleted already */
 954                 && --BmUSEFUL(prog->check_utf8) < 0
 955                 && (prog->check_utf8 == prog->float_utf8)
 956             ) : (
 957                 prog->check_substr              /* Could be deleted already */
 958                 && --BmUSEFUL(prog->check_substr) < 0
 959                 && (prog->check_substr == prog->float_substr)
 960             )))
 961         {
 962             /* If flags & SOMETHING - do not do it many times on the same match */
 963             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
 964             SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
 965             if (do_utf8 ? prog->check_substr : prog->check_utf8)
 966                 SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
 967             prog->check_substr = prog->check_utf8 = NULL;       /* disable */
 968             prog->float_substr = prog->float_utf8 = NULL;       /* clear */
 969             check = NULL;                       /* abort */
 970             s = strpos;
 971             /* XXXX This is a remnant of the old implementation.  It
 972                     looks wasteful, since now INTUIT can use many
 973                     other heuristics. */
 974             prog->extflags &= ~RXf_USE_INTUIT;
 975         }
 976         else
 977             s = strpos;
 978     }
 979
 980     /* Last resort... */
 981     /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
 982     /* trie stclasses are too expensive to use here, we are better off to
 983        leave it to regmatch itself */
 984     if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
 985         /* minlen == 0 is possible if regstclass is \b or \B,
 986            and the fixed substr is ''$.
 987            Since minlen is already taken into account, s+1 is before strend;
 988            accidentally, minlen >= 1 guaranties no false positives at s + 1
 989            even for \b or \B.  But (minlen? 1 : 0) below assumes that
 990            regstclass does not come from lookahead...  */
 991         /* If regstclass takes bytelength more than 1: If charlength==1, OK.
 992            This leaves EXACTF only, which is dealt with in find_byclass().  */
 993         const U8* const str = (U8*)STRING(progi->regstclass);
 994         const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
 995                     ? CHR_DIST(str+STR_LEN(progi->regstclass), str)
 996                     : 1);
 997         char * endpos;
 998         if (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
 999             endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
1000         else if (prog->float_substr || prog->float_utf8)
1001             endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
1002         else
1003             endpos= strend;
1004
1005         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf"\n",
1006                                       (IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg)));
1007
1008         t = s;
1009         s = find_byclass(prog, progi->regstclass, s, endpos, NULL);
1010         if (!s) {
1011 #ifdef DEBUGGING
1012             const char *what = NULL;
1013 #endif
1014             if (endpos == strend) {
1015                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1016                                 "Could not match STCLASS...\n") );
1017                 goto fail;
1018             }
1019             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1020                                    "This position contradicts STCLASS...\n") );
1021             if ((prog->extflags & RXf_ANCH) && !ml_anch)
1022                 goto fail;
1023             /* Contradict one of substrings */
1024             if (prog->anchored_substr || prog->anchored_utf8) {
1025                 if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
1026                     DEBUG_EXECUTE_r( what = "anchored" );
1027                   hop_and_restart:
1028                     s = HOP3c(t, 1, strend);
1029                     if (s + start_shift + end_shift > strend) {
1030                         /* XXXX Should be taken into account earlier? */
1031                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1032                                                "Could not match STCLASS...\n") );
1033                         goto fail;
1034                     }
1035                     if (!check)
1036                         goto giveup;
1037                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1038                                 "Looking for %s substr starting at offset %ld...\n",
1039                                  what, (long)(s + start_shift - i_strpos)) );
1040                     goto restart;
1041                 }
1042                 /* Have both, check_string is floating */
1043                 if (t + start_shift >= check_at) /* Contradicts floating=check */
1044                     goto retry_floating_check;
1045                 /* Recheck anchored substring, but not floating... */
1046                 s = check_at;
1047                 if (!check)
1048                     goto giveup;
1049                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1050                           "Looking for anchored substr starting at offset %ld...\n",
1051                           (long)(other_last - i_strpos)) );
1052                 goto do_other_anchored;
1053             }
1054             /* Another way we could have checked stclass at the
1055                current position only: */
1056             if (ml_anch) {
1057                 s = t = t + 1;
1058                 if (!check)
1059                     goto giveup;
1060                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
1061                           "Looking for /%s^%s/m starting at offset %ld...\n",
1062                           PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
1063                 goto try_at_offset;
1064             }
1065             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))     /* Could have been deleted */
1066                 goto fail;
1067             /* Check is floating subtring. */
1068           retry_floating_check:
1069             t = check_at - start_shift;
1070             DEBUG_EXECUTE_r( what = "floating" );
1071             goto hop_and_restart;
1072         }
1073         if (t != s) {
1074             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1075                         "By STCLASS: moving %ld --> %ld\n",
1076                                   (long)(t - i_strpos), (long)(s - i_strpos))
1077                    );
1078         }
1079         else {
1080             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1081                                   "Does not contradict STCLASS...\n");
1082                    );
1083         }
1084     }
1085   giveup:
1086     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
1087                           PL_colors[4], (check ? "Guessed" : "Giving up"),
1088                           PL_colors[5], (long)(s - i_strpos)) );
1089     return s;
1090
1091   fail_finish:                          /* Substring not found */
1092     if (prog->check_substr || prog->check_utf8)         /* could be removed already */
1093         BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
1094   fail:
1095     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
1096                           PL_colors[4], PL_colors[5]));
1097     return NULL;
1098 }
1099
1100 #define DECL_TRIE_TYPE(scan) \
1101     const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold } \
1102                     trie_type = (scan->flags != EXACT) \
1103                               ? (do_utf8 ? trie_utf8_fold : (UTF ? trie_latin_utf8_fold : trie_plain)) \
1104                               : (do_utf8 ? trie_utf8 : trie_plain)
1105
1106 #define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len,  \
1107 uvc, charid, foldlen, foldbuf, uniflags) STMT_START {                       \
1108     UV uvc_unfolded = 0;                                                    \
1109     switch (trie_type) {                                                    \
1110     case trie_utf8_fold:                                                    \
1111         if ( foldlen>0 ) {                                                  \
1112             uvc_unfolded = uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
1113             foldlen -= len;                                                 \
1114             uscan += len;                                                   \
1115             len=0;                                                          \
1116         } else {                                                            \
1117             uvc_unfolded = uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
1118             uvc = to_uni_fold( uvc, foldbuf, &foldlen );                    \
1119             foldlen -= UNISKIP( uvc );                                      \
1120             uscan = foldbuf + UNISKIP( uvc );                               \
1121         }                                                                   \
1122         break;                                                              \
1123     case trie_latin_utf8_fold:                                              \
1124         if ( foldlen>0 ) {                                                  \
1125             uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );     \
1126             foldlen -= len;                                                 \
1127             uscan += len;                                                   \
1128             len=0;                                                          \
1129         } else {                                                            \
1130             len = 1;                                                        \
1131             uvc = to_uni_fold( *(U8*)uc, foldbuf, &foldlen );               \
1132             foldlen -= UNISKIP( uvc );                                      \
1133             uscan = foldbuf + UNISKIP( uvc );                               \
1134         }                                                                   \
1135         break;                                                              \
1136     case trie_utf8:                                                         \
1137         uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );       \
1138         break;                                                              \
1139     case trie_plain:                                                        \
1140         uvc = (UV)*uc;                                                      \
1141         len = 1;                                                            \
1142     }                                                                       \
1143                                                                             \
1144     if (uvc < 256) {                                                        \
1145         charid = trie->charmap[ uvc ];                                      \
1146     }                                                                       \
1147     else {                                                                  \
1148         charid = 0;                                                         \
1149         if (widecharmap) {                                                  \
1150             SV** const svpp = hv_fetch(widecharmap,                         \
1151                         (char*)&uvc, sizeof(UV), 0);                        \
1152             if (svpp)                                                       \
1153                 charid = (U16)SvIV(*svpp);                                  \
1154         }                                                                   \
1155     }                                                                       \
1156     if (!charid && trie_type == trie_utf8_fold && !UTF) {                   \
1157         charid = trie->charmap[uvc_unfolded];                               \
1158     }                                                                       \
1159 } STMT_END
1160
1161 #define REXEC_FBC_EXACTISH_CHECK(CoNd)                 \
1162 {                                                      \
1163     char *my_strend= (char *)strend;                   \
1164     if ( (CoNd)                                        \
1165          && (ln == len ||                              \
1166              !ibcmp_utf8(s, &my_strend, 0,  do_utf8,   \
1167                         m, NULL, ln, (bool)UTF))       \
1168          && (!reginfo || regtry(reginfo, &s)) )        \
1169         goto got_it;                                   \
1170     else {                                             \
1171          U8 foldbuf[UTF8_MAXBYTES_CASE+1];             \
1172          uvchr_to_utf8(tmpbuf, c);                     \
1173          f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);  \
1174          if ( f != c                                   \
1175               && (f == c1 || f == c2)                  \
1176               && (ln == len ||                         \
1177                 !ibcmp_utf8(s, &my_strend, 0,  do_utf8,\
1178                               m, NULL, ln, (bool)UTF)) \
1179               && (!reginfo || regtry(reginfo, &s)) )   \
1180               goto got_it;                             \
1181     }                                                  \
1182 }                                                      \
1183 s += len
1184
1185 #define REXEC_FBC_EXACTISH_SCAN(CoNd)                     \
1186 STMT_START {                                              \
1187     while (s <= e) {                                      \
1188         if ( (CoNd)                                       \
1189              && (ln == 1 || !(OP(c) == EXACTF             \
1190                               ? ibcmp(s, m, ln)           \
1191                               : ibcmp_locale(s, m, ln)))  \
1192              && (!reginfo || regtry(reginfo, &s)) )        \
1193             goto got_it;                                  \
1194         s++;                                              \
1195     }                                                     \
1196 } STMT_END
1197
1198 #define REXEC_FBC_UTF8_SCAN(CoDe)                     \
1199 STMT_START {                                          \
1200     while (s + (uskip = UTF8SKIP(s)) <= strend) {     \
1201         CoDe                                          \
1202         s += uskip;                                   \
1203     }                                                 \
1204 } STMT_END
1205
1206 #define REXEC_FBC_SCAN(CoDe)                          \
1207 STMT_START {                                          \
1208     while (s < strend) {                              \
1209         CoDe                                          \
1210         s++;                                          \
1211     }                                                 \
1212 } STMT_END
1213
1214 #define REXEC_FBC_UTF8_CLASS_SCAN(CoNd)               \
1215 REXEC_FBC_UTF8_SCAN(                                  \
1216     if (CoNd) {                                       \
1217         if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1218             goto got_it;                              \
1219         else                                          \
1220             tmp = doevery;                            \
1221     }                                                 \
1222     else                                              \
1223         tmp = 1;                                      \
1224 )
1225
1226 #define REXEC_FBC_CLASS_SCAN(CoNd)                    \
1227 REXEC_FBC_SCAN(                                       \
1228     if (CoNd) {                                       \
1229         if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1230             goto got_it;                              \
1231         else                                          \
1232             tmp = doevery;                            \
1233     }                                                 \
1234     else                                              \
1235         tmp = 1;                                      \
1236 )
1237
1238 #define REXEC_FBC_TRYIT               \
1239 if ((!reginfo || regtry(reginfo, &s))) \
1240     goto got_it
1241
1242 #define REXEC_FBC_CSCAN(CoNdUtF8,CoNd)                         \
1243     if (do_utf8) {                                             \
1244         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1245     }                                                          \
1246     else {                                                     \
1247         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1248     }                                                          \
1249     break
1250
1251 #define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd)      \
1252     if (do_utf8) {                                             \
1253         UtFpReLoAd;                                            \
1254         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1255     }                                                          \
1256     else {                                                     \
1257         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1258     }                                                          \
1259     break
1260
1261 #define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd)                   \
1262     PL_reg_flags |= RF_tainted;                                \
1263     if (do_utf8) {                                             \
1264         REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1265     }                                                          \
1266     else {                                                     \
1267         REXEC_FBC_CLASS_SCAN(CoNd);                            \
1268     }                                                          \
1269     break
1270
1271 #define DUMP_EXEC_POS(li,s,doutf8) \
1272     dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
1273
1274 /* We know what class REx starts with.  Try to find this position... */
1275 /* if reginfo is NULL, its a dryrun */
1276 /* annoyingly all the vars in this routine have different names from their counterparts
1277    in regmatch. /grrr */
1278
1279 STATIC char *
1280 S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
1281     const char *strend, regmatch_info *reginfo)
1282 {
1283         dVAR;
1284         const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
1285         char *m;
1286         STRLEN ln;
1287         STRLEN lnc;
1288         register STRLEN uskip;
1289         unsigned int c1;
1290         unsigned int c2;
1291         char *e;
1292         register I32 tmp = 1;   /* Scratch variable? */
1293         register const bool do_utf8 = PL_reg_match_utf8;
1294         RXi_GET_DECL(prog,progi);
1295
1296         PERL_ARGS_ASSERT_FIND_BYCLASS;
1297
1298         /* We know what class it must start with. */
1299         switch (OP(c)) {
1300         case ANYOF:
1301             if (do_utf8) {
1302                  REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
1303                           !UTF8_IS_INVARIANT((U8)s[0]) ?
1304                           reginclass(prog, c, (U8*)s, 0, do_utf8) :
1305                           REGINCLASS(prog, c, (U8*)s));
1306             }
1307             else {
1308                  while (s < strend) {
1309                       STRLEN skip = 1;
1310
1311                       if (REGINCLASS(prog, c, (U8*)s) ||
1312                           (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1313                            /* The assignment of 2 is intentional:
1314                             * for the folded sharp s, the skip is 2. */
1315                            (skip = SHARP_S_SKIP))) {
1316                            if (tmp && (!reginfo || regtry(reginfo, &s)))
1317                                 goto got_it;
1318                            else
1319                                 tmp = doevery;
1320                       }
1321                       else
1322                            tmp = 1;
1323                       s += skip;
1324                  }
1325             }
1326             break;
1327         case CANY:
1328             REXEC_FBC_SCAN(
1329                 if (tmp && (!reginfo || regtry(reginfo, &s)))
1330                     goto got_it;
1331                 else
1332                     tmp = doevery;
1333             );
1334             break;
1335         case EXACTF:
1336             m   = STRING(c);
1337             ln  = STR_LEN(c);   /* length to match in octets/bytes */
1338             lnc = (I32) ln;     /* length to match in characters */
1339             if (UTF) {
1340                 STRLEN ulen1, ulen2;
1341                 U8 *sm = (U8 *) m;
1342                 U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1343                 U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
1344                 /* used by commented-out code below */
1345                 /*const U32 uniflags = UTF8_ALLOW_DEFAULT;*/
1346
1347                 /* XXX: Since the node will be case folded at compile
1348                    time this logic is a little odd, although im not
1349                    sure that its actually wrong. --dmq */
1350
1351                 c1 = to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1352                 c2 = to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1353
1354                 /* XXX: This is kinda strange. to_utf8_XYZ returns the
1355                    codepoint of the first character in the converted
1356                    form, yet originally we did the extra step.
1357                    No tests fail by commenting this code out however
1358                    so Ive left it out. -- dmq.
1359
1360                 c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
1361                                     0, uniflags);
1362                 c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
1363                                     0, uniflags);
1364                 */
1365
1366                 lnc = 0;
1367                 while (sm < ((U8 *) m + ln)) {
1368                     lnc++;
1369                     sm += UTF8SKIP(sm);
1370                 }
1371             }
1372             else {
1373                 c1 = *(U8*)m;
1374                 c2 = PL_fold[c1];
1375             }
1376             goto do_exactf;
1377         case EXACTFL:
1378             m   = STRING(c);
1379             ln  = STR_LEN(c);
1380             lnc = (I32) ln;
1381             c1 = *(U8*)m;
1382             c2 = PL_fold_locale[c1];
1383           do_exactf:
1384             e = HOP3c(strend, -((I32)lnc), s);
1385
1386             if (!reginfo && e < s)
1387                 e = s;                  /* Due to minlen logic of intuit() */
1388
1389             /* The idea in the EXACTF* cases is to first find the
1390              * first character of the EXACTF* node and then, if
1391              * necessary, case-insensitively compare the full
1392              * text of the node.  The c1 and c2 are the first
1393              * characters (though in Unicode it gets a bit
1394              * more complicated because there are more cases
1395              * than just upper and lower: one needs to use
1396              * the so-called folding case for case-insensitive
1397              * matching (called "loose matching" in Unicode).
1398              * ibcmp_utf8() will do just that. */
1399
1400             if (do_utf8 || UTF) {
1401                 UV c, f;
1402                 U8 tmpbuf [UTF8_MAXBYTES+1];
1403                 STRLEN len = 1;
1404                 STRLEN foldlen;
1405                 const U32 uniflags = UTF8_ALLOW_DEFAULT;
1406                 if (c1 == c2) {
1407                     /* Upper and lower of 1st char are equal -
1408                      * probably not a "letter". */
1409                     while (s <= e) {
1410                         if (do_utf8) {
1411                             c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1412                                            uniflags);
1413                         } else {
1414                             c = *((U8*)s);
1415                         }
1416                         REXEC_FBC_EXACTISH_CHECK(c == c1);
1417                     }
1418                 }
1419                 else {
1420                     while (s <= e) {
1421                         if (do_utf8) {
1422                             c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1423                                            uniflags);
1424                         } else {
1425                             c = *((U8*)s);
1426                         }
1427
1428                         /* Handle some of the three Greek sigmas cases.
1429                          * Note that not all the possible combinations
1430                          * are handled here: some of them are handled
1431                          * by the standard folding rules, and some of
1432                          * them (the character class or ANYOF cases)
1433                          * are handled during compiletime in
1434                          * regexec.c:S_regclass(). */
1435                         if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1436                             c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1437                             c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1438
1439                         REXEC_FBC_EXACTISH_CHECK(c == c1 || c == c2);
1440                     }
1441                 }
1442             }
1443             else {
1444                 /* Neither pattern nor string are UTF8 */
1445                 if (c1 == c2)
1446                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
1447                 else
1448                     REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
1449             }
1450             break;
1451         case BOUNDL:
1452             PL_reg_flags |= RF_tainted;
1453             /* FALL THROUGH */
1454         case BOUND:
1455             if (do_utf8) {
1456                 if (s == PL_bostr)
1457                     tmp = '\n';
1458                 else {
1459                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1460                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1461                 }
1462                 tmp = ((OP(c) == BOUND ?
1463                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1464                 LOAD_UTF8_CHARCLASS_ALNUM();
1465                 REXEC_FBC_UTF8_SCAN(
1466                     if (tmp == !(OP(c) == BOUND ?
1467                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1468                                  isALNUM_LC_utf8((U8*)s)))
1469                     {
1470                         tmp = !tmp;
1471                         REXEC_FBC_TRYIT;
1472                 }
1473                 );
1474             }
1475             else {
1476                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1477                 tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1478                 REXEC_FBC_SCAN(
1479                     if (tmp ==
1480                         !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1481                         tmp = !tmp;
1482                         REXEC_FBC_TRYIT;
1483                 }
1484                 );
1485             }
1486             if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, &s)))
1487                 goto got_it;
1488             break;
1489         case NBOUNDL:
1490             PL_reg_flags |= RF_tainted;
1491             /* FALL THROUGH */
1492         case NBOUND:
1493             if (do_utf8) {
1494                 if (s == PL_bostr)
1495                     tmp = '\n';
1496                 else {
1497                     U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1498                     tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1499                 }
1500                 tmp = ((OP(c) == NBOUND ?
1501                         isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1502                 LOAD_UTF8_CHARCLASS_ALNUM();
1503                 REXEC_FBC_UTF8_SCAN(
1504                     if (tmp == !(OP(c) == NBOUND ?
1505                                  (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1506                                  isALNUM_LC_utf8((U8*)s)))
1507                         tmp = !tmp;
1508                     else REXEC_FBC_TRYIT;
1509                 );
1510             }
1511             else {
1512                 tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1513                 tmp = ((OP(c) == NBOUND ?
1514                         isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1515                 REXEC_FBC_SCAN(
1516                     if (tmp ==
1517                         !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1518                         tmp = !tmp;
1519                     else REXEC_FBC_TRYIT;
1520                 );
1521             }
1522             if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, &s)))
1523                 goto got_it;
1524             break;
1525         case ALNUM:
1526             REXEC_FBC_CSCAN_PRELOAD(
1527                 LOAD_UTF8_CHARCLASS_PERL_WORD(),
1528                 swash_fetch(RE_utf8_perl_word, (U8*)s, do_utf8),
1529                 isALNUM(*s)
1530             );
1531         case ALNUML:
1532             REXEC_FBC_CSCAN_TAINT(
1533                 isALNUM_LC_utf8((U8*)s),
1534                 isALNUM_LC(*s)
1535             );
1536         case NALNUM:
1537             REXEC_FBC_CSCAN_PRELOAD(
1538                 LOAD_UTF8_CHARCLASS_PERL_WORD(),
1539                 !swash_fetch(RE_utf8_perl_word, (U8*)s, do_utf8),
1540                 !isALNUM(*s)
1541             );
1542         case NALNUML:
1543             REXEC_FBC_CSCAN_TAINT(
1544                 !isALNUM_LC_utf8((U8*)s),
1545                 !isALNUM_LC(*s)
1546             );
1547         case SPACE:
1548             REXEC_FBC_CSCAN_PRELOAD(
1549                 LOAD_UTF8_CHARCLASS_PERL_SPACE(),
1550                 *s == ' ' || swash_fetch(RE_utf8_perl_space,(U8*)s, do_utf8),
1551                 isSPACE(*s)
1552             );
1553         case SPACEL:
1554             REXEC_FBC_CSCAN_TAINT(
1555                 *s == ' ' || isSPACE_LC_utf8((U8*)s),
1556                 isSPACE_LC(*s)
1557             );
1558         case NSPACE:
1559             REXEC_FBC_CSCAN_PRELOAD(
1560                 LOAD_UTF8_CHARCLASS_PERL_SPACE(),
1561                 !(*s == ' ' || swash_fetch(RE_utf8_perl_space,(U8*)s, do_utf8)),
1562                 !isSPACE(*s)
1563             );
1564         case NSPACEL:
1565             REXEC_FBC_CSCAN_TAINT(
1566                 !(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
1567                 !isSPACE_LC(*s)
1568             );
1569         case DIGIT:
1570             REXEC_FBC_CSCAN_PRELOAD(
1571                 LOAD_UTF8_CHARCLASS_POSIX_DIGIT(),
1572                 swash_fetch(RE_utf8_posix_digit,(U8*)s, do_utf8),
1573                 isDIGIT(*s)
1574             );
1575         case DIGITL:
1576             REXEC_FBC_CSCAN_TAINT(
1577                 isDIGIT_LC_utf8((U8*)s),
1578                 isDIGIT_LC(*s)
1579             );
1580         case NDIGIT:
1581             REXEC_FBC_CSCAN_PRELOAD(
1582                 LOAD_UTF8_CHARCLASS_POSIX_DIGIT(),
1583                 !swash_fetch(RE_utf8_posix_digit,(U8*)s, do_utf8),
1584                 !isDIGIT(*s)
1585             );
1586         case NDIGITL:
1587             REXEC_FBC_CSCAN_TAINT(
1588                 !isDIGIT_LC_utf8((U8*)s),
1589                 !isDIGIT_LC(*s)
1590             );
1591         case LNBREAK:
1592             REXEC_FBC_CSCAN(
1593                 is_LNBREAK_utf8(s),
1594                 is_LNBREAK_latin1(s)
1595             );
1596         case VERTWS:
1597             REXEC_FBC_CSCAN(
1598                 is_VERTWS_utf8(s),
1599                 is_VERTWS_latin1(s)
1600             );
1601         case NVERTWS:
1602             REXEC_FBC_CSCAN(
1603                 !is_VERTWS_utf8(s),
1604                 !is_VERTWS_latin1(s)
1605             );
1606         case HORIZWS:
1607             REXEC_FBC_CSCAN(
1608                 is_HORIZWS_utf8(s),
1609                 is_HORIZWS_latin1(s)
1610             );
1611         case NHORIZWS:
1612             REXEC_FBC_CSCAN(
1613                 !is_HORIZWS_utf8(s),
1614                 !is_HORIZWS_latin1(s)
1615             );
1616         case AHOCORASICKC:
1617         case AHOCORASICK:
1618             {
1619                 DECL_TRIE_TYPE(c);
1620                 /* what trie are we using right now */
1621                 reg_ac_data *aho
1622                     = (reg_ac_data*)progi->data->data[ ARG( c ) ];
1623                 reg_trie_data *trie
1624                     = (reg_trie_data*)progi->data->data[ aho->trie ];
1625                 HV *widecharmap = MUTABLE_HV(progi->data->data[ aho->trie + 1 ]);
1626
1627                 const char *last_start = strend - trie->minlen;
1628 #ifdef DEBUGGING
1629                 const char *real_start = s;
1630 #endif
1631                 STRLEN maxlen = trie->maxlen;
1632                 SV *sv_points;
1633                 U8 **points; /* map of where we were in the input string
1634                                 when reading a given char. For ASCII this
1635                                 is unnecessary overhead as the relationship
1636                                 is always 1:1, but for Unicode, especially
1637                                 case folded Unicode this is not true. */
1638                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
1639                 U8 *bitmap=NULL;
1640
1641
1642                 GET_RE_DEBUG_FLAGS_DECL;
1643
1644                 /* We can't just allocate points here. We need to wrap it in
1645                  * an SV so it gets freed properly if there is a croak while
1646                  * running the match */
1647                 ENTER;
1648                 SAVETMPS;
1649                 sv_points=newSV(maxlen * sizeof(U8 *));
1650                 SvCUR_set(sv_points,
1651                     maxlen * sizeof(U8 *));
1652                 SvPOK_on(sv_points);
1653                 sv_2mortal(sv_points);
1654                 points=(U8**)SvPV_nolen(sv_points );
1655                 if ( trie_type != trie_utf8_fold
1656                      && (trie->bitmap || OP(c)==AHOCORASICKC) )
1657                 {
1658                     if (trie->bitmap)
1659                         bitmap=(U8*)trie->bitmap;
1660                     else
1661                         bitmap=(U8*)ANYOF_BITMAP(c);
1662                 }
1663                 /* this is the Aho-Corasick algorithm modified a touch
1664                    to include special handling for long "unknown char"
1665                    sequences. The basic idea being that we use AC as long
1666                    as we are dealing with a possible matching char, when
1667                    we encounter an unknown char (and we have not encountered
1668                    an accepting state) we scan forward until we find a legal
1669                    starting char.
1670                    AC matching is basically that of trie matching, except
1671                    that when we encounter a failing transition, we fall back
1672                    to the current states "fail state", and try the current char
1673                    again, a process we repeat until we reach the root state,
1674                    state 1, or a legal transition. If we fail on the root state
1675                    then we can either terminate if we have reached an accepting
1676                    state previously, or restart the entire process from the beginning
1677                    if we have not.
1678
1679                  */
1680                 while (s <= last_start) {
1681                     const U32 uniflags = UTF8_ALLOW_DEFAULT;
1682                     U8 *uc = (U8*)s;
1683                     U16 charid = 0;
1684                     U32 base = 1;
1685                     U32 state = 1;
1686                     UV uvc = 0;
1687                     STRLEN len = 0;
1688                     STRLEN foldlen = 0;
1689                     U8 *uscan = (U8*)NULL;
1690                     U8 *leftmost = NULL;
1691 #ifdef DEBUGGING
1692                     U32 accepted_word= 0;
1693 #endif
1694                     U32 pointpos = 0;
1695
1696                     while ( state && uc <= (U8*)strend ) {
1697                         int failed=0;
1698                         U32 word = aho->states[ state ].wordnum;
1699
1700                         if( state==1 ) {
1701                             if ( bitmap ) {
1702                                 DEBUG_TRIE_EXECUTE_r(
1703                                     if ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1704                                         dump_exec_pos( (char *)uc, c, strend, real_start,
1705                                             (char *)uc, do_utf8 );
1706                                         PerlIO_printf( Perl_debug_log,
1707                                             " Scanning for legal start char...\n");
1708                                     }
1709                                 );
1710                                 while ( uc <= (U8*)last_start  && !BITMAP_TEST(bitmap,*uc) ) {
1711                                     uc++;
1712                                 }
1713                                 s= (char *)uc;
1714                             }
1715                             if (uc >(U8*)last_start) break;
1716                         }
1717
1718                         if ( word ) {
1719                             U8 *lpos= points[ (pointpos - trie->wordlen[word-1] ) % maxlen ];
1720                             if (!leftmost || lpos < leftmost) {
1721                                 DEBUG_r(accepted_word=word);
1722                                 leftmost= lpos;
1723                             }
1724                             if (base==0) break;
1725
1726                         }
1727                         points[pointpos++ % maxlen]= uc;
1728                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
1729                                              uscan, len, uvc, charid, foldlen,
1730                                              foldbuf, uniflags);
1731                         DEBUG_TRIE_EXECUTE_r({
1732                             dump_exec_pos( (char *)uc, c, strend, real_start,
1733                                 s,   do_utf8 );
1734                             PerlIO_printf(Perl_debug_log,
1735                                 " Charid:%3u CP:%4"UVxf" ",
1736                                  charid, uvc);
1737                         });
1738
1739                         do {
1740 #ifdef DEBUGGING
1741                             word = aho->states[ state ].wordnum;
1742 #endif
1743                             base = aho->states[ state ].trans.base;
1744
1745                             DEBUG_TRIE_EXECUTE_r({
1746                                 if (failed)
1747                                     dump_exec_pos( (char *)uc, c, strend, real_start,
1748                                         s,   do_utf8 );
1749                                 PerlIO_printf( Perl_debug_log,
1750                                     "%sState: %4"UVxf", word=%"UVxf,
1751                                     failed ? " Fail transition to " : "",
1752                                     (UV)state, (UV)word);
1753                             });
1754                             if ( base ) {
1755                                 U32 tmp;
1756                                 if (charid &&
1757                                      (base + charid > trie->uniquecharcount )
1758                                      && (base + charid - 1 - trie->uniquecharcount
1759                                             < trie->lasttrans)
1760                                      && trie->trans[base + charid - 1 -
1761                                             trie->uniquecharcount].check == state
1762                                      && (tmp=trie->trans[base + charid - 1 -
1763                                         trie->uniquecharcount ].next))
1764                                 {
1765                                     DEBUG_TRIE_EXECUTE_r(
1766                                         PerlIO_printf( Perl_debug_log," - legal\n"));
1767                                     state = tmp;
1768                                     break;
1769                                 }
1770                                 else {
1771                                     DEBUG_TRIE_EXECUTE_r(
1772                                         PerlIO_printf( Perl_debug_log," - fail\n"));
1773                                     failed = 1;
1774                                     state = aho->fail[state];
1775                                 }
1776                             }
1777                             else {
1778                                 /* we must be accepting here */
1779                                 DEBUG_TRIE_EXECUTE_r(
1780                                         PerlIO_printf( Perl_debug_log," - accepting\n"));
1781                                 failed = 1;
1782                                 break;
1783                             }
1784                         } while(state);
1785                         uc += len;
1786                         if (failed) {
1787                             if (leftmost)
1788                                 break;
1789                             if (!state) state = 1;
1790                         }
1791                     }
1792                     if ( aho->states[ state ].wordnum ) {
1793                         U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
1794                         if (!leftmost || lpos < leftmost) {
1795                             DEBUG_r(accepted_word=aho->states[ state ].wordnum);
1796                             leftmost = lpos;
1797                         }
1798                     }
1799                     if (leftmost) {
1800                         s = (char*)leftmost;
1801                         DEBUG_TRIE_EXECUTE_r({
1802                             PerlIO_printf(
1803                                 Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
1804                                 (UV)accepted_word, (IV)(s - real_start)
1805                             );
1806                         });
1807                         if (!reginfo || regtry(reginfo, &s)) {
1808                             FREETMPS;
1809                             LEAVE;
1810                             goto got_it;
1811                         }
1812                         s = HOPc(s,1);
1813                         DEBUG_TRIE_EXECUTE_r({
1814                             PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
1815                         });
1816                     } else {
1817                         DEBUG_TRIE_EXECUTE_r(
1818                             PerlIO_printf( Perl_debug_log,"No match.\n"));
1819                         break;
1820                     }
1821                 }
1822                 FREETMPS;
1823                 LEAVE;
1824             }
1825             break;
1826         default:
1827             Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1828             break;
1829         }
1830         return 0;
1831       got_it:
1832         return s;
1833 }
1834
1835
1836 /*
1837  - regexec_flags - match a regexp against a string
1838  */
1839 I32
1840 Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, register char *strend,
1841               char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1842 /* strend: pointer to null at end of string */
1843 /* strbeg: real beginning of string */
1844 /* minend: end of match must be >=minend after stringarg. */
1845 /* data: May be used for some additional optimizations.
1846          Currently its only used, with a U32 cast, for transmitting
1847          the ganch offset when doing a /g match. This will change */
1848 /* nosave: For optimizations. */
1849 {
1850     dVAR;
1851     struct regexp *const prog = (struct regexp *)SvANY(rx);
1852     /*register*/ char *s;
1853     register regnode *c;
1854     /*register*/ char *startpos = stringarg;
1855     I32 minlen;         /* must match at least this many chars */
1856     I32 dontbother = 0; /* how many characters not to try at end */
1857     I32 end_shift = 0;                  /* Same for the end. */         /* CC */
1858     I32 scream_pos = -1;                /* Internal iterator of scream. */
1859     char *scream_olds = NULL;
1860     const bool do_utf8 = (bool)DO_UTF8(sv);
1861     I32 multiline;
1862     RXi_GET_DECL(prog,progi);
1863     regmatch_info reginfo;  /* create some info to pass to regtry etc */
1864     regexp_paren_pair *swap = NULL;
1865     GET_RE_DEBUG_FLAGS_DECL;
1866
1867     PERL_ARGS_ASSERT_REGEXEC_FLAGS;
1868     PERL_UNUSED_ARG(data);
1869
1870     /* Be paranoid... */
1871     if (prog == NULL || startpos == NULL) {
1872         Perl_croak(aTHX_ "NULL regexp parameter");
1873         return 0;
1874     }
1875
1876     multiline = prog->extflags & RXf_PMf_MULTILINE;
1877     reginfo.prog = rx;   /* Yes, sorry that this is confusing.  */
1878
1879     RX_MATCH_UTF8_set(rx, do_utf8);
1880     DEBUG_EXECUTE_r(
1881         debug_start_match(rx, do_utf8, startpos, strend,
1882         "Matching");
1883     );
1884
1885     minlen = prog->minlen;
1886
1887     if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
1888         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1889                               "String too short [regexec_flags]...\n"));
1890         goto phooey;
1891     }
1892
1893
1894     /* Check validity of program. */
1895     if (UCHARAT(progi->program) != REG_MAGIC) {
1896         Perl_croak(aTHX_ "corrupted regexp program");
1897     }
1898
1899     PL_reg_flags = 0;
1900     PL_reg_eval_set = 0;
1901     PL_reg_maxiter = 0;
1902
1903     if (RX_UTF8(rx))
1904         PL_reg_flags |= RF_utf8;
1905
1906     /* Mark beginning of line for ^ and lookbehind. */
1907     reginfo.bol = startpos; /* XXX not used ??? */
1908     PL_bostr  = strbeg;
1909     reginfo.sv = sv;
1910
1911     /* Mark end of line for $ (and such) */
1912     PL_regeol = strend;
1913
1914     /* see how far we have to get to not match where we matched before */
1915     reginfo.till = startpos+minend;
1916
1917     /* If there is a "must appear" string, look for it. */
1918     s = startpos;
1919
1920     if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
1921         MAGIC *mg;
1922         if (flags & REXEC_IGNOREPOS){   /* Means: check only at start */
1923             reginfo.ganch = startpos + prog->gofs;
1924             DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
1925               "GPOS IGNOREPOS: reginfo.ganch = startpos + %"UVxf"\n",(UV)prog->gofs));
1926         } else if (sv && SvTYPE(sv) >= SVt_PVMG
1927                   && SvMAGIC(sv)
1928                   && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1929                   && mg->mg_len >= 0) {
1930             reginfo.ganch = strbeg + mg->mg_len;        /* Defined pos() */
1931             DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
1932                 "GPOS MAGIC: reginfo.ganch = strbeg + %"IVdf"\n",(IV)mg->mg_len));
1933
1934             if (prog->extflags & RXf_ANCH_GPOS) {
1935                 if (s > reginfo.ganch)
1936                     goto phooey;
1937                 s = reginfo.ganch - prog->gofs;
1938                 DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
1939                      "GPOS ANCH_GPOS: s = ganch - %"UVxf"\n",(UV)prog->gofs));
1940                 if (s < strbeg)
1941                     goto phooey;
1942             }
1943         }
1944         else if (data) {
1945             reginfo.ganch = strbeg + PTR2UV(data);
1946             DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
1947                  "GPOS DATA: reginfo.ganch= strbeg + %"UVxf"\n",PTR2UV(data)));
1948
1949         } else {                                /* pos() not defined */
1950             reginfo.ganch = strbeg;
1951             DEBUG_GPOS_r(PerlIO_printf(Perl_debug_log,
1952                  "GPOS: reginfo.ganch = strbeg\n"));
1953         }
1954     }
1955     if (PL_curpm && (PM_GETRE(PL_curpm) == rx)) {
1956         /* We have to be careful. If the previous successful match
1957            was from this regex we don't want a subsequent partially
1958            successful match to clobber the old results.
1959            So when we detect this possibility we add a swap buffer
1960            to the re, and switch the buffer each match. If we fail
1961            we switch it back, otherwise we leave it swapped.
1962         */
1963         swap = prog->offs;
1964         /* do we need a save destructor here for eval dies? */
1965         Newxz(prog->offs, (prog->nparens + 1), regexp_paren_pair);
1966     }
1967     if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
1968         re_scream_pos_data d;
1969
1970         d.scream_olds = &scream_olds;
1971         d.scream_pos = &scream_pos;
1972         s = re_intuit_start(rx, sv, s, strend, flags, &d);
1973         if (!s) {
1974             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1975             goto phooey;        /* not present */
1976         }
1977     }
1978
1979
1980
1981     /* Simplest case:  anchored match need be tried only once. */
1982     /*  [unless only anchor is BOL and multiline is set] */
1983     if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
1984         if (s == startpos && regtry(&reginfo, &startpos))
1985             goto got_it;
1986         else if (multiline || (prog->intflags & PREGf_IMPLICIT)
1987                  || (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
1988         {
1989             char *end;
1990
1991             if (minlen)
1992                 dontbother = minlen - 1;
1993             end = HOP3c(strend, -dontbother, strbeg) - 1;
1994             /* for multiline we only have to try after newlines */
1995             if (prog->check_substr || prog->check_utf8) {
1996                 if (s == startpos)
1997                     goto after_try;
1998                 while (1) {
1999                     if (regtry(&reginfo, &s))
2000                         goto got_it;
2001                   after_try:
2002                     if (s > end)
2003                         goto phooey;
2004                     if (prog->extflags & RXf_USE_INTUIT) {
2005                         s = re_intuit_start(rx, sv, s + 1, strend, flags, NULL);
2006                         if (!s)
2007                             goto phooey;
2008                     }
2009                     else
2010                         s++;
2011                 }
2012             } else {
2013                 if (s > startpos)
2014                     s--;
2015                 while (s < end) {
2016                     if (*s++ == '\n') { /* don't need PL_utf8skip here */
2017                         if (regtry(&reginfo, &s))
2018                             goto got_it;
2019                     }
2020                 }
2021             }
2022         }
2023         goto phooey;
2024     } else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
2025     {
2026         /* the warning about reginfo.ganch being used without intialization
2027            is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
2028            and we only enter this block when the same bit is set. */
2029         char *tmp_s = reginfo.ganch - prog->gofs;
2030
2031         if (tmp_s >= strbeg && regtry(&reginfo, &tmp_s))
2032             goto got_it;
2033         goto phooey;
2034     }
2035
2036     /* Messy cases:  unanchored match. */
2037     if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
2038         /* we have /x+whatever/ */
2039         /* it must be a one character string (XXXX Except UTF?) */
2040         char ch;
2041 #ifdef DEBUGGING
2042         int did_match = 0;
2043 #endif
2044         if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
2045             do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2046         ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
2047
2048         if (do_utf8) {
2049             REXEC_FBC_SCAN(
2050                 if (*s == ch) {
2051                     DEBUG_EXECUTE_r( did_match = 1 );
2052                     if (regtry(&reginfo, &s)) goto got_it;
2053                     s += UTF8SKIP(s);
2054                     while (s < strend && *s == ch)
2055                         s += UTF8SKIP(s);
2056                 }
2057             );
2058         }
2059         else {
2060             REXEC_FBC_SCAN(
2061                 if (*s == ch) {
2062                     DEBUG_EXECUTE_r( did_match = 1 );
2063                     if (regtry(&reginfo, &s)) goto got_it;
2064                     s++;
2065                     while (s < strend && *s == ch)
2066                         s++;
2067                 }
2068             );
2069         }
2070         DEBUG_EXECUTE_r(if (!did_match)
2071                 PerlIO_printf(Perl_debug_log,
2072                                   "Did not find anchored character...\n")
2073                );
2074     }
2075     else if (prog->anchored_substr != NULL
2076               || prog->anchored_utf8 != NULL
2077               || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
2078                   && prog->float_max_offset < strend - s)) {
2079         SV *must;
2080         I32 back_max;
2081         I32 back_min;
2082         char *last;
2083         char *last1;            /* Last position checked before */
2084 #ifdef DEBUGGING
2085         int did_match = 0;
2086 #endif
2087         if (prog->anchored_substr || prog->anchored_utf8) {
2088             if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
2089                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2090             must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
2091             back_max = back_min = prog->anchored_offset;
2092         } else {
2093             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
2094                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2095             must = do_utf8 ? prog->float_utf8 : prog->float_substr;
2096             back_max = prog->float_max_offset;
2097             back_min = prog->float_min_offset;
2098         }
2099
2100
2101         if (must == &PL_sv_undef)
2102             /* could not downgrade utf8 check substring, so must fail */
2103             goto phooey;
2104
2105         if (back_min<0) {
2106             last = strend;
2107         } else {
2108             last = HOP3c(strend,        /* Cannot start after this */
2109                   -(I32)(CHR_SVLEN(must)
2110                          - (SvTAIL(must) != 0) + back_min), strbeg);
2111         }
2112         if (s > PL_bostr)
2113             last1 = HOPc(s, -1);
2114         else
2115             last1 = s - 1;      /* bogus */
2116
2117         /* XXXX check_substr already used to find "s", can optimize if
2118            check_substr==must. */
2119         scream_pos = -1;
2120         dontbother = end_shift;
2121         strend = HOPc(strend, -dontbother);
2122         while ( (s <= last) &&
2123                 ((flags & REXEC_SCREAM)
2124                  ? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
2125                                     end_shift, &scream_pos, 0))
2126                  : (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
2127                                   (unsigned char*)strend, must,
2128                                   multiline ? FBMrf_MULTILINE : 0))) ) {
2129             /* we may be pointing at the wrong string */
2130             if ((flags & REXEC_SCREAM) && RXp_MATCH_COPIED(prog))
2131                 s = strbeg + (s - SvPVX_const(sv));
2132             DEBUG_EXECUTE_r( did_match = 1 );
2133             if (HOPc(s, -back_max) > last1) {
2134                 last1 = HOPc(s, -back_min);
2135                 s = HOPc(s, -back_max);
2136             }
2137             else {
2138                 char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
2139
2140                 last1 = HOPc(s, -back_min);
2141                 s = t;
2142             }
2143             if (do_utf8) {
2144                 while (s <= last1) {
2145                     if (regtry(&reginfo, &s))
2146                         goto got_it;
2147                     s += UTF8SKIP(s);
2148                 }
2149             }
2150             else {
2151                 while (s <= last1) {
2152                     if (regtry(&reginfo, &s))
2153                         goto got_it;
2154                     s++;
2155                 }
2156             }
2157         }
2158         DEBUG_EXECUTE_r(if (!did_match) {
2159             RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
2160                 SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
2161             PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
2162                               ((must == prog->anchored_substr || must == prog->anchored_utf8)
2163                                ? "anchored" : "floating"),
2164                 quoted, RE_SV_TAIL(must));
2165         });
2166         goto phooey;
2167     }
2168     else if ( (c = progi->regstclass) ) {
2169         if (minlen) {
2170             const OPCODE op = OP(progi->regstclass);
2171             /* don't bother with what can't match */
2172             if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
2173                 strend = HOPc(strend, -(minlen - 1));
2174         }
2175         DEBUG_EXECUTE_r({
2176             SV * const prop = sv_newmortal();
2177             regprop(prog, prop, c);
2178             {
2179                 RE_PV_QUOTED_DECL(quoted,do_utf8,PERL_DEBUG_PAD_ZERO(1),
2180                     s,strend-s,60);
2181                 PerlIO_printf(Perl_debug_log,
2182                     "Matching stclass %.*s against %s (%d chars)\n",
2183                     (int)SvCUR(prop), SvPVX_const(prop),
2184                      quoted, (int)(strend - s));
2185             }
2186         });
2187         if (find_byclass(prog, c, s, strend, &reginfo))
2188             goto got_it;
2189         DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
2190     }
2191     else {
2192         dontbother = 0;
2193         if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
2194             /* Trim the end. */
2195             char *last;
2196             SV* float_real;
2197
2198             if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
2199                 do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2200             float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
2201
2202             if (flags & REXEC_SCREAM) {
2203                 last = screaminstr(sv, float_real, s - strbeg,
2204                                    end_shift, &scream_pos, 1); /* last one */
2205                 if (!last)
2206                     last = scream_olds; /* Only one occurrence. */
2207                 /* we may be pointing at the wrong string */
2208                 else if (RXp_MATCH_COPIED(prog))
2209                     s = strbeg + (s - SvPVX_const(sv));
2210             }
2211             else {
2212                 STRLEN len;
2213                 const char * const little = SvPV_const(float_real, len);
2214
2215                 if (SvTAIL(float_real)) {
2216                     if (memEQ(strend - len + 1, little, len - 1))
2217                         last = strend - len + 1;
2218                     else if (!multiline)
2219                         last = memEQ(strend - len, little, len)
2220                             ? strend - len : NULL;
2221                     else
2222                         goto find_last;
2223                 } else {
2224                   find_last:
2225                     if (len)
2226                         last = rninstr(s, strend, little, little + len);
2227                     else
2228                         last = strend;  /* matching "$" */
2229                 }
2230             }
2231             if (last == NULL) {
2232                 DEBUG_EXECUTE_r(
2233                     PerlIO_printf(Perl_debug_log,
2234                         "%sCan't trim the tail, match fails (should not happen)%s\n",
2235                         PL_colors[4], PL_colors[5]));
2236                 goto phooey; /* Should not happen! */
2237             }
2238             dontbother = strend - last + prog->float_min_offset;
2239         }
2240         if (minlen && (dontbother < minlen))
2241             dontbother = minlen - 1;
2242         strend -= dontbother;              /* this one's always in bytes! */
2243         /* We don't know much -- general case. */
2244         if (do_utf8) {
2245             for (;;) {
2246                 if (regtry(&reginfo, &s))
2247                     goto got_it;
2248                 if (s >= strend)
2249                     break;
2250                 s += UTF8SKIP(s);
2251             };
2252         }
2253         else {
2254             do {
2255                 if (regtry(&reginfo, &s))
2256                     goto got_it;
2257             } while (s++ < strend);
2258         }
2259     }
2260
2261     /* Failure. */
2262     goto phooey;
2263
2264 got_it:
2265     Safefree(swap);
2266     RX_MATCH_TAINTED_set(rx, PL_reg_flags & RF_tainted);
2267
2268     if (PL_reg_eval_set)
2269         restore_pos(aTHX_ prog);
2270     if (RXp_PAREN_NAMES(prog))
2271         (void)hv_iterinit(RXp_PAREN_NAMES(prog));
2272
2273     /* make sure $`, $&, $', and $digit will work later */
2274     if ( !(flags & REXEC_NOT_FIRST) ) {
2275         RX_MATCH_COPY_FREE(rx);
2276         if (flags & REXEC_COPY_STR) {
2277             const I32 i = PL_regeol - startpos + (stringarg - strbeg);
2278 #ifdef PERL_OLD_COPY_ON_WRITE
2279             if ((SvIsCOW(sv)
2280                  || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2281                 if (DEBUG_C_TEST) {
2282                     PerlIO_printf(Perl_debug_log,
2283                                   "Copy on write: regexp capture, type %d\n",
2284                                   (int) SvTYPE(sv));
2285                 }
2286                 prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
2287                 prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
2288                 assert (SvPOKp(prog->saved_copy));
2289             } else
2290 #endif
2291             {
2292                 RX_MATCH_COPIED_on(rx);
2293                 s = savepvn(strbeg, i);
2294                 prog->subbeg = s;
2295             }
2296             prog->sublen = i;
2297         }
2298         else {
2299             prog->subbeg = strbeg;
2300             prog->sublen = PL_regeol - strbeg;  /* strend may have been modified */
2301         }
2302     }
2303
2304     return 1;
2305
2306 phooey:
2307     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2308                           PL_colors[4], PL_colors[5]));
2309     if (PL_reg_eval_set)
2310         restore_pos(aTHX_ prog);
2311     if (swap) {
2312         /* we failed :-( roll it back */
2313         Safefree(prog->offs);
2314         prog->offs = swap;
2315     }
2316
2317     return 0;
2318 }
2319
2320
2321 /*
2322  - regtry - try match at specific point
2323  */
2324 STATIC I32                      /* 0 failure, 1 success */
2325 S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
2326 {
2327     dVAR;
2328     CHECKPOINT lastcp;
2329     REGEXP *const rx = reginfo->prog;
2330     regexp *const prog = (struct regexp *)SvANY(rx);
2331     RXi_GET_DECL(prog,progi);
2332     GET_RE_DEBUG_FLAGS_DECL;
2333
2334     PERL_ARGS_ASSERT_REGTRY;
2335
2336     reginfo->cutpoint=NULL;
2337
2338     if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
2339         MAGIC *mg;
2340
2341         PL_reg_eval_set = RS_init;
2342         DEBUG_EXECUTE_r(DEBUG_s(
2343             PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2344                           (IV)(PL_stack_sp - PL_stack_base));
2345             ));
2346         SAVESTACK_CXPOS();
2347         cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2348         /* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2349         SAVETMPS;
2350         /* Apparently this is not needed, judging by wantarray. */
2351         /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2352            cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2353
2354         if (reginfo->sv) {
2355             /* Make $_ available to executed code. */
2356             if (reginfo->sv != DEFSV) {
2357                 SAVE_DEFSV;
2358                 DEFSV_set(reginfo->sv);
2359             }
2360
2361             if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2362                   && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
2363                 /* prepare for quick setting of pos */
2364 #ifdef PERL_OLD_COPY_ON_WRITE
2365                 if (SvIsCOW(reginfo->sv))
2366                     sv_force_normal_flags(reginfo->sv, 0);
2367 #endif
2368                 mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
2369                                  &PL_vtbl_mglob, NULL, 0);
2370                 mg->mg_len = -1;
2371             }
2372             PL_reg_magic    = mg;
2373             PL_reg_oldpos   = mg->mg_len;
2374             SAVEDESTRUCTOR_X(restore_pos, prog);
2375         }
2376         if (!PL_reg_curpm) {
2377             Newxz(PL_reg_curpm, 1, PMOP);
2378 #ifdef USE_ITHREADS
2379             {
2380                 SV* const repointer = &PL_sv_undef;
2381                 /* this regexp is also owned by the new PL_reg_curpm, which
2382                    will try to free it.  */
2383                 av_push(PL_regex_padav, repointer);
2384                 PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2385                 PL_regex_pad = AvARRAY(PL_regex_padav);
2386             }
2387 #endif
2388         }
2389 #ifdef USE_ITHREADS
2390         /* It seems that non-ithreads works both with and without this code.
2391            So for efficiency reasons it seems best not to have the code
2392            compiled when it is not needed.  */
2393         /* This is safe against NULLs: */
2394         ReREFCNT_dec(PM_GETRE(PL_reg_curpm));
2395         /* PM_reg_curpm owns a reference to this regexp.  */
2396         ReREFCNT_inc(rx);
2397 #endif
2398         PM_SETRE(PL_reg_curpm, rx);
2399         PL_reg_oldcurpm = PL_curpm;
2400         PL_curpm = PL_reg_curpm;
2401         if (RXp_MATCH_COPIED(prog)) {
2402             /*  Here is a serious problem: we cannot rewrite subbeg,
2403                 since it may be needed if this match fails.  Thus
2404                 $` inside (?{}) could fail... */
2405             PL_reg_oldsaved = prog->subbeg;
2406             PL_reg_oldsavedlen = prog->sublen;
2407 #ifdef PERL_OLD_COPY_ON_WRITE
2408             PL_nrs = prog->saved_copy;
2409 #endif
2410             RXp_MATCH_COPIED_off(prog);
2411         }
2412         else
2413             PL_reg_oldsaved = NULL;
2414         prog->subbeg = PL_bostr;
2415         prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2416     }
2417     DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
2418     prog->offs[0].start = *startpos - PL_bostr;
2419     PL_reginput = *startpos;
2420     PL_reglastparen = &prog->lastparen;
2421     PL_reglastcloseparen = &prog->lastcloseparen;
2422     prog->lastparen = 0;
2423     prog->lastcloseparen = 0;
2424     PL_regsize = 0;
2425     PL_regoffs = prog->offs;
2426     if (PL_reg_start_tmpl <= prog->nparens) {
2427         PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2428         if(PL_reg_start_tmp)
2429             Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2430         else
2431             Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2432     }
2433
2434     /* XXXX What this code is doing here?!!!  There should be no need
2435        to do this again and again, PL_reglastparen should take care of
2436        this!  --ilya*/
2437
2438     /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2439      * Actually, the code in regcppop() (which Ilya may be meaning by
2440      * PL_reglastparen), is not needed at all by the test suite
2441      * (op/regexp, op/pat, op/split), but that code is needed otherwise
2442      * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
2443      * Meanwhile, this code *is* needed for the
2444      * above-mentioned test suite tests to succeed.  The common theme
2445      * on those tests seems to be returning null fields from matches.
2446      * --jhi updated by dapm */
2447 #if 1
2448     if (prog->nparens) {
2449         regexp_paren_pair *pp = PL_regoffs;
2450         register I32 i;
2451         for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2452             ++pp;
2453             pp->start = -1;
2454             pp->end = -1;
2455         }
2456     }
2457 #endif
2458     REGCP_SET(lastcp);
2459     if (regmatch(reginfo, progi->program + 1)) {
2460         PL_regoffs[0].end = PL_reginput - PL_bostr;
2461         return 1;
2462     }
2463     if (reginfo->cutpoint)
2464         *startpos= reginfo->cutpoint;
2465     REGCP_UNWIND(lastcp);
2466     return 0;
2467 }
2468
2469
2470 #define sayYES goto yes
2471 #define sayNO goto no
2472 #define sayNO_SILENT goto no_silent
2473
2474 /* we dont use STMT_START/END here because it leads to
2475    "unreachable code" warnings, which are bogus, but distracting. */
2476 #define CACHEsayNO \
2477     if (ST.cache_mask) \
2478        PL_reg_poscache[ST.cache_offset] |= ST.cache_mask; \
2479     sayNO
2480
2481 /* this is used to determine how far from the left messages like
2482    'failed...' are printed. It should be set such that messages
2483    are inline with the regop output that created them.
2484 */
2485 #define REPORT_CODE_OFF 32
2486
2487
2488 /* Make sure there is a test for this +1 options in re_tests */
2489 #define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2490
2491 #define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
2492 #define CHRTEST_VOID   -1000 /* the c1/c2 "next char" test should be skipped */
2493
2494 #define SLAB_FIRST(s) (&(s)->states[0])
2495 #define SLAB_LAST(s)  (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2496
2497 /* grab a new slab and return the first slot in it */
2498
2499 STATIC regmatch_state *
2500 S_push_slab(pTHX)
2501 {
2502 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2503     dMY_CXT;
2504 #endif
2505     regmatch_slab *s = PL_regmatch_slab->next;
2506     if (!s) {
2507         Newx(s, 1, regmatch_slab);
2508         s->prev = PL_regmatch_slab;
2509         s->next = NULL;
2510         PL_regmatch_slab->next = s;
2511     }
2512     PL_regmatch_slab = s;
2513     return SLAB_FIRST(s);
2514 }
2515
2516
2517 /* push a new state then goto it */
2518
2519 #define PUSH_STATE_GOTO(state, node) \
2520     scan = node; \
2521     st->resume_state = state; \
2522     goto push_state;
2523
2524 /* push a new state with success backtracking, then goto it */
2525
2526 #define PUSH_YES_STATE_GOTO(state, node) \
2527     scan = node; \
2528     st->resume_state = state; \
2529     goto push_yes_state;
2530
2531
2532
2533 /*
2534
2535 regmatch() - main matching routine
2536
2537 This is basically one big switch statement in a loop. We execute an op,
2538 set 'next' to point the next op, and continue. If we come to a point which
2539 we may need to backtrack to on failure such as (A|B|C), we push a
2540 backtrack state onto the backtrack stack. On failure, we pop the top
2541 state, and re-enter the loop at the state indicated. If there are no more
2542 states to pop, we return failure.
2543
2544 Sometimes we also need to backtrack on success; for example /A+/, where
2545 after successfully matching one A, we need to go back and try to
2546 match another one; similarly for lookahead assertions: if the assertion
2547 completes successfully, we backtrack to the state just before the assertion
2548 and then carry on.  In these cases, the pushed state is marked as
2549 'backtrack on success too'. This marking is in fact done by a chain of
2550 pointers, each pointing to the previous 'yes' state. On success, we pop to
2551 the nearest yes state, discarding any intermediate failure-only states.
2552 Sometimes a yes state is pushed just to force some cleanup code to be
2553 called at the end of a successful match or submatch; e.g. (??{$re}) uses
2554 it to free the inner regex.
2555
2556 Note that failure backtracking rewinds the cursor position, while
2557 success backtracking leaves it alone.
2558
2559 A pattern is complete when the END op is executed, while a subpattern
2560 such as (?=foo) is complete when the SUCCESS op is executed. Both of these
2561 ops trigger the "pop to last yes state if any, otherwise return true"
2562 behaviour.
2563
2564 A common convention in this function is to use A and B to refer to the two
2565 subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
2566 the subpattern to be matched possibly multiple times, while B is the entire
2567 rest of the pattern. Variable and state names reflect this convention.
2568
2569 The states in the main switch are the union of ops and failure/success of
2570 substates associated with with that op.  For example, IFMATCH is the op
2571 that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
2572 'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
2573 successfully matched A and IFMATCH_A_fail is a state saying that we have
2574 just failed to match A. Resume states always come in pairs. The backtrack
2575 state we push is marked as 'IFMATCH_A', but when that is popped, we resume
2576 at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
2577 on success or failure.
2578
2579 The struct that holds a backtracking state is actually a big union, with
2580 one variant for each major type of op. The variable st points to the
2581 top-most backtrack struct. To make the code clearer, within each
2582 block of code we #define ST to alias the relevant union.
2583
2584 Here's a concrete example of a (vastly oversimplified) IFMATCH
2585 implementation:
2586
2587     switch (state) {
2588     ....
2589
2590 #define ST st->u.ifmatch
2591
2592     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2593         ST.foo = ...; // some state we wish to save
2594         ...
2595         // push a yes backtrack state with a resume value of
2596         // IFMATCH_A/IFMATCH_A_fail, then continue execution at the
2597         // first node of A:
2598         PUSH_YES_STATE_GOTO(IFMATCH_A, A);
2599         // NOTREACHED
2600
2601     case IFMATCH_A: // we have successfully executed A; now continue with B
2602         next = B;
2603         bar = ST.foo; // do something with the preserved value
2604         break;
2605
2606     case IFMATCH_A_fail: // A failed, so the assertion failed
2607         ...;   // do some housekeeping, then ...
2608         sayNO; // propagate the failure
2609
2610 #undef ST
2611
2612     ...
2613     }
2614
2615 For any old-timers reading this who are familiar with the old recursive
2616 approach, the code above is equivalent to:
2617
2618     case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2619     {
2620         int foo = ...
2621         ...
2622         if (regmatch(A)) {
2623             next = B;
2624             bar = foo;
2625             break;
2626         }
2627         ...;   // do some housekeeping, then ...
2628         sayNO; // propagate the failure
2629     }
2630
2631 The topmost backtrack state, pointed to by st, is usually free. If you
2632 want to claim it, populate any ST.foo fields in it with values you wish to
2633 save, then do one of
2634
2635         PUSH_STATE_GOTO(resume_state, node);
2636         PUSH_YES_STATE_GOTO(resume_state, node);
2637
2638 which sets that backtrack state's resume value to 'resume_state', pushes a
2639 new free entry to the top of the backtrack stack, then goes to 'node'.
2640 On backtracking, the free slot is popped, and the saved state becomes the
2641 new free state. An ST.foo field in this new top state can be temporarily
2642 accessed to retrieve values, but once the main loop is re-entered, it
2643 becomes available for reuse.
2644
2645 Note that the depth of the backtrack stack constantly increases during the
2646 left-to-right execution of the pattern, rather than going up and down with
2647 the pattern nesting. For example the stack is at its maximum at Z at the
2648 end of the pattern, rather than at X in the following:
2649
2650     /(((X)+)+)+....(Y)+....Z/
2651
2652 The only exceptions to this are lookahead/behind assertions and the cut,
2653 (?>A), which pop all the backtrack states associated with A before
2654 continuing.
2655
2656 Bascktrack state structs are allocated in slabs of about 4K in size.
2657 PL_regmatch_state and st always point to the currently active state,
2658 and PL_regmatch_slab points to the slab currently containing
2659 PL_regmatch_state.  The first time regmatch() is called, the first slab is
2660 allocated, and is never freed until interpreter destruction. When the slab
2661 is full, a new one is allocated and chained to the end. At exit from
2662 regmatch(), slabs allocated since entry are freed.
2663
2664 */
2665
2666
2667 #define DEBUG_STATE_pp(pp)                                  \
2668     DEBUG_STATE_r({                                         \
2669         DUMP_EXEC_POS(locinput, scan, do_utf8);             \
2670         PerlIO_printf(Perl_debug_log,                       \
2671             "    %*s"pp" %s%s%s%s%s\n",                     \
2672             depth*2, "",                                    \
2673             PL_reg_name[st->resume_state],                     \
2674             ((st==yes_state||st==mark_state) ? "[" : ""),   \
2675             ((st==yes_state) ? "Y" : ""),                   \
2676             ((st==mark_state) ? "M" : ""),                  \
2677             ((st==yes_state||st==mark_state) ? "]" : "")    \
2678         );                                                  \
2679     });
2680
2681
2682 #define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
2683
2684 #ifdef DEBUGGING
2685
2686 STATIC void
2687 S_debug_start_match(pTHX_ const REGEXP *prog, const bool do_utf8,
2688     const char *start, const char *end, const char *blurb)
2689 {
2690     const bool utf8_pat = RX_UTF8(prog) ? 1 : 0;
2691
2692     PERL_ARGS_ASSERT_DEBUG_START_MATCH;
2693
2694     if (!PL_colorset)
2695             reginitcolors();
2696     {
2697         RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
2698             RX_PRECOMP_const(prog), RX_PRELEN(prog), 60);
2699
2700         RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1),
2701             start, end - start, 60);
2702
2703         PerlIO_printf(Perl_debug_log,
2704             "%s%s REx%s %s against %s\n",
2705                        PL_colors[4], blurb, PL_colors[5], s0, s1);
2706
2707         if (do_utf8||utf8_pat)
2708             PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
2709                 utf8_pat ? "pattern" : "",
2710                 utf8_pat && do_utf8 ? " and " : "",
2711                 do_utf8 ? "string" : ""
2712             );
2713     }
2714 }
2715
2716 STATIC void
2717 S_dump_exec_pos(pTHX_ const char *locinput,
2718                       const regnode *scan,
2719                       const char *loc_regeol,
2720                       const char *loc_bostr,
2721                       const char *loc_reg_starttry,
2722                       const bool do_utf8)
2723 {
2724     const int docolor = *PL_colors[0] || *PL_colors[2] || *PL_colors[4];
2725     const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2726     int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
2727     /* The part of the string before starttry has one color
2728        (pref0_len chars), between starttry and current
2729        position another one (pref_len - pref0_len chars),
2730        after the current position the third one.
2731        We assume that pref0_len <= pref_len, otherwise we
2732        decrease pref0_len.  */
2733     int pref_len = (locinput - loc_bostr) > (5 + taill) - l
2734         ? (5 + taill) - l : locinput - loc_bostr;
2735     int pref0_len;
2736
2737     PERL_ARGS_ASSERT_DUMP_EXEC_POS;
2738
2739     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2740         pref_len++;
2741     pref0_len = pref_len  - (locinput - loc_reg_starttry);
2742     if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
2743         l = ( loc_regeol - locinput > (5 + taill) - pref_len
2744               ? (5 + taill) - pref_len : loc_regeol - locinput);
2745     while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2746         l--;
2747     if (pref0_len < 0)
2748         pref0_len = 0;
2749     if (pref0_len > pref_len)
2750         pref0_len = pref_len;
2751     {
2752         const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0;
2753
2754         RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
2755             (locinput - pref_len),pref0_len, 60, 4, 5);
2756
2757         RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
2758                     (locinput - pref_len + pref0_len),
2759                     pref_len - pref0_len, 60, 2, 3);
2760
2761         RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
2762                     locinput, loc_regeol - locinput, 10, 0, 1);
2763
2764         const STRLEN tlen=len0+len1+len2;
2765         PerlIO_printf(Perl_debug_log,
2766                     "%4"IVdf" <%.*s%.*s%s%.*s>%*s|",
2767                     (IV)(locinput - loc_bostr),
2768                     len0, s0,
2769                     len1, s1,
2770                     (docolor ? "" : "> <"),
2771                     len2, s2,
2772                     (int)(tlen > 19 ? 0 :  19 - tlen),
2773                     "");
2774     }
2775 }
2776
2777 #endif
2778
2779 /* reg_check_named_buff_matched()
2780  * Checks to see if a named buffer has matched. The data array of
2781  * buffer numbers corresponding to the buffer is expected to reside
2782  * in the regexp->data->data array in the slot stored in the ARG() of
2783  * node involved. Note that this routine doesn't actually care about the
2784  * name, that information is not preserved from compilation to execution.
2785  * Returns the index of the leftmost defined buffer with the given name
2786  * or 0 if non of the buffers matched.
2787  */
2788 STATIC I32
2789 S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan)
2790 {
2791     I32 n;
2792     RXi_GET_DECL(rex,rexi);
2793     SV *sv_dat= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
2794     I32 *nums=(I32*)SvPVX(sv_dat);
2795
2796     PERL_ARGS_ASSERT_REG_CHECK_NAMED_BUFF_MATCHED;
2797
2798     for ( n=0; n<SvIVX(sv_dat); n++ ) {
2799         if ((I32)*PL_reglastparen >= nums[n] &&
2800             PL_regoffs[nums[n]].end != -1)
2801         {
2802             return nums[n];
2803         }
2804     }
2805     return 0;
2806 }
2807
2808
2809 /* free all slabs above current one  - called during LEAVE_SCOPE */
2810
2811 STATIC void
2812 S_clear_backtrack_stack(pTHX_ void *p)
2813 {
2814     regmatch_slab *s = PL_regmatch_slab->next;
2815     PERL_UNUSED_ARG(p);
2816
2817     if (!s)
2818         return;
2819     PL_regmatch_slab->next = NULL;
2820     while (s) {
2821         regmatch_slab * const osl = s;
2822         s = s->next;
2823         Safefree(osl);
2824     }
2825 }
2826
2827
2828 #define SETREX(Re1,Re2) \
2829     if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \
2830     Re1 = (Re2)
2831
2832 STATIC I32                      /* 0 failure, 1 success */
2833 S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
2834 {
2835 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2836     dMY_CXT;
2837 #endif
2838     dVAR;
2839     register const bool do_utf8 = PL_reg_match_utf8;
2840     const U32 uniflags = UTF8_ALLOW_DEFAULT;
2841     REGEXP *rex_sv = reginfo->prog;
2842     regexp *rex = (struct regexp *)SvANY(rex_sv);
2843     RXi_GET_DECL(rex,rexi);
2844     I32 oldsave;
2845     /* the current state. This is a cached copy of PL_regmatch_state */
2846     register regmatch_state *st;
2847     /* cache heavy used fields of st in registers */
2848     register regnode *scan;
2849     register regnode *next;
2850     register U32 n = 0; /* general value; init to avoid compiler warning */
2851     register I32 ln = 0; /* len or last;  init to avoid compiler warning */
2852     register char *locinput = PL_reginput;
2853     register I32 nextchr;   /* is always set to UCHARAT(locinput) */
2854
2855     bool result = 0;        /* return value of S_regmatch */
2856     int depth = 0;          /* depth of backtrack stack */
2857     U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
2858     const U32 max_nochange_depth =
2859         (3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
2860         3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
2861     regmatch_state *yes_state = NULL; /* state to pop to on success of
2862                                                             subpattern */
2863     /* mark_state piggy backs on the yes_state logic so that when we unwind
2864        the stack on success we can update the mark_state as we go */
2865     regmatch_state *mark_state = NULL; /* last mark state we have seen */
2866     regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
2867     struct regmatch_state  *cur_curlyx = NULL; /* most recent curlyx */
2868     U32 state_num;
2869     bool no_final = 0;      /* prevent failure from backtracking? */
2870     bool do_cutgroup = 0;   /* no_final only until next branch/trie entry */
2871     char *startpoint = PL_reginput;
2872     SV *popmark = NULL;     /* are we looking for a mark? */
2873     SV *sv_commit = NULL;   /* last mark name seen in failure */
2874     SV *sv_yes_mark = NULL; /* last mark name we have seen
2875                                during a successfull match */
2876     U32 lastopen = 0;       /* last open we saw */
2877     bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
2878     SV* const oreplsv = GvSV(PL_replgv);
2879     /* these three flags are set by various ops to signal information to
2880      * the very next op. They have a useful lifetime of exactly one loop
2881      * iteration, and are not preserved or restored by state pushes/pops
2882      */
2883     bool sw = 0;            /* the condition value in (?(cond)a|b) */
2884     bool minmod = 0;        /* the next "{n,m}" is a "{n,m}?" */
2885     int logical = 0;        /* the following EVAL is:
2886                                 0: (?{...})
2887                                 1: (?(?{...})X|Y)
2888                                 2: (??{...})
2889                                or the following IFMATCH/UNLESSM is:
2890                                 false: plain (?=foo)
2891                                 true:  used as a condition: (?(?=foo))
2892                             */
2893 #ifdef DEBUGGING
2894     GET_RE_DEBUG_FLAGS_DECL;
2895 #endif
2896
2897     PERL_ARGS_ASSERT_REGMATCH;
2898
2899     DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
2900             PerlIO_printf(Perl_debug_log,"regmatch start\n");
2901     }));
2902     /* on first ever call to regmatch, allocate first slab */
2903     if (!PL_regmatch_slab) {
2904         Newx(PL_regmatch_slab, 1, regmatch_slab);
2905         PL_regmatch_slab->prev = NULL;
2906         PL_regmatch_slab->next = NULL;
2907         PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
2908     }
2909
2910     oldsave = PL_savestack_ix;
2911     SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
2912     SAVEVPTR(PL_regmatch_slab);
2913     SAVEVPTR(PL_regmatch_state);
2914
2915     /* grab next free state slot */
2916     st = ++PL_regmatch_state;
2917     if (st >  SLAB_LAST(PL_regmatch_slab))
2918         st = PL_regmatch_state = S_push_slab(aTHX);
2919
2920     /* Note that nextchr is a byte even in UTF */
2921     nextchr = UCHARAT(locinput);
2922     scan = prog;
2923     while (scan != NULL) {
2924
2925         DEBUG_EXECUTE_r( {
2926             SV * const prop = sv_newmortal();
2927             regnode *rnext=regnext(scan);
2928             DUMP_EXEC_POS( locinput, scan, do_utf8 );
2929             regprop(rex, prop, scan);
2930
2931             PerlIO_printf(Perl_debug_log,
2932                     "%3"IVdf":%*s%s(%"IVdf")\n",
2933                     (IV)(scan - rexi->program), depth*2, "",
2934                     SvPVX_const(prop),
2935                     (PL_regkind[OP(scan)] == END || !rnext) ?
2936                         0 : (IV)(rnext - rexi->program));
2937         });
2938
2939         next = scan + NEXT_OFF(scan);
2940         if (next == scan)
2941             next = NULL;
2942         state_num = OP(scan);
2943
2944       reenter_switch:
2945
2946         assert(PL_reglastparen == &rex->lastparen);
2947         assert(PL_reglastcloseparen == &rex->lastcloseparen);
2948         assert(PL_regoffs == rex->offs);
2949
2950         switch (state_num) {
2951         case BOL:
2952             if (locinput == PL_bostr)
2953             {
2954                 /* reginfo->till = reginfo->bol; */
2955                 break;
2956             }
2957             sayNO;
2958         case MBOL:
2959             if (locinput == PL_bostr ||
2960                 ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2961             {
2962                 break;
2963             }
2964             sayNO;
2965         case SBOL:
2966             if (locinput == PL_bostr)
2967                 break;
2968             sayNO;
2969         case GPOS:
2970             if (locinput == reginfo->ganch)
2971                 break;
2972             sayNO;
2973
2974         case KEEPS:
2975             /* update the startpoint */
2976             st->u.keeper.val = PL_regoffs[0].start;
2977             PL_reginput = locinput;
2978             PL_regoffs[0].start = locinput - PL_bostr;
2979             PUSH_STATE_GOTO(KEEPS_next, next);
2980             /*NOT-REACHED*/
2981         case KEEPS_next_fail:
2982             /* rollback the start point change */
2983             PL_regoffs[0].start = st->u.keeper.val;
2984             sayNO_SILENT;
2985             /*NOT-REACHED*/
2986         case EOL:
2987                 goto seol;
2988         case MEOL:
2989             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2990                 sayNO;
2991             break;
2992         case SEOL:
2993           seol:
2994             if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2995                 sayNO;
2996             if (PL_regeol - locinput > 1)
2997                 sayNO;
2998             break;
2999         case EOS:
3000             if (PL_regeol != locinput)
3001                 sayNO;
3002             break;
3003         case SANY:
3004             if (!nextchr && locinput >= PL_regeol)
3005                 sayNO;
3006             if (do_utf8) {
3007                 locinput += PL_utf8skip[nextchr];
3008                 if (locinput > PL_regeol)
3009                     sayNO;
3010                 nextchr = UCHARAT(locinput);
3011             }
3012             else
3013                 nextchr = UCHARAT(++locinput);
3014             break;
3015         case CANY:
3016             if (!nextchr && locinput >= PL_regeol)
3017                 sayNO;
3018             nextchr = UCHARAT(++locinput);
3019             break;
3020         case REG_ANY:
3021             if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
3022                 sayNO;
3023             if (do_utf8) {
3024                 locinput += PL_utf8skip[nextchr];
3025                 if (locinput > PL_regeol)
3026                     sayNO;
3027                 nextchr = UCHARAT(locinput);
3028             }
3029             else
3030                 nextchr = UCHARAT(++locinput);
3031             break;
3032
3033 #undef  ST
3034 #define ST st->u.trie
3035         case TRIEC:
3036             /* In this case the charclass data is available inline so
3037                we can fail fast without a lot of extra overhead.
3038              */
3039             if (scan->flags == EXACT || !do_utf8) {
3040                 if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
3041                     DEBUG_EXECUTE_r(
3042                         PerlIO_printf(Perl_debug_log,
3043                                   "%*s  %sfailed to match trie start class...%s\n",
3044                                   REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
3045                     );
3046                     sayNO_SILENT;
3047                     /* NOTREACHED */
3048                 }
3049             }
3050             /* FALL THROUGH */
3051         case TRIE:
3052             {
3053                 /* what type of TRIE am I? (utf8 makes this contextual) */
3054                 DECL_TRIE_TYPE(scan);
3055
3056                 /* what trie are we using right now */
3057                 reg_trie_data * const trie
3058                     = (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
3059                 HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG( scan ) + 1 ]);
3060                 U32 state = trie->startstate;
3061
3062                 if (trie->bitmap && trie_type != trie_utf8_fold &&
3063                     !TRIE_BITMAP_TEST(trie,*locinput)
3064                 ) {
3065                     if (trie->states[ state ].wordnum) {
3066                          DEBUG_EXECUTE_r(
3067                             PerlIO_printf(Perl_debug_log,
3068                                           "%*s  %smatched empty string...%s\n",
3069                                           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
3070                         );
3071                         break;
3072                     } else {
3073                         DEBUG_EXECUTE_r(
3074                             PerlIO_printf(Perl_debug_log,
3075                                           "%*s  %sfailed to match trie start class...%s\n",
3076                                           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
3077                         );
3078                         sayNO_SILENT;
3079                    }
3080                 }
3081
3082             {
3083                 U8 *uc = ( U8* )locinput;
3084
3085                 STRLEN len = 0;
3086                 STRLEN foldlen = 0;
3087                 U8 *uscan = (U8*)NULL;
3088                 STRLEN bufflen=0;
3089                 SV *sv_accept_buff = NULL;
3090                 U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
3091
3092                 ST.accepted = 0; /* how many accepting states we have seen */
3093                 ST.B = next;
3094                 ST.jump = trie->jump;
3095                 ST.me = scan;
3096                 /*
3097                    traverse the TRIE keeping track of all accepting states
3098                    we transition through until we get to a failing node.
3099                 */
3100
3101                 while ( state && uc <= (U8*)PL_regeol ) {
3102                     U32 base = trie->states[ state ].trans.base;
3103                     UV uvc = 0;
3104                     U16 charid;
3105                     /* We use charid to hold the wordnum as we don't use it
3106                        for charid until after we have done the wordnum logic.
3107                        We define an alias just so that the wordnum logic reads
3108                        more naturally. */
3109
3110 #define got_wordnum charid
3111                     got_wordnum = trie->states[ state ].wordnum;
3112
3113                     if ( got_wordnum ) {
3114                         if ( ! ST.accepted ) {
3115                             ENTER;
3116                             SAVETMPS; /* XXX is this necessary? dmq */
3117                             bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
3118                             sv_accept_buff=newSV(bufflen *
3119                                             sizeof(reg_trie_accepted) - 1);
3120                             SvCUR_set(sv_accept_buff, 0);
3121                             SvPOK_on(sv_accept_buff);
3122                             sv_2mortal(sv_accept_buff);
3123                             SAVETMPS;
3124                             ST.accept_buff =
3125                                 (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
3126                         }
3127                         do {
3128                             if (ST.accepted >= bufflen) {
3129                                 bufflen *= 2;
3130                                 ST.accept_buff =(reg_trie_accepted*)
3131                                     SvGROW(sv_accept_buff,
3132                                         bufflen * sizeof(reg_trie_accepted));
3133                             }
3134                             SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
3135                                 + sizeof(reg_trie_accepted));
3136
3137
3138                             ST.accept_buff[ST.accepted].wordnum = got_wordnum;
3139                             ST.accept_buff[ST.accepted].endpos = uc;
3140                             ++ST.accepted;
3141                         } while (trie->nextword && (got_wordnum= trie->nextword[got_wordnum]));
3142                     }
3143 #undef got_wordnum
3144
3145                     DEBUG_TRIE_EXECUTE_r({
3146                                 DUMP_EXEC_POS( (char *)uc, scan, do_utf8 );
3147                                 PerlIO_printf( Perl_debug_log,
3148                                     "%*s  %sState: %4"UVxf" Accepted: %4"UVxf" ",
3149                                     2+depth * 2, "", PL_colors[4],
3150                                     (UV)state, (UV)ST.accepted );
3151                     });
3152
3153                     if ( base ) {
3154                         REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
3155                                              uscan, len, uvc, charid, foldlen,
3156                                              foldbuf, uniflags);
3157
3158                         if (charid &&
3159                              (base + charid > trie->uniquecharcount )
3160                              && (base + charid - 1 - trie->uniquecharcount
3161                                     < trie->lasttrans)
3162                              && trie->trans[base + charid - 1 -
3163                                     trie->uniquecharcount].check == state)
3164                         {
3165                             state = trie->trans[base + charid - 1 -
3166                                 trie->uniquecharcount ].next;
3167                         }
3168                         else {
3169                             state = 0;
3170                         }
3171                         uc += len;
3172
3173                     }
3174                     else {
3175                         state = 0;
3176                     }
3177                     DEBUG_TRIE_EXECUTE_r(
3178                         PerlIO_printf( Perl_debug_log,
3179                             "Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
3180                             charid, uvc, (UV)state, PL_colors[5] );
3181                     );
3182                 }
3183                 if (!ST.accepted )
3184                    sayNO;
3185
3186                 DEBUG_EXECUTE_r(
3187                     PerlIO_printf( Perl_debug_log,
3188                         "%*s  %sgot %"IVdf" possible matches%s\n",
3189                         REPORT_CODE_OFF + depth * 2, "",
3190                         PL_colors[4], (IV)ST.accepted, PL_colors[5] );
3191                 );
3192             }}
3193             goto trie_first_try; /* jump into the fail handler */
3194             /* NOTREACHED */
3195         case TRIE_next_fail: /* we failed - try next alterative */
3196             if ( ST.jump) {
3197                 REGCP_UNWIND(ST.cp);
3198                 for (n = *PL_reglastparen; n > ST.lastparen; n--)
3199                     PL_regoffs[n].end = -1;
3200                 *PL_reglastparen = n;
3201             }
3202           trie_first_try:
3203             if (do_cutgroup) {
3204                 do_cutgroup = 0;
3205                 no_final = 0;
3206             }
3207
3208             if ( ST.jump) {
3209                 ST.lastparen = *PL_reglastparen;
3210                 REGCP_SET(ST.cp);
3211             }
3212             if ( ST.accepted == 1 ) {
3213                 /* only one choice left - just continue */
3214                 DEBUG_EXECUTE_r({
3215                     AV *const trie_words
3216                         = MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
3217                     SV ** const tmp = av_fetch( trie_words,
3218                         ST.accept_buff[ 0 ].wordnum-1, 0 );
3219                     SV *sv= tmp ? sv_newmortal() : NULL;
3220
3221                     PerlIO_printf( Perl_debug_log,
3222                         "%*s  %sonly one match left: #%d <%s>%s\n",
3223                         REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3224                         ST.accept_buff[ 0 ].wordnum,
3225                         tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3226                                 PL_colors[0], PL_colors[1],
3227                                 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3228                             )
3229                         : "not compiled under -Dr",
3230                         PL_colors[5] );
3231                 });
3232                 PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
3233                 /* in this case we free tmps/leave before we call regmatch
3234                    as we wont be using accept_buff again. */
3235
3236                 locinput = PL_reginput;
3237                 nextchr = UCHARAT(locinput);
3238                 if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum])
3239                     scan = ST.B;
3240                 else
3241                     scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
3242                 if (!has_cutgroup) {
3243                     FREETMPS;
3244                     LEAVE;
3245                 } else {
3246                     ST.accepted--;
3247                     PUSH_YES_STATE_GOTO(TRIE_next, scan);
3248                 }
3249
3250                 continue; /* execute rest of RE */
3251             }
3252
3253             if ( !ST.accepted-- ) {
3254                 DEBUG_EXECUTE_r({
3255                     PerlIO_printf( Perl_debug_log,
3256                         "%*s  %sTRIE failed...%s\n",
3257                         REPORT_CODE_OFF+depth*2, "",
3258                         PL_colors[4],
3259                         PL_colors[5] );
3260                 });
3261                 FREETMPS;
3262                 LEAVE;
3263                 sayNO_SILENT;
3264                 /*NOTREACHED*/
3265             }
3266
3267             /*
3268                There are at least two accepting states left.  Presumably
3269                the number of accepting states is going to be low,
3270                typically two. So we simply scan through to find the one
3271                with lowest wordnum.  Once we find it, we swap the last
3272                state into its place and decrement the size. We then try to
3273                match the rest of the pattern at the point where the word
3274                ends. If we succeed, control just continues along the
3275                regex; if we fail we return here to try the next accepting
3276                state
3277              */
3278
3279             {
3280                 U32 best = 0;
3281                 U32 cur;
3282                 for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
3283                     DEBUG_TRIE_EXECUTE_r(
3284                         PerlIO_printf( Perl_debug_log,
3285                             "%*s  %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
3286                             REPORT_CODE_OFF + depth * 2, "", PL_colors[4],
3287                             (IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
3288                             ST.accept_buff[ cur ].wordnum, PL_colors[5] );
3289                     );
3290
3291                     if (ST.accept_buff[cur].wordnum <
3292                             ST.accept_buff[best].wordnum)
3293                         best = cur;
3294                 }
3295
3296                 DEBUG_EXECUTE_r({
3297                     AV *const trie_words
3298                         = MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
3299                     SV ** const tmp = av_fetch( trie_words,
3300                         ST.accept_buff[ best ].wordnum - 1, 0 );
3301                     regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ?
3302                                     ST.B :
3303                                     ST.me + ST.jump[ST.accept_buff[best].wordnum];
3304                     SV *sv= tmp ? sv_newmortal() : NULL;
3305
3306                     PerlIO_printf( Perl_debug_log,
3307                         "%*s  %strying alternation #%d <%s> at node #%d %s\n",
3308                         REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3309                         ST.accept_buff[best].wordnum,
3310                         tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3311                                 PL_colors[0], PL_colors[1],
3312                                 (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3313                             ) : "not compiled under -Dr",
3314                             REG_NODE_NUM(nextop),
3315                         PL_colors[5] );
3316                 });
3317
3318                 if ( best<ST.accepted ) {
3319                     reg_trie_accepted tmp = ST.accept_buff[ best ];
3320                     ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
3321                     ST.accept_buff[ ST.accepted ] = tmp;
3322                     best = ST.accepted;
3323                 }
3324                 PL_reginput = (char *)ST.accept_buff[ best ].endpos;
3325                 if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
3326                     scan = ST.B;
3327                 } else {
3328                     scan = ST.me + ST.jump[ST.accept_buff[best].wordnum];
3329                 }
3330                 PUSH_YES_STATE_GOTO(TRIE_next, scan);
3331                 /* NOTREACHED */
3332             }
3333             /* NOTREACHED */
3334         case TRIE_next:
3335             /* we dont want to throw this away, see bug 57042*/
3336             if (oreplsv != GvSV(PL_replgv))
3337                 sv_setsv(oreplsv, GvSV(PL_replgv));
3338             FREETMPS;
3339             LEAVE;
3340             sayYES;
3341 #undef  ST
3342
3343         case EXACT: {
3344             char *s = STRING(scan);
3345             ln = STR_LEN(scan);
3346             if (do_utf8 != UTF) {
3347                 /* The target and the pattern have differing utf8ness. */
3348                 char *l = locinput;
3349                 const char * const e = s + ln;
3350
3351                 if (do_utf8) {
3352                     /* The target is utf8, the pattern is not utf8. */
3353                     while (s < e) {
3354                         STRLEN ulen;
3355                         if (l >= PL_regeol)
3356                              sayNO;
3357                         if (NATIVE_TO_UNI(*(U8*)s) !=
3358                             utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
3359                                             uniflags))
3360                              sayNO;
3361                         l += ulen;
3362                         s ++;
3363                     }
3364                 }
3365                 else {
3366                     /* The target is not utf8, the pattern is utf8. */
3367                     while (s < e) {
3368                         STRLEN ulen;
3369                         if (l >= PL_regeol)
3370                             sayNO;
3371                         if (NATIVE_TO_UNI(*((U8*)l)) !=
3372                             utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
3373                                            uniflags))
3374                             sayNO;
3375                         s += ulen;
3376                         l ++;
3377                     }
3378                 }
3379                 locinput = l;
3380                 nextchr = UCHARAT(locinput);
3381                 break;
3382             }
3383             /* The target and the pattern have the same utf8ness. */
3384             /* Inline the first character, for speed. */
3385             if (UCHARAT(s) != nextchr)
3386                 sayNO;
3387             if (PL_regeol - locinput < ln)
3388                 sayNO;
3389             if (ln > 1 && memNE(s, locinput, ln))
3390                 sayNO;
3391             locinput += ln;
3392             nextchr = UCHARAT(locinput);
3393             break;
3394             }
3395         case EXACTFL:
3396             PL_reg_flags |= RF_tainted;
3397             /* FALL THROUGH */
3398         case EXACTF: {
3399             char * const s = STRING(scan);
3400             ln = STR_LEN(scan);
3401
3402             if (do_utf8 || UTF) {
3403               /* Either target or the pattern are utf8. */
3404                 const char * const l = locinput;
3405                 char *e = PL_regeol;
3406
3407                 if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
3408                                l, &e, 0,  do_utf8)) {
3409                      /* One more case for the sharp s:
3410                       * pack("U0U*", 0xDF) =~ /ss/i,
3411                       * the 0xC3 0x9F are the UTF-8
3412                       * byte sequence for the U+00DF. */
3413
3414                      if (!(do_utf8 &&
3415                            toLOWER(s[0]) == 's' &&
3416                            ln >= 2 &&
3417                            toLOWER(s[1]) == 's' &&
3418                            (U8)l[0] == 0xC3 &&
3419                            e - l >= 2 &&
3420                            (U8)l[1] == 0x9F))
3421                           sayNO;
3422                 }
3423                 locinput = e;
3424                 nextchr = UCHARAT(locinput);
3425                 break;
3426             }
3427
3428             /* Neither the target and the pattern are utf8. */
3429
3430             /* Inline the first character, for speed. */
3431             if (UCHARAT(s) != nextchr &&
3432                 UCHARAT(s) != ((OP(scan) == EXACTF)
3433                                ? PL_fold : PL_fold_locale)[nextchr])
3434                 sayNO;
3435             if (PL_regeol - locinput < ln)
3436                 sayNO;
3437             if (ln > 1 && (OP(scan) == EXACTF
3438                            ? ibcmp(s, locinput, ln)
3439                            : ibcmp_locale(s, locinput, ln)))
3440                 sayNO;
3441             locinput += ln;
3442             nextchr = UCHARAT(locinput);
3443             break;
3444             }
3445         case BOUNDL:
3446         case NBOUNDL:
3447             PL_reg_flags |= RF_tainted;
3448             /* FALL THROUGH */
3449         case BOUND:
3450         case NBOUND:
3451             /* was last char in word? */
3452             if (do_utf8) {
3453                 if (locinput == PL_bostr)
3454                     ln = '\n';
3455                 else {
3456                     const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
3457
3458                     ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
3459                 }
3460                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3461                     ln = isALNUM_uni(ln);
3462                     LOAD_UTF8_CHARCLASS_ALNUM();
3463                     n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
3464                 }
3465                 else {
3466                     ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
3467                     n = isALNUM_LC_utf8((U8*)locinput);
3468                 }
3469             }
3470             else {
3471                 ln = (locinput != PL_bostr) ?
3472                     UCHARAT(locinput - 1) : '\n';
3473                 if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3474                     ln = isALNUM(ln);
3475                     n = isALNUM(nextchr);
3476                 }
3477                 else {
3478                     ln = isALNUM_LC(ln);
3479                     n = isALNUM_LC(nextchr);
3480                 }
3481             }
3482             if (((!ln) == (!n)) == (OP(scan) == BOUND ||
3483                                     OP(scan) == BOUNDL))
3484                     sayNO;
3485             break;
3486         case ANYOF:
3487             if (do_utf8) {
3488                 STRLEN inclasslen = PL_regeol - locinput;
3489
3490                 if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
3491                     goto anyof_fail;
3492                 if (locinput >= PL_regeol)
3493                     sayNO;
3494                 locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
3495                 nextchr = UCHARAT(locinput);
3496                 break;
3497             }
3498             else {
3499                 if (nextchr < 0)
3500                     nextchr = UCHARAT(locinput);
3501                 if (!REGINCLASS(rex, scan, (U8*)locinput))
3502                     goto anyof_fail;
3503                 if (!nextchr && locinput >= PL_regeol)
3504                     sayNO;
3505                 nextchr = UCHARAT(++locinput);
3506                 break;
3507             }
3508         anyof_fail:
3509             /* If we might have the case of the German sharp s
3510              * in a casefolding Unicode character class. */
3511
3512             if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3513                  locinput += SHARP_S_SKIP;
3514                  nextchr = UCHARAT(locinput);
3515             }
3516             else
3517                  sayNO;
3518             break;
3519         /* Special char classes - The defines start on line 129 or so */
3520         CCC_TRY_AFF( ALNUM,  ALNUML, perl_word,   "a", isALNUM_LC_utf8, isALNUM, isALNUM_LC);
3521         CCC_TRY_NEG(NALNUM, NALNUML, perl_word,   "a", isALNUM_LC_utf8, isALNUM, isALNUM_LC);
3522
3523         CCC_TRY_AFF( SPACE,  SPACEL, perl_space,  " ", isSPACE_LC_utf8, isSPACE, isSPACE_LC);
3524         CCC_TRY_NEG(NSPACE, NSPACEL, perl_space,  " ", isSPACE_LC_utf8, isSPACE, isSPACE_LC);
3525
3526         CCC_TRY_AFF( DIGIT,  DIGITL, posix_digit, "0", isDIGIT_LC_utf8, isDIGIT, isDIGIT_LC);
3527         CCC_TRY_NEG(NDIGIT, NDIGITL, posix_digit, "0", isDIGIT_LC_utf8, isDIGIT, isDIGIT_LC);
3528
3529         case CLUMP:
3530             if (locinput >= PL_regeol)
3531                 sayNO;
3532             if  (do_utf8) {
3533                 LOAD_UTF8_CHARCLASS_MARK();
3534                 if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3535                     sayNO;
3536                 locinput += PL_utf8skip[nextchr];
3537                 while (locinput < PL_regeol &&
3538                        swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3539                     locinput += UTF8SKIP(locinput);
3540                 if (locinput > PL_regeol)
3541                     sayNO;
3542             }
3543             else
3544                locinput++;
3545             nextchr = UCHARAT(locinput);
3546             break;
3547
3548         case NREFFL:
3549         {
3550             char *s;
3551             char type;
3552             PL_reg_flags |= RF_tainted;
3553             /* FALL THROUGH */
3554         case NREF:
3555         case NREFF:
3556             type = OP(scan);
3557             n = reg_check_named_buff_matched(rex,scan);
3558
3559             if ( n ) {
3560                 type = REF + ( type - NREF );
3561                 goto do_ref;
3562             } else {
3563                 sayNO;
3564             }
3565             /* unreached */
3566         case REFFL:
3567             PL_reg_flags |= RF_tainted;
3568             /* FALL THROUGH */
3569         case REF:
3570         case REFF:
3571             n = ARG(scan);  /* which paren pair */
3572             type = OP(scan);
3573           do_ref:
3574             ln = PL_regoffs[n].start;
3575             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3576             if (*PL_reglastparen < n || ln == -1)
3577                 sayNO;                  /* Do not match unless seen CLOSEn. */
3578             if (ln == PL_regoffs[n].end)
3579                 break;
3580
3581             s = PL_bostr + ln;
3582             if (do_utf8 && type != REF) {       /* REF can do byte comparison */
3583                 char *l = locinput;
3584                 const char *e = PL_bostr + PL_regoffs[n].end;
3585                 /*
3586                  * Note that we can't do the "other character" lookup trick as
3587                  * in the 8-bit case (no pun intended) because in Unicode we
3588                  * have to map both upper and title case to lower case.
3589                  */
3590                 if (type == REFF) {
3591                     while (s < e) {
3592                         STRLEN ulen1, ulen2;
3593                         U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3594                         U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3595
3596                         if (l >= PL_regeol)
3597                             sayNO;
3598                         toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3599                         toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
3600                         if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
3601                             sayNO;
3602                         s += ulen1;
3603                         l += ulen2;
3604                     }
3605                 }
3606                 locinput = l;
3607                 nextchr = UCHARAT(locinput);
3608                 break;
3609             }
3610
3611             /* Inline the first character, for speed. */
3612             if (UCHARAT(s) != nextchr &&
3613                 (type == REF ||
3614                  (UCHARAT(s) != (type == REFF
3615                                   ? PL_fold : PL_fold_locale)[nextchr])))
3616                 sayNO;
3617             ln = PL_regoffs[n].end - ln;
3618             if (locinput + ln > PL_regeol)
3619                 sayNO;
3620             if (ln > 1 && (type == REF
3621                            ? memNE(s, locinput, ln)
3622                            : (type == REFF
3623                               ? ibcmp(s, locinput, ln)
3624                               : ibcmp_locale(s, locinput, ln))))
3625                 sayNO;
3626             locinput += ln;
3627             nextchr = UCHARAT(locinput);
3628             break;
3629         }
3630         case NOTHING:
3631         case TAIL:
3632             break;
3633         case BACK:
3634             break;
3635
3636 #undef  ST
3637 #define ST st->u.eval
3638         {
3639             SV *ret;
3640             REGEXP *re_sv;
3641             regexp *re;
3642             regexp_internal *rei;
3643             regnode *startpoint;
3644
3645         case GOSTART:
3646         case GOSUB: /*    /(...(?1))/   /(...(?&foo))/   */
3647             if (cur_eval && cur_eval->locinput==locinput) {
3648                 if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
3649                     Perl_croak(aTHX_ "Infinite recursion in regex");
3650                 if ( ++nochange_depth > max_nochange_depth )
3651                     Perl_croak(aTHX_
3652                         "Pattern subroutine nesting without pos change"
3653                         " exceeded limit in regex");
3654             } else {
3655                 nochange_depth = 0;
3656             }
3657             re_sv = rex_sv;
3658             re = rex;
3659             rei = rexi;
3660             (void)ReREFCNT_inc(rex_sv);
3661             if (OP(scan)==GOSUB) {
3662                 startpoint = scan + ARG2L(scan);
3663                 ST.close_paren = ARG(scan);
3664             } else {
3665                 startpoint = rei->program+1;
3666                 ST.close_paren = 0;
3667             }
3668             goto eval_recurse_doit;
3669             /* NOTREACHED */
3670         case EVAL:  /*   /(?{A})B/   /(??{A})B/  and /(?(?{A})X|Y)B/   */
3671             if (cur_eval && cur_eval->locinput==locinput) {
3672                 if ( ++nochange_depth > max_nochange_depth )
3673                     Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
3674             } else {
3675                 nochange_depth = 0;
3676             }
3677             {
3678                 /* execute the code in the {...} */
3679                 dSP;
3680                 SV ** const before = SP;
3681                 OP_4tree * const oop = PL_op;
3682                 COP * const ocurcop = PL_curcop;
3683                 PAD *old_comppad;
3684                 char *saved_regeol = PL_regeol;
3685
3686                 n = ARG(scan);
3687                 PL_op = (OP_4tree*)rexi->data->data[n];
3688                 DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
3689                     "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
3690                 PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
3691                 PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
3692
3693                 if (sv_yes_mark) {
3694                     SV *sv_mrk = get_sv("REGMARK", 1);
3695                     sv_setsv(sv_mrk, sv_yes_mark);
3696                 }
3697
3698                 CALLRUNOPS(aTHX);                       /* Scalar context. */
3699                 SPAGAIN;
3700                 if (SP == before)
3701                     ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
3702                 else {
3703                     ret = POPs;
3704                     PUTBACK;
3705                 }
3706
3707                 PL_op = oop;
3708                 PAD_RESTORE_LOCAL(old_comppad);
3709                 PL_curcop = ocurcop;
3710                 PL_regeol = saved_regeol;
3711                 if (!logical) {
3712                     /* /(?{...})/ */
3713                     sv_setsv(save_scalar(PL_replgv), ret);
3714                     break;
3715                 }
3716             }
3717             if (logical == 2) { /* Postponed subexpression: /(??{...})/ */
3718                 logical = 0;
3719                 {
3720                     /* extract RE object from returned value; compiling if
3721                      * necessary */
3722                     MAGIC *mg = NULL;
3723                     REGEXP *rx = NULL;
3724
3725                     if (SvROK(ret)) {
3726                         SV *const sv = SvRV(ret);
3727
3728                         if (SvTYPE(sv) == SVt_REGEXP) {
3729                             rx = (REGEXP*) sv;
3730                         } else if (SvSMAGICAL(sv)) {
3731                             mg = mg_find(sv, PERL_MAGIC_qr);
3732                             assert(mg);
3733                         }
3734                     } else if (SvTYPE(ret) == SVt_REGEXP) {
3735                         rx = (REGEXP*) ret;
3736                     } else if (SvSMAGICAL(ret)) {
3737                         if (SvGMAGICAL(ret)) {
3738                             /* I don't believe that there is ever qr magic
3739                                here.  */
3740                             assert(!mg_find(ret, PERL_MAGIC_qr));
3741                             sv_unmagic(ret, PERL_MAGIC_qr);
3742                         }
3743                         else {
3744                             mg = mg_find(ret, PERL_MAGIC_qr);
3745                             /* testing suggests mg only ends up non-NULL for
3746                                scalars who were upgraded and compiled in the
3747                                else block below. In turn, this is only
3748                                triggered in the "postponed utf8 string" tests
3749                                in t/op/pat.t  */
3750                         }
3751                     }
3752
3753                     if (mg) {
3754                         rx = (REGEXP *) mg->mg_obj; /*XXX:dmq*/
3755                         assert(rx);
3756                     }
3757                     if (rx) {
3758                         rx = reg_temp_copy(rx);
3759                     }
3760                     else {
3761                         U32 pm_flags = 0;
3762                         const I32 osize = PL_regsize;
3763
3764                         if (DO_UTF8(ret)) {
3765                             assert (SvUTF8(ret));
3766                         } else if (SvUTF8(ret)) {
3767                             /* Not doing UTF-8, despite what the SV says. Is
3768                                this only if we're trapped in use 'bytes'?  */
3769                             /* Make a copy of the octet sequence, but without
3770                                the flag on, as the compiler now honours the
3771                                SvUTF8 flag on ret.  */
3772                             STRLEN len;
3773                             const char *const p = SvPV(ret, len);
3774                             ret = newSVpvn_flags(p, len, SVs_TEMP);
3775                         }
3776                         rx = CALLREGCOMP(ret, pm_flags);
3777                         if (!(SvFLAGS(ret)
3778                               & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3779                                  | SVs_GMG))) {
3780                             /* This isn't a first class regexp. Instead, it's
3781                                caching a regexp onto an existing, Perl visible
3782                                scalar.  */
3783                             sv_magic(ret, MUTABLE_SV(rx), PERL_MAGIC_qr, 0, 0);
3784                         }
3785                         PL_regsize = osize;
3786                     }
3787                     re_sv = rx;
3788                     re = (struct regexp *)SvANY(rx);
3789                 }
3790                 RXp_MATCH_COPIED_off(re);
3791                 re->subbeg = rex->subbeg;
3792                 re->sublen = rex->sublen;
3793                 rei = RXi_GET(re);
3794                 DEBUG_EXECUTE_r(
3795                     debug_start_match(re_sv, do_utf8, locinput, PL_regeol,
3796                         "Matching embedded");
3797                 );
3798                 startpoint = rei->program + 1;
3799                 ST.close_paren = 0; /* only used for GOSUB */
3800                 /* borrowed from regtry */
3801                 if (PL_reg_start_tmpl <= re->nparens) {
3802                     PL_reg_start_tmpl = re->nparens*3/2 + 3;
3803                     if(PL_reg_start_tmp)
3804                         Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3805                     else
3806                         Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3807                 }
3808
3809         eval_recurse_doit: /* Share code with GOSUB below this line */
3810                 /* run the pattern returned from (??{...}) */
3811                 ST.cp = regcppush(0);   /* Save *all* the positions. */
3812                 REGCP_SET(ST.lastcp);
3813
3814                 PL_regoffs = re->offs; /* essentially NOOP on GOSUB */
3815
3816                 /* see regtry, specifically PL_reglast(?:close)?paren is a pointer! (i dont know why) :dmq */
3817                 PL_reglastparen = &re->lastparen;
3818                 PL_reglastcloseparen = &re->lastcloseparen;
3819                 re->lastparen = 0;
3820                 re->lastcloseparen = 0;
3821
3822                 PL_reginput = locinput;
3823                 PL_regsize = 0;
3824
3825                 /* XXXX This is too dramatic a measure... */
3826                 PL_reg_maxiter = 0;
3827
3828                 ST.toggle_reg_flags = PL_reg_flags;
3829                 if (RX_UTF8(re_sv))
3830                     PL_reg_flags |= RF_utf8;
3831                 else
3832                     PL_reg_flags &= ~RF_utf8;
3833                 ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
3834
3835                 ST.prev_rex = rex_sv;
3836                 ST.prev_curlyx = cur_curlyx;
3837                 SETREX(rex_sv,re_sv);
3838                 rex = re;
3839                 rexi = rei;
3840                 cur_curlyx = NULL;
3841                 ST.B = next;
3842                 ST.prev_eval = cur_eval;
3843                 cur_eval = st;
3844                 /* now continue from first node in postoned RE */
3845                 PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
3846                 /* NOTREACHED */
3847             }
3848             /* logical is 1,   /(?(?{...})X|Y)/ */
3849             sw = (bool)SvTRUE(ret);
3850             logical = 0;
3851             break;
3852         }
3853
3854         case EVAL_AB: /* cleanup after a successful (??{A})B */
3855             /* note: this is called twice; first after popping B, then A */
3856             PL_reg_flags ^= ST.toggle_reg_flags;
3857             ReREFCNT_dec(rex_sv);
3858             SETREX(rex_sv,ST.prev_rex);
3859             rex = (struct regexp *)SvANY(rex_sv);
3860             rexi = RXi_GET(rex);
3861             regcpblow(ST.cp);
3862             cur_eval = ST.prev_eval;
3863             cur_curlyx = ST.prev_curlyx;
3864
3865             /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
3866             PL_reglastparen = &rex->lastparen;
3867             PL_reglastcloseparen = &rex->lastcloseparen;
3868             /* also update PL_regoffs */
3869             PL_regoffs = rex->offs;
3870
3871             /* XXXX This is too dramatic a measure... */
3872             PL_reg_maxiter = 0;
3873             if ( nochange_depth )
3874                 nochange_depth--;
3875             sayYES;
3876
3877
3878         case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
3879             /* note: this is called twice; first after popping B, then A */
3880             PL_reg_flags ^= ST.toggle_reg_flags;
3881             ReREFCNT_dec(rex_sv);
3882             SETREX(rex_sv,ST.prev_rex);
3883             rex = (struct regexp *)SvANY(rex_sv);
3884             rexi = RXi_GET(rex);
3885             /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
3886             PL_reglastparen = &rex->lastparen;
3887             PL_reglastcloseparen = &rex->lastcloseparen;
3888
3889             PL_reginput = locinput;
3890             REGCP_UNWIND(ST.lastcp);
3891             regcppop(rex);
3892             cur_eval = ST.prev_eval;
3893             cur_curlyx = ST.prev_curlyx;
3894             /* XXXX This is too dramatic a measure... */
3895             PL_reg_maxiter = 0;
3896             if ( nochange_depth )
3897                 nochange_depth--;
3898             sayNO_SILENT;
3899 #undef ST
3900
3901         case OPEN:
3902             n = ARG(scan);  /* which paren pair */
3903             PL_reg_start_tmp[n] = locinput;
3904             if (n > PL_regsize)
3905                 PL_regsize = n;
3906             lastopen = n;
3907             break;
3908         case CLOSE:
3909             n = ARG(scan);  /* which paren pair */
3910             PL_regoffs[n].start = PL_reg_start_tmp[n] - PL_bostr;
3911             PL_regoffs[n].end = locinput - PL_bostr;
3912             /*if (n > PL_regsize)
3913                 PL_regsize = n;*/
3914             if (n > *PL_reglastparen)
3915                 *PL_reglastparen = n;
3916             *PL_reglastcloseparen = n;
3917             if (cur_eval && cur_eval->u.eval.close_paren == n) {
3918                 goto fake_end;
3919             }
3920             break;
3921         case ACCEPT:
3922             if (ARG(scan)){
3923                 regnode *cursor;
3924                 for (cursor=scan;
3925                      cursor && OP(cursor)!=END;
3926                      cursor=regnext(cursor))
3927                 {
3928                     if ( OP(cursor)==CLOSE ){
3929                         n = ARG(cursor);
3930                         if ( n <= lastopen ) {
3931                             PL_regoffs[n].start
3932                                 = PL_reg_start_tmp[n] - PL_bostr;
3933                             PL_regoffs[n].end = locinput - PL_bostr;
3934                             /*if (n > PL_regsize)
3935                             PL_regsize = n;*/
3936                             if (n > *PL_reglastparen)
3937                                 *PL_reglastparen = n;
3938                             *PL_reglastcloseparen = n;
3939                             if ( n == ARG(scan) || (cur_eval &&
3940                                 cur_eval->u.eval.close_paren == n))
3941                                 break;
3942                         }
3943                     }
3944                 }
3945             }
3946             goto fake_end;
3947             /*NOTREACHED*/
3948         case GROUPP:
3949             n = ARG(scan);  /* which paren pair */
3950             sw = (bool)(*PL_reglastparen >= n && PL_regoffs[n].end != -1);
3951             break;
3952         case NGROUPP:
3953             /* reg_check_named_buff_matched returns 0 for no match */
3954             sw = (bool)(0 < reg_check_named_buff_matched(rex,scan));
3955             break;
3956         case INSUBP:
3957             n = ARG(scan);
3958             sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
3959             break;
3960         case DEFINEP:
3961             sw = 0;
3962             break;
3963         case IFTHEN:
3964             PL_reg_leftiter = PL_reg_maxiter;           /* Void cache */
3965             if (sw)
3966                 next = NEXTOPER(NEXTOPER(scan));
3967             else {
3968                 next = scan + ARG(scan);
3969                 if (OP(next) == IFTHEN) /* Fake one. */
3970                     next = NEXTOPER(NEXTOPER(next));
3971             }
3972             break;
3973         case LOGICAL:
3974             logical = scan->flags;
3975             break;
3976
3977 /*******************************************************************
3978
3979 The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
3980 pattern, where A and B are subpatterns. (For simple A, CURLYM or
3981 STAR/PLUS/CURLY/CURLYN are used instead.)
3982
3983 A*B is compiled as <CURLYX><A><WHILEM><B>
3984
3985 On entry to the subpattern, CURLYX is called. This pushes a CURLYX
3986 state, which contains the current count, initialised to -1. It also sets
3987 cur_curlyx to point to this state, with any previous value saved in the
3988 state block.
3989
3990 CURLYX then jumps straight to the WHILEM op, rather than executing A,
3991 since the pattern may possibly match zero times (i.e. it's a while {} loop
3992 rather than a do {} while loop).
3993
3994 Each entry to WHILEM represents a successful match of A. The count in the
3995 CURLYX block is incremented, another WHILEM state is pushed, and execution
3996 passes to A or B depending on greediness and the current count.
3997
3998 For example, if matching against the string a1a2a3b (where the aN are
3999 substrings that match /A/), then the match progresses as follows: (the
4000 pushed states are interspersed with the bits of strings matched so far):
4001
4002     <CURLYX cnt=-1>
4003     <CURLYX cnt=0><WHILEM>
4004     <CURLYX cnt=1><WHILEM> a1 <WHILEM>
4005     <CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
4006     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
4007     <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
4008
4009 (Contrast this with something like CURLYM, which maintains only a single
4010 backtrack state:
4011
4012     <CURLYM cnt=0> a1
4013     a1 <CURLYM cnt=1> a2
4014     a1 a2 <CURLYM cnt=2> a3
4015     a1 a2 a3 <CURLYM cnt=3> b
4016 )
4017
4018 Each WHILEM state block marks a point to backtrack to upon partial failure
4019 of A or B, and also contains some minor state data related to that
4020 iteration.  The CURLYX block, pointed to by cur_curlyx, contains the
4021 overall state, such as the count, and pointers to the A and B ops.
4022
4023 This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
4024 must always point to the *current* CURLYX block, the rules are:
4025
4026 When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
4027 and set cur_curlyx to point the new block.
4028
4029 When popping the CURLYX block after a successful or unsuccessful match,
4030 restore the previous cur_curlyx.
4031
4032 When WHILEM is about to execute B, save the current cur_curlyx, and set it
4033 to the outer one saved in the CURLYX block.
4034
4035 When popping the WHILEM block after a successful or unsuccessful B match,
4036 restore the previous cur_curlyx.
4037
4038 Here's an example for the pattern (AI* BI)*BO
4039 I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
4040
4041 cur_
4042 curlyx backtrack stack
4043 ------ ---------------
4044 NULL
4045 CO     <CO prev=NULL> <WO>
4046 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4047 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4048 NULL   <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
4049
4050 At this point the pattern succeeds, and we work back down the stack to
4051 clean up, restoring as we go:
4052
4053 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4054 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4055 CO     <CO prev=NULL> <WO>
4056 NULL
4057
4058 *******************************************************************/
4059
4060 #define ST st->u.curlyx
4061
4062         case CURLYX:    /* start of /A*B/  (for complex A) */
4063         {
4064             /* No need to save/restore up to this paren */
4065             I32 parenfloor = scan->flags;
4066
4067             assert(next); /* keep Coverity happy */
4068             if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
4069                 next += ARG(next);
4070
4071             /* XXXX Probably it is better to teach regpush to support
4072                parenfloor > PL_regsize... */
4073             if (parenfloor > (I32)*PL_reglastparen)
4074                 parenfloor = *PL_reglastparen; /* Pessimization... */
4075
4076             ST.prev_curlyx= cur_curlyx;
4077             cur_curlyx = st;
4078             ST.cp = PL_savestack_ix;
4079
4080             /* these fields contain the state of the current curly.
4081              * they are accessed by subsequent WHILEMs */
4082             ST.parenfloor = parenfloor;
4083             ST.min = ARG1(scan);
4084             ST.max = ARG2(scan);
4085             ST.A = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
4086             ST.B = next;
4087             ST.minmod = minmod;
4088             minmod = 0;
4089             ST.count = -1;      /* this will be updated by WHILEM */
4090             ST.lastloc = NULL;  /* this will be updated by WHILEM */
4091
4092             PL_reginput = locinput;
4093             PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
4094             /* NOTREACHED */
4095         }
4096
4097         case CURLYX_end: /* just finished matching all of A*B */
4098             cur_curlyx = ST.prev_curlyx;
4099             sayYES;
4100             /* NOTREACHED */
4101
4102         case CURLYX_end_fail: /* just failed to match all of A*B */
4103             regcpblow(ST.cp);
4104             cur_curlyx = ST.prev_curlyx;
4105             sayNO;
4106             /* NOTREACHED */
4107
4108
4109 #undef ST
4110 #define ST st->u.whilem
4111
4112         case WHILEM:     /* just matched an A in /A*B/  (for complex A) */
4113         {
4114             /* see the discussion above about CURLYX/WHILEM */
4115             I32 n;
4116             assert(cur_curlyx); /* keep Coverity happy */
4117             n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
4118             ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
4119             ST.cache_offset = 0;
4120             ST.cache_mask = 0;
4121
4122             PL_reginput = locinput;
4123
4124             DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4125                   "%*s  whilem: matched %ld out of %ld..%ld\n",
4126                   REPORT_CODE_OFF+depth*2, "", (long)n,
4127                   (long)cur_curlyx->u.curlyx.min,
4128                   (long)cur_curlyx->u.curlyx.max)
4129             );
4130
4131             /* First just match a string of min A's. */
4132
4133             if (n < cur_curlyx->u.curlyx.min) {
4134                 cur_curlyx->u.curlyx.lastloc = locinput;
4135                 PUSH_STATE_GOTO(WHILEM_A_pre, cur_curlyx->u.curlyx.A);
4136                 /* NOTREACHED */
4137             }
4138
4139             /* If degenerate A matches "", assume A done. */
4140
4141             if (locinput == cur_curlyx->u.curlyx.lastloc) {
4142                 DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4143                    "%*s  whilem: empty match detected, trying continuation...\n",
4144                    REPORT_CODE_OFF+depth*2, "")
4145                 );
4146                 goto do_whilem_B_max;
4147             }
4148
4149             /* super-linear cache processing */
4150
4151             if (scan->flags) {
4152
4153                 if (!PL_reg_maxiter) {
4154                     /* start the countdown: Postpone detection until we
4155                      * know the match is not *that* much linear. */
4156                     PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
4157                     /* possible overflow for long strings and many CURLYX's */
4158                     if (PL_reg_maxiter < 0)
4159                         PL_reg_maxiter = I32_MAX;
4160                     PL_reg_leftiter = PL_reg_maxiter;
4161                 }
4162
4163                 if (PL_reg_leftiter-- == 0) {
4164                     /* initialise cache */
4165                     const I32 size = (PL_reg_maxiter + 7)/8;
4166                     if (PL_reg_poscache) {
4167                         if ((I32)PL_reg_poscache_size < size) {
4168                             Renew(PL_reg_poscache, size, char);
4169                             PL_reg_poscache_size = size;
4170                         }
4171                         Zero(PL_reg_poscache, size, char);
4172                     }
4173                     else {
4174                         PL_reg_poscache_size = size;
4175                         Newxz(PL_reg_poscache, size, char);
4176                     }
4177                     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4178       "%swhilem: Detected a super-linear match, switching on caching%s...\n",
4179                               PL_colors[4], PL_colors[5])
4180                     );
4181                 }
4182
4183                 if (PL_reg_leftiter < 0) {
4184                     /* have we already failed at this position? */
4185                     I32 offset, mask;
4186                     offset  = (scan->flags & 0xf) - 1
4187                                 + (locinput - PL_bostr)  * (scan->flags>>4);
4188                     mask    = 1 << (offset % 8);
4189                     offset /= 8;
4190                     if (PL_reg_poscache[offset] & mask) {
4191                         DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4192                             "%*s  whilem: (cache) already tried at this position...\n",
4193                             REPORT_CODE_OFF+depth*2, "")
4194                         );
4195                         sayNO; /* cache records failure */
4196                     }
4197                     ST.cache_offset = offset;
4198                     ST.cache_mask   = mask;
4199                 }
4200             }
4201
4202             /* Prefer B over A for minimal matching. */
4203
4204             if (cur_curlyx->u.curlyx.minmod) {
4205                 ST.save_curlyx = cur_curlyx;
4206                 cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4207                 ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
4208                 REGCP_SET(ST.lastcp);
4209                 PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
4210                 /* NOTREACHED */
4211             }
4212
4213             /* Prefer A over B for maximal matching. */
4214
4215             if (n < cur_curlyx->u.curlyx.max) { /* More greed allowed? */
4216                 ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4217                 cur_curlyx->u.curlyx.lastloc = locinput;
4218                 REGCP_SET(ST.lastcp);
4219                 PUSH_STATE_GOTO(WHILEM_A_max, cur_curlyx->u.curlyx.A);
4220                 /* NOTREACHED */
4221             }
4222             goto do_whilem_B_max;
4223         }
4224         /* NOTREACHED */
4225
4226         case WHILEM_B_min: /* just matched B in a minimal match */
4227         case WHILEM_B_max: /* just matched B in a maximal match */
4228             cur_curlyx = ST.save_curlyx;
4229             sayYES;
4230             /* NOTREACHED */
4231
4232         case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
4233             cur_curlyx = ST.save_curlyx;
4234             cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4235             cur_curlyx->u.curlyx.count--;
4236             CACHEsayNO;
4237             /* NOTREACHED */
4238
4239         case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
4240             REGCP_UNWIND(ST.lastcp);
4241             regcppop(rex);
4242             /* FALL THROUGH */
4243         case WHILEM_A_pre_fail: /* just failed to match even minimal A */
4244             cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4245             cur_curlyx->u.curlyx.count--;
4246             CACHEsayNO;
4247             /* NOTREACHED */
4248
4249         case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
4250             REGCP_UNWIND(ST.lastcp);
4251             regcppop(rex);      /* Restore some previous $<digit>s? */
4252             PL_reginput = locinput;
4253             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4254                 "%*s  whilem: failed, trying continuation...\n",
4255                 REPORT_CODE_OFF+depth*2, "")
4256             );
4257           do_whilem_B_max:
4258             if (cur_curlyx->u.curlyx.count >= REG_INFTY
4259                 && ckWARN(WARN_REGEXP)
4260                 && !(PL_reg_flags & RF_warned))
4261             {
4262                 PL_reg_flags |= RF_warned;
4263                 Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
4264                      "Complex regular subexpression recursion",
4265                      REG_INFTY - 1);
4266             }
4267
4268             /* now try B */
4269             ST.save_curlyx = cur_curlyx;
4270             cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4271             PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
4272             /* NOTREACHED */
4273
4274         case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
4275             cur_curlyx = ST.save_curlyx;
4276             REGCP_UNWIND(ST.lastcp);
4277             regcppop(rex);
4278
4279             if (cur_curlyx->u.curlyx.count >= cur_curlyx->u.curlyx.max) {
4280                 /* Maximum greed exceeded */
4281                 if (cur_curlyx->u.curlyx.count >= REG_INFTY
4282                     && ckWARN(WARN_REGEXP)
4283                     && !(PL_reg_flags & RF_warned))
4284                 {
4285                     PL_reg_flags |= RF_warned;
4286                     Perl_warner(aTHX_ packWARN(WARN_REGEXP),
4287                         "%s limit (%d) exceeded",
4288                         "Complex regular subexpression recursion",
4289                         REG_INFTY - 1);
4290                 }
4291                 cur_curlyx->u.curlyx.count--;
4292                 CACHEsayNO;
4293             }
4294
4295             DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4296                 "%*s  trying longer...\n", REPORT_CODE_OFF+depth*2, "")
4297             );
4298             /* Try grabbing another A and see if it helps. */
4299             PL_reginput = locinput;
4300             cur_curlyx->u.curlyx.lastloc = locinput;
4301             ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4302             REGCP_SET(ST.lastcp);
4303             PUSH_STATE_GOTO(WHILEM_A_min, ST.save_curlyx->u.curlyx.A);
4304             /* NOTREACHED */
4305
4306 #undef  ST
4307 #define ST st->u.branch
4308
4309         case BRANCHJ:       /*  /(...|A|...)/ with long next pointer */
4310             next = scan + ARG(scan);
4311             if (next == scan)
4312                 next = NULL;
4313             scan = NEXTOPER(scan);
4314             /* FALL THROUGH */
4315
4316         case BRANCH:        /*  /(...|A|...)/ */
4317             scan = NEXTOPER(scan); /* scan now points to inner node */
4318             ST.lastparen = *PL_reglastparen;
4319             ST.next_branch = next;
4320             REGCP_SET(ST.cp);
4321             PL_reginput = locinput;
4322
4323             /* Now go into the branch */
4324             if (has_cutgroup) {
4325                 PUSH_YES_STATE_GOTO(BRANCH_next, scan);
4326             } else {
4327                 PUSH_STATE_GOTO(BRANCH_next, scan);
4328             }
4329             /* NOTREACHED */
4330         case CUTGROUP:
4331             PL_reginput = locinput;
4332             sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
4333                 MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
4334             PUSH_STATE_GOTO(CUTGROUP_next,next);
4335             /* NOTREACHED */
4336         case CUTGROUP_next_fail:
4337             do_cutgroup = 1;
4338             no_final = 1;
4339             if (st->u.mark.mark_name)
4340                 sv_commit = st->u.mark.mark_name;
4341             sayNO;
4342             /* NOTREACHED */
4343         case BRANCH_next:
4344             sayYES;
4345             /* NOTREACHED */
4346         case BRANCH_next_fail: /* that branch failed; try the next, if any */
4347             if (do_cutgroup) {
4348                 do_cutgroup = 0;
4349                 no_final = 0;
4350             }
4351             REGCP_UNWIND(ST.cp);
4352             for (n = *PL_reglastparen; n > ST.lastparen; n--)
4353                 PL_regoffs[n].end = -1;
4354             *PL_reglastparen = n;
4355             /*dmq: *PL_reglastcloseparen = n; */
4356             scan = ST.next_branch;
4357             /* no more branches? */
4358             if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
4359                 DEBUG_EXECUTE_r({
4360                     PerlIO_printf( Perl_debug_log,
4361                         "%*s  %sBRANCH failed...%s\n",
4362                         REPORT_CODE_OFF+depth*2, "",
4363                         PL_colors[4],
4364                         PL_colors[5] );
4365                 });
4366                 sayNO_SILENT;
4367             }
4368             continue; /* execute next BRANCH[J] op */
4369             /* NOTREACHED */
4370
4371         case MINMOD:
4372             minmod = 1;
4373             break;
4374
4375 #undef  ST
4376 #define ST st->u.curlym
4377
4378         case CURLYM:    /* /A{m,n}B/ where A is fixed-length */
4379
4380             /* This is an optimisation of CURLYX that enables us to push
4381              * only a single backtracking state, no matter how many matches
4382              * there are in {m,n}. It relies on the pattern being constant
4383              * length, with no parens to influence future backrefs
4384              */
4385
4386             ST.me = scan;
4387             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4388
4389             /* if paren positive, emulate an OPEN/CLOSE around A */
4390             if (ST.me->flags) {
4391                 U32 paren = ST.me->flags;
4392                 if (paren > PL_regsize)
4393                     PL_regsize = paren;
4394                 if (paren > *PL_reglastparen)
4395                     *PL_reglastparen = paren;
4396                 scan += NEXT_OFF(scan); /* Skip former OPEN. */
4397             }
4398             ST.A = scan;
4399             ST.B = next;
4400             ST.alen = 0;
4401             ST.count = 0;
4402             ST.minmod = minmod;
4403             minmod = 0;
4404             ST.c1 = CHRTEST_UNINIT;
4405             REGCP_SET(ST.cp);
4406
4407             if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
4408                 goto curlym_do_B;
4409
4410           curlym_do_A: /* execute the A in /A{m,n}B/  */
4411             PL_reginput = locinput;
4412             PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
4413             /* NOTREACHED */
4414
4415         case CURLYM_A: /* we've just matched an A */
4416             locinput = st->locinput;
4417             nextchr = UCHARAT(locinput);
4418
4419             ST.count++;
4420             /* after first match, determine A's length: u.curlym.alen */
4421             if (ST.count == 1) {
4422                 if (PL_reg_match_utf8) {
4423                     char *s = locinput;
4424                     while (s < PL_reginput) {
4425                         ST.alen++;
4426                         s += UTF8SKIP(s);
4427                     }
4428                 }
4429                 else {
4430                     ST.alen = PL_reginput - locinput;
4431                 }
4432                 if (ST.alen == 0)
4433                     ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
4434             }
4435             DEBUG_EXECUTE_r(
4436                 PerlIO_printf(Perl_debug_log,
4437                           "%*s  CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
4438                           (int)(REPORT_CODE_OFF+(depth*2)), "",
4439                           (IV) ST.count, (IV)ST.alen)
4440             );
4441
4442             locinput = PL_reginput;
4443
4444             if (cur_eval && cur_eval->u.eval.close_paren &&
4445                 cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4446                 goto fake_end;
4447
4448             {
4449                 I32 max = (ST.minmod ? ARG1(ST.me) : ARG2(ST.me));
4450                 if ( max == REG_INFTY || ST.count < max )
4451                     goto curlym_do_A; /* try to match another A */
4452             }
4453             goto curlym_do_B; /* try to match B */
4454
4455         case CURLYM_A_fail: /* just failed to match an A */
4456             REGCP_UNWIND(ST.cp);
4457
4458             if (ST.minmod || ST.count < ARG1(ST.me) /* min*/
4459                 || (cur_eval && cur_eval->u.eval.close_paren &&
4460                     cur_eval->u.eval.close_paren == (U32)ST.me->flags))
4461                 sayNO;
4462
4463           curlym_do_B: /* execute the B in /A{m,n}B/  */
4464             PL_reginput = locinput;
4465             if (ST.c1 == CHRTEST_UNINIT) {
4466                 /* calculate c1 and c2 for possible match of 1st char
4467                  * following curly */
4468                 ST.c1 = ST.c2 = CHRTEST_VOID;
4469                 if (HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
4470                     regnode *text_node = ST.B;
4471                     if (! HAS_TEXT(text_node))
4472                         FIND_NEXT_IMPT(text_node);
4473                     /* this used to be
4474
4475                         (HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
4476
4477                         But the former is redundant in light of the latter.
4478
4479                         if this changes back then the macro for
4480                         IS_TEXT and friends need to change.
4481                      */
4482                     if (PL_regkind[OP(text_node)] == EXACT)
4483                     {
4484
4485                         ST.c1 = (U8)*STRING(text_node);
4486                         ST.c2 =
4487                             (IS_TEXTF(text_node))
4488                             ? PL_fold[ST.c1]
4489                             : (IS_TEXTFL(text_node))
4490                                 ? PL_fold_locale[ST.c1]
4491                                 : ST.c1;
4492                     }
4493                 }
4494             }
4495
4496             DEBUG_EXECUTE_r(
4497                 PerlIO_printf(Perl_debug_log,
4498                     "%*s  CURLYM trying tail with matches=%"IVdf"...\n",
4499                     (int)(REPORT_CODE_OFF+(depth*2)),
4500                     "", (IV)ST.count)
4501                 );
4502             if (ST.c1 != CHRTEST_VOID
4503                     && UCHARAT(PL_reginput) != ST.c1
4504                     && UCHARAT(PL_reginput) != ST.c2)
4505             {
4506                 /* simulate B failing */
4507                 DEBUG_OPTIMISE_r(
4508                     PerlIO_printf(Perl_debug_log,
4509                         "%*s  CURLYM Fast bail c1=%"IVdf" c2=%"IVdf"\n",
4510                         (int)(REPORT_CODE_OFF+(depth*2)),"",
4511                         (IV)ST.c1,(IV)ST.c2
4512                 ));
4513                 state_num = CURLYM_B_fail;
4514                 goto reenter_switch;
4515             }
4516
4517             if (ST.me->flags) {
4518                 /* mark current A as captured */
4519                 I32 paren = ST.me->flags;
4520                 if (ST.count) {
4521                     PL_regoffs[paren].start
4522                         = HOPc(PL_reginput, -ST.alen) - PL_bostr;
4523                     PL_regoffs[paren].end = PL_reginput - PL_bostr;
4524                     /*dmq: *PL_reglastcloseparen = paren; */
4525                 }
4526                 else
4527                     PL_regoffs[paren].end = -1;
4528                 if (cur_eval && cur_eval->u.eval.close_paren &&
4529                     cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4530                 {
4531                     if (ST.count)
4532                         goto fake_end;
4533                     else
4534                         sayNO;
4535                 }
4536             }
4537
4538             PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
4539             /* NOTREACHED */
4540
4541         case CURLYM_B_fail: /* just failed to match a B */
4542             REGCP_UNWIND(ST.cp);
4543             if (ST.minmod) {
4544                 I32 max = ARG2(ST.me);
4545                 if (max != REG_INFTY && ST.count == max)
4546                     sayNO;
4547                 goto curlym_do_A; /* try to match a further A */
4548             }
4549             /* backtrack one A */
4550             if (ST.count == ARG1(ST.me) /* min */)
4551                 sayNO;
4552             ST.count--;
4553             locinput = HOPc(locinput, -ST.alen);
4554             goto curlym_do_B; /* try to match B */
4555
4556 #undef ST
4557 #define ST st->u.curly
4558
4559 #define CURLY_SETPAREN(paren, success) \
4560     if (paren) { \
4561         if (success) { \
4562             PL_regoffs[paren].start = HOPc(locinput, -1) - PL_bostr; \
4563             PL_regoffs[paren].end = locinput - PL_bostr; \
4564             *PL_reglastcloseparen = paren; \
4565         } \
4566         else \
4567             PL_regoffs[paren].end = -1; \
4568     }
4569
4570         case STAR:              /*  /A*B/ where A is width 1 */
4571             ST.paren = 0;
4572             ST.min = 0;
4573             ST.max = REG_INFTY;
4574             scan = NEXTOPER(scan);
4575             goto repeat;
4576         case PLUS:              /*  /A+B/ where A is width 1 */
4577             ST.paren = 0;
4578             ST.min = 1;
4579             ST.max = REG_INFTY;
4580             scan = NEXTOPER(scan);
4581             goto repeat;
4582         case CURLYN:            /*  /(A){m,n}B/ where A is width 1 */
4583             ST.paren = scan->flags;     /* Which paren to set */
4584             if (ST.paren > PL_regsize)
4585                 PL_regsize = ST.paren;
4586             if (ST.paren > *PL_reglastparen)
4587                 *PL_reglastparen = ST.paren;
4588             ST.min = ARG1(scan);  /* min to match */
4589             ST.max = ARG2(scan);  /* max to match */
4590             if (cur_eval && cur_eval->u.eval.close_paren &&
4591                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4592                 ST.min=1;
4593                 ST.max=1;
4594             }
4595             scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
4596             goto repeat;
4597         case CURLY:             /*  /A{m,n}B/ where A is width 1 */
4598             ST.paren = 0;
4599             ST.min = ARG1(scan);  /* min to match */
4600             ST.max = ARG2(scan);  /* max to match */
4601             scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4602           repeat:
4603             /*
4604             * Lookahead to avoid useless match attempts
4605             * when we know what character comes next.
4606             *
4607             * Used to only do .*x and .*?x, but now it allows
4608             * for )'s, ('s and (?{ ... })'s to be in the way
4609             * of the quantifier and the EXACT-like node.  -- japhy
4610             */
4611
4612             if (ST.min > ST.max) /* XXX make this a compile-time check? */
4613                 sayNO;
4614             if (HAS_TEXT(next) || JUMPABLE(next)) {
4615                 U8 *s;
4616                 regnode *text_node = next;
4617
4618                 if (! HAS_TEXT(text_node))
4619                     FIND_NEXT_IMPT(text_node);
4620
4621                 if (! HAS_TEXT(text_node))
4622                     ST.c1 = ST.c2 = CHRTEST_VOID;
4623                 else {
4624                     if ( PL_regkind[OP(text_node)] != EXACT ) {
4625                         ST.c1 = ST.c2 = CHRTEST_VOID;
4626                         goto assume_ok_easy;
4627                     }
4628                     else
4629                         s = (U8*)STRING(text_node);
4630
4631                     /*  Currently we only get here when
4632
4633                         PL_rekind[OP(text_node)] == EXACT
4634
4635                         if this changes back then the macro for IS_TEXT and
4636                         friends need to change. */
4637                     if (!UTF) {
4638                         ST.c2 = ST.c1 = *s;
4639                         if (IS_TEXTF(text_node))
4640                             ST.c2 = PL_fold[ST.c1];
4641                         else if (IS_TEXTFL(text_node))
4642                             ST.c2 = PL_fold_locale[ST.c1];
4643                     }
4644                     else { /* UTF */
4645                         if (IS_TEXTF(text_node)) {
4646                              STRLEN ulen1, ulen2;
4647                              U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4648                              U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4649
4650                              to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4651                              to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4652 #ifdef EBCDIC
4653                              ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
4654                                                     ckWARN(WARN_UTF8) ?
4655                                                     0 : UTF8_ALLOW_ANY);
4656                              ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
4657                                                     ckWARN(WARN_UTF8) ?
4658                                                     0 : UTF8_ALLOW_ANY);
4659 #else
4660                              ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
4661                                                     uniflags);
4662                              ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
4663                                                     uniflags);
4664 #endif
4665                         }
4666                         else {
4667                             ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
4668                                                      uniflags);
4669                         }
4670                     }
4671                 }
4672             }
4673             else
4674                 ST.c1 = ST.c2 = CHRTEST_VOID;
4675         assume_ok_easy:
4676
4677             ST.A = scan;
4678             ST.B = next;
4679             PL_reginput = locinput;
4680             if (minmod) {
4681                 minmod = 0;
4682                 if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
4683                     sayNO;
4684                 ST.count = ST.min;
4685                 locinput = PL_reginput;
4686                 REGCP_SET(ST.cp);
4687                 if (ST.c1 == CHRTEST_VOID)
4688                     goto curly_try_B_min;
4689
4690                 ST.oldloc = locinput;
4691
4692                 /* set ST.maxpos to the furthest point along the
4693                  * string that could possibly match */
4694                 if  (ST.max == REG_INFTY) {
4695                     ST.maxpos = PL_regeol - 1;
4696                     if (do_utf8)
4697                         while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
4698                             ST.maxpos--;
4699                 }
4700                 else if (do_utf8) {
4701                     int m = ST.max - ST.min;
4702                     for (ST.maxpos = locinput;
4703                          m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
4704                         ST.maxpos += UTF8SKIP(ST.maxpos);
4705                 }
4706                 else {
4707                     ST.maxpos = locinput + ST.max - ST.min;
4708                     if (ST.maxpos >= PL_regeol)
4709                         ST.maxpos = PL_regeol - 1;
4710                 }
4711                 goto curly_try_B_min_known;
4712
4713             }
4714             else {
4715                 ST.count = regrepeat(rex, ST.A, ST.max, depth);
4716                 locinput = PL_reginput;
4717                 if (ST.count < ST.min)
4718                     sayNO;
4719                 if ((ST.count > ST.min)
4720                     && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
4721                 {
4722                     /* A{m,n} must come at the end of the string, there's
4723                      * no point in backing off ... */
4724                     ST.min = ST.count;
4725                     /* ...except that $ and \Z can match before *and* after
4726                        newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
4727                        We may back off by one in this case. */
4728                     if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
4729                         ST.min--;
4730                 }
4731                 REGCP_SET(ST.cp);
4732                 goto curly_try_B_max;
4733             }
4734             /* NOTREACHED */
4735
4736
4737         case CURLY_B_min_known_fail:
4738             /* failed to find B in a non-greedy match where c1,c2 valid */
4739             if (ST.paren && ST.count)
4740                 PL_regoffs[ST.paren].end = -1;
4741
4742             PL_reginput = locinput;     /* Could be reset... */
4743             REGCP_UNWIND(ST.cp);
4744             /* Couldn't or didn't -- move forward. */
4745             ST.oldloc = locinput;
4746             if (do_utf8)
4747                 locinput += UTF8SKIP(locinput);
4748             else
4749                 locinput++;
4750             ST.count++;
4751           curly_try_B_min_known:
4752              /* find the next place where 'B' could work, then call B */
4753             {
4754                 int n;
4755                 if (do_utf8) {
4756                     n = (ST.oldloc == locinput) ? 0 : 1;
4757                     if (ST.c1 == ST.c2) {
4758                         STRLEN len;
4759                         /* set n to utf8_distance(oldloc, locinput) */
4760                         while (locinput <= ST.maxpos &&
4761                                utf8n_to_uvchr((U8*)locinput,
4762                                               UTF8_MAXBYTES, &len,
4763                                               uniflags) != (UV)ST.c1) {
4764                             locinput += len;
4765                             n++;
4766                         }
4767                     }
4768                     else {
4769                         /* set n to utf8_distance(oldloc, locinput) */
4770                         while (locinput <= ST.maxpos) {
4771                             STRLEN len;
4772                             const UV c = utf8n_to_uvchr((U8*)locinput,
4773                                                   UTF8_MAXBYTES, &len,
4774                                                   uniflags);
4775                             if (c == (UV)ST.c1 || c == (UV)ST.c2)
4776                                 break;
4777                             locinput += len;
4778                             n++;
4779                         }
4780                     }
4781                 }
4782                 else {
4783                     if (ST.c1 == ST.c2) {
4784                         while (locinput <= ST.maxpos &&
4785                                UCHARAT(locinput) != ST.c1)
4786                             locinput++;
4787                     }
4788                     else {
4789                         while (locinput <= ST.maxpos
4790                                && UCHARAT(locinput) != ST.c1
4791                                && UCHARAT(locinput) != ST.c2)
4792                             locinput++;
4793                     }
4794                     n = locinput - ST.oldloc;
4795                 }
4796                 if (locinput > ST.maxpos)
4797                     sayNO;
4798                 /* PL_reginput == oldloc now */
4799                 if (n) {
4800                     ST.count += n;
4801                     if (regrepeat(rex, ST.A, n, depth) < n)
4802                         sayNO;
4803                 }
4804                 PL_reginput = locinput;
4805                 CURLY_SETPAREN(ST.paren, ST.count);
4806                 if (cur_eval && cur_eval->u.eval.close_paren &&
4807                     cur_eval->u.eval.close_paren == (U32)ST.paren) {
4808                     goto fake_end;
4809                 }
4810                 PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
4811             }
4812             /* NOTREACHED */
4813
4814
4815         case CURLY_B_min_fail:
4816             /* failed to find B in a non-greedy match where c1,c2 invalid */
4817             if (ST.paren && ST.count)
4818                 PL_regoffs[ST.paren].end = -1;
4819
4820             REGCP_UNWIND(ST.cp);
4821             /* failed -- move forward one */
4822             PL_reginput = locinput;
4823             if (regrepeat(rex, ST.A, 1, depth)) {
4824                 ST.count++;
4825                 locinput = PL_reginput;
4826                 if (ST.count <= ST.max || (ST.max == REG_INFTY &&
4827                         ST.count > 0)) /* count overflow ? */
4828                 {
4829                   curly_try_B_min:
4830                     CURLY_SETPAREN(ST.paren, ST.count);
4831                     if (cur_eval && cur_eval->u.eval.close_paren &&
4832                         cur_eval->u.eval.close_paren == (U32)ST.paren) {
4833                         goto fake_end;
4834                     }
4835                     PUSH_STATE_GOTO(CURLY_B_min, ST.B);
4836                 }
4837             }
4838             sayNO;
4839             /* NOTREACHED */
4840
4841
4842         curly_try_B_max:
4843             /* a successful greedy match: now try to match B */
4844             if (cur_eval && cur_eval->u.eval.close_paren &&
4845                 cur_eval->u.eval.close_paren == (U32)ST.paren) {
4846                 goto fake_end;
4847             }
4848             {
4849                 UV c = 0;
4850                 if (ST.c1 != CHRTEST_VOID)
4851                     c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
4852                                            UTF8_MAXBYTES, 0, uniflags)
4853                                 : (UV) UCHARAT(PL_reginput);
4854                 /* If it could work, try it. */
4855                 if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
4856                     CURLY_SETPAREN(ST.paren, ST.count);
4857                     PUSH_STATE_GOTO(CURLY_B_max, ST.B);
4858                     /* NOTREACHED */
4859                 }
4860             }
4861             /* FALL THROUGH */
4862         case CURLY_B_max_fail:
4863             /* failed to find B in a greedy match */
4864             if (ST.paren && ST.count)
4865                 PL_regoffs[ST.paren].end = -1;
4866
4867             REGCP_UNWIND(ST.cp);
4868             /*  back up. */
4869             if (--ST.count < ST.min)
4870                 sayNO;
4871             PL_reginput = locinput = HOPc(locinput, -1);
4872             goto curly_try_B_max;
4873
4874 #undef ST
4875
4876         case END:
4877             fake_end:
4878             if (cur_eval) {
4879                 /* we've just finished A in /(??{A})B/; now continue with B */
4880                 I32 tmpix;
4881                 st->u.eval.toggle_reg_flags
4882                             = cur_eval->u.eval.toggle_reg_flags;
4883                 PL_reg_flags ^= st->u.eval.toggle_reg_flags;
4884
4885                 st->u.eval.prev_rex = rex_sv;           /* inner */
4886                 SETREX(rex_sv,cur_eval->u.eval.prev_rex);
4887                 rex = (struct regexp *)SvANY(rex_sv);
4888                 rexi = RXi_GET(rex);
4889                 cur_curlyx = cur_eval->u.eval.prev_curlyx;
4890                 ReREFCNT_inc(rex_sv);
4891                 st->u.eval.cp = regcppush(0);   /* Save *all* the positions. */
4892
4893                 /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
4894                 PL_reglastparen = &rex->lastparen;
4895                 PL_reglastcloseparen = &rex->lastcloseparen;
4896
4897                 REGCP_SET(st->u.eval.lastcp);
4898                 PL_reginput = locinput;
4899
4900                 /* Restore parens of the outer rex without popping the
4901                  * savestack */
4902                 tmpix = PL_savestack_ix;
4903                 PL_savestack_ix = cur_eval->u.eval.lastcp;
4904                 regcppop(rex);
4905                 PL_savestack_ix = tmpix;
4906
4907                 st->u.eval.prev_eval = cur_eval;
4908                 cur_eval = cur_eval->u.eval.prev_eval;
4909                 DEBUG_EXECUTE_r(
4910                     PerlIO_printf(Perl_debug_log, "%*s  EVAL trying tail ... %"UVxf"\n",
4911                                       REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
4912                 if ( nochange_depth )
4913                     nochange_depth--;
4914
4915                 PUSH_YES_STATE_GOTO(EVAL_AB,
4916                         st->u.eval.prev_eval->u.eval.B); /* match B */
4917             }
4918
4919             if (locinput < reginfo->till) {
4920                 DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4921                                       "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4922                                       PL_colors[4],
4923                                       (long)(locinput - PL_reg_starttry),
4924                                       (long)(reginfo->till - PL_reg_starttry),
4925                                       PL_colors[5]));
4926
4927                 sayNO_SILENT;           /* Cannot match: too short. */
4928             }
4929             PL_reginput = locinput;     /* put where regtry can find it */
4930             sayYES;                     /* Success! */
4931
4932         case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
4933             DEBUG_EXECUTE_r(
4934             PerlIO_printf(Perl_debug_log,
4935                 "%*s  %ssubpattern success...%s\n",
4936                 REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
4937             PL_reginput = locinput;     /* put where regtry can find it */
4938             sayYES;                     /* Success! */
4939
4940 #undef  ST
4941 #define ST st->u.ifmatch
4942
4943         case SUSPEND:   /* (?>A) */
4944             ST.wanted = 1;
4945             PL_reginput = locinput;
4946             goto do_ifmatch;
4947
4948         case UNLESSM:   /* -ve lookaround: (?!A), or with flags, (?<!A) */
4949             ST.wanted = 0;
4950             goto ifmatch_trivial_fail_test;
4951
4952         case IFMATCH:   /* +ve lookaround: (?=A), or with flags, (?<=A) */
4953             ST.wanted = 1;
4954           ifmatch_trivial_fail_test:
4955             if (scan->flags) {
4956                 char * const s = HOPBACKc(locinput, scan->flags);
4957                 if (!s) {
4958                     /* trivial fail */
4959                     if (logical) {
4960                         logical = 0;
4961                         sw = 1 - (bool)ST.wanted;
4962                     }
4963                     else if (ST.wanted)
4964                         sayNO;
4965                     next = scan + ARG(scan);
4966                     if (next == scan)
4967                         next = NULL;
4968                     break;
4969                 }
4970                 PL_reginput = s;
4971             }
4972             else
4973                 PL_reginput = locinput;
4974
4975           do_ifmatch:
4976             ST.me = scan;
4977             ST.logical = logical;
4978             logical = 0; /* XXX: reset state of logical once it has been saved into ST */
4979
4980             /* execute body of (?...A) */
4981             PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
4982             /* NOTREACHED */
4983
4984         case IFMATCH_A_fail: /* body of (?...A) failed */
4985             ST.wanted = !ST.wanted;
4986             /* FALL THROUGH */
4987
4988         case IFMATCH_A: /* body of (?...A) succeeded */
4989             if (ST.logical) {
4990                 sw = (bool)ST.wanted;
4991             }
4992             else if (!ST.wanted)
4993                 sayNO;
4994
4995             if (OP(ST.me) == SUSPEND)
4996                 locinput = PL_reginput;
4997             else {
4998                 locinput = PL_reginput = st->locinput;
4999                 nextchr = UCHARAT(locinput);
5000             }
5001             scan = ST.me + ARG(ST.me);
5002             if (scan == ST.me)
5003                 scan = NULL;
5004             continue; /* execute B */
5005
5006 #undef ST
5007
5008         case LONGJMP:
5009             next = scan + ARG(scan);
5010             if (next == scan)
5011                 next = NULL;
5012             break;
5013         case COMMIT:
5014             reginfo->cutpoint = PL_regeol;
5015             /* FALLTHROUGH */
5016         case PRUNE:
5017             PL_reginput = locinput;
5018             if (!scan->flags)
5019                 sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5020             PUSH_STATE_GOTO(COMMIT_next,next);
5021             /* NOTREACHED */
5022         case COMMIT_next_fail:
5023             no_final = 1;
5024             /* FALLTHROUGH */
5025         case OPFAIL:
5026             sayNO;
5027             /* NOTREACHED */
5028
5029 #define ST st->u.mark
5030         case MARKPOINT:
5031             ST.prev_mark = mark_state;
5032             ST.mark_name = sv_commit = sv_yes_mark
5033                 = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5034             mark_state = st;
5035             ST.mark_loc = PL_reginput = locinput;
5036             PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
5037             /* NOTREACHED */
5038         case MARKPOINT_next:
5039             mark_state = ST.prev_mark;
5040             sayYES;
5041             /* NOTREACHED */
5042         case MARKPOINT_next_fail:
5043             if (popmark && sv_eq(ST.mark_name,popmark))
5044             {
5045                 if (ST.mark_loc > startpoint)
5046                     reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5047                 popmark = NULL; /* we found our mark */
5048                 sv_commit = ST.mark_name;
5049
5050                 DEBUG_EXECUTE_r({
5051                         PerlIO_printf(Perl_debug_log,
5052                             "%*s  %ssetting cutpoint to mark:%"SVf"...%s\n",
5053                             REPORT_CODE_OFF+depth*2, "",
5054                             PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
5055                 });
5056             }
5057             mark_state = ST.prev_mark;
5058             sv_yes_mark = mark_state ?
5059                 mark_state->u.mark.mark_name : NULL;
5060             sayNO;
5061             /* NOTREACHED */
5062         case SKIP:
5063             PL_reginput = locinput;
5064             if (scan->flags) {
5065                 /* (*SKIP) : if we fail we cut here*/
5066                 ST.mark_name = NULL;
5067                 ST.mark_loc = locinput;
5068                 PUSH_STATE_GOTO(SKIP_next,next);
5069             } else {
5070                 /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was,
5071                    otherwise do nothing.  Meaning we need to scan
5072                  */
5073                 regmatch_state *cur = mark_state;
5074                 SV *find = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5075
5076                 while (cur) {
5077                     if ( sv_eq( cur->u.mark.mark_name,
5078                                 find ) )
5079                     {
5080                         ST.mark_name = find;
5081                         PUSH_STATE_GOTO( SKIP_next, next );
5082                     }
5083                     cur = cur->u.mark.prev_mark;
5084                 }
5085             }
5086             /* Didn't find our (*MARK:NAME) so ignore this (*SKIP:NAME) */
5087             break;
5088         case SKIP_next_fail:
5089             if (ST.mark_name) {
5090                 /* (*CUT:NAME) - Set up to search for the name as we
5091                    collapse the stack*/
5092                 popmark = ST.mark_name;
5093             } else {
5094                 /* (*CUT) - No name, we cut here.*/
5095                 if (ST.mark_loc > startpoint)
5096                     reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5097                 /* but we set sv_commit to latest mark_name if there
5098                    is one so they can test to see how things lead to this
5099                    cut */
5100                 if (mark_state)
5101                     sv_commit=mark_state->u.mark.mark_name;
5102             }
5103             no_final = 1;
5104             sayNO;
5105             /* NOTREACHED */
5106 #undef ST
5107         case FOLDCHAR:
5108             n = ARG(scan);
5109             if ( n == (U32)what_len_TRICKYFOLD(locinput,do_utf8,ln) ) {
5110                 locinput += ln;
5111             } else if ( 0xDF == n && !do_utf8 && !UTF ) {
5112                 sayNO;
5113             } else  {
5114                 U8 folded[UTF8_MAXBYTES_CASE+1];
5115                 STRLEN foldlen;
5116                 const char * const l = locinput;
5117                 char *e = PL_regeol;
5118                 to_uni_fold(n, folded, &foldlen);
5119
5120                 if (ibcmp_utf8((const char*) folded, 0,  foldlen, 1,
5121                                l, &e, 0,  do_utf8)) {
5122                         sayNO;
5123                 }
5124                 locinput = e;
5125             }
5126             nextchr = UCHARAT(locinput);
5127             break;
5128         case LNBREAK:
5129             if ((n=is_LNBREAK(locinput,do_utf8))) {
5130                 locinput += n;
5131                 nextchr = UCHARAT(locinput);
5132             } else
5133                 sayNO;
5134             break;
5135
5136 #define CASE_CLASS(nAmE)                              \
5137         case nAmE:                                    \
5138             if ((n=is_##nAmE(locinput,do_utf8))) {    \
5139                 locinput += n;                        \
5140                 nextchr = UCHARAT(locinput);          \
5141             } else                                    \
5142                 sayNO;                                \
5143             break;                                    \
5144         case N##nAmE:                                 \
5145             if ((n=is_##nAmE(locinput,do_utf8))) {    \
5146                 sayNO;                                \
5147             } else {                                  \
5148                 locinput += UTF8SKIP(locinput);       \
5149                 nextchr = UCHARAT(locinput);          \
5150             }                                         \
5151             break
5152
5153         CASE_CLASS(VERTWS);
5154         CASE_CLASS(HORIZWS);
5155 #undef CASE_CLASS
5156
5157         default:
5158             PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
5159                           PTR2UV(scan), OP(scan));
5160             Perl_croak(aTHX_ "regexp memory corruption");
5161
5162         } /* end switch */
5163
5164         /* switch break jumps here */
5165         scan = next; /* prepare to execute the next op and ... */
5166         continue;    /* ... jump back to the top, reusing st */
5167         /* NOTREACHED */
5168
5169       push_yes_state:
5170         /* push a state that backtracks on success */
5171         st->u.yes.prev_yes_state = yes_state;
5172         yes_state = st;
5173         /* FALL THROUGH */
5174       push_state:
5175         /* push a new regex state, then continue at scan  */
5176         {
5177             regmatch_state *newst;
5178
5179             DEBUG_STACK_r({
5180                 regmatch_state *cur = st;
5181                 regmatch_state *curyes = yes_state;
5182                 int curd = depth;
5183                 regmatch_slab *slab = PL_regmatch_slab;
5184                 for (;curd > -1;cur--,curd--) {
5185                     if (cur < SLAB_FIRST(slab)) {
5186                         slab = slab->prev;
5187                         cur = SLAB_LAST(slab);
5188                     }
5189                     PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
5190                         REPORT_CODE_OFF + 2 + depth * 2,"",
5191                         curd, PL_reg_name[cur->resume_state],
5192                         (curyes == cur) ? "yes" : ""
5193                     );
5194                     if (curyes == cur)
5195                         curyes = cur->u.yes.prev_yes_state;
5196                 }
5197             } else
5198                 DEBUG_STATE_pp("push")
5199             );
5200             depth++;
5201             st->locinput = locinput;
5202             newst = st+1;
5203             if (newst >  SLAB_LAST(PL_regmatch_slab))
5204                 newst = S_push_slab(aTHX);
5205             PL_regmatch_state = newst;
5206
5207             locinput = PL_reginput;
5208             nextchr = UCHARAT(locinput);
5209             st = newst;
5210             continue;
5211             /* NOTREACHED */
5212         }
5213     }
5214
5215     /*
5216     * We get here only if there's trouble -- normally "case END" is
5217     * the terminating point.
5218     */
5219     Perl_croak(aTHX_ "corrupted regexp pointers");
5220     /*NOTREACHED*/
5221     sayNO;
5222
5223 yes:
5224     if (yes_state) {
5225         /* we have successfully completed a subexpression, but we must now
5226          * pop to the state marked by yes_state and continue from there */
5227         assert(st != yes_state);
5228 #ifdef DEBUGGING
5229         while (st != yes_state) {
5230             st--;
5231             if (st < SLAB_FIRST(PL_regmatch_slab)) {
5232                 PL_regmatch_slab = PL_regmatch_slab->prev;
5233                 st = SLAB_LAST(PL_regmatch_slab);
5234             }
5235             DEBUG_STATE_r({
5236                 if (no_final) {
5237                     DEBUG_STATE_pp("pop (no final)");
5238                 } else {
5239                     DEBUG_STATE_pp("pop (yes)");
5240                 }
5241             });
5242             depth--;
5243         }
5244 #else
5245         while (yes_state < SLAB_FIRST(PL_regmatch_slab)
5246             || yes_state > SLAB_LAST(PL_regmatch_slab))
5247         {
5248             /* not in this slab, pop slab */
5249             depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
5250             PL_regmatch_slab = PL_regmatch_slab->prev;
5251             st = SLAB_LAST(PL_regmatch_slab);
5252         }
5253         depth -= (st - yes_state);
5254 #endif
5255         st = yes_state;
5256         yes_state = st->u.yes.prev_yes_state;
5257         PL_regmatch_state = st;
5258
5259         if (no_final) {
5260             locinput= st->locinput;
5261             nextchr = UCHARAT(locinput);
5262         }
5263         state_num = st->resume_state + no_final;
5264         goto reenter_switch;
5265     }
5266
5267     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
5268                           PL_colors[4], PL_colors[5]));
5269
5270     if (PL_reg_eval_set) {
5271         /* each successfully executed (?{...}) block does the equivalent of
5272          *   local $^R = do {...}
5273          * When popping the save stack, all these locals would be undone;
5274          * bypass this by setting the outermost saved $^R to the latest
5275          * value */
5276         if (oreplsv != GvSV(PL_replgv))
5277             sv_setsv(oreplsv, GvSV(PL_replgv));
5278     }
5279     result = 1;
5280     goto final_exit;
5281
5282 no:
5283     DEBUG_EXECUTE_r(
5284         PerlIO_printf(Perl_debug_log,
5285             "%*s  %sfailed...%s\n",
5286             REPORT_CODE_OFF+depth*2, "",
5287             PL_colors[4], PL_colors[5])
5288         );
5289
5290 no_silent:
5291     if (no_final) {
5292         if (yes_state) {
5293             goto yes;
5294         } else {
5295             goto final_exit;
5296         }
5297     }
5298     if (depth) {
5299         /* there's a previous state to backtrack to */
5300         st--;
5301         if (st < SLAB_FIRST(PL_regmatch_slab)) {
5302             PL_regmatch_slab = PL_regmatch_slab->prev;
5303             st = SLAB_LAST(PL_regmatch_slab);
5304         }
5305         PL_regmatch_state = st;
5306         locinput= st->locinput;
5307         nextchr = UCHARAT(locinput);
5308
5309         DEBUG_STATE_pp("pop");
5310         depth--;
5311         if (yes_state == st)
5312             yes_state = st->u.yes.prev_yes_state;
5313
5314         state_num = st->resume_state + 1; /* failure = success + 1 */
5315         goto reenter_switch;
5316     }
5317     result = 0;
5318
5319   final_exit:
5320     if (rex->intflags & PREGf_VERBARG_SEEN) {
5321         SV *sv_err = get_sv("REGERROR", 1);
5322         SV *sv_mrk = get_sv("REGMARK", 1);
5323         if (result) {
5324             sv_commit = &PL_sv_no;
5325             if (!sv_yes_mark)
5326                 sv_yes_mark = &PL_sv_yes;
5327         } else {
5328             if (!sv_commit)
5329                 sv_commit = &PL_sv_yes;
5330             sv_yes_mark = &PL_sv_no;
5331         }
5332         sv_setsv(sv_err, sv_commit);
5333         sv_setsv(sv_mrk, sv_yes_mark);
5334     }
5335
5336     /* clean up; in particular, free all slabs above current one */
5337     LEAVE_SCOPE(oldsave);
5338
5339     return result;
5340 }
5341
5342 /*
5343  - regrepeat - repeatedly match something simple, report how many
5344  */
5345 /*
5346  * [This routine now assumes that it will only match on things of length 1.
5347  * That was true before, but now we assume scan - reginput is the count,
5348  * rather than incrementing count on every character.  [Er, except utf8.]]
5349  */
5350 STATIC I32
5351 S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
5352 {
5353     dVAR;
5354     register char *scan;
5355     register I32 c;
5356     register char *loceol = PL_regeol;
5357     register I32 hardcount = 0;
5358     register bool do_utf8 = PL_reg_match_utf8;
5359 #ifndef DEBUGGING
5360     PERL_UNUSED_ARG(depth);
5361 #endif
5362
5363     PERL_ARGS_ASSERT_REGREPEAT;
5364
5365     scan = PL_reginput;
5366     if (max == REG_INFTY)
5367         max = I32_MAX;
5368     else if (max < loceol - scan)
5369         loceol = scan + max;
5370     switch (OP(p)) {
5371     case REG_ANY:
5372         if (do_utf8) {
5373             loceol = PL_regeol;
5374             while (scan < loceol && hardcount < max && *scan != '\n') {
5375                 scan += UTF8SKIP(scan);
5376                 hardcount++;
5377             }
5378         } else {
5379             while (scan < loceol && *scan != '\n')
5380                 scan++;
5381         }
5382         break;
5383     case SANY:
5384         if (do_utf8) {
5385             loceol = PL_regeol;
5386             while (scan < loceol && hardcount < max) {
5387                 scan += UTF8SKIP(scan);
5388                 hardcount++;
5389             }
5390         }
5391         else
5392             scan = loceol;
5393         break;
5394     case CANY:
5395         scan = loceol;
5396         break;
5397     case EXACT:         /* length of string is 1 */
5398         c = (U8)*STRING(p);
5399         while (scan < loceol && UCHARAT(scan) == c)
5400             scan++;
5401         break;
5402     case EXACTF:        /* length of string is 1 */
5403         c = (U8)*STRING(p);
5404         while (scan < loceol &&
5405                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
5406             scan++;
5407         break;
5408     case EXACTFL:       /* length of string is 1 */
5409         PL_reg_flags |= RF_tainted;
5410         c = (U8)*STRING(p);
5411         while (scan < loceol &&
5412                (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
5413             scan++;
5414         break;
5415     case ANYOF:
5416         if (do_utf8) {
5417             loceol = PL_regeol;
5418             while (hardcount < max && scan < loceol &&
5419                    reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
5420                 scan += UTF8SKIP(scan);
5421                 hardcount++;
5422             }
5423         } else {
5424             while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
5425                 scan++;
5426         }
5427         break;
5428     case ALNUM:
5429         if (do_utf8) {
5430             loceol = PL_regeol;
5431             LOAD_UTF8_CHARCLASS_ALNUM();
5432             while (hardcount < max && scan < loceol &&
5433                    swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5434                 scan += UTF8SKIP(scan);
5435                 hardcount++;
5436             }
5437         } else {
5438             while (scan < loceol && isALNUM(*scan))
5439                 scan++;
5440         }
5441         break;
5442     case ALNUML:
5443         PL_reg_flags |= RF_tainted;
5444         if (do_utf8) {
5445             loceol = PL_regeol;
5446             while (hardcount < max && scan < loceol &&
5447                    isALNUM_LC_utf8((U8*)scan)) {
5448                 scan += UTF8SKIP(scan);
5449                 hardcount++;
5450             }
5451         } else {
5452             while (scan < loceol && isALNUM_LC(*scan))
5453                 scan++;
5454         }
5455         break;
5456     case NALNUM:
5457         if (do_utf8) {
5458             loceol = PL_regeol;
5459             LOAD_UTF8_CHARCLASS_ALNUM();
5460             while (hardcount < max && scan < loceol &&
5461                    !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5462                 scan += UTF8SKIP(scan);
5463                 hardcount++;
5464             }
5465         } else {
5466             while (scan < loceol && !isALNUM(*scan))
5467                 scan++;
5468         }
5469         break;
5470     case NALNUML:
5471         PL_reg_flags |= RF_tainted;
5472         if (do_utf8) {
5473             loceol = PL_regeol;
5474             while (hardcount < max && scan < loceol &&
5475                    !isALNUM_LC_utf8((U8*)scan)) {
5476                 scan += UTF8SKIP(scan);
5477                 hardcount++;
5478             }
5479         } else {
5480             while (scan < loceol && !isALNUM_LC(*scan))
5481                 scan++;
5482         }
5483         break;
5484     case SPACE:
5485         if (do_utf8) {
5486             loceol = PL_regeol;
5487             LOAD_UTF8_CHARCLASS_SPACE();
5488             while (hardcount < max && scan < loceol &&
5489                    (*scan == ' ' ||
5490                     swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5491                 scan += UTF8SKIP(scan);
5492                 hardcount++;
5493             }
5494         } else {
5495             while (scan < loceol && isSPACE(*scan))
5496                 scan++;
5497         }
5498         break;
5499     case SPACEL:
5500         PL_reg_flags |= RF_tainted;
5501         if (do_utf8) {
5502             loceol = PL_regeol;
5503             while (hardcount < max && scan < loceol &&
5504                    (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5505                 scan += UTF8SKIP(scan);
5506                 hardcount++;
5507             }
5508         } else {
5509             while (scan < loceol && isSPACE_LC(*scan))
5510                 scan++;
5511         }
5512         break;
5513     case NSPACE:
5514         if (do_utf8) {
5515             loceol = PL_regeol;
5516             LOAD_UTF8_CHARCLASS_SPACE();
5517             while (hardcount < max && scan < loceol &&
5518                    !(*scan == ' ' ||
5519                      swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5520                 scan += UTF8SKIP(scan);
5521                 hardcount++;
5522             }
5523         } else {
5524             while (scan < loceol && !isSPACE(*scan))
5525                 scan++;
5526         }
5527         break;
5528     case NSPACEL:
5529         PL_reg_flags |= RF_tainted;
5530         if (do_utf8) {
5531             loceol = PL_regeol;
5532             while (hardcount < max && scan < loceol &&
5533                    !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5534                 scan += UTF8SKIP(scan);
5535                 hardcount++;
5536             }
5537         } else {
5538             while (scan < loceol && !isSPACE_LC(*scan))
5539                 scan++;
5540         }
5541         break;
5542     case DIGIT:
5543         if (do_utf8) {
5544             loceol = PL_regeol;
5545             LOAD_UTF8_CHARCLASS_DIGIT();
5546             while (hardcount < max && scan < loceol &&
5547                    swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5548                 scan += UTF8SKIP(scan);
5549                 hardcount++;
5550             }
5551         } else {
5552             while (scan < loceol && isDIGIT(*scan))
5553                 scan++;
5554         }
5555         break;
5556     case NDIGIT:
5557         if (do_utf8) {
5558             loceol = PL_regeol;
5559             LOAD_UTF8_CHARCLASS_DIGIT();
5560             while (hardcount < max && scan < loceol &&
5561                    !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5562                 scan += UTF8SKIP(scan);
5563                 hardcount++;
5564             }
5565         } else {
5566             while (scan < loceol && !isDIGIT(*scan))
5567                 scan++;
5568         }
5569     case LNBREAK:
5570         if (do_utf8) {
5571             loceol = PL_regeol;
5572             while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
5573                 scan += c;
5574                 hardcount++;
5575             }
5576         } else {
5577             /*
5578               LNBREAK can match two latin chars, which is ok,
5579               because we have a null terminated string, but we
5580               have to use hardcount in this situation
5581             */
5582             while (scan < loceol && (c=is_LNBREAK_latin1(scan)))  {
5583                 scan+=c;
5584                 hardcount++;
5585             }
5586         }
5587         break;
5588     case HORIZWS:
5589         if (do_utf8) {
5590             loceol = PL_regeol;
5591             while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
5592                 scan += c;
5593                 hardcount++;
5594             }
5595         } else {
5596             while (scan < loceol && is_HORIZWS_latin1(scan))
5597                 scan++;
5598         }
5599         break;
5600     case NHORIZWS:
5601         if (do_utf8) {
5602             loceol = PL_regeol;
5603             while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
5604                 scan += UTF8SKIP(scan);
5605                 hardcount++;
5606             }
5607         } else {
5608             while (scan < loceol && !is_HORIZWS_latin1(scan))
5609                 scan++;
5610
5611         }
5612         break;
5613     case VERTWS:
5614         if (do_utf8) {
5615             loceol = PL_regeol;
5616             while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
5617                 scan += c;
5618                 hardcount++;
5619             }
5620         } else {
5621             while (scan < loceol && is_VERTWS_latin1(scan))
5622                 scan++;
5623
5624         }
5625         break;
5626     case NVERTWS:
5627         if (do_utf8) {
5628             loceol = PL_regeol;
5629             while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
5630                 scan += UTF8SKIP(scan);
5631                 hardcount++;
5632             }
5633         } else {
5634             while (scan < loceol && !is_VERTWS_latin1(scan))
5635                 scan++;
5636
5637         }
5638         break;
5639
5640     default:            /* Called on something of 0 width. */
5641         break;          /* So match right here or not at all. */
5642     }
5643
5644     if (hardcount)
5645         c = hardcount;
5646     else
5647         c = scan - PL_reginput;
5648     PL_reginput = scan;
5649
5650     DEBUG_r({
5651         GET_RE_DEBUG_FLAGS_DECL;
5652         DEBUG_EXECUTE_r({
5653             SV * const prop = sv_newmortal();
5654             regprop(prog, prop, p);
5655             PerlIO_printf(Perl_debug_log,
5656                         "%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
5657                         REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
5658         });
5659     });
5660
5661     return(c);
5662 }
5663
5664
5665 #if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
5666 /*
5667 - regclass_swash - prepare the utf8 swash
5668 */
5669
5670 SV *
5671 Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
5672 {
5673     dVAR;
5674     SV *sw  = NULL;
5675     SV *si  = NULL;
5676     SV *alt = NULL;
5677     RXi_GET_DECL(prog,progi);
5678     const struct reg_data * const data = prog ? progi->data : NULL;
5679
5680     PERL_ARGS_ASSERT_REGCLASS_SWASH;
5681
5682     if (data && data->count) {
5683         const U32 n = ARG(node);
5684
5685         if (data->what[n] == 's') {
5686             SV * const rv = MUTABLE_SV(data->data[n]);
5687             AV * const av = MUTABLE_AV(SvRV(rv));
5688             SV **const ary = AvARRAY(av);
5689             SV **a, **b;
5690
5691             /* See the end of regcomp.c:S_regclass() for
5692              * documentation of these array elements. */
5693
5694             si = *ary;
5695             a  = SvROK(ary[1]) ? &ary[1] : NULL;
5696             b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : NULL;
5697
5698             if (a)
5699                 sw = *a;
5700             else if (si && doinit) {
5701                 sw = swash_init("utf8", "", si, 1, 0);
5702                 (void)av_store(av, 1, sw);
5703             }
5704             if (b)
5705                 alt = *b;
5706         }
5707     }
5708
5709     if (listsvp)
5710         *listsvp = si;
5711     if (altsvp)
5712         *altsvp  = alt;
5713
5714     return sw;
5715 }
5716 #endif
5717
5718 /*
5719  - reginclass - determine if a character falls into a character class
5720
5721   The n is the ANYOF regnode, the p is the target string, lenp
5722   is pointer to the maximum length of how far to go in the p
5723   (if the lenp is zero, UTF8SKIP(p) is used),
5724   do_utf8 tells whether the target string is in UTF-8.
5725
5726  */
5727
5728 STATIC bool
5729 S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
5730 {
5731     dVAR;
5732     const char flags = ANYOF_FLAGS(n);
5733     bool match = FALSE;
5734     UV c = *p;
5735     STRLEN len = 0;
5736     STRLEN plen;
5737
5738     PERL_ARGS_ASSERT_REGINCLASS;
5739
5740     if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
5741         c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
5742                 (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
5743                 /* see [perl #37836] for UTF8_ALLOW_ANYUV */
5744         if (len == (STRLEN)-1)
5745             Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
5746     }
5747
5748     plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
5749     if (do_utf8 || (flags & ANYOF_UNICODE)) {
5750         if (lenp)
5751             *lenp = 0;
5752         if (do_utf8 && !ANYOF_RUNTIME(n)) {
5753             if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
5754                 match = TRUE;
5755         }
5756         if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
5757             match = TRUE;
5758         if (!match) {
5759             AV *av;
5760             SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
5761
5762             if (sw) {
5763                 U8 * utf8_p;
5764                 if (do_utf8) {
5765                     utf8_p = (U8 *) p;
5766                 } else {
5767                     STRLEN len = 1;
5768                     utf8_p = bytes_to_utf8(p, &len);
5769                 }
5770                 if (swash_fetch(sw, utf8_p, 1))
5771                     match = TRUE;
5772                 else if (flags & ANYOF_FOLD) {
5773                     if (!match && lenp && av) {
5774                         I32 i;
5775                         for (i = 0; i <= av_len(av); i++) {
5776                             SV* const sv = *av_fetch(av, i, FALSE);
5777                             STRLEN len;
5778                             const char * const s = SvPV_const(sv, len);
5779                             if (len <= plen && memEQ(s, (char*)utf8_p, len)) {
5780                                 *lenp = len;
5781                                 match = TRUE;
5782                                 break;
5783                             }
5784                         }
5785                     }
5786                     if (!match) {
5787                         U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
5788
5789                         STRLEN tmplen;
5790                         to_utf8_fold(utf8_p, tmpbuf, &tmplen);
5791                         if (swash_fetch(sw, tmpbuf, 1))
5792                             match = TRUE;
5793                     }
5794                 }
5795
5796                 /* If we allocated a string above, free it */
5797                 if (! do_utf8) Safefree(utf8_p);
5798             }
5799         }
5800         if (match && lenp && *lenp == 0)
5801             *lenp = UNISKIP(NATIVE_TO_UNI(c));
5802     }
5803     if (!match && c < 256) {
5804         if (ANYOF_BITMAP_TEST(n, c))
5805             match = TRUE;
5806         else if (flags & ANYOF_FOLD) {
5807             U8 f;
5808
5809             if (flags & ANYOF_LOCALE) {
5810                 PL_reg_flags |= RF_tainted;
5811                 f = PL_fold_locale[c];
5812             }
5813             else
5814                 f = PL_fold[c];
5815             if (f != c && ANYOF_BITMAP_TEST(n, f))
5816                 match = TRUE;
5817         }
5818
5819         if (!match && (flags & ANYOF_CLASS)) {
5820             PL_reg_flags |= RF_tainted;
5821             if (
5822                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
5823                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
5824                 (ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
5825                 (ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
5826                 (ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
5827                 (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
5828                 (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
5829                 (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5830                 (ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
5831                 (ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
5832                 (ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
5833                 (ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
5834                 (ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
5835                 (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
5836                 (ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
5837                 (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
5838                 (ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
5839                 (ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
5840                 (ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
5841                 (ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
5842                 (ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
5843                 (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
5844                 (ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
5845                 (ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
5846                 (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
5847                 (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
5848                 (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
5849                 (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
5850                 (ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
5851                 (ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
5852                 ) /* How's that for a conditional? */
5853             {
5854                 match = TRUE;
5855             }
5856         }
5857     }
5858
5859     return (flags & ANYOF_INVERT) ? !match : match;
5860 }
5861
5862 STATIC U8 *
5863 S_reghop3(U8 *s, I32 off, const U8* lim)
5864 {
5865     dVAR;
5866
5867     PERL_ARGS_ASSERT_REGHOP3;
5868
5869     if (off >= 0) {
5870         while (off-- && s < lim) {
5871             /* XXX could check well-formedness here */
5872             s += UTF8SKIP(s);
5873         }
5874     }
5875     else {
5876         while (off++ && s > lim) {
5877             s--;
5878             if (UTF8_IS_CONTINUED(*s)) {
5879                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5880                     s--;
5881             }
5882             /* XXX could check well-formedness here */
5883         }
5884     }
5885     return s;
5886 }
5887
5888 #ifdef XXX_dmq
5889 /* there are a bunch of places where we use two reghop3's that should
5890    be replaced with this routine. but since thats not done yet
5891    we ifdef it out - dmq
5892 */
5893 STATIC U8 *
5894 S_reghop4(U8 *s, I32 off, const U8* llim, const U8* rlim)
5895 {
5896     dVAR;
5897
5898     PERL_ARGS_ASSERT_REGHOP4;
5899
5900     if (off >= 0) {
5901         while (off-- && s < rlim) {
5902             /* XXX could check well-formedness here */
5903             s += UTF8SKIP(s);
5904         }
5905     }
5906     else {
5907         while (off++ && s > llim) {
5908             s--;
5909             if (UTF8_IS_CONTINUED(*s)) {
5910                 while (s > llim && UTF8_IS_CONTINUATION(*s))
5911                     s--;
5912             }
5913             /* XXX could check well-formedness here */
5914         }
5915     }
5916     return s;
5917 }
5918 #endif
5919
5920 STATIC U8 *
5921 S_reghopmaybe3(U8* s, I32 off, const U8* lim)
5922 {
5923     dVAR;
5924
5925     PERL_ARGS_ASSERT_REGHOPMAYBE3;
5926
5927     if (off >= 0) {
5928         while (off-- && s < lim) {
5929             /* XXX could check well-formedness here */
5930             s += UTF8SKIP(s);
5931         }
5932         if (off >= 0)
5933             return NULL;
5934     }
5935     else {
5936         while (off++ && s > lim) {
5937             s--;
5938             if (UTF8_IS_CONTINUED(*s)) {
5939                 while (s > lim && UTF8_IS_CONTINUATION(*s))
5940                     s--;
5941             }
5942             /* XXX could check well-formedness here */
5943         }
5944         if (off <= 0)
5945             return NULL;
5946     }
5947     return s;
5948 }
5949
5950 static void
5951 restore_pos(pTHX_ void *arg)
5952 {
5953     dVAR;
5954     regexp * const rex = (regexp *)arg;
5955     if (PL_reg_eval_set) {
5956         if (PL_reg_oldsaved) {
5957             rex->subbeg = PL_reg_oldsaved;
5958             rex->sublen = PL_reg_oldsavedlen;
5959 #ifdef PERL_OLD_COPY_ON_WRITE
5960             rex->saved_copy = PL_nrs;
5961 #endif
5962             RXp_MATCH_COPIED_on(rex);
5963         }
5964         PL_reg_magic->mg_len = PL_reg_oldpos;
5965         PL_reg_eval_set = 0;
5966         PL_curpm = PL_reg_oldcurpm;
5967     }
5968 }
5969
5970 STATIC void
5971 S_to_utf8_substr(pTHX_ register regexp *prog)
5972 {
5973     int i = 1;
5974
5975     PERL_ARGS_ASSERT_TO_UTF8_SUBSTR;
5976
5977     do {
5978         if (prog->substrs->data[i].substr
5979             && !prog->substrs->data[i].utf8_substr) {
5980             SV* const sv = newSVsv(prog->substrs->data[i].substr);
5981             prog->substrs->data[i].utf8_substr = sv;
5982             sv_utf8_upgrade(sv);
5983             if (SvVALID(prog->substrs->data[i].substr)) {
5984                 const U8 flags = BmFLAGS(prog->substrs->data[i].substr);
5985                 if (flags & FBMcf_TAIL) {
5986                     /* Trim the trailing \n that fbm_compile added last
5987                        time.  */
5988                     SvCUR_set(sv, SvCUR(sv) - 1);
5989                     /* Whilst this makes the SV technically "invalid" (as its
5990                        buffer is no longer followed by "\0") when fbm_compile()
5991                        adds the "\n" back, a "\0" is restored.  */
5992                 }
5993                 fbm_compile(sv, flags);
5994             }
5995             if (prog->substrs->data[i].substr == prog->check_substr)
5996                 prog->check_utf8 = sv;
5997         }
5998     } while (i--);
5999 }
6000
6001 STATIC void
6002 S_to_byte_substr(pTHX_ register regexp *prog)
6003 {
6004     dVAR;
6005     int i = 1;
6006
6007     PERL_ARGS_ASSERT_TO_BYTE_SUBSTR;
6008
6009     do {
6010         if (prog->substrs->data[i].utf8_substr
6011             && !prog->substrs->data[i].substr) {
6012             SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
6013             if (sv_utf8_downgrade(sv, TRUE)) {
6014                 if (SvVALID(prog->substrs->data[i].utf8_substr)) {
6015                     const U8 flags
6016                         = BmFLAGS(prog->substrs->data[i].utf8_substr);
6017                     if (flags & FBMcf_TAIL) {
6018                         /* Trim the trailing \n that fbm_compile added last
6019                            time.  */
6020                         SvCUR_set(sv, SvCUR(sv) - 1);
6021                     }
6022                     fbm_compile(sv, flags);
6023                 }
6024             } else {
6025                 SvREFCNT_dec(sv);
6026                 sv = &PL_sv_undef;
6027             }
6028             prog->substrs->data[i].substr = sv;
6029             if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
6030                 prog->check_substr = sv;
6031         }
6032     } while (i--);
6033 }
6034
6035 /*
6036  * Local variables:
6037  * c-indentation-style: bsd
6038  * c-basic-offset: 4
6039  * indent-tabs-mode: t
6040  * End:
6041  *
6042  * ex: set ts=8 sts=4 sw=4 noet:
6043  */