src/5010001/regexec.c

   1 /*    regexec.c
   2  */
   3
   4 /*
   5  *  One Ring to rule them all, One Ring to find them
   6  &
   7  *     [p.v of _The Lord of the Rings_, opening poem]
   8  *     [p.50 of _The Lord of the Rings_, I/iii: "The Shadow of the Past"]
   9  *     [p.254 of _The Lord of the Rings_, II/ii: "The Council of Elrond"]
  10  */
  11
  12 /* This file contains functions for executing a regular expression.  See
  13  * also regcomp.c which funnily enough, contains functions for compiling
  14  * a regular expression.
  15  *
  16  * This file is also copied at build time to ext/re/re_exec.c, where
  17  * it's built with -DPERL_EXT_RE_BUILD -DPERL_EXT_RE_DEBUG -DPERL_EXT.
  18  * This causes the main functions to be compiled under new names and with
  19  * debugging support added, which makes "use re 'debug'" work.
  20  */
  21
  22 /* NOTE: this is derived from Henry Spencer's regexp code, and should not
  23  * confused with the original package (see point 3 below).  Thanks, Henry!
  24  */
  25
  26 /* Additional note: this code is very heavily munged from Henry's version
  27  * in places.  In some spots I've traded clarity for efficiency, so don't
  28  * blame Henry for some of the lack of readability.
  29  */
  30
  31 /* The names of the functions have been changed from regcomp and
  32  * regexec to  pregcomp and pregexec in order to avoid conflicts
  33  * with the POSIX routines of the same names.
  34 */
  35
  36 #ifdef PERL_EXT_RE_BUILD
  37 #include "re_top.h"
  38 #endif
  39
  40 /*
  41  * pregcomp and pregexec -- regsub and regerror are not used in perl
  42  *
  43  * Copyright (c) 1986 by University of Toronto.
  44  * Written by Henry Spencer.  Not derived from licensed software.
  45  *
  46  * Permission is granted to anyone to use this software for any
  47  * purpose on any computer system, and to redistribute it freely,
  48  * subject to the following restrictions:
  49  *
  50  * 1. The author is not responsible for the consequences of use of
  51  *  this software, no matter how awful, even if they arise
  52  *  from defects in it.
  53  *
  54  * 2. The origin of this software must not be misrepresented, either
  55  *  by explicit claim or by omission.
  56  *
  57  * 3. Altered versions must be plainly marked as such, and must not
  58  *  be misrepresented as being the original software.
  59  *
  60  ****    Alterations to Henry's code are...
  61  ****
  62  ****    Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
  63  ****    2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
  64  ****    by Larry Wall and others
  65  ****
  66  ****    You may distribute under the terms of either the GNU General Public
  67  ****    License or the Artistic License, as specified in the README file.
  68  *
  69  * Beware that some of this code is subtly aware of the way operator
  70  * precedence is structured in regular expressions.  Serious changes in
  71  * regular-expression syntax might require a total rethink.
  72  */
  73 #include "EXTERN.h"
  74 #define PERL_IN_REGEXEC_C
  75 #include "perl.h"
  76 #include "re_defs.h"
  77
  78 #ifdef PERL_IN_XSUB_RE
  79 #  include "re_comp.h"
  80 #else
  81 #  include "regcomp.h"
  82 #endif
  83
  84 #define RF_tainted 1  /* tainted information used? */
  85 #define RF_warned 2  /* warned about big count? */
  86
  87 #define RF_utf8  8  /* Pattern contains multibyte chars? */
  88
  89 #define UTF ((PL_reg_flags & RF_utf8) != 0)
  90
  91 #define RS_init  1  /* eval environment created */
  92 #define RS_set  2  /* replsv value is set */
  93
  94 #ifndef STATIC
  95 #define STATIC static
  96 #endif
  97
  98 #define REGINCLASS(prog,p,c)  (ANYOF_FLAGS(p) ? reginclass(prog,p,c,0,0) : ANYOF_BITMAP_TEST(p,*(c)))
  99
 100 /*
 101  * Forwards.
 102  */
 103
 104 #define CHR_SVLEN(sv) (do_utf8 ? sv_len_utf8(sv) : SvCUR(sv))
 105 #define CHR_DIST(a,b) (PL_reg_match_utf8 ? utf8_distance(a,b) : a - b)
 106
 107 #define HOPc(pos,off) \
 108   (char *)(PL_reg_match_utf8 \
 109    ? reghop3((U8*)pos, off, (U8*)(off >= 0 ? PL_regeol : PL_bostr)) \
 110    : (U8*)(pos + off))
 111 #define HOPBACKc(pos, off) \
 112   (char*)(PL_reg_match_utf8\
 113    ? reghopmaybe3((U8*)pos, -off, (U8*)PL_bostr) \
 114    : (pos - off >= PL_bostr)  \
 115     ? (U8*)pos - off  \
 116     : NULL)
 117
 118 #define HOP3(pos,off,lim) (PL_reg_match_utf8 ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
 119 #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
 120
 121 #define LOAD_UTF8_CHARCLASS(class,str) STMT_START { \
 122  if (!CAT2(PL_utf8_,class)) { bool ok; ENTER; save_re_context(); ok=CAT2(is_utf8_,class)((const U8*)str); assert(ok); LEAVE; } } STMT_END
 123 #define LOAD_UTF8_CHARCLASS_ALNUM() LOAD_UTF8_CHARCLASS(alnum,"a")
 124 #define LOAD_UTF8_CHARCLASS_DIGIT() LOAD_UTF8_CHARCLASS(digit,"0")
 125 #define LOAD_UTF8_CHARCLASS_SPACE() LOAD_UTF8_CHARCLASS(space," ")
 126 #define LOAD_UTF8_CHARCLASS_MARK()  LOAD_UTF8_CHARCLASS(mark, "\xcd\x86")
 127
 128 /* TODO: Combine JUMPABLE and HAS_TEXT to cache OP(rn) */
 129
 130 /* for use after a quantifier and before an EXACT-like node -- japhy */
 131 /* it would be nice to rework regcomp.sym to generate this stuff. sigh */
 132 #define JUMPABLE(rn) (      \
 133  OP(rn) == OPEN ||       \
 134  (OP(rn) == CLOSE && (!cur_eval || cur_eval->u.eval.close_paren != ARG(rn))) || \
 135  OP(rn) == EVAL ||   \
 136  OP(rn) == SUSPEND || OP(rn) == IFMATCH || \
 137  OP(rn) == PLUS || OP(rn) == MINMOD || \
 138  OP(rn) == KEEPS || (PL_regkind[OP(rn)] == VERB) || \
 139  (PL_regkind[OP(rn)] == CURLY && ARG1(rn) > 0) \
 140 )
 141 #define IS_EXACT(rn) (PL_regkind[OP(rn)] == EXACT)
 142
 143 #define HAS_TEXT(rn) ( IS_EXACT(rn) || PL_regkind[OP(rn)] == REF )
 144
 145 #if 0
 146 /* Currently these are only used when PL_regkind[OP(rn)] == EXACT so
 147    we don't need this definition. */
 148 #define IS_TEXT(rn)   ( OP(rn)==EXACT   || OP(rn)==REF   || OP(rn)==NREF   )
 149 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  || OP(rn)==REFF  || OP(rn)==NREFF  )
 150 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL || OP(rn)==REFFL || OP(rn)==NREFFL )
 151
 152 #else
 153 /* ... so we use this as its faster. */
 154 #define IS_TEXT(rn)   ( OP(rn)==EXACT   )
 155 #define IS_TEXTF(rn)  ( OP(rn)==EXACTF  )
 156 #define IS_TEXTFL(rn) ( OP(rn)==EXACTFL )
 157
 158 #endif
 159
 160 /*
 161   Search for mandatory following text node; for lookahead, the text must
 162   follow but for lookbehind (rn->flags != 0) we skip to the next step.
 163 */
 164 #define FIND_NEXT_IMPT(rn) STMT_START { \
 165  while (JUMPABLE(rn)) { \
 166   const OPCODE type = OP(rn); \
 167   if (type == SUSPEND || PL_regkind[type] == CURLY) \
 168    rn = NEXTOPER(NEXTOPER(rn)); \
 169   else if (type == PLUS) \
 170    rn = NEXTOPER(rn); \
 171   else if (type == IFMATCH) \
 172    rn = (rn->flags == 0) ? NEXTOPER(NEXTOPER(rn)) : rn + ARG(rn); \
 173   else rn += NEXT_OFF(rn); \
 174  } \
 175 } STMT_END
 176
 177
 178 static void restore_pos(pTHX_ void *arg);
 179
 180 STATIC CHECKPOINT
 181 S_regcppush(pTHX_ I32 parenfloor)
 182 {
 183  dVAR;
 184  const int retval = PL_savestack_ix;
 185 #define REGCP_PAREN_ELEMS 4
 186  const int paren_elems_to_push = (PL_regsize - parenfloor) * REGCP_PAREN_ELEMS;
 187  int p;
 188  GET_RE_DEBUG_FLAGS_DECL;
 189
 190  if (paren_elems_to_push < 0)
 191   Perl_croak(aTHX_ "panic: paren_elems_to_push < 0");
 192
 193 #define REGCP_OTHER_ELEMS 7
 194  SSGROW(paren_elems_to_push + REGCP_OTHER_ELEMS);
 195
 196  for (p = PL_regsize; p > parenfloor; p--) {
 197 /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
 198   SSPUSHINT(PL_regoffs[p].end);
 199   SSPUSHINT(PL_regoffs[p].start);
 200   SSPUSHPTR(PL_reg_start_tmp[p]);
 201   SSPUSHINT(p);
 202   DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
 203   "     saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n",
 204      (UV)p, (IV)PL_regoffs[p].start,
 205      (IV)(PL_reg_start_tmp[p] - PL_bostr),
 206      (IV)PL_regoffs[p].end
 207   ));
 208  }
 209 /* REGCP_OTHER_ELEMS are pushed in any case, parentheses or no. */
 210  SSPUSHPTR(PL_regoffs);
 211  SSPUSHINT(PL_regsize);
 212  SSPUSHINT(*PL_reglastparen);
 213  SSPUSHINT(*PL_reglastcloseparen);
 214  SSPUSHPTR(PL_reginput);
 215 #define REGCP_FRAME_ELEMS 2
 216 /* REGCP_FRAME_ELEMS are part of the REGCP_OTHER_ELEMS and
 217  * are needed for the regexp context stack bookkeeping. */
 218  SSPUSHINT(paren_elems_to_push + REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 219  SSPUSHINT(SAVEt_REGCONTEXT); /* Magic cookie. */
 220
 221  return retval;
 222 }
 223
 224 /* These are needed since we do not localize EVAL nodes: */
 225 #define REGCP_SET(cp)                                           \
 226  DEBUG_STATE_r(                                              \
 227    PerlIO_printf(Perl_debug_log,          \
 228     "  Setting an EVAL scope, savestack=%"IVdf"\n", \
 229     (IV)PL_savestack_ix));                          \
 230  cp = PL_savestack_ix
 231
 232 #define REGCP_UNWIND(cp)                                        \
 233  DEBUG_STATE_r(                                              \
 234   if (cp != PL_savestack_ix)                   \
 235     PerlIO_printf(Perl_debug_log,          \
 236     "  Clearing an EVAL scope, savestack=%"IVdf"..%"IVdf"\n", \
 237     (IV)(cp), (IV)PL_savestack_ix));                \
 238  regcpblow(cp)
 239
 240 STATIC char *
 241 S_regcppop(pTHX_ const regexp *rex)
 242 {
 243  dVAR;
 244  U32 i;
 245  char *input;
 246  GET_RE_DEBUG_FLAGS_DECL;
 247
 248  PERL_ARGS_ASSERT_REGCPPOP;
 249
 250  /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
 251  i = SSPOPINT;
 252  assert(i == SAVEt_REGCONTEXT); /* Check that the magic cookie is there. */
 253  i = SSPOPINT; /* Parentheses elements to pop. */
 254  input = (char *) SSPOPPTR;
 255  *PL_reglastcloseparen = SSPOPINT;
 256  *PL_reglastparen = SSPOPINT;
 257  PL_regsize = SSPOPINT;
 258  PL_regoffs=(regexp_paren_pair *) SSPOPPTR;
 259
 260
 261  /* Now restore the parentheses context. */
 262  for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
 263   i > 0; i -= REGCP_PAREN_ELEMS) {
 264   I32 tmps;
 265   U32 paren = (U32)SSPOPINT;
 266   PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
 267   PL_regoffs[paren].start = SSPOPINT;
 268   tmps = SSPOPINT;
 269   if (paren <= *PL_reglastparen)
 270    PL_regoffs[paren].end = tmps;
 271   DEBUG_BUFFERS_r(
 272    PerlIO_printf(Perl_debug_log,
 273       "     restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n",
 274       (UV)paren, (IV)PL_regoffs[paren].start,
 275       (IV)(PL_reg_start_tmp[paren] - PL_bostr),
 276       (IV)PL_regoffs[paren].end,
 277       (paren > *PL_reglastparen ? "(no)" : ""));
 278   );
 279  }
 280  DEBUG_BUFFERS_r(
 281   if (*PL_reglastparen + 1 <= rex->nparens) {
 282    PerlIO_printf(Perl_debug_log,
 283       "     restoring \\%"IVdf"..\\%"IVdf" to undef\n",
 284       (IV)(*PL_reglastparen + 1), (IV)rex->nparens);
 285   }
 286  );
 287 #if 1
 288  /* It would seem that the similar code in regtry()
 289  * already takes care of this, and in fact it is in
 290  * a better location to since this code can #if 0-ed out
 291  * but the code in regtry() is needed or otherwise tests
 292  * requiring null fields (pat.t#187 and split.t#{13,14}
 293  * (as of patchlevel 7877)  will fail.  Then again,
 294  * this code seems to be necessary or otherwise
 295  * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
 296  * --jhi updated by dapm */
 297  for (i = *PL_reglastparen + 1; i <= rex->nparens; i++) {
 298   if (i > PL_regsize)
 299    PL_regoffs[i].start = -1;
 300   PL_regoffs[i].end = -1;
 301  }
 302 #endif
 303  return input;
 304 }
 305
 306 #define regcpblow(cp) LEAVE_SCOPE(cp) /* Ignores regcppush()ed data. */
 307
 308 /*
 309  * pregexec and friends
 310  */
 311
 312 #ifndef PERL_IN_XSUB_RE
 313 /*
 314  - pregexec - match a regexp against a string
 315  */
 316 I32
 317 Perl_pregexec(pTHX_ REGEXP * const prog, char* stringarg, register char *strend,
 318   char *strbeg, I32 minend, SV *screamer, U32 nosave)
 319 /* strend: pointer to null at end of string */
 320 /* strbeg: real beginning of string */
 321 /* minend: end of match must be >=minend after stringarg. */
 322 /* nosave: For optimizations. */
 323 {
 324  PERL_ARGS_ASSERT_PREGEXEC;
 325
 326  return
 327   regexec_flags(prog, stringarg, strend, strbeg, minend, screamer, NULL,
 328      nosave ? 0 : REXEC_COPY_STR);
 329 }
 330 #endif
 331
 332 /*
 333  * Need to implement the following flags for reg_anch:
 334  *
 335  * USE_INTUIT_NOML  - Useful to call re_intuit_start() first
 336  * USE_INTUIT_ML
 337  * INTUIT_AUTORITATIVE_NOML - Can trust a positive answer
 338  * INTUIT_AUTORITATIVE_ML
 339  * INTUIT_ONCE_NOML  - Intuit can match in one location only.
 340  * INTUIT_ONCE_ML
 341  *
 342  * Another flag for this function: SECOND_TIME (so that float substrs
 343  * with giant delta may be not rechecked).
 344  */
 345
 346 /* Assumptions: if ANCH_GPOS, then strpos is anchored. XXXX Check GPOS logic */
 347
 348 /* If SCREAM, then SvPVX_const(sv) should be compatible with strpos and strend.
 349    Otherwise, only SvCUR(sv) is used to get strbeg. */
 350
 351 /* XXXX We assume that strpos is strbeg unless sv. */
 352
 353 /* XXXX Some places assume that there is a fixed substring.
 354   An update may be needed if optimizer marks as "INTUITable"
 355   RExen without fixed substrings.  Similarly, it is assumed that
 356   lengths of all the strings are no more than minlen, thus they
 357   cannot come from lookahead.
 358   (Or minlen should take into account lookahead.)
 359   NOTE: Some of this comment is not correct. minlen does now take account
 360   of lookahead/behind. Further research is required. -- demerphq
 361
 362 */
 363
 364 /* A failure to find a constant substring means that there is no need to make
 365    an expensive call to REx engine, thus we celebrate a failure.  Similarly,
 366    finding a substring too deep into the string means that less calls to
 367    regtry() should be needed.
 368
 369    REx compiler's optimizer found 4 possible hints:
 370   a) Anchored substring;
 371   b) Fixed substring;
 372   c) Whether we are anchored (beginning-of-line or \G);
 373   d) First node (of those at offset 0) which may distingush positions;
 374    We use a)b)d) and multiline-part of c), and try to find a position in the
 375    string which does not contradict any of them.
 376  */
 377
 378 /* Most of decisions we do here should have been done at compile time.
 379    The nodes of the REx which we used for the search should have been
 380    deleted from the finite automaton. */
 381
 382 char *
 383 Perl_re_intuit_start(pTHX_ REGEXP * const prog, SV *sv, char *strpos,
 384      char *strend, const U32 flags, re_scream_pos_data *data)
 385 {
 386  dVAR;
 387  register I32 start_shift = 0;
 388  /* Should be nonnegative! */
 389  register I32 end_shift   = 0;
 390  register char *s;
 391  register SV *check;
 392  char *strbeg;
 393  char *t;
 394  const bool do_utf8 = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
 395  I32 ml_anch;
 396  register char *other_last = NULL; /* other substr checked before this */
 397  char *check_at = NULL;  /* check substr found at this pos */
 398  const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
 399  RXi_GET_DECL(prog,progi);
 400 #ifdef DEBUGGING
 401  const char * const i_strpos = strpos;
 402 #endif
 403  GET_RE_DEBUG_FLAGS_DECL;
 404
 405  PERL_ARGS_ASSERT_RE_INTUIT_START;
 406
 407  RX_MATCH_UTF8_set(prog,do_utf8);
 408
 409  if (RX_UTF8(prog)) {
 410   PL_reg_flags |= RF_utf8;
 411  }
 412  DEBUG_EXECUTE_r(
 413   debug_start_match(prog, do_utf8, strpos, strend,
 414    sv ? "Guessing start of match in sv for"
 415    : "Guessing start of match in string for");
 416    );
 417
 418  /* CHR_DIST() would be more correct here but it makes things slow. */
 419  if (prog->minlen > strend - strpos) {
 420   DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 421        "String too short... [re_intuit_start]\n"));
 422   goto fail;
 423  }
 424
 425  strbeg = (sv && SvPOK(sv)) ? strend - SvCUR(sv) : strpos;
 426  PL_regeol = strend;
 427  if (do_utf8) {
 428   if (!prog->check_utf8 && prog->check_substr)
 429    to_utf8_substr(prog);
 430   check = prog->check_utf8;
 431  } else {
 432   if (!prog->check_substr && prog->check_utf8)
 433    to_byte_substr(prog);
 434   check = prog->check_substr;
 435  }
 436  if (check == &PL_sv_undef) {
 437   DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 438     "Non-utf8 string cannot match utf8 check string\n"));
 439   goto fail;
 440  }
 441  if (prog->extflags & RXf_ANCH) { /* Match at beg-of-str or after \n */
 442   ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE)
 443      || ( (prog->extflags & RXf_ANCH_BOL)
 444       && !multiline ) ); /* Check after \n? */
 445
 446   if (!ml_anch) {
 447   if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */
 448     && !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */
 449    /* SvCUR is not set on references: SvRV and SvPVX_const overlap */
 450    && sv && !SvROK(sv)
 451    && (strpos != strbeg)) {
 452    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not at start...\n"));
 453    goto fail;
 454   }
 455   if (prog->check_offset_min == prog->check_offset_max &&
 456    !(prog->extflags & RXf_CANY_SEEN)) {
 457    /* Substring at constant offset from beg-of-str... */
 458    I32 slen;
 459
 460    s = HOP3c(strpos, prog->check_offset_min, strend);
 461
 462    if (SvTAIL(check)) {
 463     slen = SvCUR(check); /* >= 1 */
 464
 465     if ( strend - s > slen || strend - s < slen - 1
 466      || (strend - s == slen && strend[-1] != '\n')) {
 467      DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String too long...\n"));
 468      goto fail_finish;
 469     }
 470     /* Now should match s[0..slen-2] */
 471     slen--;
 472     if (slen && (*SvPVX_const(check) != *s
 473        || (slen > 1
 474         && memNE(SvPVX_const(check), s, slen)))) {
 475     report_neq:
 476      DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "String not equal...\n"));
 477      goto fail_finish;
 478     }
 479    }
 480    else if (*SvPVX_const(check) != *s
 481      || ((slen = SvCUR(check)) > 1
 482       && memNE(SvPVX_const(check), s, slen)))
 483     goto report_neq;
 484    check_at = s;
 485    goto success_at_start;
 486   }
 487   }
 488   /* Match is anchored, but substr is not anchored wrt beg-of-str. */
 489   s = strpos;
 490   start_shift = prog->check_offset_min; /* okay to underestimate on CC */
 491   end_shift = prog->check_end_shift;
 492
 493   if (!ml_anch) {
 494    const I32 end = prog->check_offset_max + CHR_SVLEN(check)
 495           - (SvTAIL(check) != 0);
 496    const I32 eshift = CHR_DIST((U8*)strend, (U8*)s) - end;
 497
 498    if (end_shift < eshift)
 499     end_shift = eshift;
 500   }
 501  }
 502  else {    /* Can match at random position */
 503   ml_anch = 0;
 504   s = strpos;
 505   start_shift = prog->check_offset_min;  /* okay to underestimate on CC */
 506   end_shift = prog->check_end_shift;
 507
 508   /* end shift should be non negative here */
 509  }
 510
 511 #ifdef QDEBUGGING /* 7/99: reports of failure (with the older version) */
 512  if (end_shift < 0)
 513   Perl_croak(aTHX_ "panic: end_shift: %"IVdf" pattern:\n%s\n ",
 514     (IV)end_shift, RX_PRECOMP(prog));
 515 #endif
 516
 517   restart:
 518  /* Find a possible match in the region s..strend by looking for
 519  the "check" substring in the region corrected by start/end_shift. */
 520
 521  {
 522   I32 srch_start_shift = start_shift;
 523   I32 srch_end_shift = end_shift;
 524   if (srch_start_shift < 0 && strbeg - s > srch_start_shift) {
 525    srch_end_shift -= ((strbeg - s) - srch_start_shift);
 526    srch_start_shift = strbeg - s;
 527   }
 528  DEBUG_OPTIMISE_MORE_r({
 529   PerlIO_printf(Perl_debug_log, "Check offset min: %"IVdf" Start shift: %"IVdf" End shift %"IVdf" Real End Shift: %"IVdf"\n",
 530    (IV)prog->check_offset_min,
 531    (IV)srch_start_shift,
 532    (IV)srch_end_shift,
 533    (IV)prog->check_end_shift);
 534  });
 535
 536  if (flags & REXEC_SCREAM) {
 537   I32 p = -1;   /* Internal iterator of scream. */
 538   I32 * const pp = data ? data->scream_pos : &p;
 539
 540   if (PL_screamfirst[BmRARE(check)] >= 0
 541    || ( BmRARE(check) == '\n'
 542     && (BmPREVIOUS(check) == SvCUR(check) - 1)
 543     && SvTAIL(check) ))
 544    s = screaminstr(sv, check,
 545        srch_start_shift + (s - strbeg), srch_end_shift, pp, 0);
 546   else
 547    goto fail_finish;
 548   /* we may be pointing at the wrong string */
 549   if (s && RXp_MATCH_COPIED(prog))
 550    s = strbeg + (s - SvPVX_const(sv));
 551   if (data)
 552    *data->scream_olds = s;
 553  }
 554  else {
 555   U8* start_point;
 556   U8* end_point;
 557   if (prog->extflags & RXf_CANY_SEEN) {
 558    start_point= (U8*)(s + srch_start_shift);
 559    end_point= (U8*)(strend - srch_end_shift);
 560   } else {
 561    start_point= HOP3(s, srch_start_shift, srch_start_shift < 0 ? strbeg : strend);
 562    end_point= HOP3(strend, -srch_end_shift, strbeg);
 563   }
 564   DEBUG_OPTIMISE_MORE_r({
 565    PerlIO_printf(Perl_debug_log, "fbm_instr len=%d str=<%.*s>\n",
 566     (int)(end_point - start_point),
 567     (int)(end_point - start_point) > 20 ? 20 : (int)(end_point - start_point),
 568     start_point);
 569   });
 570
 571   s = fbm_instr( start_point, end_point,
 572      check, multiline ? FBMrf_MULTILINE : 0);
 573  }
 574  }
 575  /* Update the count-of-usability, remove useless subpatterns,
 576   unshift s.  */
 577
 578  DEBUG_EXECUTE_r({
 579   RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 580    SvPVX_const(check), RE_SV_DUMPLEN(check), 30);
 581   PerlIO_printf(Perl_debug_log, "%s %s substr %s%s%s",
 582       (s ? "Found" : "Did not find"),
 583    (check == (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)
 584     ? "anchored" : "floating"),
 585    quoted,
 586    RE_SV_TAIL(check),
 587    (s ? " at offset " : "...\n") );
 588  });
 589
 590  if (!s)
 591   goto fail_finish;
 592  /* Finish the diagnostic message */
 593  DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%ld...\n", (long)(s - i_strpos)) );
 594
 595  /* XXX dmq: first branch is for positive lookbehind...
 596  Our check string is offset from the beginning of the pattern.
 597  So we need to do any stclass tests offset forward from that
 598  point. I think. :-(
 599  */
 600
 601
 602
 603  check_at=s;
 604
 605
 606  /* Got a candidate.  Check MBOL anchoring, and the *other* substr.
 607  Start with the other substr.
 608  XXXX no SCREAM optimization yet - and a very coarse implementation
 609  XXXX /ttx+/ results in anchored="ttx", floating="x".  floating will
 610     *always* match.  Probably should be marked during compile...
 611  Probably it is right to do no SCREAM here...
 612  */
 613
 614  if (do_utf8 ? (prog->float_utf8 && prog->anchored_utf8)
 615     : (prog->float_substr && prog->anchored_substr))
 616  {
 617   /* Take into account the "other" substring. */
 618   /* XXXX May be hopelessly wrong for UTF... */
 619   if (!other_last)
 620    other_last = strpos;
 621   if (check == (do_utf8 ? prog->float_utf8 : prog->float_substr)) {
 622   do_other_anchored:
 623    {
 624     char * const last = HOP3c(s, -start_shift, strbeg);
 625     char *last1, *last2;
 626     char * const saved_s = s;
 627     SV* must;
 628
 629     t = s - prog->check_offset_max;
 630     if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 631      && (!do_utf8
 632       || ((t = (char*)reghopmaybe3((U8*)s, -(prog->check_offset_max), (U8*)strpos))
 633        && t > strpos)))
 634      NOOP;
 635     else
 636      t = strpos;
 637     t = HOP3c(t, prog->anchored_offset, strend);
 638     if (t < other_last) /* These positions already checked */
 639      t = other_last;
 640     last2 = last1 = HOP3c(strend, -prog->minlen, strbeg);
 641     if (last < last1)
 642      last1 = last;
 643     /* XXXX It is not documented what units *_offsets are in.
 644     We assume bytes, but this is clearly wrong.
 645     Meaning this code needs to be carefully reviewed for errors.
 646     dmq.
 647     */
 648
 649     /* On end-of-str: see comment below. */
 650     must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
 651     if (must == &PL_sv_undef) {
 652      s = (char*)NULL;
 653      DEBUG_r(must = prog->anchored_utf8); /* for debug */
 654     }
 655     else
 656      s = fbm_instr(
 657       (unsigned char*)t,
 658       HOP3(HOP3(last1, prog->anchored_offset, strend)
 659         + SvCUR(must), -(SvTAIL(must)!=0), strbeg),
 660       must,
 661       multiline ? FBMrf_MULTILINE : 0
 662      );
 663     DEBUG_EXECUTE_r({
 664      RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 665       SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 666      PerlIO_printf(Perl_debug_log, "%s anchored substr %s%s",
 667       (s ? "Found" : "Contradicts"),
 668       quoted, RE_SV_TAIL(must));
 669     });
 670
 671
 672     if (!s) {
 673      if (last1 >= last2) {
 674       DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 675             ", giving up...\n"));
 676       goto fail_finish;
 677      }
 678      DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 679       ", trying floating at offset %ld...\n",
 680       (long)(HOP3c(saved_s, 1, strend) - i_strpos)));
 681      other_last = HOP3c(last1, prog->anchored_offset+1, strend);
 682      s = HOP3c(last, 1, strend);
 683      goto restart;
 684     }
 685     else {
 686      DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 687       (long)(s - i_strpos)));
 688      t = HOP3c(s, -prog->anchored_offset, strbeg);
 689      other_last = HOP3c(s, 1, strend);
 690      s = saved_s;
 691      if (t == strpos)
 692       goto try_at_start;
 693      goto try_at_offset;
 694     }
 695    }
 696   }
 697   else {  /* Take into account the floating substring. */
 698    char *last, *last1;
 699    char * const saved_s = s;
 700    SV* must;
 701
 702    t = HOP3c(s, -start_shift, strbeg);
 703    last1 = last =
 704     HOP3c(strend, -prog->minlen + prog->float_min_offset, strbeg);
 705    if (CHR_DIST((U8*)last, (U8*)t) > prog->float_max_offset)
 706     last = HOP3c(t, prog->float_max_offset, strend);
 707    s = HOP3c(t, prog->float_min_offset, strend);
 708    if (s < other_last)
 709     s = other_last;
 710  /* XXXX It is not documented what units *_offsets are in.  Assume bytes.  */
 711    must = do_utf8 ? prog->float_utf8 : prog->float_substr;
 712    /* fbm_instr() takes into account exact value of end-of-str
 713    if the check is SvTAIL(ed).  Since false positives are OK,
 714    and end-of-str is not later than strend we are OK. */
 715    if (must == &PL_sv_undef) {
 716     s = (char*)NULL;
 717     DEBUG_r(must = prog->float_utf8); /* for debug message */
 718    }
 719    else
 720     s = fbm_instr((unsigned char*)s,
 721        (unsigned char*)last + SvCUR(must)
 722         - (SvTAIL(must)!=0),
 723        must, multiline ? FBMrf_MULTILINE : 0);
 724    DEBUG_EXECUTE_r({
 725     RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
 726      SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
 727     PerlIO_printf(Perl_debug_log, "%s floating substr %s%s",
 728      (s ? "Found" : "Contradicts"),
 729      quoted, RE_SV_TAIL(must));
 730    });
 731    if (!s) {
 732     if (last1 == last) {
 733      DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 734            ", giving up...\n"));
 735      goto fail_finish;
 736     }
 737     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 738      ", trying anchored starting at offset %ld...\n",
 739      (long)(saved_s + 1 - i_strpos)));
 740     other_last = last;
 741     s = HOP3c(t, 1, strend);
 742     goto restart;
 743    }
 744    else {
 745     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " at offset %ld...\n",
 746      (long)(s - i_strpos)));
 747     other_last = s; /* Fix this later. --Hugo */
 748     s = saved_s;
 749     if (t == strpos)
 750      goto try_at_start;
 751     goto try_at_offset;
 752    }
 753   }
 754  }
 755
 756
 757  t= (char*)HOP3( s, -prog->check_offset_max, (prog->check_offset_max<0) ? strend : strpos);
 758
 759  DEBUG_OPTIMISE_MORE_r(
 760   PerlIO_printf(Perl_debug_log,
 761    "Check offset min:%"IVdf" max:%"IVdf" S:%"IVdf" t:%"IVdf" D:%"IVdf" end:%"IVdf"\n",
 762    (IV)prog->check_offset_min,
 763    (IV)prog->check_offset_max,
 764    (IV)(s-strpos),
 765    (IV)(t-strpos),
 766    (IV)(t-s),
 767    (IV)(strend-strpos)
 768   )
 769  );
 770
 771  if (s - strpos > prog->check_offset_max  /* signed-corrected t > strpos */
 772   && (!do_utf8
 773    || ((t = (char*)reghopmaybe3((U8*)s, -prog->check_offset_max, (U8*) ((prog->check_offset_max<0) ? strend : strpos)))
 774     && t > strpos)))
 775  {
 776   /* Fixed substring is found far enough so that the match
 777   cannot start at strpos. */
 778  try_at_offset:
 779   if (ml_anch && t[-1] != '\n') {
 780    /* Eventually fbm_*() should handle this, but often
 781    anchored_offset is not 0, so this check will not be wasted. */
 782    /* XXXX In the code below we prefer to look for "^" even in
 783    presence of anchored substrings.  And we search even
 784    beyond the found float position.  These pessimizations
 785    are historical artefacts only.  */
 786   find_anchor:
 787    while (t < strend - prog->minlen) {
 788     if (*t == '\n') {
 789      if (t < check_at - prog->check_offset_min) {
 790       if (do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) {
 791        /* Since we moved from the found position,
 792        we definitely contradict the found anchored
 793        substr.  Due to the above check we do not
 794        contradict "check" substr.
 795        Thus we can arrive here only if check substr
 796        is float.  Redo checking for "other"=="fixed".
 797        */
 798        strpos = t + 1;
 799        DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld, rescanning for anchored from offset %ld...\n",
 800         PL_colors[0], PL_colors[1], (long)(strpos - i_strpos), (long)(strpos - i_strpos + prog->anchored_offset)));
 801        goto do_other_anchored;
 802       }
 803       /* We don't contradict the found floating substring. */
 804       /* XXXX Why not check for STCLASS? */
 805       s = t + 1;
 806       DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m at offset %ld...\n",
 807        PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
 808       goto set_useful;
 809      }
 810      /* Position contradicts check-string */
 811      /* XXXX probably better to look for check-string
 812      than for "\n", so one should lower the limit for t? */
 813      DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Found /%s^%s/m, restarting lookup for check-string at offset %ld...\n",
 814       PL_colors[0], PL_colors[1], (long)(t + 1 - i_strpos)));
 815      other_last = strpos = s = t + 1;
 816      goto restart;
 817     }
 818     t++;
 819    }
 820    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Did not find /%s^%s/m...\n",
 821       PL_colors[0], PL_colors[1]));
 822    goto fail_finish;
 823   }
 824   else {
 825    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Starting position does not contradict /%s^%s/m...\n",
 826       PL_colors[0], PL_colors[1]));
 827   }
 828   s = t;
 829  set_useful:
 830   ++BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
 831  }
 832  else {
 833   /* The found string does not prohibit matching at strpos,
 834   - no optimization of calling REx engine can be performed,
 835   unless it was an MBOL and we are not after MBOL,
 836   or a future STCLASS check will fail this. */
 837  try_at_start:
 838   /* Even in this situation we may use MBOL flag if strpos is offset
 839   wrt the start of the string. */
 840   if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */
 841    && (strpos != strbeg) && strpos[-1] != '\n'
 842    /* May be due to an implicit anchor of m{.*foo}  */
 843    && !(prog->intflags & PREGf_IMPLICIT))
 844   {
 845    t = strpos;
 846    goto find_anchor;
 847   }
 848   DEBUG_EXECUTE_r( if (ml_anch)
 849    PerlIO_printf(Perl_debug_log, "Position at offset %ld does not contradict /%s^%s/m...\n",
 850       (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
 851   );
 852  success_at_start:
 853   if (!(prog->intflags & PREGf_NAUGHTY) /* XXXX If strpos moved? */
 854    && (do_utf8 ? (
 855     prog->check_utf8  /* Could be deleted already */
 856     && --BmUSEFUL(prog->check_utf8) < 0
 857     && (prog->check_utf8 == prog->float_utf8)
 858    ) : (
 859     prog->check_substr  /* Could be deleted already */
 860     && --BmUSEFUL(prog->check_substr) < 0
 861     && (prog->check_substr == prog->float_substr)
 862    )))
 863   {
 864    /* If flags & SOMETHING - do not do it many times on the same match */
 865    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "... Disabling check substring...\n"));
 866    SvREFCNT_dec(do_utf8 ? prog->check_utf8 : prog->check_substr);
 867    if (do_utf8 ? prog->check_substr : prog->check_utf8)
 868     SvREFCNT_dec(do_utf8 ? prog->check_substr : prog->check_utf8);
 869    prog->check_substr = prog->check_utf8 = NULL; /* disable */
 870    prog->float_substr = prog->float_utf8 = NULL; /* clear */
 871    check = NULL;   /* abort */
 872    s = strpos;
 873    /* XXXX This is a remnant of the old implementation.  It
 874      looks wasteful, since now INTUIT can use many
 875      other heuristics. */
 876    prog->extflags &= ~RXf_USE_INTUIT;
 877   }
 878   else
 879    s = strpos;
 880  }
 881
 882  /* Last resort... */
 883  /* XXXX BmUSEFUL already changed, maybe multiple change is meaningful... */
 884  /* trie stclasses are too expensive to use here, we are better off to
 885  leave it to regmatch itself */
 886  if (progi->regstclass && PL_regkind[OP(progi->regstclass)]!=TRIE) {
 887   /* minlen == 0 is possible if regstclass is \b or \B,
 888   and the fixed substr is ''$.
 889   Since minlen is already taken into account, s+1 is before strend;
 890   accidentally, minlen >= 1 guaranties no false positives at s + 1
 891   even for \b or \B.  But (minlen? 1 : 0) below assumes that
 892   regstclass does not come from lookahead...  */
 893   /* If regstclass takes bytelength more than 1: If charlength==1, OK.
 894   This leaves EXACTF only, which is dealt with in find_byclass().  */
 895   const U8* const str = (U8*)STRING(progi->regstclass);
 896   const int cl_l = (PL_regkind[OP(progi->regstclass)] == EXACT
 897      ? CHR_DIST(str+STR_LEN(progi->regstclass), str)
 898      : 1);
 899   char * endpos;
 900   if (prog->anchored_substr || prog->anchored_utf8 || ml_anch)
 901    endpos= HOP3c(s, (prog->minlen ? cl_l : 0), strend);
 902   else if (prog->float_substr || prog->float_utf8)
 903    endpos= HOP3c(HOP3c(check_at, -start_shift, strbeg), cl_l, strend);
 904   else
 905    endpos= strend;
 906
 907   DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "start_shift: %"IVdf" check_at: %"IVdf" s: %"IVdf" endpos: %"IVdf"\n",
 908          (IV)start_shift, (IV)(check_at - strbeg), (IV)(s - strbeg), (IV)(endpos - strbeg)));
 909
 910   t = s;
 911   s = find_byclass(prog, progi->regstclass, s, endpos, NULL);
 912   if (!s) {
 913 #ifdef DEBUGGING
 914    const char *what = NULL;
 915 #endif
 916    if (endpos == strend) {
 917     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 918         "Could not match STCLASS...\n") );
 919     goto fail;
 920    }
 921    DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 922         "This position contradicts STCLASS...\n") );
 923    if ((prog->extflags & RXf_ANCH) && !ml_anch)
 924     goto fail;
 925    /* Contradict one of substrings */
 926    if (prog->anchored_substr || prog->anchored_utf8) {
 927     if ((do_utf8 ? prog->anchored_utf8 : prog->anchored_substr) == check) {
 928      DEBUG_EXECUTE_r( what = "anchored" );
 929     hop_and_restart:
 930      s = HOP3c(t, 1, strend);
 931      if (s + start_shift + end_shift > strend) {
 932       /* XXXX Should be taken into account earlier? */
 933       DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 934            "Could not match STCLASS...\n") );
 935       goto fail;
 936      }
 937      if (!check)
 938       goto giveup;
 939      DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 940         "Looking for %s substr starting at offset %ld...\n",
 941         what, (long)(s + start_shift - i_strpos)) );
 942      goto restart;
 943     }
 944     /* Have both, check_string is floating */
 945     if (t + start_shift >= check_at) /* Contradicts floating=check */
 946      goto retry_floating_check;
 947     /* Recheck anchored substring, but not floating... */
 948     s = check_at;
 949     if (!check)
 950      goto giveup;
 951     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 952       "Looking for anchored substr starting at offset %ld...\n",
 953       (long)(other_last - i_strpos)) );
 954     goto do_other_anchored;
 955    }
 956    /* Another way we could have checked stclass at the
 957    current position only: */
 958    if (ml_anch) {
 959     s = t = t + 1;
 960     if (!check)
 961      goto giveup;
 962     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
 963       "Looking for /%s^%s/m starting at offset %ld...\n",
 964       PL_colors[0], PL_colors[1], (long)(t - i_strpos)) );
 965     goto try_at_offset;
 966    }
 967    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr)) /* Could have been deleted */
 968     goto fail;
 969    /* Check is floating subtring. */
 970   retry_floating_check:
 971    t = check_at - start_shift;
 972    DEBUG_EXECUTE_r( what = "floating" );
 973    goto hop_and_restart;
 974   }
 975   if (t != s) {
 976    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 977       "By STCLASS: moving %ld --> %ld\n",
 978         (long)(t - i_strpos), (long)(s - i_strpos))
 979     );
 980   }
 981   else {
 982    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
 983         "Does not contradict STCLASS...\n");
 984     );
 985   }
 986  }
 987   giveup:
 988  DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%s%s:%s match at offset %ld\n",
 989       PL_colors[4], (check ? "Guessed" : "Giving up"),
 990       PL_colors[5], (long)(s - i_strpos)) );
 991  return s;
 992
 993   fail_finish:    /* Substring not found */
 994  if (prog->check_substr || prog->check_utf8)  /* could be removed already */
 995   BmUSEFUL(do_utf8 ? prog->check_utf8 : prog->check_substr) += 5; /* hooray */
 996   fail:
 997  DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch rejected by optimizer%s\n",
 998       PL_colors[4], PL_colors[5]));
 999  return NULL;
1000 }
1001
1002 #define DECL_TRIE_TYPE(scan) \
1003  const enum { trie_plain, trie_utf8, trie_utf8_fold, trie_latin_utf8_fold } \
1004      trie_type = (scan->flags != EXACT) \
1005        ? (do_utf8 ? trie_utf8_fold : (UTF ? trie_latin_utf8_fold : trie_plain)) \
1006        : (do_utf8 ? trie_utf8 : trie_plain)
1007
1008 #define REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc, uscan, len,  \
1009 uvc, charid, foldlen, foldbuf, uniflags) STMT_START {                       \
1010  UV uvc_unfolded = 0;          \
1011  switch (trie_type) {                                                    \
1012  case trie_utf8_fold:                                                    \
1013   if ( foldlen>0 ) {                                                  \
1014    uvc_unfolded = uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags ); \
1015    foldlen -= len;                                                 \
1016    uscan += len;                                                   \
1017    len=0;                                                          \
1018   } else {                                                            \
1019    uvc_unfolded = uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags ); \
1020    uvc = to_uni_fold( uvc, foldbuf, &foldlen );                    \
1021    foldlen -= UNISKIP( uvc );                                      \
1022    uscan = foldbuf + UNISKIP( uvc );                               \
1023   }                                                                   \
1024   break;                                                              \
1025  case trie_latin_utf8_fold:                                              \
1026   if ( foldlen>0 ) {                                                  \
1027    uvc = utf8n_to_uvuni( uscan, UTF8_MAXLEN, &len, uniflags );     \
1028    foldlen -= len;                                                 \
1029    uscan += len;                                                   \
1030    len=0;                                                          \
1031   } else {                                                            \
1032    len = 1;                                                        \
1033    uvc = to_uni_fold( *(U8*)uc, foldbuf, &foldlen );               \
1034    foldlen -= UNISKIP( uvc );                                      \
1035    uscan = foldbuf + UNISKIP( uvc );                               \
1036   }                                                                   \
1037   break;                                                              \
1038  case trie_utf8:                                                         \
1039   uvc = utf8n_to_uvuni( (U8*)uc, UTF8_MAXLEN, &len, uniflags );       \
1040   break;                                                              \
1041  case trie_plain:                                                        \
1042   uvc = (UV)*uc;                                                      \
1043   len = 1;                                                            \
1044  }                                                                       \
1045                    \
1046  if (uvc < 256) {                                                        \
1047   charid = trie->charmap[ uvc ];                                      \
1048  }                                                                       \
1049  else {                                                                  \
1050   charid = 0;                                                         \
1051   if (widecharmap) {                                                  \
1052    SV** const svpp = hv_fetch(widecharmap,                         \
1053       (char*)&uvc, sizeof(UV), 0);                        \
1054    if (svpp)                                                       \
1055     charid = (U16)SvIV(*svpp);                                  \
1056   }                                                                   \
1057  }                                                                       \
1058  if (!charid && trie_type == trie_utf8_fold && !UTF) {      \
1059   charid = trie->charmap[uvc_unfolded];            \
1060  }                 \
1061 } STMT_END
1062
1063 #define REXEC_FBC_EXACTISH_CHECK(CoNd)                 \
1064 {                                                      \
1065  char *my_strend= (char *)strend;                   \
1066  if ( (CoNd)                                        \
1067   && (ln == len ||                              \
1068    !ibcmp_utf8(s, &my_strend, 0,  do_utf8,   \
1069       m, NULL, ln, (bool)UTF))       \
1070   && (!reginfo || regtry(reginfo, &s)) )        \
1071   goto got_it;                                   \
1072  else {                                             \
1073   U8 foldbuf[UTF8_MAXBYTES_CASE+1];             \
1074   uvchr_to_utf8(tmpbuf, c);                     \
1075   f = to_utf8_fold(tmpbuf, foldbuf, &foldlen);  \
1076   if ( f != c                                   \
1077    && (f == c1 || f == c2)                  \
1078    && (ln == len ||                         \
1079     !ibcmp_utf8(s, &my_strend, 0,  do_utf8,\
1080        m, NULL, ln, (bool)UTF)) \
1081    && (!reginfo || regtry(reginfo, &s)) )   \
1082    goto got_it;                             \
1083  }                                                  \
1084 }                                                      \
1085 s += len
1086
1087 #define REXEC_FBC_EXACTISH_SCAN(CoNd)                     \
1088 STMT_START {                                              \
1089  while (s <= e) {                                      \
1090   if ( (CoNd)                                       \
1091    && (ln == 1 || !(OP(c) == EXACTF             \
1092        ? ibcmp(s, m, ln)           \
1093        : ibcmp_locale(s, m, ln)))  \
1094    && (!reginfo || regtry(reginfo, &s)) )        \
1095    goto got_it;                                  \
1096   s++;                                              \
1097  }                                                     \
1098 } STMT_END
1099
1100 #define REXEC_FBC_UTF8_SCAN(CoDe)                     \
1101 STMT_START {                                          \
1102  while (s + (uskip = UTF8SKIP(s)) <= strend) {     \
1103   CoDe                                          \
1104   s += uskip;                                   \
1105  }                                                 \
1106 } STMT_END
1107
1108 #define REXEC_FBC_SCAN(CoDe)                          \
1109 STMT_START {                                          \
1110  while (s < strend) {                              \
1111   CoDe                                          \
1112   s++;                                          \
1113  }                                                 \
1114 } STMT_END
1115
1116 #define REXEC_FBC_UTF8_CLASS_SCAN(CoNd)               \
1117 REXEC_FBC_UTF8_SCAN(                                  \
1118  if (CoNd) {                                       \
1119   if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1120    goto got_it;                              \
1121   else                                          \
1122    tmp = doevery;                            \
1123  }                                                 \
1124  else                                              \
1125   tmp = 1;                                      \
1126 )
1127
1128 #define REXEC_FBC_CLASS_SCAN(CoNd)                    \
1129 REXEC_FBC_SCAN(                                       \
1130  if (CoNd) {                                       \
1131   if (tmp && (!reginfo || regtry(reginfo, &s)))  \
1132    goto got_it;                              \
1133   else                                          \
1134    tmp = doevery;                            \
1135  }                                                 \
1136  else                                              \
1137   tmp = 1;                                      \
1138 )
1139
1140 #define REXEC_FBC_TRYIT               \
1141 if ((!reginfo || regtry(reginfo, &s))) \
1142  goto got_it
1143
1144 #define REXEC_FBC_CSCAN(CoNdUtF8,CoNd)                         \
1145  if (do_utf8) {                                             \
1146   REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1147  }                                                          \
1148  else {                                                     \
1149   REXEC_FBC_CLASS_SCAN(CoNd);                            \
1150  }                                                          \
1151  break
1152
1153 #define REXEC_FBC_CSCAN_PRELOAD(UtFpReLoAd,CoNdUtF8,CoNd)      \
1154  if (do_utf8) {                                             \
1155   UtFpReLoAd;                                            \
1156   REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1157  }                                                          \
1158  else {                                                     \
1159   REXEC_FBC_CLASS_SCAN(CoNd);                            \
1160  }                                                          \
1161  break
1162
1163 #define REXEC_FBC_CSCAN_TAINT(CoNdUtF8,CoNd)                   \
1164  PL_reg_flags |= RF_tainted;                                \
1165  if (do_utf8) {                                             \
1166   REXEC_FBC_UTF8_CLASS_SCAN(CoNdUtF8);                   \
1167  }                                                          \
1168  else {                                                     \
1169   REXEC_FBC_CLASS_SCAN(CoNd);                            \
1170  }                                                          \
1171  break
1172
1173 #define DUMP_EXEC_POS(li,s,doutf8) \
1174  dump_exec_pos(li,s,(PL_regeol),(PL_bostr),(PL_reg_starttry),doutf8)
1175
1176 /* We know what class REx starts with.  Try to find this position... */
1177 /* if reginfo is NULL, its a dryrun */
1178 /* annoyingly all the vars in this routine have different names from their counterparts
1179    in regmatch. /grrr */
1180
1181 STATIC char *
1182 S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
1183  const char *strend, regmatch_info *reginfo)
1184 {
1185   dVAR;
1186   const I32 doevery = (prog->intflags & PREGf_SKIP) == 0;
1187   char *m;
1188   STRLEN ln;
1189   STRLEN lnc;
1190   register STRLEN uskip;
1191   unsigned int c1;
1192   unsigned int c2;
1193   char *e;
1194   register I32 tmp = 1; /* Scratch variable? */
1195   register const bool do_utf8 = PL_reg_match_utf8;
1196   RXi_GET_DECL(prog,progi);
1197
1198   PERL_ARGS_ASSERT_FIND_BYCLASS;
1199
1200   /* We know what class it must start with. */
1201   switch (OP(c)) {
1202   case ANYOF:
1203    if (do_utf8) {
1204     REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
1205       !UTF8_IS_INVARIANT((U8)s[0]) ?
1206       reginclass(prog, c, (U8*)s, 0, do_utf8) :
1207       REGINCLASS(prog, c, (U8*)s));
1208    }
1209    else {
1210     while (s < strend) {
1211      STRLEN skip = 1;
1212
1213      if (REGINCLASS(prog, c, (U8*)s) ||
1214       (ANYOF_FOLD_SHARP_S(c, s, strend) &&
1215       /* The assignment of 2 is intentional:
1216        * for the folded sharp s, the skip is 2. */
1217       (skip = SHARP_S_SKIP))) {
1218       if (tmp && (!reginfo || regtry(reginfo, &s)))
1219         goto got_it;
1220       else
1221         tmp = doevery;
1222      }
1223      else
1224       tmp = 1;
1225      s += skip;
1226     }
1227    }
1228    break;
1229   case CANY:
1230    REXEC_FBC_SCAN(
1231     if (tmp && (!reginfo || regtry(reginfo, &s)))
1232      goto got_it;
1233     else
1234      tmp = doevery;
1235    );
1236    break;
1237   case EXACTF:
1238    m   = STRING(c);
1239    ln  = STR_LEN(c); /* length to match in octets/bytes */
1240    lnc = (I32) ln; /* length to match in characters */
1241    if (UTF) {
1242     STRLEN ulen1, ulen2;
1243     U8 *sm = (U8 *) m;
1244     U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
1245     U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
1246     /* used by commented-out code below */
1247     /*const U32 uniflags = UTF8_ALLOW_DEFAULT;*/
1248
1249     /* XXX: Since the node will be case folded at compile
1250     time this logic is a little odd, although im not
1251     sure that its actually wrong. --dmq */
1252
1253     c1 = to_utf8_lower((U8*)m, tmpbuf1, &ulen1);
1254     c2 = to_utf8_upper((U8*)m, tmpbuf2, &ulen2);
1255
1256     /* XXX: This is kinda strange. to_utf8_XYZ returns the
1257     codepoint of the first character in the converted
1258     form, yet originally we did the extra step.
1259     No tests fail by commenting this code out however
1260     so Ive left it out. -- dmq.
1261
1262     c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXBYTES_CASE,
1263          0, uniflags);
1264     c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXBYTES_CASE,
1265          0, uniflags);
1266     */
1267
1268     lnc = 0;
1269     while (sm < ((U8 *) m + ln)) {
1270      lnc++;
1271      sm += UTF8SKIP(sm);
1272     }
1273    }
1274    else {
1275     c1 = *(U8*)m;
1276     c2 = PL_fold[c1];
1277    }
1278    goto do_exactf;
1279   case EXACTFL:
1280    m   = STRING(c);
1281    ln  = STR_LEN(c);
1282    lnc = (I32) ln;
1283    c1 = *(U8*)m;
1284    c2 = PL_fold_locale[c1];
1285   do_exactf:
1286    e = HOP3c(strend, -((I32)lnc), s);
1287
1288    if (!reginfo && e < s)
1289     e = s;   /* Due to minlen logic of intuit() */
1290
1291    /* The idea in the EXACTF* cases is to first find the
1292    * first character of the EXACTF* node and then, if
1293    * necessary, case-insensitively compare the full
1294    * text of the node.  The c1 and c2 are the first
1295    * characters (though in Unicode it gets a bit
1296    * more complicated because there are more cases
1297    * than just upper and lower: one needs to use
1298    * the so-called folding case for case-insensitive
1299    * matching (called "loose matching" in Unicode).
1300    * ibcmp_utf8() will do just that. */
1301
1302    if (do_utf8 || UTF) {
1303     UV c, f;
1304     U8 tmpbuf [UTF8_MAXBYTES+1];
1305     STRLEN len = 1;
1306     STRLEN foldlen;
1307     const U32 uniflags = UTF8_ALLOW_DEFAULT;
1308     if (c1 == c2) {
1309      /* Upper and lower of 1st char are equal -
1310      * probably not a "letter". */
1311      while (s <= e) {
1312       if (do_utf8) {
1313        c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1314           uniflags);
1315       } else {
1316        c = *((U8*)s);
1317       }
1318       REXEC_FBC_EXACTISH_CHECK(c == c1);
1319      }
1320     }
1321     else {
1322      while (s <= e) {
1323       if (do_utf8) {
1324        c = utf8n_to_uvchr((U8*)s, UTF8_MAXBYTES, &len,
1325           uniflags);
1326       } else {
1327        c = *((U8*)s);
1328       }
1329
1330       /* Handle some of the three Greek sigmas cases.
1331       * Note that not all the possible combinations
1332       * are handled here: some of them are handled
1333       * by the standard folding rules, and some of
1334       * them (the character class or ANYOF cases)
1335       * are handled during compiletime in
1336       * regexec.c:S_regclass(). */
1337       if (c == (UV)UNICODE_GREEK_CAPITAL_LETTER_SIGMA ||
1338        c == (UV)UNICODE_GREEK_SMALL_LETTER_FINAL_SIGMA)
1339        c = (UV)UNICODE_GREEK_SMALL_LETTER_SIGMA;
1340
1341       REXEC_FBC_EXACTISH_CHECK(c == c1 || c == c2);
1342      }
1343     }
1344    }
1345    else {
1346     /* Neither pattern nor string are UTF8 */
1347     if (c1 == c2)
1348      REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1);
1349     else
1350      REXEC_FBC_EXACTISH_SCAN(*(U8*)s == c1 || *(U8*)s == c2);
1351    }
1352    break;
1353   case BOUNDL:
1354    PL_reg_flags |= RF_tainted;
1355    /* FALL THROUGH */
1356   case BOUND:
1357    if (do_utf8) {
1358     if (s == PL_bostr)
1359      tmp = '\n';
1360     else {
1361      U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1362      tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1363     }
1364     tmp = ((OP(c) == BOUND ?
1365       isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1366     LOAD_UTF8_CHARCLASS_ALNUM();
1367     REXEC_FBC_UTF8_SCAN(
1368      if (tmp == !(OP(c) == BOUND ?
1369         (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1370         isALNUM_LC_utf8((U8*)s)))
1371      {
1372       tmp = !tmp;
1373       REXEC_FBC_TRYIT;
1374     }
1375     );
1376    }
1377    else {
1378     tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1379     tmp = ((OP(c) == BOUND ? isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1380     REXEC_FBC_SCAN(
1381      if (tmp ==
1382       !(OP(c) == BOUND ? isALNUM(*s) : isALNUM_LC(*s))) {
1383       tmp = !tmp;
1384       REXEC_FBC_TRYIT;
1385     }
1386     );
1387    }
1388    if ((!prog->minlen && tmp) && (!reginfo || regtry(reginfo, &s)))
1389     goto got_it;
1390    break;
1391   case NBOUNDL:
1392    PL_reg_flags |= RF_tainted;
1393    /* FALL THROUGH */
1394   case NBOUND:
1395    if (do_utf8) {
1396     if (s == PL_bostr)
1397      tmp = '\n';
1398     else {
1399      U8 * const r = reghop3((U8*)s, -1, (U8*)PL_bostr);
1400      tmp = utf8n_to_uvchr(r, UTF8SKIP(r), 0, UTF8_ALLOW_DEFAULT);
1401     }
1402     tmp = ((OP(c) == NBOUND ?
1403       isALNUM_uni(tmp) : isALNUM_LC_uvchr(UNI_TO_NATIVE(tmp))) != 0);
1404     LOAD_UTF8_CHARCLASS_ALNUM();
1405     REXEC_FBC_UTF8_SCAN(
1406      if (tmp == !(OP(c) == NBOUND ?
1407         (bool)swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8) :
1408         isALNUM_LC_utf8((U8*)s)))
1409       tmp = !tmp;
1410      else REXEC_FBC_TRYIT;
1411     );
1412    }
1413    else {
1414     tmp = (s != PL_bostr) ? UCHARAT(s - 1) : '\n';
1415     tmp = ((OP(c) == NBOUND ?
1416       isALNUM(tmp) : isALNUM_LC(tmp)) != 0);
1417     REXEC_FBC_SCAN(
1418      if (tmp ==
1419       !(OP(c) == NBOUND ? isALNUM(*s) : isALNUM_LC(*s)))
1420       tmp = !tmp;
1421      else REXEC_FBC_TRYIT;
1422     );
1423    }
1424    if ((!prog->minlen && !tmp) && (!reginfo || regtry(reginfo, &s)))
1425     goto got_it;
1426    break;
1427   case ALNUM:
1428    REXEC_FBC_CSCAN_PRELOAD(
1429     LOAD_UTF8_CHARCLASS_ALNUM(),
1430     swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1431     isALNUM(*s)
1432    );
1433   case ALNUML:
1434    REXEC_FBC_CSCAN_TAINT(
1435     isALNUM_LC_utf8((U8*)s),
1436     isALNUM_LC(*s)
1437    );
1438   case NALNUM:
1439    REXEC_FBC_CSCAN_PRELOAD(
1440     LOAD_UTF8_CHARCLASS_ALNUM(),
1441     !swash_fetch(PL_utf8_alnum, (U8*)s, do_utf8),
1442     !isALNUM(*s)
1443    );
1444   case NALNUML:
1445    REXEC_FBC_CSCAN_TAINT(
1446     !isALNUM_LC_utf8((U8*)s),
1447     !isALNUM_LC(*s)
1448    );
1449   case SPACE:
1450    REXEC_FBC_CSCAN_PRELOAD(
1451     LOAD_UTF8_CHARCLASS_SPACE(),
1452     *s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8),
1453     isSPACE(*s)
1454    );
1455   case SPACEL:
1456    REXEC_FBC_CSCAN_TAINT(
1457     *s == ' ' || isSPACE_LC_utf8((U8*)s),
1458     isSPACE_LC(*s)
1459    );
1460   case NSPACE:
1461    REXEC_FBC_CSCAN_PRELOAD(
1462     LOAD_UTF8_CHARCLASS_SPACE(),
1463     !(*s == ' ' || swash_fetch(PL_utf8_space,(U8*)s, do_utf8)),
1464     !isSPACE(*s)
1465    );
1466   case NSPACEL:
1467    REXEC_FBC_CSCAN_TAINT(
1468     !(*s == ' ' || isSPACE_LC_utf8((U8*)s)),
1469     !isSPACE_LC(*s)
1470    );
1471   case DIGIT:
1472    REXEC_FBC_CSCAN_PRELOAD(
1473     LOAD_UTF8_CHARCLASS_DIGIT(),
1474     swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1475     isDIGIT(*s)
1476    );
1477   case DIGITL:
1478    REXEC_FBC_CSCAN_TAINT(
1479     isDIGIT_LC_utf8((U8*)s),
1480     isDIGIT_LC(*s)
1481    );
1482   case NDIGIT:
1483    REXEC_FBC_CSCAN_PRELOAD(
1484     LOAD_UTF8_CHARCLASS_DIGIT(),
1485     !swash_fetch(PL_utf8_digit,(U8*)s, do_utf8),
1486     !isDIGIT(*s)
1487    );
1488   case NDIGITL:
1489    REXEC_FBC_CSCAN_TAINT(
1490     !isDIGIT_LC_utf8((U8*)s),
1491     !isDIGIT_LC(*s)
1492    );
1493   case LNBREAK:
1494    REXEC_FBC_CSCAN(
1495     is_LNBREAK_utf8(s),
1496     is_LNBREAK_latin1(s)
1497    );
1498   case VERTWS:
1499    REXEC_FBC_CSCAN(
1500     is_VERTWS_utf8(s),
1501     is_VERTWS_latin1(s)
1502    );
1503   case NVERTWS:
1504    REXEC_FBC_CSCAN(
1505     !is_VERTWS_utf8(s),
1506     !is_VERTWS_latin1(s)
1507    );
1508   case HORIZWS:
1509    REXEC_FBC_CSCAN(
1510     is_HORIZWS_utf8(s),
1511     is_HORIZWS_latin1(s)
1512    );
1513   case NHORIZWS:
1514    REXEC_FBC_CSCAN(
1515     !is_HORIZWS_utf8(s),
1516     !is_HORIZWS_latin1(s)
1517    );
1518   case AHOCORASICKC:
1519   case AHOCORASICK:
1520    {
1521     DECL_TRIE_TYPE(c);
1522     /* what trie are we using right now */
1523     reg_ac_data *aho
1524      = (reg_ac_data*)progi->data->data[ ARG( c ) ];
1525     reg_trie_data *trie
1526      = (reg_trie_data*)progi->data->data[ aho->trie ];
1527     HV *widecharmap = MUTABLE_HV(progi->data->data[ aho->trie + 1 ]);
1528
1529     const char *last_start = strend - trie->minlen;
1530 #ifdef DEBUGGING
1531     const char *real_start = s;
1532 #endif
1533     STRLEN maxlen = trie->maxlen;
1534     SV *sv_points;
1535     U8 **points; /* map of where we were in the input string
1536         when reading a given char. For ASCII this
1537         is unnecessary overhead as the relationship
1538         is always 1:1, but for Unicode, especially
1539         case folded Unicode this is not true. */
1540     U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
1541     U8 *bitmap=NULL;
1542
1543
1544     GET_RE_DEBUG_FLAGS_DECL;
1545
1546     /* We can't just allocate points here. We need to wrap it in
1547     * an SV so it gets freed properly if there is a croak while
1548     * running the match */
1549     ENTER;
1550     SAVETMPS;
1551     sv_points=newSV(maxlen * sizeof(U8 *));
1552     SvCUR_set(sv_points,
1553      maxlen * sizeof(U8 *));
1554     SvPOK_on(sv_points);
1555     sv_2mortal(sv_points);
1556     points=(U8**)SvPV_nolen(sv_points );
1557     if ( trie_type != trie_utf8_fold
1558      && (trie->bitmap || OP(c)==AHOCORASICKC) )
1559     {
1560      if (trie->bitmap)
1561       bitmap=(U8*)trie->bitmap;
1562      else
1563       bitmap=(U8*)ANYOF_BITMAP(c);
1564     }
1565     /* this is the Aho-Corasick algorithm modified a touch
1566     to include special handling for long "unknown char"
1567     sequences. The basic idea being that we use AC as long
1568     as we are dealing with a possible matching char, when
1569     we encounter an unknown char (and we have not encountered
1570     an accepting state) we scan forward until we find a legal
1571     starting char.
1572     AC matching is basically that of trie matching, except
1573     that when we encounter a failing transition, we fall back
1574     to the current states "fail state", and try the current char
1575     again, a process we repeat until we reach the root state,
1576     state 1, or a legal transition. If we fail on the root state
1577     then we can either terminate if we have reached an accepting
1578     state previously, or restart the entire process from the beginning
1579     if we have not.
1580
1581     */
1582     while (s <= last_start) {
1583      const U32 uniflags = UTF8_ALLOW_DEFAULT;
1584      U8 *uc = (U8*)s;
1585      U16 charid = 0;
1586      U32 base = 1;
1587      U32 state = 1;
1588      UV uvc = 0;
1589      STRLEN len = 0;
1590      STRLEN foldlen = 0;
1591      U8 *uscan = (U8*)NULL;
1592      U8 *leftmost = NULL;
1593 #ifdef DEBUGGING
1594      U32 accepted_word= 0;
1595 #endif
1596      U32 pointpos = 0;
1597
1598      while ( state && uc <= (U8*)strend ) {
1599       int failed=0;
1600       U32 word = aho->states[ state ].wordnum;
1601
1602       if( state==1 ) {
1603        if ( bitmap ) {
1604         DEBUG_TRIE_EXECUTE_r(
1605          if ( uc <= (U8*)last_start && !BITMAP_TEST(bitmap,*uc) ) {
1606           dump_exec_pos( (char *)uc, c, strend, real_start,
1607            (char *)uc, do_utf8 );
1608           PerlIO_printf( Perl_debug_log,
1609            " Scanning for legal start char...\n");
1610          }
1611         );
1612         while ( uc <= (U8*)last_start  && !BITMAP_TEST(bitmap,*uc) ) {
1613          uc++;
1614         }
1615         s= (char *)uc;
1616        }
1617        if (uc >(U8*)last_start) break;
1618       }
1619
1620       if ( word ) {
1621        U8 *lpos= points[ (pointpos - trie->wordlen[word-1] ) % maxlen ];
1622        if (!leftmost || lpos < leftmost) {
1623         DEBUG_r(accepted_word=word);
1624         leftmost= lpos;
1625        }
1626        if (base==0) break;
1627
1628       }
1629       points[pointpos++ % maxlen]= uc;
1630       REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
1631            uscan, len, uvc, charid, foldlen,
1632            foldbuf, uniflags);
1633       DEBUG_TRIE_EXECUTE_r({
1634        dump_exec_pos( (char *)uc, c, strend, real_start,
1635         s,   do_utf8 );
1636        PerlIO_printf(Perl_debug_log,
1637         " Charid:%3u CP:%4"UVxf" ",
1638         charid, uvc);
1639       });
1640
1641       do {
1642 #ifdef DEBUGGING
1643        word = aho->states[ state ].wordnum;
1644 #endif
1645        base = aho->states[ state ].trans.base;
1646
1647        DEBUG_TRIE_EXECUTE_r({
1648         if (failed)
1649          dump_exec_pos( (char *)uc, c, strend, real_start,
1650           s,   do_utf8 );
1651         PerlIO_printf( Perl_debug_log,
1652          "%sState: %4"UVxf", word=%"UVxf,
1653          failed ? " Fail transition to " : "",
1654          (UV)state, (UV)word);
1655        });
1656        if ( base ) {
1657         U32 tmp;
1658         if (charid &&
1659          (base + charid > trie->uniquecharcount )
1660          && (base + charid - 1 - trie->uniquecharcount
1661            < trie->lasttrans)
1662          && trie->trans[base + charid - 1 -
1663            trie->uniquecharcount].check == state
1664          && (tmp=trie->trans[base + charid - 1 -
1665           trie->uniquecharcount ].next))
1666         {
1667          DEBUG_TRIE_EXECUTE_r(
1668           PerlIO_printf( Perl_debug_log," - legal\n"));
1669          state = tmp;
1670          break;
1671         }
1672         else {
1673          DEBUG_TRIE_EXECUTE_r(
1674           PerlIO_printf( Perl_debug_log," - fail\n"));
1675          failed = 1;
1676          state = aho->fail[state];
1677         }
1678        }
1679        else {
1680         /* we must be accepting here */
1681         DEBUG_TRIE_EXECUTE_r(
1682           PerlIO_printf( Perl_debug_log," - accepting\n"));
1683         failed = 1;
1684         break;
1685        }
1686       } while(state);
1687       uc += len;
1688       if (failed) {
1689        if (leftmost)
1690         break;
1691        if (!state) state = 1;
1692       }
1693      }
1694      if ( aho->states[ state ].wordnum ) {
1695       U8 *lpos = points[ (pointpos - trie->wordlen[aho->states[ state ].wordnum-1]) % maxlen ];
1696       if (!leftmost || lpos < leftmost) {
1697        DEBUG_r(accepted_word=aho->states[ state ].wordnum);
1698        leftmost = lpos;
1699       }
1700      }
1701      if (leftmost) {
1702       s = (char*)leftmost;
1703       DEBUG_TRIE_EXECUTE_r({
1704        PerlIO_printf(
1705         Perl_debug_log,"Matches word #%"UVxf" at position %"IVdf". Trying full pattern...\n",
1706         (UV)accepted_word, (IV)(s - real_start)
1707        );
1708       });
1709       if (!reginfo || regtry(reginfo, &s)) {
1710        FREETMPS;
1711        LEAVE;
1712        goto got_it;
1713       }
1714       s = HOPc(s,1);
1715       DEBUG_TRIE_EXECUTE_r({
1716        PerlIO_printf( Perl_debug_log,"Pattern failed. Looking for new start point...\n");
1717       });
1718      } else {
1719       DEBUG_TRIE_EXECUTE_r(
1720        PerlIO_printf( Perl_debug_log,"No match.\n"));
1721       break;
1722      }
1723     }
1724     FREETMPS;
1725     LEAVE;
1726    }
1727    break;
1728   default:
1729    Perl_croak(aTHX_ "panic: unknown regstclass %d", (int)OP(c));
1730    break;
1731   }
1732   return 0;
1733  got_it:
1734   return s;
1735 }
1736
1737 static void
1738 S_swap_match_buff (pTHX_ regexp *prog)
1739 {
1740  regexp_paren_pair *t;
1741
1742  PERL_ARGS_ASSERT_SWAP_MATCH_BUFF;
1743
1744  if (!prog->swap) {
1745  /* We have to be careful. If the previous successful match
1746  was from this regex we don't want a subsequent paritally
1747  successful match to clobber the old results.
1748  So when we detect this possibility we add a swap buffer
1749  to the re, and switch the buffer each match. If we fail
1750  we switch it back, otherwise we leave it swapped.
1751  */
1752   Newxz(prog->swap, (prog->nparens + 1), regexp_paren_pair);
1753  }
1754  t = prog->swap;
1755  prog->swap = prog->offs;
1756  prog->offs = t;
1757 }
1758
1759
1760 /*
1761  - regexec_flags - match a regexp against a string
1762  */
1763 I32
1764 Perl_regexec_flags(pTHX_ REGEXP * const prog, char *stringarg, register char *strend,
1765    char *strbeg, I32 minend, SV *sv, void *data, U32 flags)
1766 /* strend: pointer to null at end of string */
1767 /* strbeg: real beginning of string */
1768 /* minend: end of match must be >=minend after stringarg. */
1769 /* data: May be used for some additional optimizations.
1770   Currently its only used, with a U32 cast, for transmitting
1771   the ganch offset when doing a /g match. This will change */
1772 /* nosave: For optimizations. */
1773 {
1774  dVAR;
1775  /*register*/ char *s;
1776  register regnode *c;
1777  /*register*/ char *startpos = stringarg;
1778  I32 minlen;  /* must match at least this many chars */
1779  I32 dontbother = 0; /* how many characters not to try at end */
1780  I32 end_shift = 0;   /* Same for the end. */  /* CC */
1781  I32 scream_pos = -1;  /* Internal iterator of scream. */
1782  char *scream_olds = NULL;
1783  const bool do_utf8 = (bool)DO_UTF8(sv);
1784  I32 multiline;
1785  RXi_GET_DECL(prog,progi);
1786  regmatch_info reginfo;  /* create some info to pass to regtry etc */
1787  bool swap_on_fail = 0;
1788  GET_RE_DEBUG_FLAGS_DECL;
1789
1790  PERL_ARGS_ASSERT_REGEXEC_FLAGS;
1791  PERL_UNUSED_ARG(data);
1792
1793  /* Be paranoid... */
1794  if (prog == NULL || startpos == NULL) {
1795   Perl_croak(aTHX_ "NULL regexp parameter");
1796   return 0;
1797  }
1798
1799  multiline = prog->extflags & RXf_PMf_MULTILINE;
1800  reginfo.prog = prog;
1801
1802  RX_MATCH_UTF8_set(prog, do_utf8);
1803  DEBUG_EXECUTE_r(
1804   debug_start_match(prog, do_utf8, startpos, strend,
1805   "Matching");
1806  );
1807
1808  minlen = prog->minlen;
1809
1810  if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
1811   DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
1812        "String too short [regexec_flags]...\n"));
1813   goto phooey;
1814  }
1815
1816
1817  /* Check validity of program. */
1818  if (UCHARAT(progi->program) != REG_MAGIC) {
1819   Perl_croak(aTHX_ "corrupted regexp program");
1820  }
1821
1822  PL_reg_flags = 0;
1823  PL_reg_eval_set = 0;
1824  PL_reg_maxiter = 0;
1825
1826  if (RX_UTF8(prog))
1827   PL_reg_flags |= RF_utf8;
1828
1829  /* Mark beginning of line for ^ and lookbehind. */
1830  reginfo.bol = startpos; /* XXX not used ??? */
1831  PL_bostr  = strbeg;
1832  reginfo.sv = sv;
1833
1834  /* Mark end of line for $ (and such) */
1835  PL_regeol = strend;
1836
1837  /* see how far we have to get to not match where we matched before */
1838  reginfo.till = startpos+minend;
1839
1840  /* If there is a "must appear" string, look for it. */
1841  s = startpos;
1842
1843  if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */
1844   MAGIC *mg;
1845
1846   if (flags & REXEC_IGNOREPOS) /* Means: check only at start */
1847    reginfo.ganch = startpos + prog->gofs;
1848   else if (sv && SvTYPE(sv) >= SVt_PVMG
1849     && SvMAGIC(sv)
1850     && (mg = mg_find(sv, PERL_MAGIC_regex_global))
1851     && mg->mg_len >= 0) {
1852    reginfo.ganch = strbeg + mg->mg_len; /* Defined pos() */
1853    if (prog->extflags & RXf_ANCH_GPOS) {
1854     if (s > reginfo.ganch)
1855      goto phooey;
1856     s = reginfo.ganch - prog->gofs;
1857    }
1858   }
1859   else if (data) {
1860    reginfo.ganch = strbeg + PTR2UV(data);
1861   } else    /* pos() not defined */
1862    reginfo.ganch = strbeg;
1863  }
1864  if (PL_curpm && (PM_GETRE(PL_curpm) == prog)) {
1865   swap_on_fail = 1;
1866   swap_match_buff(prog); /* do we need a save destructor here for
1867         eval dies? */
1868  }
1869  if (!(flags & REXEC_CHECKED) && (prog->check_substr != NULL || prog->check_utf8 != NULL)) {
1870   re_scream_pos_data d;
1871
1872   d.scream_olds = &scream_olds;
1873   d.scream_pos = &scream_pos;
1874   s = re_intuit_start(prog, sv, s, strend, flags, &d);
1875   if (!s) {
1876    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Not present...\n"));
1877    goto phooey; /* not present */
1878   }
1879  }
1880
1881
1882
1883  /* Simplest case:  anchored match need be tried only once. */
1884  /*  [unless only anchor is BOL and multiline is set] */
1885  if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) {
1886   if (s == startpos && regtry(&reginfo, &startpos))
1887    goto got_it;
1888   else if (multiline || (prog->intflags & PREGf_IMPLICIT)
1889     || (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */
1890   {
1891    char *end;
1892
1893    if (minlen)
1894     dontbother = minlen - 1;
1895    end = HOP3c(strend, -dontbother, strbeg) - 1;
1896    /* for multiline we only have to try after newlines */
1897    if (prog->check_substr || prog->check_utf8) {
1898     if (s == startpos)
1899      goto after_try;
1900     while (1) {
1901      if (regtry(&reginfo, &s))
1902       goto got_it;
1903     after_try:
1904      if (s > end)
1905       goto phooey;
1906      if (prog->extflags & RXf_USE_INTUIT) {
1907       s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL);
1908       if (!s)
1909        goto phooey;
1910      }
1911      else
1912       s++;
1913     }
1914    } else {
1915     if (s > startpos)
1916      s--;
1917     while (s < end) {
1918      if (*s++ == '\n') { /* don't need PL_utf8skip here */
1919       if (regtry(&reginfo, &s))
1920        goto got_it;
1921      }
1922     }
1923    }
1924   }
1925   goto phooey;
1926  } else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK))
1927  {
1928   /* the warning about reginfo.ganch being used without intialization
1929   is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN
1930   and we only enter this block when the same bit is set. */
1931   char *tmp_s = reginfo.ganch - prog->gofs;
1932   if (regtry(&reginfo, &tmp_s))
1933    goto got_it;
1934   goto phooey;
1935  }
1936
1937  /* Messy cases:  unanchored match. */
1938  if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) {
1939   /* we have /x+whatever/ */
1940   /* it must be a one character string (XXXX Except UTF?) */
1941   char ch;
1942 #ifdef DEBUGGING
1943   int did_match = 0;
1944 #endif
1945   if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1946    do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1947   ch = SvPVX_const(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr)[0];
1948
1949   if (do_utf8) {
1950    REXEC_FBC_SCAN(
1951     if (*s == ch) {
1952      DEBUG_EXECUTE_r( did_match = 1 );
1953      if (regtry(&reginfo, &s)) goto got_it;
1954      s += UTF8SKIP(s);
1955      while (s < strend && *s == ch)
1956       s += UTF8SKIP(s);
1957     }
1958    );
1959   }
1960   else {
1961    REXEC_FBC_SCAN(
1962     if (*s == ch) {
1963      DEBUG_EXECUTE_r( did_match = 1 );
1964      if (regtry(&reginfo, &s)) goto got_it;
1965      s++;
1966      while (s < strend && *s == ch)
1967       s++;
1968     }
1969    );
1970   }
1971   DEBUG_EXECUTE_r(if (!did_match)
1972     PerlIO_printf(Perl_debug_log,
1973         "Did not find anchored character...\n")
1974    );
1975  }
1976  else if (prog->anchored_substr != NULL
1977    || prog->anchored_utf8 != NULL
1978    || ((prog->float_substr != NULL || prog->float_utf8 != NULL)
1979     && prog->float_max_offset < strend - s)) {
1980   SV *must;
1981   I32 back_max;
1982   I32 back_min;
1983   char *last;
1984   char *last1;  /* Last position checked before */
1985 #ifdef DEBUGGING
1986   int did_match = 0;
1987 #endif
1988   if (prog->anchored_substr || prog->anchored_utf8) {
1989    if (!(do_utf8 ? prog->anchored_utf8 : prog->anchored_substr))
1990     do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1991    must = do_utf8 ? prog->anchored_utf8 : prog->anchored_substr;
1992    back_max = back_min = prog->anchored_offset;
1993   } else {
1994    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
1995     do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
1996    must = do_utf8 ? prog->float_utf8 : prog->float_substr;
1997    back_max = prog->float_max_offset;
1998    back_min = prog->float_min_offset;
1999   }
2000
2001
2002   if (must == &PL_sv_undef)
2003    /* could not downgrade utf8 check substring, so must fail */
2004    goto phooey;
2005
2006   if (back_min<0) {
2007    last = strend;
2008   } else {
2009    last = HOP3c(strend, /* Cannot start after this */
2010     -(I32)(CHR_SVLEN(must)
2011       - (SvTAIL(must) != 0) + back_min), strbeg);
2012   }
2013   if (s > PL_bostr)
2014    last1 = HOPc(s, -1);
2015   else
2016    last1 = s - 1; /* bogus */
2017
2018   /* XXXX check_substr already used to find "s", can optimize if
2019   check_substr==must. */
2020   scream_pos = -1;
2021   dontbother = end_shift;
2022   strend = HOPc(strend, -dontbother);
2023   while ( (s <= last) &&
2024     ((flags & REXEC_SCREAM)
2025     ? (s = screaminstr(sv, must, HOP3c(s, back_min, (back_min<0 ? strbeg : strend)) - strbeg,
2026          end_shift, &scream_pos, 0))
2027     : (s = fbm_instr((unsigned char*)HOP3(s, back_min, (back_min<0 ? strbeg : strend)),
2028         (unsigned char*)strend, must,
2029         multiline ? FBMrf_MULTILINE : 0))) ) {
2030    /* we may be pointing at the wrong string */
2031    if ((flags & REXEC_SCREAM) && RXp_MATCH_COPIED(prog))
2032     s = strbeg + (s - SvPVX_const(sv));
2033    DEBUG_EXECUTE_r( did_match = 1 );
2034    if (HOPc(s, -back_max) > last1) {
2035     last1 = HOPc(s, -back_min);
2036     s = HOPc(s, -back_max);
2037    }
2038    else {
2039     char * const t = (last1 >= PL_bostr) ? HOPc(last1, 1) : last1 + 1;
2040
2041     last1 = HOPc(s, -back_min);
2042     s = t;
2043    }
2044    if (do_utf8) {
2045     while (s <= last1) {
2046      if (regtry(&reginfo, &s))
2047       goto got_it;
2048      s += UTF8SKIP(s);
2049     }
2050    }
2051    else {
2052     while (s <= last1) {
2053      if (regtry(&reginfo, &s))
2054       goto got_it;
2055      s++;
2056     }
2057    }
2058   }
2059   DEBUG_EXECUTE_r(if (!did_match) {
2060    RE_PV_QUOTED_DECL(quoted, do_utf8, PERL_DEBUG_PAD_ZERO(0),
2061     SvPVX_const(must), RE_SV_DUMPLEN(must), 30);
2062    PerlIO_printf(Perl_debug_log, "Did not find %s substr %s%s...\n",
2063        ((must == prog->anchored_substr || must == prog->anchored_utf8)
2064        ? "anchored" : "floating"),
2065     quoted, RE_SV_TAIL(must));
2066   });
2067   goto phooey;
2068  }
2069  else if ( (c = progi->regstclass) ) {
2070   if (minlen) {
2071    const OPCODE op = OP(progi->regstclass);
2072    /* don't bother with what can't match */
2073    if (PL_regkind[op] != EXACT && op != CANY && PL_regkind[op] != TRIE)
2074     strend = HOPc(strend, -(minlen - 1));
2075   }
2076   DEBUG_EXECUTE_r({
2077    SV * const prop = sv_newmortal();
2078    regprop(prog, prop, c);
2079    {
2080     RE_PV_QUOTED_DECL(quoted,do_utf8,PERL_DEBUG_PAD_ZERO(1),
2081      s,strend-s,60);
2082     PerlIO_printf(Perl_debug_log,
2083      "Matching stclass %.*s against %s (%d chars)\n",
2084      (int)SvCUR(prop), SvPVX_const(prop),
2085      quoted, (int)(strend - s));
2086    }
2087   });
2088   if (find_byclass(prog, c, s, strend, &reginfo))
2089    goto got_it;
2090   DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "Contradicts stclass... [regexec_flags]\n"));
2091  }
2092  else {
2093   dontbother = 0;
2094   if (prog->float_substr != NULL || prog->float_utf8 != NULL) {
2095    /* Trim the end. */
2096    char *last;
2097    SV* float_real;
2098
2099    if (!(do_utf8 ? prog->float_utf8 : prog->float_substr))
2100     do_utf8 ? to_utf8_substr(prog) : to_byte_substr(prog);
2101    float_real = do_utf8 ? prog->float_utf8 : prog->float_substr;
2102
2103    if (flags & REXEC_SCREAM) {
2104     last = screaminstr(sv, float_real, s - strbeg,
2105         end_shift, &scream_pos, 1); /* last one */
2106     if (!last)
2107      last = scream_olds; /* Only one occurrence. */
2108     /* we may be pointing at the wrong string */
2109     else if (RXp_MATCH_COPIED(prog))
2110      s = strbeg + (s - SvPVX_const(sv));
2111    }
2112    else {
2113     STRLEN len;
2114     const char * const little = SvPV_const(float_real, len);
2115
2116     if (SvTAIL(float_real)) {
2117      if (memEQ(strend - len + 1, little, len - 1))
2118       last = strend - len + 1;
2119      else if (!multiline)
2120       last = memEQ(strend - len, little, len)
2121        ? strend - len : NULL;
2122      else
2123       goto find_last;
2124     } else {
2125     find_last:
2126      if (len)
2127       last = rninstr(s, strend, little, little + len);
2128      else
2129       last = strend; /* matching "$" */
2130     }
2131    }
2132    if (last == NULL) {
2133     DEBUG_EXECUTE_r(
2134      PerlIO_printf(Perl_debug_log,
2135       "%sCan't trim the tail, match fails (should not happen)%s\n",
2136       PL_colors[4], PL_colors[5]));
2137     goto phooey; /* Should not happen! */
2138    }
2139    dontbother = strend - last + prog->float_min_offset;
2140   }
2141   if (minlen && (dontbother < minlen))
2142    dontbother = minlen - 1;
2143   strend -= dontbother;      /* this one's always in bytes! */
2144   /* We don't know much -- general case. */
2145   if (do_utf8) {
2146    for (;;) {
2147     if (regtry(&reginfo, &s))
2148      goto got_it;
2149     if (s >= strend)
2150      break;
2151     s += UTF8SKIP(s);
2152    };
2153   }
2154   else {
2155    do {
2156     if (regtry(&reginfo, &s))
2157      goto got_it;
2158    } while (s++ < strend);
2159   }
2160  }
2161
2162  /* Failure. */
2163  goto phooey;
2164
2165 got_it:
2166  RX_MATCH_TAINTED_set(prog, PL_reg_flags & RF_tainted);
2167
2168  if (PL_reg_eval_set)
2169   restore_pos(aTHX_ prog);
2170  if (RXp_PAREN_NAMES(prog))
2171   (void)hv_iterinit(RXp_PAREN_NAMES(prog));
2172
2173  /* make sure $`, $&, $', and $digit will work later */
2174  if ( !(flags & REXEC_NOT_FIRST) ) {
2175   RX_MATCH_COPY_FREE(prog);
2176   if (flags & REXEC_COPY_STR) {
2177    const I32 i = PL_regeol - startpos + (stringarg - strbeg);
2178 #ifdef PERL_OLD_COPY_ON_WRITE
2179    if ((SvIsCOW(sv)
2180     || (SvFLAGS(sv) & CAN_COW_MASK) == CAN_COW_FLAGS)) {
2181     if (DEBUG_C_TEST) {
2182      PerlIO_printf(Perl_debug_log,
2183         "Copy on write: regexp capture, type %d\n",
2184         (int) SvTYPE(sv));
2185     }
2186     prog->saved_copy = sv_setsv_cow(prog->saved_copy, sv);
2187     prog->subbeg = (char *)SvPVX_const(prog->saved_copy);
2188     assert (SvPOKp(prog->saved_copy));
2189    } else
2190 #endif
2191    {
2192     RX_MATCH_COPIED_on(prog);
2193     s = savepvn(strbeg, i);
2194     prog->subbeg = s;
2195    }
2196    prog->sublen = i;
2197   }
2198   else {
2199    prog->subbeg = strbeg;
2200    prog->sublen = PL_regeol - strbeg; /* strend may have been modified */
2201   }
2202  }
2203
2204  return 1;
2205
2206 phooey:
2207  DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n",
2208       PL_colors[4], PL_colors[5]));
2209  if (PL_reg_eval_set)
2210   restore_pos(aTHX_ prog);
2211  if (swap_on_fail)
2212   /* we failed :-( roll it back */
2213   swap_match_buff(prog);
2214
2215  return 0;
2216 }
2217
2218
2219 /*
2220  - regtry - try match at specific point
2221  */
2222 STATIC I32   /* 0 failure, 1 success */
2223 S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
2224 {
2225  dVAR;
2226  CHECKPOINT lastcp;
2227  regexp *prog = reginfo->prog;
2228  RXi_GET_DECL(prog,progi);
2229  GET_RE_DEBUG_FLAGS_DECL;
2230
2231  PERL_ARGS_ASSERT_REGTRY;
2232
2233  reginfo->cutpoint=NULL;
2234
2235  if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) {
2236   MAGIC *mg;
2237
2238   PL_reg_eval_set = RS_init;
2239   DEBUG_EXECUTE_r(DEBUG_s(
2240    PerlIO_printf(Perl_debug_log, "  setting stack tmpbase at %"IVdf"\n",
2241       (IV)(PL_stack_sp - PL_stack_base));
2242    ));
2243   SAVESTACK_CXPOS();
2244   cxstack[cxstack_ix].blk_oldsp = PL_stack_sp - PL_stack_base;
2245   /* Otherwise OP_NEXTSTATE will free whatever on stack now.  */
2246   SAVETMPS;
2247   /* Apparently this is not needed, judging by wantarray. */
2248   /* SAVEI8(cxstack[cxstack_ix].blk_gimme);
2249   cxstack[cxstack_ix].blk_gimme = G_SCALAR; */
2250
2251   if (reginfo->sv) {
2252    /* Make $_ available to executed code. */
2253    if (reginfo->sv != DEFSV) {
2254     SAVE_DEFSV;
2255     DEFSV_set(reginfo->sv);
2256    }
2257
2258    if (!(SvTYPE(reginfo->sv) >= SVt_PVMG && SvMAGIC(reginfo->sv)
2259     && (mg = mg_find(reginfo->sv, PERL_MAGIC_regex_global)))) {
2260     /* prepare for quick setting of pos */
2261 #ifdef PERL_OLD_COPY_ON_WRITE
2262     if (SvIsCOW(reginfo->sv))
2263      sv_force_normal_flags(reginfo->sv, 0);
2264 #endif
2265     mg = sv_magicext(reginfo->sv, NULL, PERL_MAGIC_regex_global,
2266         &PL_vtbl_mglob, NULL, 0);
2267     mg->mg_len = -1;
2268    }
2269    PL_reg_magic    = mg;
2270    PL_reg_oldpos   = mg->mg_len;
2271    SAVEDESTRUCTOR_X(restore_pos, prog);
2272   }
2273   if (!PL_reg_curpm) {
2274    Newxz(PL_reg_curpm, 1, PMOP);
2275 #ifdef USE_ITHREADS
2276    {
2277     SV* const repointer = newSViv(0);
2278     /* this regexp is also owned by the new PL_reg_curpm, which
2279     will try to free it.  */
2280     av_push(PL_regex_padav,repointer);
2281     PL_reg_curpm->op_pmoffset = av_len(PL_regex_padav);
2282     PL_regex_pad = AvARRAY(PL_regex_padav);
2283    }
2284 #endif
2285   }
2286 #ifdef USE_ITHREADS
2287   /* It seems that non-ithreads works both with and without this code.
2288   So for efficiency reasons it seems best not to have the code
2289   compiled when it is not needed.  */
2290   /* This is safe against NULLs: */
2291   ReREFCNT_dec(PM_GETRE(PL_reg_curpm));
2292   /* PM_reg_curpm owns a reference to this regexp.  */
2293   ReREFCNT_inc(prog);
2294 #endif
2295   PM_SETRE(PL_reg_curpm, prog);
2296   PL_reg_oldcurpm = PL_curpm;
2297   PL_curpm = PL_reg_curpm;
2298   if (RXp_MATCH_COPIED(prog)) {
2299    /*  Here is a serious problem: we cannot rewrite subbeg,
2300     since it may be needed if this match fails.  Thus
2301     $` inside (?{}) could fail... */
2302    PL_reg_oldsaved = prog->subbeg;
2303    PL_reg_oldsavedlen = prog->sublen;
2304 #ifdef PERL_OLD_COPY_ON_WRITE
2305    PL_nrs = prog->saved_copy;
2306 #endif
2307    RXp_MATCH_COPIED_off(prog);
2308   }
2309   else
2310    PL_reg_oldsaved = NULL;
2311   prog->subbeg = PL_bostr;
2312   prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
2313  }
2314  DEBUG_EXECUTE_r(PL_reg_starttry = *startpos);
2315  prog->offs[0].start = *startpos - PL_bostr;
2316  PL_reginput = *startpos;
2317  PL_reglastparen = &prog->lastparen;
2318  PL_reglastcloseparen = &prog->lastcloseparen;
2319  prog->lastparen = 0;
2320  prog->lastcloseparen = 0;
2321  PL_regsize = 0;
2322  PL_regoffs = prog->offs;
2323  if (PL_reg_start_tmpl <= prog->nparens) {
2324   PL_reg_start_tmpl = prog->nparens*3/2 + 3;
2325   if(PL_reg_start_tmp)
2326    Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2327   else
2328    Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
2329  }
2330
2331  /* XXXX What this code is doing here?!!!  There should be no need
2332  to do this again and again, PL_reglastparen should take care of
2333  this!  --ilya*/
2334
2335  /* Tests pat.t#187 and split.t#{13,14} seem to depend on this code.
2336  * Actually, the code in regcppop() (which Ilya may be meaning by
2337  * PL_reglastparen), is not needed at all by the test suite
2338  * (op/regexp, op/pat, op/split), but that code is needed otherwise
2339  * this erroneously leaves $1 defined: "1" =~ /^(?:(\d)x)?\d$/
2340  * Meanwhile, this code *is* needed for the
2341  * above-mentioned test suite tests to succeed.  The common theme
2342  * on those tests seems to be returning null fields from matches.
2343  * --jhi updated by dapm */
2344 #if 1
2345  if (prog->nparens) {
2346   regexp_paren_pair *pp = PL_regoffs;
2347   register I32 i;
2348   for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
2349    ++pp;
2350    pp->start = -1;
2351    pp->end = -1;
2352   }
2353  }
2354 #endif
2355  REGCP_SET(lastcp);
2356  if (regmatch(reginfo, progi->program + 1)) {
2357   PL_regoffs[0].end = PL_reginput - PL_bostr;
2358   return 1;
2359  }
2360  if (reginfo->cutpoint)
2361   *startpos= reginfo->cutpoint;
2362  REGCP_UNWIND(lastcp);
2363  return 0;
2364 }
2365
2366
2367 #define sayYES goto yes
2368 #define sayNO goto no
2369 #define sayNO_SILENT goto no_silent
2370
2371 /* we dont use STMT_START/END here because it leads to
2372    "unreachable code" warnings, which are bogus, but distracting. */
2373 #define CACHEsayNO \
2374  if (ST.cache_mask) \
2375  PL_reg_poscache[ST.cache_offset] |= ST.cache_mask; \
2376  sayNO
2377
2378 /* this is used to determine how far from the left messages like
2379    'failed...' are printed. It should be set such that messages
2380    are inline with the regop output that created them.
2381 */
2382 #define REPORT_CODE_OFF 32
2383
2384
2385 /* Make sure there is a test for this +1 options in re_tests */
2386 #define TRIE_INITAL_ACCEPT_BUFFLEN 4;
2387
2388 #define CHRTEST_UNINIT -1001 /* c1/c2 haven't been calculated yet */
2389 #define CHRTEST_VOID   -1000 /* the c1/c2 "next char" test should be skipped */
2390
2391 #define SLAB_FIRST(s) (&(s)->states[0])
2392 #define SLAB_LAST(s)  (&(s)->states[PERL_REGMATCH_SLAB_SLOTS-1])
2393
2394 /* grab a new slab and return the first slot in it */
2395
2396 STATIC regmatch_state *
2397 S_push_slab(pTHX)
2398 {
2399 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2400  dMY_CXT;
2401 #endif
2402  regmatch_slab *s = PL_regmatch_slab->next;
2403  if (!s) {
2404   Newx(s, 1, regmatch_slab);
2405   s->prev = PL_regmatch_slab;
2406   s->next = NULL;
2407   PL_regmatch_slab->next = s;
2408  }
2409  PL_regmatch_slab = s;
2410  return SLAB_FIRST(s);
2411 }
2412
2413
2414 /* push a new state then goto it */
2415
2416 #define PUSH_STATE_GOTO(state, node) \
2417  scan = node; \
2418  st->resume_state = state; \
2419  goto push_state;
2420
2421 /* push a new state with success backtracking, then goto it */
2422
2423 #define PUSH_YES_STATE_GOTO(state, node) \
2424  scan = node; \
2425  st->resume_state = state; \
2426  goto push_yes_state;
2427
2428
2429
2430 /*
2431
2432 regmatch() - main matching routine
2433
2434 This is basically one big switch statement in a loop. We execute an op,
2435 set 'next' to point the next op, and continue. If we come to a point which
2436 we may need to backtrack to on failure such as (A|B|C), we push a
2437 backtrack state onto the backtrack stack. On failure, we pop the top
2438 state, and re-enter the loop at the state indicated. If there are no more
2439 states to pop, we return failure.
2440
2441 Sometimes we also need to backtrack on success; for example /A+/, where
2442 after successfully matching one A, we need to go back and try to
2443 match another one; similarly for lookahead assertions: if the assertion
2444 completes successfully, we backtrack to the state just before the assertion
2445 and then carry on.  In these cases, the pushed state is marked as
2446 'backtrack on success too'. This marking is in fact done by a chain of
2447 pointers, each pointing to the previous 'yes' state. On success, we pop to
2448 the nearest yes state, discarding any intermediate failure-only states.
2449 Sometimes a yes state is pushed just to force some cleanup code to be
2450 called at the end of a successful match or submatch; e.g. (??{$re}) uses
2451 it to free the inner regex.
2452
2453 Note that failure backtracking rewinds the cursor position, while
2454 success backtracking leaves it alone.
2455
2456 A pattern is complete when the END op is executed, while a subpattern
2457 such as (?=foo) is complete when the SUCCESS op is executed. Both of these
2458 ops trigger the "pop to last yes state if any, otherwise return true"
2459 behaviour.
2460
2461 A common convention in this function is to use A and B to refer to the two
2462 subpatterns (or to the first nodes thereof) in patterns like /A*B/: so A is
2463 the subpattern to be matched possibly multiple times, while B is the entire
2464 rest of the pattern. Variable and state names reflect this convention.
2465
2466 The states in the main switch are the union of ops and failure/success of
2467 substates associated with with that op.  For example, IFMATCH is the op
2468 that does lookahead assertions /(?=A)B/ and so the IFMATCH state means
2469 'execute IFMATCH'; while IFMATCH_A is a state saying that we have just
2470 successfully matched A and IFMATCH_A_fail is a state saying that we have
2471 just failed to match A. Resume states always come in pairs. The backtrack
2472 state we push is marked as 'IFMATCH_A', but when that is popped, we resume
2473 at IFMATCH_A or IFMATCH_A_fail, depending on whether we are backtracking
2474 on success or failure.
2475
2476 The struct that holds a backtracking state is actually a big union, with
2477 one variant for each major type of op. The variable st points to the
2478 top-most backtrack struct. To make the code clearer, within each
2479 block of code we #define ST to alias the relevant union.
2480
2481 Here's a concrete example of a (vastly oversimplified) IFMATCH
2482 implementation:
2483
2484  switch (state) {
2485  ....
2486
2487 #define ST st->u.ifmatch
2488
2489  case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2490   ST.foo = ...; // some state we wish to save
2491   ...
2492   // push a yes backtrack state with a resume value of
2493   // IFMATCH_A/IFMATCH_A_fail, then continue execution at the
2494   // first node of A:
2495   PUSH_YES_STATE_GOTO(IFMATCH_A, A);
2496   // NOTREACHED
2497
2498  case IFMATCH_A: // we have successfully executed A; now continue with B
2499   next = B;
2500   bar = ST.foo; // do something with the preserved value
2501   break;
2502
2503  case IFMATCH_A_fail: // A failed, so the assertion failed
2504   ...;   // do some housekeeping, then ...
2505   sayNO; // propagate the failure
2506
2507 #undef ST
2508
2509  ...
2510  }
2511
2512 For any old-timers reading this who are familiar with the old recursive
2513 approach, the code above is equivalent to:
2514
2515  case IFMATCH: // we are executing the IFMATCH op, (?=A)B
2516  {
2517   int foo = ...
2518   ...
2519   if (regmatch(A)) {
2520    next = B;
2521    bar = foo;
2522    break;
2523   }
2524   ...;   // do some housekeeping, then ...
2525   sayNO; // propagate the failure
2526  }
2527
2528 The topmost backtrack state, pointed to by st, is usually free. If you
2529 want to claim it, populate any ST.foo fields in it with values you wish to
2530 save, then do one of
2531
2532   PUSH_STATE_GOTO(resume_state, node);
2533   PUSH_YES_STATE_GOTO(resume_state, node);
2534
2535 which sets that backtrack state's resume value to 'resume_state', pushes a
2536 new free entry to the top of the backtrack stack, then goes to 'node'.
2537 On backtracking, the free slot is popped, and the saved state becomes the
2538 new free state. An ST.foo field in this new top state can be temporarily
2539 accessed to retrieve values, but once the main loop is re-entered, it
2540 becomes available for reuse.
2541
2542 Note that the depth of the backtrack stack constantly increases during the
2543 left-to-right execution of the pattern, rather than going up and down with
2544 the pattern nesting. For example the stack is at its maximum at Z at the
2545 end of the pattern, rather than at X in the following:
2546
2547  /(((X)+)+)+....(Y)+....Z/
2548
2549 The only exceptions to this are lookahead/behind assertions and the cut,
2550 (?>A), which pop all the backtrack states associated with A before
2551 continuing.
2552
2553 Bascktrack state structs are allocated in slabs of about 4K in size.
2554 PL_regmatch_state and st always point to the currently active state,
2555 and PL_regmatch_slab points to the slab currently containing
2556 PL_regmatch_state.  The first time regmatch() is called, the first slab is
2557 allocated, and is never freed until interpreter destruction. When the slab
2558 is full, a new one is allocated and chained to the end. At exit from
2559 regmatch(), slabs allocated since entry are freed.
2560
2561 */
2562
2563
2564 #define DEBUG_STATE_pp(pp)        \
2565  DEBUG_STATE_r({         \
2566   DUMP_EXEC_POS(locinput, scan, do_utf8);      \
2567   PerlIO_printf(Perl_debug_log,       \
2568    "    %*s"pp" %s%s%s%s%s\n",       \
2569    depth*2, "",        \
2570    PL_reg_name[st->resume_state],                     \
2571    ((st==yes_state||st==mark_state) ? "[" : ""),   \
2572    ((st==yes_state) ? "Y" : ""),                   \
2573    ((st==mark_state) ? "M" : ""),                  \
2574    ((st==yes_state||st==mark_state) ? "]" : "")    \
2575   );                                                  \
2576  });
2577
2578
2579 #define REG_NODE_NUM(x) ((x) ? (int)((x)-prog) : -1)
2580
2581 #ifdef DEBUGGING
2582
2583 STATIC void
2584 S_debug_start_match(pTHX_ const REGEXP *prog, const bool do_utf8,
2585  const char *start, const char *end, const char *blurb)
2586 {
2587  const bool utf8_pat = RX_UTF8(prog) ? 1 : 0;
2588
2589  PERL_ARGS_ASSERT_DEBUG_START_MATCH;
2590
2591  if (!PL_colorset)
2592    reginitcolors();
2593  {
2594   RE_PV_QUOTED_DECL(s0, utf8_pat, PERL_DEBUG_PAD_ZERO(0),
2595    RX_PRECOMP_const(prog), RX_PRELEN(prog), 60);
2596
2597   RE_PV_QUOTED_DECL(s1, do_utf8, PERL_DEBUG_PAD_ZERO(1),
2598    start, end - start, 60);
2599
2600   PerlIO_printf(Perl_debug_log,
2601    "%s%s REx%s %s against %s\n",
2602      PL_colors[4], blurb, PL_colors[5], s0, s1);
2603
2604   if (do_utf8||utf8_pat)
2605    PerlIO_printf(Perl_debug_log, "UTF-8 %s%s%s...\n",
2606     utf8_pat ? "pattern" : "",
2607     utf8_pat && do_utf8 ? " and " : "",
2608     do_utf8 ? "string" : ""
2609    );
2610  }
2611 }
2612
2613 STATIC void
2614 S_dump_exec_pos(pTHX_ const char *locinput,
2615      const regnode *scan,
2616      const char *loc_regeol,
2617      const char *loc_bostr,
2618      const char *loc_reg_starttry,
2619      const bool do_utf8)
2620 {
2621  const int docolor = *PL_colors[0] || *PL_colors[2] || *PL_colors[4];
2622  const int taill = (docolor ? 10 : 7); /* 3 chars for "> <" */
2623  int l = (loc_regeol - locinput) > taill ? taill : (loc_regeol - locinput);
2624  /* The part of the string before starttry has one color
2625  (pref0_len chars), between starttry and current
2626  position another one (pref_len - pref0_len chars),
2627  after the current position the third one.
2628  We assume that pref0_len <= pref_len, otherwise we
2629  decrease pref0_len.  */
2630  int pref_len = (locinput - loc_bostr) > (5 + taill) - l
2631   ? (5 + taill) - l : locinput - loc_bostr;
2632  int pref0_len;
2633
2634  PERL_ARGS_ASSERT_DUMP_EXEC_POS;
2635
2636  while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput - pref_len)))
2637   pref_len++;
2638  pref0_len = pref_len  - (locinput - loc_reg_starttry);
2639  if (l + pref_len < (5 + taill) && l < loc_regeol - locinput)
2640   l = ( loc_regeol - locinput > (5 + taill) - pref_len
2641    ? (5 + taill) - pref_len : loc_regeol - locinput);
2642  while (do_utf8 && UTF8_IS_CONTINUATION(*(U8*)(locinput + l)))
2643   l--;
2644  if (pref0_len < 0)
2645   pref0_len = 0;
2646  if (pref0_len > pref_len)
2647   pref0_len = pref_len;
2648  {
2649   const int is_uni = (do_utf8 && OP(scan) != CANY) ? 1 : 0;
2650
2651   RE_PV_COLOR_DECL(s0,len0,is_uni,PERL_DEBUG_PAD(0),
2652    (locinput - pref_len),pref0_len, 60, 4, 5);
2653
2654   RE_PV_COLOR_DECL(s1,len1,is_uni,PERL_DEBUG_PAD(1),
2655      (locinput - pref_len + pref0_len),
2656      pref_len - pref0_len, 60, 2, 3);
2657
2658   RE_PV_COLOR_DECL(s2,len2,is_uni,PERL_DEBUG_PAD(2),
2659      locinput, loc_regeol - locinput, 10, 0, 1);
2660
2661   const STRLEN tlen=len0+len1+len2;
2662   PerlIO_printf(Perl_debug_log,
2663      "%4"IVdf" <%.*s%.*s%s%.*s>%*s|",
2664      (IV)(locinput - loc_bostr),
2665      len0, s0,
2666      len1, s1,
2667      (docolor ? "" : "> <"),
2668      len2, s2,
2669      (int)(tlen > 19 ? 0 :  19 - tlen),
2670      "");
2671  }
2672 }
2673
2674 #endif
2675
2676 /* reg_check_named_buff_matched()
2677  * Checks to see if a named buffer has matched. The data array of
2678  * buffer numbers corresponding to the buffer is expected to reside
2679  * in the regexp->data->data array in the slot stored in the ARG() of
2680  * node involved. Note that this routine doesn't actually care about the
2681  * name, that information is not preserved from compilation to execution.
2682  * Returns the index of the leftmost defined buffer with the given name
2683  * or 0 if non of the buffers matched.
2684  */
2685 STATIC I32
2686 S_reg_check_named_buff_matched(pTHX_ const regexp *rex, const regnode *scan)
2687 {
2688  I32 n;
2689  RXi_GET_DECL(rex,rexi);
2690  SV *sv_dat= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
2691  I32 *nums=(I32*)SvPVX(sv_dat);
2692
2693  PERL_ARGS_ASSERT_REG_CHECK_NAMED_BUFF_MATCHED;
2694
2695  for ( n=0; n<SvIVX(sv_dat); n++ ) {
2696   if ((I32)*PL_reglastparen >= nums[n] &&
2697    PL_regoffs[nums[n]].end != -1)
2698   {
2699    return nums[n];
2700   }
2701  }
2702  return 0;
2703 }
2704
2705
2706 /* free all slabs above current one  - called during LEAVE_SCOPE */
2707
2708 STATIC void
2709 S_clear_backtrack_stack(pTHX_ void *p)
2710 {
2711  regmatch_slab *s = PL_regmatch_slab->next;
2712  PERL_UNUSED_ARG(p);
2713
2714  if (!s)
2715   return;
2716  PL_regmatch_slab->next = NULL;
2717  while (s) {
2718   regmatch_slab * const osl = s;
2719   s = s->next;
2720   Safefree(osl);
2721  }
2722 }
2723
2724
2725 #define SETREX(Re1,Re2) \
2726  if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \
2727  Re1 = (Re2)
2728
2729 STATIC I32   /* 0 failure, 1 success */
2730 S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
2731 {
2732 #if PERL_VERSION < 9 && !defined(PERL_CORE)
2733  dMY_CXT;
2734 #endif
2735  dVAR;
2736  register const bool do_utf8 = PL_reg_match_utf8;
2737  const U32 uniflags = UTF8_ALLOW_DEFAULT;
2738  regexp *rex = reginfo->prog;
2739  RXi_GET_DECL(rex,rexi);
2740  I32 oldsave;
2741  /* the current state. This is a cached copy of PL_regmatch_state */
2742  register regmatch_state *st;
2743  /* cache heavy used fields of st in registers */
2744  register regnode *scan;
2745  register regnode *next;
2746  register U32 n = 0; /* general value; init to avoid compiler warning */
2747  register I32 ln = 0; /* len or last;  init to avoid compiler warning */
2748  register char *locinput = PL_reginput;
2749  register I32 nextchr;   /* is always set to UCHARAT(locinput) */
2750
2751  bool result = 0;     /* return value of S_regmatch */
2752  int depth = 0;     /* depth of backtrack stack */
2753  U32 nochange_depth = 0; /* depth of GOSUB recursion with nochange */
2754  const U32 max_nochange_depth =
2755   (3 * rex->nparens > MAX_RECURSE_EVAL_NOCHANGE_DEPTH) ?
2756   3 * rex->nparens : MAX_RECURSE_EVAL_NOCHANGE_DEPTH;
2757  regmatch_state *yes_state = NULL; /* state to pop to on success of
2758                subpattern */
2759  /* mark_state piggy backs on the yes_state logic so that when we unwind
2760  the stack on success we can update the mark_state as we go */
2761  regmatch_state *mark_state = NULL; /* last mark state we have seen */
2762  regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
2763  struct regmatch_state  *cur_curlyx = NULL; /* most recent curlyx */
2764  U32 state_num;
2765  bool no_final = 0;      /* prevent failure from backtracking? */
2766  bool do_cutgroup = 0;   /* no_final only until next branch/trie entry */
2767  char *startpoint = PL_reginput;
2768  SV *popmark = NULL;     /* are we looking for a mark? */
2769  SV *sv_commit = NULL;   /* last mark name seen in failure */
2770  SV *sv_yes_mark = NULL; /* last mark name we have seen
2771        during a successfull match */
2772  U32 lastopen = 0;       /* last open we saw */
2773  bool has_cutgroup = RX_HAS_CUTGROUP(rex) ? 1 : 0;
2774  SV* const oreplsv = GvSV(PL_replgv);
2775  /* these three flags are set by various ops to signal information to
2776  * the very next op. They have a useful lifetime of exactly one loop
2777  * iteration, and are not preserved or restored by state pushes/pops
2778  */
2779  bool sw = 0;     /* the condition value in (?(cond)a|b) */
2780  bool minmod = 0;     /* the next "{n,m}" is a "{n,m}?" */
2781  int logical = 0;     /* the following EVAL is:
2782         0: (?{...})
2783         1: (?(?{...})X|Y)
2784         2: (??{...})
2785        or the following IFMATCH/UNLESSM is:
2786         false: plain (?=foo)
2787         true:  used as a condition: (?(?=foo))
2788        */
2789 #ifdef DEBUGGING
2790  GET_RE_DEBUG_FLAGS_DECL;
2791 #endif
2792
2793  PERL_ARGS_ASSERT_REGMATCH;
2794
2795  DEBUG_OPTIMISE_r( DEBUG_EXECUTE_r({
2796    PerlIO_printf(Perl_debug_log,"regmatch start\n");
2797  }));
2798  /* on first ever call to regmatch, allocate first slab */
2799  if (!PL_regmatch_slab) {
2800   Newx(PL_regmatch_slab, 1, regmatch_slab);
2801   PL_regmatch_slab->prev = NULL;
2802   PL_regmatch_slab->next = NULL;
2803   PL_regmatch_state = SLAB_FIRST(PL_regmatch_slab);
2804  }
2805
2806  oldsave = PL_savestack_ix;
2807  SAVEDESTRUCTOR_X(S_clear_backtrack_stack, NULL);
2808  SAVEVPTR(PL_regmatch_slab);
2809  SAVEVPTR(PL_regmatch_state);
2810
2811  /* grab next free state slot */
2812  st = ++PL_regmatch_state;
2813  if (st >  SLAB_LAST(PL_regmatch_slab))
2814   st = PL_regmatch_state = S_push_slab(aTHX);
2815
2816  /* Note that nextchr is a byte even in UTF */
2817  nextchr = UCHARAT(locinput);
2818  scan = prog;
2819  while (scan != NULL) {
2820
2821   DEBUG_EXECUTE_r( {
2822    SV * const prop = sv_newmortal();
2823    regnode *rnext=regnext(scan);
2824    DUMP_EXEC_POS( locinput, scan, do_utf8 );
2825    regprop(rex, prop, scan);
2826
2827    PerlIO_printf(Perl_debug_log,
2828      "%3"IVdf":%*s%s(%"IVdf")\n",
2829      (IV)(scan - rexi->program), depth*2, "",
2830      SvPVX_const(prop),
2831      (PL_regkind[OP(scan)] == END || !rnext) ?
2832       0 : (IV)(rnext - rexi->program));
2833   });
2834
2835   next = scan + NEXT_OFF(scan);
2836   if (next == scan)
2837    next = NULL;
2838   state_num = OP(scan);
2839
2840   REH_CALL_EXEC_NODE_HOOK(rex, scan, reginfo, st);
2841  reenter_switch:
2842
2843   assert(PL_reglastparen == &rex->lastparen);
2844   assert(PL_reglastcloseparen == &rex->lastcloseparen);
2845   assert(PL_regoffs == rex->offs);
2846
2847   switch (state_num) {
2848   case BOL:
2849    if (locinput == PL_bostr)
2850    {
2851     /* reginfo->till = reginfo->bol; */
2852     break;
2853    }
2854    sayNO;
2855   case MBOL:
2856    if (locinput == PL_bostr ||
2857     ((nextchr || locinput < PL_regeol) && locinput[-1] == '\n'))
2858    {
2859     break;
2860    }
2861    sayNO;
2862   case SBOL:
2863    if (locinput == PL_bostr)
2864     break;
2865    sayNO;
2866   case GPOS:
2867    if (locinput == reginfo->ganch)
2868     break;
2869    sayNO;
2870
2871   case KEEPS:
2872    /* update the startpoint */
2873    st->u.keeper.val = PL_regoffs[0].start;
2874    PL_reginput = locinput;
2875    PL_regoffs[0].start = locinput - PL_bostr;
2876    PUSH_STATE_GOTO(KEEPS_next, next);
2877    /*NOT-REACHED*/
2878   case KEEPS_next_fail:
2879    /* rollback the start point change */
2880    PL_regoffs[0].start = st->u.keeper.val;
2881    sayNO_SILENT;
2882    /*NOT-REACHED*/
2883   case EOL:
2884     goto seol;
2885   case MEOL:
2886    if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2887     sayNO;
2888    break;
2889   case SEOL:
2890   seol:
2891    if ((nextchr || locinput < PL_regeol) && nextchr != '\n')
2892     sayNO;
2893    if (PL_regeol - locinput > 1)
2894     sayNO;
2895    break;
2896   case EOS:
2897    if (PL_regeol != locinput)
2898     sayNO;
2899    break;
2900   case SANY:
2901    if (!nextchr && locinput >= PL_regeol)
2902     sayNO;
2903    if (do_utf8) {
2904     locinput += PL_utf8skip[nextchr];
2905     if (locinput > PL_regeol)
2906      sayNO;
2907     nextchr = UCHARAT(locinput);
2908    }
2909    else
2910     nextchr = UCHARAT(++locinput);
2911    break;
2912   case CANY:
2913    if (!nextchr && locinput >= PL_regeol)
2914     sayNO;
2915    nextchr = UCHARAT(++locinput);
2916    break;
2917   case REG_ANY:
2918    if ((!nextchr && locinput >= PL_regeol) || nextchr == '\n')
2919     sayNO;
2920    if (do_utf8) {
2921     locinput += PL_utf8skip[nextchr];
2922     if (locinput > PL_regeol)
2923      sayNO;
2924     nextchr = UCHARAT(locinput);
2925    }
2926    else
2927     nextchr = UCHARAT(++locinput);
2928    break;
2929
2930 #undef  ST
2931 #define ST st->u.trie
2932   case TRIEC:
2933    /* In this case the charclass data is available inline so
2934    we can fail fast without a lot of extra overhead.
2935    */
2936    if (scan->flags == EXACT || !do_utf8) {
2937     if(!ANYOF_BITMAP_TEST(scan, *locinput)) {
2938      DEBUG_EXECUTE_r(
2939       PerlIO_printf(Perl_debug_log,
2940          "%*s  %sfailed to match trie start class...%s\n",
2941          REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2942      );
2943      sayNO_SILENT;
2944      /* NOTREACHED */
2945     }
2946    }
2947    /* FALL THROUGH */
2948   case TRIE:
2949    {
2950     /* what type of TRIE am I? (utf8 makes this contextual) */
2951     DECL_TRIE_TYPE(scan);
2952
2953     /* what trie are we using right now */
2954     reg_trie_data * const trie
2955      = (reg_trie_data*)rexi->data->data[ ARG( scan ) ];
2956     HV * widecharmap = MUTABLE_HV(rexi->data->data[ ARG( scan ) + 1 ]);
2957     U32 state = trie->startstate;
2958
2959     if (trie->bitmap && trie_type != trie_utf8_fold &&
2960      !TRIE_BITMAP_TEST(trie,*locinput)
2961     ) {
2962      if (trie->states[ state ].wordnum) {
2963       DEBUG_EXECUTE_r(
2964        PerlIO_printf(Perl_debug_log,
2965           "%*s  %smatched empty string...%s\n",
2966           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2967       );
2968       break;
2969      } else {
2970       DEBUG_EXECUTE_r(
2971        PerlIO_printf(Perl_debug_log,
2972           "%*s  %sfailed to match trie start class...%s\n",
2973           REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5])
2974       );
2975       sayNO_SILENT;
2976     }
2977     }
2978
2979    {
2980     U8 *uc = ( U8* )locinput;
2981
2982     STRLEN len = 0;
2983     STRLEN foldlen = 0;
2984     U8 *uscan = (U8*)NULL;
2985     STRLEN bufflen=0;
2986     SV *sv_accept_buff = NULL;
2987     U8 foldbuf[ UTF8_MAXBYTES_CASE + 1 ];
2988
2989      ST.accepted = 0; /* how many accepting states we have seen */
2990     ST.B = next;
2991     ST.jump = trie->jump;
2992     ST.me = scan;
2993     /*
2994     traverse the TRIE keeping track of all accepting states
2995     we transition through until we get to a failing node.
2996     */
2997
2998     while ( state && uc <= (U8*)PL_regeol ) {
2999      U32 base = trie->states[ state ].trans.base;
3000      UV uvc = 0;
3001      U16 charid;
3002      /* We use charid to hold the wordnum as we don't use it
3003      for charid until after we have done the wordnum logic.
3004      We define an alias just so that the wordnum logic reads
3005      more naturally. */
3006
3007 #define got_wordnum charid
3008      got_wordnum = trie->states[ state ].wordnum;
3009
3010      if ( got_wordnum ) {
3011       if ( ! ST.accepted ) {
3012        ENTER;
3013        SAVETMPS; /* XXX is this necessary? dmq */
3014        bufflen = TRIE_INITAL_ACCEPT_BUFFLEN;
3015        sv_accept_buff=newSV(bufflen *
3016            sizeof(reg_trie_accepted) - 1);
3017        SvCUR_set(sv_accept_buff, 0);
3018        SvPOK_on(sv_accept_buff);
3019        sv_2mortal(sv_accept_buff);
3020        SAVETMPS;
3021        ST.accept_buff =
3022         (reg_trie_accepted*)SvPV_nolen(sv_accept_buff );
3023       }
3024       do {
3025        if (ST.accepted >= bufflen) {
3026         bufflen *= 2;
3027         ST.accept_buff =(reg_trie_accepted*)
3028          SvGROW(sv_accept_buff,
3029            bufflen * sizeof(reg_trie_accepted));
3030        }
3031        SvCUR_set(sv_accept_buff,SvCUR(sv_accept_buff)
3032         + sizeof(reg_trie_accepted));
3033
3034
3035        ST.accept_buff[ST.accepted].wordnum = got_wordnum;
3036        ST.accept_buff[ST.accepted].endpos = uc;
3037        ++ST.accepted;
3038       } while (trie->nextword && (got_wordnum= trie->nextword[got_wordnum]));
3039      }
3040 #undef got_wordnum
3041
3042      DEBUG_TRIE_EXECUTE_r({
3043         DUMP_EXEC_POS( (char *)uc, scan, do_utf8 );
3044         PerlIO_printf( Perl_debug_log,
3045          "%*s  %sState: %4"UVxf" Accepted: %4"UVxf" ",
3046          2+depth * 2, "", PL_colors[4],
3047          (UV)state, (UV)ST.accepted );
3048      });
3049
3050      if ( base ) {
3051       REXEC_TRIE_READ_CHAR(trie_type, trie, widecharmap, uc,
3052            uscan, len, uvc, charid, foldlen,
3053            foldbuf, uniflags);
3054
3055       if (charid &&
3056        (base + charid > trie->uniquecharcount )
3057        && (base + charid - 1 - trie->uniquecharcount
3058          < trie->lasttrans)
3059        && trie->trans[base + charid - 1 -
3060          trie->uniquecharcount].check == state)
3061       {
3062        state = trie->trans[base + charid - 1 -
3063         trie->uniquecharcount ].next;
3064       }
3065       else {
3066        state = 0;
3067       }
3068       uc += len;
3069
3070      }
3071      else {
3072       state = 0;
3073      }
3074      DEBUG_TRIE_EXECUTE_r(
3075       PerlIO_printf( Perl_debug_log,
3076        "Charid:%3x CP:%4"UVxf" After State: %4"UVxf"%s\n",
3077        charid, uvc, (UV)state, PL_colors[5] );
3078      );
3079     }
3080     if (!ST.accepted )
3081     sayNO;
3082
3083     DEBUG_EXECUTE_r(
3084      PerlIO_printf( Perl_debug_log,
3085       "%*s  %sgot %"IVdf" possible matches%s\n",
3086       REPORT_CODE_OFF + depth * 2, "",
3087       PL_colors[4], (IV)ST.accepted, PL_colors[5] );
3088     );
3089    }}
3090    goto trie_first_try; /* jump into the fail handler */
3091    /* NOTREACHED */
3092   case TRIE_next_fail: /* we failed - try next alterative */
3093    if ( ST.jump) {
3094     REGCP_UNWIND(ST.cp);
3095     for (n = *PL_reglastparen; n > ST.lastparen; n--)
3096      PL_regoffs[n].end = -1;
3097     *PL_reglastparen = n;
3098    }
3099   trie_first_try:
3100    if (do_cutgroup) {
3101     do_cutgroup = 0;
3102     no_final = 0;
3103    }
3104
3105    if ( ST.jump) {
3106     ST.lastparen = *PL_reglastparen;
3107     REGCP_SET(ST.cp);
3108    }
3109    if ( ST.accepted == 1 ) {
3110     /* only one choice left - just continue */
3111     DEBUG_EXECUTE_r({
3112      AV *const trie_words
3113       = MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
3114      SV ** const tmp = av_fetch( trie_words,
3115       ST.accept_buff[ 0 ].wordnum-1, 0 );
3116      SV *sv= tmp ? sv_newmortal() : NULL;
3117
3118      PerlIO_printf( Perl_debug_log,
3119       "%*s  %sonly one match left: #%d <%s>%s\n",
3120       REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3121       ST.accept_buff[ 0 ].wordnum,
3122       tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3123         PL_colors[0], PL_colors[1],
3124         (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3125        )
3126       : "not compiled under -Dr",
3127       PL_colors[5] );
3128     });
3129     PL_reginput = (char *)ST.accept_buff[ 0 ].endpos;
3130     /* in this case we free tmps/leave before we call regmatch
3131     as we wont be using accept_buff again. */
3132
3133     locinput = PL_reginput;
3134     nextchr = UCHARAT(locinput);
3135      if ( !ST.jump || !ST.jump[ST.accept_buff[0].wordnum])
3136       scan = ST.B;
3137      else
3138       scan = ST.me + ST.jump[ST.accept_buff[0].wordnum];
3139     if (!has_cutgroup) {
3140      FREETMPS;
3141      LEAVE;
3142     } else {
3143      ST.accepted--;
3144      PUSH_YES_STATE_GOTO(TRIE_next, scan);
3145     }
3146
3147     continue; /* execute rest of RE */
3148    }
3149
3150    if ( !ST.accepted-- ) {
3151     DEBUG_EXECUTE_r({
3152      PerlIO_printf( Perl_debug_log,
3153       "%*s  %sTRIE failed...%s\n",
3154       REPORT_CODE_OFF+depth*2, "",
3155       PL_colors[4],
3156       PL_colors[5] );
3157     });
3158     FREETMPS;
3159     LEAVE;
3160     sayNO_SILENT;
3161     /*NOTREACHED*/
3162    }
3163
3164    /*
3165    There are at least two accepting states left.  Presumably
3166    the number of accepting states is going to be low,
3167    typically two. So we simply scan through to find the one
3168    with lowest wordnum.  Once we find it, we swap the last
3169    state into its place and decrement the size. We then try to
3170    match the rest of the pattern at the point where the word
3171    ends. If we succeed, control just continues along the
3172    regex; if we fail we return here to try the next accepting
3173    state
3174    */
3175
3176    {
3177     U32 best = 0;
3178     U32 cur;
3179     for( cur = 1 ; cur <= ST.accepted ; cur++ ) {
3180      DEBUG_TRIE_EXECUTE_r(
3181       PerlIO_printf( Perl_debug_log,
3182        "%*s  %sgot %"IVdf" (%d) as best, looking at %"IVdf" (%d)%s\n",
3183        REPORT_CODE_OFF + depth * 2, "", PL_colors[4],
3184        (IV)best, ST.accept_buff[ best ].wordnum, (IV)cur,
3185        ST.accept_buff[ cur ].wordnum, PL_colors[5] );
3186      );
3187
3188      if (ST.accept_buff[cur].wordnum <
3189        ST.accept_buff[best].wordnum)
3190       best = cur;
3191     }
3192
3193     DEBUG_EXECUTE_r({
3194      AV *const trie_words
3195       = MUTABLE_AV(rexi->data->data[ARG(ST.me)+TRIE_WORDS_OFFSET]);
3196      SV ** const tmp = av_fetch( trie_words,
3197       ST.accept_buff[ best ].wordnum - 1, 0 );
3198      regnode *nextop=(!ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) ?
3199          ST.B :
3200          ST.me + ST.jump[ST.accept_buff[best].wordnum];
3201      SV *sv= tmp ? sv_newmortal() : NULL;
3202
3203      PerlIO_printf( Perl_debug_log,
3204       "%*s  %strying alternation #%d <%s> at node #%d %s\n",
3205       REPORT_CODE_OFF+depth*2, "", PL_colors[4],
3206       ST.accept_buff[best].wordnum,
3207       tmp ? pv_pretty(sv, SvPV_nolen_const(*tmp), SvCUR(*tmp), 0,
3208         PL_colors[0], PL_colors[1],
3209         (SvUTF8(*tmp) ? PERL_PV_ESCAPE_UNI : 0)
3210        ) : "not compiled under -Dr",
3211        REG_NODE_NUM(nextop),
3212       PL_colors[5] );
3213     });
3214
3215     if ( best<ST.accepted ) {
3216      reg_trie_accepted tmp = ST.accept_buff[ best ];
3217      ST.accept_buff[ best ] = ST.accept_buff[ ST.accepted ];
3218      ST.accept_buff[ ST.accepted ] = tmp;
3219      best = ST.accepted;
3220     }
3221     PL_reginput = (char *)ST.accept_buff[ best ].endpos;
3222     if ( !ST.jump || !ST.jump[ST.accept_buff[best].wordnum]) {
3223      scan = ST.B;
3224     } else {
3225      scan = ST.me + ST.jump[ST.accept_buff[best].wordnum];
3226     }
3227     PUSH_YES_STATE_GOTO(TRIE_next, scan);
3228     /* NOTREACHED */
3229    }
3230    /* NOTREACHED */
3231   case TRIE_next:
3232    /* we dont want to throw this away, see bug 57042*/
3233    if (oreplsv != GvSV(PL_replgv))
3234     sv_setsv(oreplsv, GvSV(PL_replgv));
3235    FREETMPS;
3236    LEAVE;
3237    sayYES;
3238 #undef  ST
3239
3240   case EXACT: {
3241    char *s = STRING(scan);
3242    ln = STR_LEN(scan);
3243    if (do_utf8 != UTF) {
3244     /* The target and the pattern have differing utf8ness. */
3245     char *l = locinput;
3246     const char * const e = s + ln;
3247
3248     if (do_utf8) {
3249      /* The target is utf8, the pattern is not utf8. */
3250      while (s < e) {
3251       STRLEN ulen;
3252       if (l >= PL_regeol)
3253        sayNO;
3254       if (NATIVE_TO_UNI(*(U8*)s) !=
3255        utf8n_to_uvuni((U8*)l, UTF8_MAXBYTES, &ulen,
3256            uniflags))
3257        sayNO;
3258       l += ulen;
3259       s ++;
3260      }
3261     }
3262     else {
3263      /* The target is not utf8, the pattern is utf8. */
3264      while (s < e) {
3265       STRLEN ulen;
3266       if (l >= PL_regeol)
3267        sayNO;
3268       if (NATIVE_TO_UNI(*((U8*)l)) !=
3269        utf8n_to_uvuni((U8*)s, UTF8_MAXBYTES, &ulen,
3270           uniflags))
3271        sayNO;
3272       s += ulen;
3273       l ++;
3274      }
3275     }
3276     locinput = l;
3277     nextchr = UCHARAT(locinput);
3278     break;
3279    }
3280    /* The target and the pattern have the same utf8ness. */
3281    /* Inline the first character, for speed. */
3282    if (UCHARAT(s) != nextchr)
3283     sayNO;
3284    if (PL_regeol - locinput < ln)
3285     sayNO;
3286    if (ln > 1 && memNE(s, locinput, ln))
3287     sayNO;
3288    locinput += ln;
3289    nextchr = UCHARAT(locinput);
3290    break;
3291    }
3292   case EXACTFL:
3293    PL_reg_flags |= RF_tainted;
3294    /* FALL THROUGH */
3295   case EXACTF: {
3296    char * const s = STRING(scan);
3297    ln = STR_LEN(scan);
3298
3299    if (do_utf8 || UTF) {
3300    /* Either target or the pattern are utf8. */
3301     const char * const l = locinput;
3302     char *e = PL_regeol;
3303
3304     if (ibcmp_utf8(s, 0,  ln, (bool)UTF,
3305        l, &e, 0,  do_utf8)) {
3306      /* One more case for the sharp s:
3307      * pack("U0U*", 0xDF) =~ /ss/i,
3308      * the 0xC3 0x9F are the UTF-8
3309      * byte sequence for the U+00DF. */
3310
3311      if (!(do_utf8 &&
3312       toLOWER(s[0]) == 's' &&
3313       ln >= 2 &&
3314       toLOWER(s[1]) == 's' &&
3315       (U8)l[0] == 0xC3 &&
3316       e - l >= 2 &&
3317       (U8)l[1] == 0x9F))
3318       sayNO;
3319     }
3320     locinput = e;
3321     nextchr = UCHARAT(locinput);
3322     break;
3323    }
3324
3325    /* Neither the target and the pattern are utf8. */
3326
3327    /* Inline the first character, for speed. */
3328    if (UCHARAT(s) != nextchr &&
3329     UCHARAT(s) != ((OP(scan) == EXACTF)
3330        ? PL_fold : PL_fold_locale)[nextchr])
3331     sayNO;
3332    if (PL_regeol - locinput < ln)
3333     sayNO;
3334    if (ln > 1 && (OP(scan) == EXACTF
3335       ? ibcmp(s, locinput, ln)
3336       : ibcmp_locale(s, locinput, ln)))
3337     sayNO;
3338    locinput += ln;
3339    nextchr = UCHARAT(locinput);
3340    break;
3341    }
3342   case ANYOF:
3343    if (do_utf8) {
3344     STRLEN inclasslen = PL_regeol - locinput;
3345
3346     if (!reginclass(rex, scan, (U8*)locinput, &inclasslen, do_utf8))
3347      goto anyof_fail;
3348     if (locinput >= PL_regeol)
3349      sayNO;
3350     locinput += inclasslen ? inclasslen : UTF8SKIP(locinput);
3351     nextchr = UCHARAT(locinput);
3352     break;
3353    }
3354    else {
3355     if (nextchr < 0)
3356      nextchr = UCHARAT(locinput);
3357     if (!REGINCLASS(rex, scan, (U8*)locinput))
3358      goto anyof_fail;
3359     if (!nextchr && locinput >= PL_regeol)
3360      sayNO;
3361     nextchr = UCHARAT(++locinput);
3362     break;
3363    }
3364   anyof_fail:
3365    /* If we might have the case of the German sharp s
3366    * in a casefolding Unicode character class. */
3367
3368    if (ANYOF_FOLD_SHARP_S(scan, locinput, PL_regeol)) {
3369     locinput += SHARP_S_SKIP;
3370     nextchr = UCHARAT(locinput);
3371    }
3372    else
3373     sayNO;
3374    break;
3375   case ALNUML:
3376    PL_reg_flags |= RF_tainted;
3377    /* FALL THROUGH */
3378   case ALNUM:
3379    if (!nextchr)
3380     sayNO;
3381    if (do_utf8) {
3382     LOAD_UTF8_CHARCLASS_ALNUM();
3383     if (!(OP(scan) == ALNUM
3384      ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3385      : isALNUM_LC_utf8((U8*)locinput)))
3386     {
3387      sayNO;
3388     }
3389     locinput += PL_utf8skip[nextchr];
3390     nextchr = UCHARAT(locinput);
3391     break;
3392    }
3393    if (!(OP(scan) == ALNUM
3394     ? isALNUM(nextchr) : isALNUM_LC(nextchr)))
3395     sayNO;
3396    nextchr = UCHARAT(++locinput);
3397    break;
3398   case NALNUML:
3399    PL_reg_flags |= RF_tainted;
3400    /* FALL THROUGH */
3401   case NALNUM:
3402    if (!nextchr && locinput >= PL_regeol)
3403     sayNO;
3404    if (do_utf8) {
3405     LOAD_UTF8_CHARCLASS_ALNUM();
3406     if (OP(scan) == NALNUM
3407      ? (bool)swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8)
3408      : isALNUM_LC_utf8((U8*)locinput))
3409     {
3410      sayNO;
3411     }
3412     locinput += PL_utf8skip[nextchr];
3413     nextchr = UCHARAT(locinput);
3414     break;
3415    }
3416    if (OP(scan) == NALNUM
3417     ? isALNUM(nextchr) : isALNUM_LC(nextchr))
3418     sayNO;
3419    nextchr = UCHARAT(++locinput);
3420    break;
3421   case BOUNDL:
3422   case NBOUNDL:
3423    PL_reg_flags |= RF_tainted;
3424    /* FALL THROUGH */
3425   case BOUND:
3426   case NBOUND:
3427    /* was last char in word? */
3428    if (do_utf8) {
3429     if (locinput == PL_bostr)
3430      ln = '\n';
3431     else {
3432      const U8 * const r = reghop3((U8*)locinput, -1, (U8*)PL_bostr);
3433
3434      ln = utf8n_to_uvchr(r, UTF8SKIP(r), 0, uniflags);
3435     }
3436     if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3437      ln = isALNUM_uni(ln);
3438      LOAD_UTF8_CHARCLASS_ALNUM();
3439      n = swash_fetch(PL_utf8_alnum, (U8*)locinput, do_utf8);
3440     }
3441     else {
3442      ln = isALNUM_LC_uvchr(UNI_TO_NATIVE(ln));
3443      n = isALNUM_LC_utf8((U8*)locinput);
3444     }
3445    }
3446    else {
3447     ln = (locinput != PL_bostr) ?
3448      UCHARAT(locinput - 1) : '\n';
3449     if (OP(scan) == BOUND || OP(scan) == NBOUND) {
3450      ln = isALNUM(ln);
3451      n = isALNUM(nextchr);
3452     }
3453     else {
3454      ln = isALNUM_LC(ln);
3455      n = isALNUM_LC(nextchr);
3456     }
3457    }
3458    if (((!ln) == (!n)) == (OP(scan) == BOUND ||
3459          OP(scan) == BOUNDL))
3460      sayNO;
3461    break;
3462   case SPACEL:
3463    PL_reg_flags |= RF_tainted;
3464    /* FALL THROUGH */
3465   case SPACE:
3466    if (!nextchr)
3467     sayNO;
3468    if (do_utf8) {
3469     if (UTF8_IS_CONTINUED(nextchr)) {
3470      LOAD_UTF8_CHARCLASS_SPACE();
3471      if (!(OP(scan) == SPACE
3472       ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3473       : isSPACE_LC_utf8((U8*)locinput)))
3474      {
3475       sayNO;
3476      }
3477      locinput += PL_utf8skip[nextchr];
3478      nextchr = UCHARAT(locinput);
3479      break;
3480     }
3481     if (!(OP(scan) == SPACE
3482      ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3483      sayNO;
3484     nextchr = UCHARAT(++locinput);
3485    }
3486    else {
3487     if (!(OP(scan) == SPACE
3488      ? isSPACE(nextchr) : isSPACE_LC(nextchr)))
3489      sayNO;
3490     nextchr = UCHARAT(++locinput);
3491    }
3492    break;
3493   case NSPACEL:
3494    PL_reg_flags |= RF_tainted;
3495    /* FALL THROUGH */
3496   case NSPACE:
3497    if (!nextchr && locinput >= PL_regeol)
3498     sayNO;
3499    if (do_utf8) {
3500     LOAD_UTF8_CHARCLASS_SPACE();
3501     if (OP(scan) == NSPACE
3502      ? (bool)swash_fetch(PL_utf8_space, (U8*)locinput, do_utf8)
3503      : isSPACE_LC_utf8((U8*)locinput))
3504     {
3505      sayNO;
3506     }
3507     locinput += PL_utf8skip[nextchr];
3508     nextchr = UCHARAT(locinput);
3509     break;
3510    }
3511    if (OP(scan) == NSPACE
3512     ? isSPACE(nextchr) : isSPACE_LC(nextchr))
3513     sayNO;
3514    nextchr = UCHARAT(++locinput);
3515    break;
3516   case DIGITL:
3517    PL_reg_flags |= RF_tainted;
3518    /* FALL THROUGH */
3519   case DIGIT:
3520    if (!nextchr)
3521     sayNO;
3522    if (do_utf8) {
3523     LOAD_UTF8_CHARCLASS_DIGIT();
3524     if (!(OP(scan) == DIGIT
3525      ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3526      : isDIGIT_LC_utf8((U8*)locinput)))
3527     {
3528      sayNO;
3529     }
3530     locinput += PL_utf8skip[nextchr];
3531     nextchr = UCHARAT(locinput);
3532     break;
3533    }
3534    if (!(OP(scan) == DIGIT
3535     ? isDIGIT(nextchr) : isDIGIT_LC(nextchr)))
3536     sayNO;
3537    nextchr = UCHARAT(++locinput);
3538    break;
3539   case NDIGITL:
3540    PL_reg_flags |= RF_tainted;
3541    /* FALL THROUGH */
3542   case NDIGIT:
3543    if (!nextchr && locinput >= PL_regeol)
3544     sayNO;
3545    if (do_utf8) {
3546     LOAD_UTF8_CHARCLASS_DIGIT();
3547     if (OP(scan) == NDIGIT
3548      ? (bool)swash_fetch(PL_utf8_digit, (U8*)locinput, do_utf8)
3549      : isDIGIT_LC_utf8((U8*)locinput))
3550     {
3551      sayNO;
3552     }
3553     locinput += PL_utf8skip[nextchr];
3554     nextchr = UCHARAT(locinput);
3555     break;
3556    }
3557    if (OP(scan) == NDIGIT
3558     ? isDIGIT(nextchr) : isDIGIT_LC(nextchr))
3559     sayNO;
3560    nextchr = UCHARAT(++locinput);
3561    break;
3562   case CLUMP:
3563    if (locinput >= PL_regeol)
3564     sayNO;
3565    if  (do_utf8) {
3566     LOAD_UTF8_CHARCLASS_MARK();
3567     if (swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3568      sayNO;
3569     locinput += PL_utf8skip[nextchr];
3570     while (locinput < PL_regeol &&
3571      swash_fetch(PL_utf8_mark,(U8*)locinput, do_utf8))
3572      locinput += UTF8SKIP(locinput);
3573     if (locinput > PL_regeol)
3574      sayNO;
3575    }
3576    else
3577    locinput++;
3578    nextchr = UCHARAT(locinput);
3579    break;
3580
3581   case NREFFL:
3582   {
3583    char *s;
3584    char type;
3585    PL_reg_flags |= RF_tainted;
3586    /* FALL THROUGH */
3587   case NREF:
3588   case NREFF:
3589    type = OP(scan);
3590    n = reg_check_named_buff_matched(rex,scan);
3591
3592    if ( n ) {
3593     type = REF + ( type - NREF );
3594     goto do_ref;
3595    } else {
3596     sayNO;
3597    }
3598    /* unreached */
3599   case REFFL:
3600    PL_reg_flags |= RF_tainted;
3601    /* FALL THROUGH */
3602   case REF:
3603   case REFF:
3604    n = ARG(scan);  /* which paren pair */
3605    type = OP(scan);
3606   do_ref:
3607    ln = PL_regoffs[n].start;
3608    PL_reg_leftiter = PL_reg_maxiter;  /* Void cache */
3609    if (*PL_reglastparen < n || ln == -1)
3610     sayNO;   /* Do not match unless seen CLOSEn. */
3611    if (ln == PL_regoffs[n].end)
3612     break;
3613
3614    s = PL_bostr + ln;
3615    if (do_utf8 && type != REF) { /* REF can do byte comparison */
3616     char *l = locinput;
3617     const char *e = PL_bostr + PL_regoffs[n].end;
3618     /*
3619     * Note that we can't do the "other character" lookup trick as
3620     * in the 8-bit case (no pun intended) because in Unicode we
3621     * have to map both upper and title case to lower case.
3622     */
3623     if (type == REFF) {
3624      while (s < e) {
3625       STRLEN ulen1, ulen2;
3626       U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
3627       U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
3628
3629       if (l >= PL_regeol)
3630        sayNO;
3631       toLOWER_utf8((U8*)s, tmpbuf1, &ulen1);
3632       toLOWER_utf8((U8*)l, tmpbuf2, &ulen2);
3633       if (ulen1 != ulen2 || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
3634        sayNO;
3635       s += ulen1;
3636       l += ulen2;
3637      }
3638     }
3639     locinput = l;
3640     nextchr = UCHARAT(locinput);
3641     break;
3642    }
3643
3644    /* Inline the first character, for speed. */
3645    if (UCHARAT(s) != nextchr &&
3646     (type == REF ||
3647     (UCHARAT(s) != (type == REFF
3648         ? PL_fold : PL_fold_locale)[nextchr])))
3649     sayNO;
3650    ln = PL_regoffs[n].end - ln;
3651    if (locinput + ln > PL_regeol)
3652     sayNO;
3653    if (ln > 1 && (type == REF
3654       ? memNE(s, locinput, ln)
3655       : (type == REFF
3656        ? ibcmp(s, locinput, ln)
3657        : ibcmp_locale(s, locinput, ln))))
3658     sayNO;
3659    locinput += ln;
3660    nextchr = UCHARAT(locinput);
3661    break;
3662   }
3663   case NOTHING:
3664   case TAIL:
3665    break;
3666   case BACK:
3667    break;
3668
3669 #undef  ST
3670 #define ST st->u.eval
3671   {
3672    SV *ret;
3673    regexp *re;
3674    regexp_internal *rei;
3675    regnode *startpoint;
3676
3677   case GOSTART:
3678   case GOSUB: /*    /(...(?1))/   /(...(?&foo))/   */
3679    if (cur_eval && cur_eval->locinput==locinput) {
3680     if (cur_eval->u.eval.close_paren == (U32)ARG(scan))
3681      Perl_croak(aTHX_ "Infinite recursion in regex");
3682     if ( ++nochange_depth > max_nochange_depth )
3683      Perl_croak(aTHX_
3684       "Pattern subroutine nesting without pos change"
3685       " exceeded limit in regex");
3686    } else {
3687     nochange_depth = 0;
3688    }
3689    re = rex;
3690    rei = rexi;
3691    (void)ReREFCNT_inc(rex);
3692    if (OP(scan)==GOSUB) {
3693     startpoint = scan + ARG2L(scan);
3694     ST.close_paren = ARG(scan);
3695    } else {
3696     startpoint = rei->program+1;
3697     ST.close_paren = 0;
3698    }
3699    goto eval_recurse_doit;
3700    /* NOTREACHED */
3701   case EVAL:  /*   /(?{A})B/   /(??{A})B/  and /(?(?{A})X|Y)B/   */
3702    if (cur_eval && cur_eval->locinput==locinput) {
3703     if ( ++nochange_depth > max_nochange_depth )
3704      Perl_croak(aTHX_ "EVAL without pos change exceeded limit in regex");
3705    } else {
3706     nochange_depth = 0;
3707    }
3708    {
3709     /* execute the code in the {...} */
3710     dSP;
3711     SV ** const before = SP;
3712     OP_4tree * const oop = PL_op;
3713     COP * const ocurcop = PL_curcop;
3714     PAD *old_comppad;
3715
3716     n = ARG(scan);
3717     PL_op = (OP_4tree*)rexi->data->data[n];
3718     DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
3719      "  re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
3720     PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
3721     PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
3722
3723     if (sv_yes_mark) {
3724      SV *sv_mrk = get_sv("REGMARK", 1);
3725      sv_setsv(sv_mrk, sv_yes_mark);
3726     }
3727
3728     CALLRUNOPS(aTHX);   /* Scalar context. */
3729     SPAGAIN;
3730     if (SP == before)
3731      ret = &PL_sv_undef;   /* protect against empty (?{}) blocks. */
3732     else {
3733      ret = POPs;
3734      PUTBACK;
3735     }
3736
3737     PL_op = oop;
3738     PAD_RESTORE_LOCAL(old_comppad);
3739     PL_curcop = ocurcop;
3740     if (!logical) {
3741      /* /(?{...})/ */
3742      sv_setsv(save_scalar(PL_replgv), ret);
3743      break;
3744     }
3745    }
3746    if (logical == 2) { /* Postponed subexpression: /(??{...})/ */
3747     logical = 0;
3748     {
3749      /* extract RE object from returned value; compiling if
3750      * necessary */
3751
3752      MAGIC *mg = NULL;
3753      const SV *sv;
3754      if(SvROK(ret) && SvSMAGICAL(sv = SvRV(ret)))
3755       mg = mg_find(sv, PERL_MAGIC_qr);
3756      else if (SvSMAGICAL(ret)) {
3757       if (SvGMAGICAL(ret))
3758        sv_unmagic(ret, PERL_MAGIC_qr);
3759       else
3760        mg = mg_find(ret, PERL_MAGIC_qr);
3761      }
3762
3763      if (mg) {
3764       re = reg_temp_copy((regexp *)mg->mg_obj); /*XXX:dmq*/
3765      }
3766      else {
3767       U32 pm_flags = 0;
3768       const I32 osize = PL_regsize;
3769
3770       if (DO_UTF8(ret)) pm_flags |= RXf_UTF8;
3771       re = CALLREGCOMP(ret, pm_flags);
3772       if (!(SvFLAGS(ret)
3773        & (SVs_TEMP | SVs_PADTMP | SVf_READONLY
3774         | SVs_GMG)))
3775        sv_magic(ret,MUTABLE_SV(ReREFCNT_inc(re)),
3776           PERL_MAGIC_qr,0,0);
3777       PL_regsize = osize;
3778      }
3779     }
3780     RXp_MATCH_COPIED_off(re);
3781     re->subbeg = rex->subbeg;
3782     re->sublen = rex->sublen;
3783     rei = RXi_GET(re);
3784     DEBUG_EXECUTE_r(
3785      debug_start_match(re, do_utf8, locinput, PL_regeol,
3786       "Matching embedded");
3787     );
3788     startpoint = rei->program + 1;
3789      ST.close_paren = 0; /* only used for GOSUB */
3790      /* borrowed from regtry */
3791     if (PL_reg_start_tmpl <= re->nparens) {
3792      PL_reg_start_tmpl = re->nparens*3/2 + 3;
3793      if(PL_reg_start_tmp)
3794       Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3795      else
3796       Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*);
3797     }
3798
3799   eval_recurse_doit: /* Share code with GOSUB below this line */
3800     /* run the pattern returned from (??{...}) */
3801     ST.cp = regcppush(0); /* Save *all* the positions. */
3802     REGCP_SET(ST.lastcp);
3803
3804     PL_regoffs = re->offs; /* essentially NOOP on GOSUB */
3805
3806     /* see regtry, specifically PL_reglast(?:close)?paren is a pointer! (i dont know why) :dmq */
3807     PL_reglastparen = &re->lastparen;
3808     PL_reglastcloseparen = &re->lastcloseparen;
3809     re->lastparen = 0;
3810     re->lastcloseparen = 0;
3811
3812     PL_reginput = locinput;
3813     PL_regsize = 0;
3814
3815     /* XXXX This is too dramatic a measure... */
3816     PL_reg_maxiter = 0;
3817
3818     ST.toggle_reg_flags = PL_reg_flags;
3819     if (RX_UTF8(re))
3820      PL_reg_flags |= RF_utf8;
3821     else
3822      PL_reg_flags &= ~RF_utf8;
3823     ST.toggle_reg_flags ^= PL_reg_flags; /* diff of old and new */
3824
3825     ST.prev_rex = rex;
3826     ST.prev_curlyx = cur_curlyx;
3827     SETREX(rex,re);
3828     rexi = rei;
3829     cur_curlyx = NULL;
3830     ST.B = next;
3831     ST.prev_eval = cur_eval;
3832     cur_eval = st;
3833     /* now continue from first node in postoned RE */
3834     PUSH_YES_STATE_GOTO(EVAL_AB, startpoint);
3835     /* NOTREACHED */
3836    }
3837    /* logical is 1,   /(?(?{...})X|Y)/ */
3838    sw = (bool)SvTRUE(ret);
3839    logical = 0;
3840    break;
3841   }
3842
3843   case EVAL_AB: /* cleanup after a successful (??{A})B */
3844    /* note: this is called twice; first after popping B, then A */
3845    PL_reg_flags ^= ST.toggle_reg_flags;
3846    ReREFCNT_dec(rex);
3847    SETREX(rex,ST.prev_rex);
3848    rexi = RXi_GET(rex);
3849    regcpblow(ST.cp);
3850    cur_eval = ST.prev_eval;
3851    cur_curlyx = ST.prev_curlyx;
3852
3853    /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
3854    PL_reglastparen = &rex->lastparen;
3855    PL_reglastcloseparen = &rex->lastcloseparen;
3856    /* also update PL_regoffs */
3857    PL_regoffs = rex->offs;
3858
3859    /* XXXX This is too dramatic a measure... */
3860    PL_reg_maxiter = 0;
3861    if ( nochange_depth )
3862     nochange_depth--;
3863    sayYES;
3864
3865
3866   case EVAL_AB_fail: /* unsuccessfully ran A or B in (??{A})B */
3867    /* note: this is called twice; first after popping B, then A */
3868    PL_reg_flags ^= ST.toggle_reg_flags;
3869    ReREFCNT_dec(rex);
3870    SETREX(rex,ST.prev_rex);
3871    rexi = RXi_GET(rex);
3872    /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
3873    PL_reglastparen = &rex->lastparen;
3874    PL_reglastcloseparen = &rex->lastcloseparen;
3875
3876    PL_reginput = locinput;
3877    REGCP_UNWIND(ST.lastcp);
3878    regcppop(rex);
3879    cur_eval = ST.prev_eval;
3880    cur_curlyx = ST.prev_curlyx;
3881    /* XXXX This is too dramatic a measure... */
3882    PL_reg_maxiter = 0;
3883    if ( nochange_depth )
3884     nochange_depth--;
3885    sayNO_SILENT;
3886 #undef ST
3887
3888   case OPEN:
3889    n = ARG(scan);  /* which paren pair */
3890    PL_reg_start_tmp[n] = locinput;
3891    if (n > PL_regsize)
3892     PL_regsize = n;
3893    lastopen = n;
3894    break;
3895   case CLOSE:
3896    n = ARG(scan);  /* which paren pair */
3897    PL_regoffs[n].start = PL_reg_start_tmp[n] - PL_bostr;
3898    PL_regoffs[n].end = locinput - PL_bostr;
3899    /*if (n > PL_regsize)
3900     PL_regsize = n;*/
3901    if (n > *PL_reglastparen)
3902     *PL_reglastparen = n;
3903    *PL_reglastcloseparen = n;
3904    if (cur_eval && cur_eval->u.eval.close_paren == n) {
3905     goto fake_end;
3906    }
3907    break;
3908   case ACCEPT:
3909    if (ARG(scan)){
3910     regnode *cursor;
3911     for (cursor=scan;
3912      cursor && OP(cursor)!=END;
3913      cursor=regnext(cursor))
3914     {
3915      if ( OP(cursor)==CLOSE ){
3916       n = ARG(cursor);
3917       if ( n <= lastopen ) {
3918        PL_regoffs[n].start
3919         = PL_reg_start_tmp[n] - PL_bostr;
3920        PL_regoffs[n].end = locinput - PL_bostr;
3921        /*if (n > PL_regsize)
3922        PL_regsize = n;*/
3923        if (n > *PL_reglastparen)
3924         *PL_reglastparen = n;
3925        *PL_reglastcloseparen = n;
3926        if ( n == ARG(scan) || (cur_eval &&
3927         cur_eval->u.eval.close_paren == n))
3928         break;
3929       }
3930      }
3931     }
3932    }
3933    goto fake_end;
3934    /*NOTREACHED*/
3935   case GROUPP:
3936    n = ARG(scan);  /* which paren pair */
3937    sw = (bool)(*PL_reglastparen >= n && PL_regoffs[n].end != -1);
3938    break;
3939   case NGROUPP:
3940    /* reg_check_named_buff_matched returns 0 for no match */
3941    sw = (bool)(0 < reg_check_named_buff_matched(rex,scan));
3942    break;
3943   case INSUBP:
3944    n = ARG(scan);
3945    sw = (cur_eval && (!n || cur_eval->u.eval.close_paren == n));
3946    break;
3947   case DEFINEP:
3948    sw = 0;
3949    break;
3950   case IFTHEN:
3951    PL_reg_leftiter = PL_reg_maxiter;  /* Void cache */
3952    if (sw)
3953     next = NEXTOPER(NEXTOPER(scan));
3954    else {
3955     next = scan + ARG(scan);
3956     if (OP(next) == IFTHEN) /* Fake one. */
3957      next = NEXTOPER(NEXTOPER(next));
3958    }
3959    break;
3960   case LOGICAL:
3961    logical = scan->flags;
3962    break;
3963
3964 /*******************************************************************
3965
3966 The CURLYX/WHILEM pair of ops handle the most generic case of the /A*B/
3967 pattern, where A and B are subpatterns. (For simple A, CURLYM or
3968 STAR/PLUS/CURLY/CURLYN are used instead.)
3969
3970 A*B is compiled as <CURLYX><A><WHILEM><B>
3971
3972 On entry to the subpattern, CURLYX is called. This pushes a CURLYX
3973 state, which contains the current count, initialised to -1. It also sets
3974 cur_curlyx to point to this state, with any previous value saved in the
3975 state block.
3976
3977 CURLYX then jumps straight to the WHILEM op, rather than executing A,
3978 since the pattern may possibly match zero times (i.e. it's a while {} loop
3979 rather than a do {} while loop).
3980
3981 Each entry to WHILEM represents a successful match of A. The count in the
3982 CURLYX block is incremented, another WHILEM state is pushed, and execution
3983 passes to A or B depending on greediness and the current count.
3984
3985 For example, if matching against the string a1a2a3b (where the aN are
3986 substrings that match /A/), then the match progresses as follows: (the
3987 pushed states are interspersed with the bits of strings matched so far):
3988
3989  <CURLYX cnt=-1>
3990  <CURLYX cnt=0><WHILEM>
3991  <CURLYX cnt=1><WHILEM> a1 <WHILEM>
3992  <CURLYX cnt=2><WHILEM> a1 <WHILEM> a2 <WHILEM>
3993  <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM>
3994  <CURLYX cnt=3><WHILEM> a1 <WHILEM> a2 <WHILEM> a3 <WHILEM> b
3995
3996 (Contrast this with something like CURLYM, which maintains only a single
3997 backtrack state:
3998
3999  <CURLYM cnt=0> a1
4000  a1 <CURLYM cnt=1> a2
4001  a1 a2 <CURLYM cnt=2> a3
4002  a1 a2 a3 <CURLYM cnt=3> b
4003 )
4004
4005 Each WHILEM state block marks a point to backtrack to upon partial failure
4006 of A or B, and also contains some minor state data related to that
4007 iteration.  The CURLYX block, pointed to by cur_curlyx, contains the
4008 overall state, such as the count, and pointers to the A and B ops.
4009
4010 This is complicated slightly by nested CURLYX/WHILEM's. Since cur_curlyx
4011 must always point to the *current* CURLYX block, the rules are:
4012
4013 When executing CURLYX, save the old cur_curlyx in the CURLYX state block,
4014 and set cur_curlyx to point the new block.
4015
4016 When popping the CURLYX block after a successful or unsuccessful match,
4017 restore the previous cur_curlyx.
4018
4019 When WHILEM is about to execute B, save the current cur_curlyx, and set it
4020 to the outer one saved in the CURLYX block.
4021
4022 When popping the WHILEM block after a successful or unsuccessful B match,
4023 restore the previous cur_curlyx.
4024
4025 Here's an example for the pattern (AI* BI)*BO
4026 I and O refer to inner and outer, C and W refer to CURLYX and WHILEM:
4027
4028 cur_
4029 curlyx backtrack stack
4030 ------ ---------------
4031 NULL
4032 CO     <CO prev=NULL> <WO>
4033 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4034 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4035 NULL   <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi <WO prev=CO> bo
4036
4037 At this point the pattern succeeds, and we work back down the stack to
4038 clean up, restoring as we go:
4039
4040 CO     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai <WI prev=CI> bi
4041 CI     <CO prev=NULL> <WO> <CI prev=CO> <WI> ai
4042 CO     <CO prev=NULL> <WO>
4043 NULL
4044
4045 *******************************************************************/
4046
4047 #define ST st->u.curlyx
4048
4049   case CURLYX:    /* start of /A*B/  (for complex A) */
4050   {
4051    /* No need to save/restore up to this paren */
4052    I32 parenfloor = scan->flags;
4053
4054    assert(next); /* keep Coverity happy */
4055    if (OP(PREVOPER(next)) == NOTHING) /* LONGJMP */
4056     next += ARG(next);
4057
4058    /* XXXX Probably it is better to teach regpush to support
4059    parenfloor > PL_regsize... */
4060    if (parenfloor > (I32)*PL_reglastparen)
4061     parenfloor = *PL_reglastparen; /* Pessimization... */
4062
4063    ST.prev_curlyx= cur_curlyx;
4064    cur_curlyx = st;
4065    ST.cp = PL_savestack_ix;
4066
4067    /* these fields contain the state of the current curly.
4068    * they are accessed by subsequent WHILEMs */
4069    ST.parenfloor = parenfloor;
4070    ST.min = ARG1(scan);
4071    ST.max = ARG2(scan);
4072    ST.A = NEXTOPER(scan) + EXTRA_STEP_2ARGS;
4073    ST.B = next;
4074    ST.minmod = minmod;
4075    minmod = 0;
4076    ST.count = -1; /* this will be updated by WHILEM */
4077    ST.lastloc = NULL;  /* this will be updated by WHILEM */
4078
4079    PL_reginput = locinput;
4080    PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
4081    /* NOTREACHED */
4082   }
4083
4084   case CURLYX_end: /* just finished matching all of A*B */
4085    cur_curlyx = ST.prev_curlyx;
4086    sayYES;
4087    /* NOTREACHED */
4088
4089   case CURLYX_end_fail: /* just failed to match all of A*B */
4090    regcpblow(ST.cp);
4091    cur_curlyx = ST.prev_curlyx;
4092    sayNO;
4093    /* NOTREACHED */
4094
4095
4096 #undef ST
4097 #define ST st->u.whilem
4098
4099   case WHILEM:     /* just matched an A in /A*B/  (for complex A) */
4100   {
4101    /* see the discussion above about CURLYX/WHILEM */
4102    I32 n;
4103    assert(cur_curlyx); /* keep Coverity happy */
4104    n = ++cur_curlyx->u.curlyx.count; /* how many A's matched */
4105    ST.save_lastloc = cur_curlyx->u.curlyx.lastloc;
4106    ST.cache_offset = 0;
4107    ST.cache_mask = 0;
4108
4109    PL_reginput = locinput;
4110
4111    DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4112     "%*s  whilem: matched %ld out of %ld..%ld\n",
4113     REPORT_CODE_OFF+depth*2, "", (long)n,
4114     (long)cur_curlyx->u.curlyx.min,
4115     (long)cur_curlyx->u.curlyx.max)
4116    );
4117
4118    /* First just match a string of min A's. */
4119
4120    if (n < cur_curlyx->u.curlyx.min) {
4121     cur_curlyx->u.curlyx.lastloc = locinput;
4122     PUSH_STATE_GOTO(WHILEM_A_pre, cur_curlyx->u.curlyx.A);
4123     /* NOTREACHED */
4124    }
4125
4126    /* If degenerate A matches "", assume A done. */
4127
4128    if (locinput == cur_curlyx->u.curlyx.lastloc) {
4129     DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4130     "%*s  whilem: empty match detected, trying continuation...\n",
4131     REPORT_CODE_OFF+depth*2, "")
4132     );
4133     goto do_whilem_B_max;
4134    }
4135
4136    /* super-linear cache processing */
4137
4138    if (scan->flags) {
4139
4140     if (!PL_reg_maxiter) {
4141      /* start the countdown: Postpone detection until we
4142      * know the match is not *that* much linear. */
4143      PL_reg_maxiter = (PL_regeol - PL_bostr + 1) * (scan->flags>>4);
4144      /* possible overflow for long strings and many CURLYX's */
4145      if (PL_reg_maxiter < 0)
4146       PL_reg_maxiter = I32_MAX;
4147      PL_reg_leftiter = PL_reg_maxiter;
4148     }
4149
4150     if (PL_reg_leftiter-- == 0) {
4151      /* initialise cache */
4152      const I32 size = (PL_reg_maxiter + 7)/8;
4153      if (PL_reg_poscache) {
4154       if ((I32)PL_reg_poscache_size < size) {
4155        Renew(PL_reg_poscache, size, char);
4156        PL_reg_poscache_size = size;
4157       }
4158       Zero(PL_reg_poscache, size, char);
4159      }
4160      else {
4161       PL_reg_poscache_size = size;
4162       Newxz(PL_reg_poscache, size, char);
4163      }
4164      DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4165  "%swhilem: Detected a super-linear match, switching on caching%s...\n",
4166        PL_colors[4], PL_colors[5])
4167      );
4168     }
4169
4170     if (PL_reg_leftiter < 0) {
4171      /* have we already failed at this position? */
4172      I32 offset, mask;
4173      offset  = (scan->flags & 0xf) - 1
4174         + (locinput - PL_bostr)  * (scan->flags>>4);
4175      mask    = 1 << (offset % 8);
4176      offset /= 8;
4177      if (PL_reg_poscache[offset] & mask) {
4178       DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
4179        "%*s  whilem: (cache) already tried at this position...\n",
4180        REPORT_CODE_OFF+depth*2, "")
4181       );
4182       sayNO; /* cache records failure */
4183      }
4184      ST.cache_offset = offset;
4185      ST.cache_mask   = mask;
4186     }
4187    }
4188
4189    /* Prefer B over A for minimal matching. */
4190
4191    if (cur_curlyx->u.curlyx.minmod) {
4192     ST.save_curlyx = cur_curlyx;
4193     cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4194     ST.cp = regcppush(ST.save_curlyx->u.curlyx.parenfloor);
4195     REGCP_SET(ST.lastcp);
4196     PUSH_YES_STATE_GOTO(WHILEM_B_min, ST.save_curlyx->u.curlyx.B);
4197     /* NOTREACHED */
4198    }
4199
4200    /* Prefer A over B for maximal matching. */
4201
4202    if (n < cur_curlyx->u.curlyx.max) { /* More greed allowed? */
4203     ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4204     cur_curlyx->u.curlyx.lastloc = locinput;
4205     REGCP_SET(ST.lastcp);
4206     PUSH_STATE_GOTO(WHILEM_A_max, cur_curlyx->u.curlyx.A);
4207     /* NOTREACHED */
4208    }
4209    goto do_whilem_B_max;
4210   }
4211   /* NOTREACHED */
4212
4213   case WHILEM_B_min: /* just matched B in a minimal match */
4214   case WHILEM_B_max: /* just matched B in a maximal match */
4215    cur_curlyx = ST.save_curlyx;
4216    sayYES;
4217    /* NOTREACHED */
4218
4219   case WHILEM_B_max_fail: /* just failed to match B in a maximal match */
4220    cur_curlyx = ST.save_curlyx;
4221    cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4222    cur_curlyx->u.curlyx.count--;
4223    CACHEsayNO;
4224    /* NOTREACHED */
4225
4226   case WHILEM_A_min_fail: /* just failed to match A in a minimal match */
4227    REGCP_UNWIND(ST.lastcp);
4228    regcppop(rex);
4229    /* FALL THROUGH */
4230   case WHILEM_A_pre_fail: /* just failed to match even minimal A */
4231    cur_curlyx->u.curlyx.lastloc = ST.save_lastloc;
4232    cur_curlyx->u.curlyx.count--;
4233    CACHEsayNO;
4234    /* NOTREACHED */
4235
4236   case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
4237    REGCP_UNWIND(ST.lastcp);
4238    regcppop(rex); /* Restore some previous $<digit>s? */
4239    PL_reginput = locinput;
4240    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4241     "%*s  whilem: failed, trying continuation...\n",
4242     REPORT_CODE_OFF+depth*2, "")
4243    );
4244   do_whilem_B_max:
4245    if (cur_curlyx->u.curlyx.count >= REG_INFTY
4246     && ckWARN(WARN_REGEXP)
4247     && !(PL_reg_flags & RF_warned))
4248    {
4249     PL_reg_flags |= RF_warned;
4250     Perl_warner(aTHX_ packWARN(WARN_REGEXP), "%s limit (%d) exceeded",
4251      "Complex regular subexpression recursion",
4252      REG_INFTY - 1);
4253    }
4254
4255    /* now try B */
4256    ST.save_curlyx = cur_curlyx;
4257    cur_curlyx = cur_curlyx->u.curlyx.prev_curlyx;
4258    PUSH_YES_STATE_GOTO(WHILEM_B_max, ST.save_curlyx->u.curlyx.B);
4259    /* NOTREACHED */
4260
4261   case WHILEM_B_min_fail: /* just failed to match B in a minimal match */
4262    cur_curlyx = ST.save_curlyx;
4263    REGCP_UNWIND(ST.lastcp);
4264    regcppop(rex);
4265
4266    if (cur_curlyx->u.curlyx.count >= cur_curlyx->u.curlyx.max) {
4267     /* Maximum greed exceeded */
4268     if (cur_curlyx->u.curlyx.count >= REG_INFTY
4269      && ckWARN(WARN_REGEXP)
4270      && !(PL_reg_flags & RF_warned))
4271     {
4272      PL_reg_flags |= RF_warned;
4273      Perl_warner(aTHX_ packWARN(WARN_REGEXP),
4274       "%s limit (%d) exceeded",
4275       "Complex regular subexpression recursion",
4276       REG_INFTY - 1);
4277     }
4278     cur_curlyx->u.curlyx.count--;
4279     CACHEsayNO;
4280    }
4281
4282    DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4283     "%*s  trying longer...\n", REPORT_CODE_OFF+depth*2, "")
4284    );
4285    /* Try grabbing another A and see if it helps. */
4286    PL_reginput = locinput;
4287    cur_curlyx->u.curlyx.lastloc = locinput;
4288    ST.cp = regcppush(cur_curlyx->u.curlyx.parenfloor);
4289    REGCP_SET(ST.lastcp);
4290    PUSH_STATE_GOTO(WHILEM_A_min, ST.save_curlyx->u.curlyx.A);
4291    /* NOTREACHED */
4292
4293 #undef  ST
4294 #define ST st->u.branch
4295
4296   case BRANCHJ:     /*  /(...|A|...)/ with long next pointer */
4297    next = scan + ARG(scan);
4298    if (next == scan)
4299     next = NULL;
4300    scan = NEXTOPER(scan);
4301    /* FALL THROUGH */
4302
4303   case BRANCH:     /*  /(...|A|...)/ */
4304    scan = NEXTOPER(scan); /* scan now points to inner node */
4305    ST.lastparen = *PL_reglastparen;
4306    ST.next_branch = next;
4307    REGCP_SET(ST.cp);
4308    PL_reginput = locinput;
4309
4310    /* Now go into the branch */
4311    if (has_cutgroup) {
4312     PUSH_YES_STATE_GOTO(BRANCH_next, scan);
4313    } else {
4314     PUSH_STATE_GOTO(BRANCH_next, scan);
4315    }
4316    /* NOTREACHED */
4317   case CUTGROUP:
4318    PL_reginput = locinput;
4319    sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
4320     MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
4321    PUSH_STATE_GOTO(CUTGROUP_next,next);
4322    /* NOTREACHED */
4323   case CUTGROUP_next_fail:
4324    do_cutgroup = 1;
4325    no_final = 1;
4326    if (st->u.mark.mark_name)
4327     sv_commit = st->u.mark.mark_name;
4328    sayNO;
4329    /* NOTREACHED */
4330   case BRANCH_next:
4331    sayYES;
4332    /* NOTREACHED */
4333   case BRANCH_next_fail: /* that branch failed; try the next, if any */
4334    if (do_cutgroup) {
4335     do_cutgroup = 0;
4336     no_final = 0;
4337    }
4338    REGCP_UNWIND(ST.cp);
4339    for (n = *PL_reglastparen; n > ST.lastparen; n--)
4340     PL_regoffs[n].end = -1;
4341    *PL_reglastparen = n;
4342    /*dmq: *PL_reglastcloseparen = n; */
4343    scan = ST.next_branch;
4344    /* no more branches? */
4345    if (!scan || (OP(scan) != BRANCH && OP(scan) != BRANCHJ)) {
4346     DEBUG_EXECUTE_r({
4347      PerlIO_printf( Perl_debug_log,
4348       "%*s  %sBRANCH failed...%s\n",
4349       REPORT_CODE_OFF+depth*2, "",
4350       PL_colors[4],
4351       PL_colors[5] );
4352     });
4353     sayNO_SILENT;
4354    }
4355    continue; /* execute next BRANCH[J] op */
4356    /* NOTREACHED */
4357
4358   case MINMOD:
4359    minmod = 1;
4360    break;
4361
4362 #undef  ST
4363 #define ST st->u.curlym
4364
4365   case CURLYM: /* /A{m,n}B/ where A is fixed-length */
4366
4367    /* This is an optimisation of CURLYX that enables us to push
4368    * only a single backtracking state, no matter how many matches
4369    * there are in {m,n}. It relies on the pattern being constant
4370    * length, with no parens to influence future backrefs
4371    */
4372
4373    ST.me = scan;
4374    scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4375
4376    /* if paren positive, emulate an OPEN/CLOSE around A */
4377    if (ST.me->flags) {
4378     U32 paren = ST.me->flags;
4379     if (paren > PL_regsize)
4380      PL_regsize = paren;
4381     if (paren > *PL_reglastparen)
4382      *PL_reglastparen = paren;
4383     scan += NEXT_OFF(scan); /* Skip former OPEN. */
4384    }
4385    ST.A = scan;
4386    ST.B = next;
4387    ST.alen = 0;
4388    ST.count = 0;
4389    ST.minmod = minmod;
4390    minmod = 0;
4391    ST.c1 = CHRTEST_UNINIT;
4392    REGCP_SET(ST.cp);
4393
4394    if (!(ST.minmod ? ARG1(ST.me) : ARG2(ST.me))) /* min/max */
4395     goto curlym_do_B;
4396
4397   curlym_do_A: /* execute the A in /A{m,n}B/  */
4398    PL_reginput = locinput;
4399    PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
4400    /* NOTREACHED */
4401
4402   case CURLYM_A: /* we've just matched an A */
4403    locinput = st->locinput;
4404    nextchr = UCHARAT(locinput);
4405
4406    ST.count++;
4407    /* after first match, determine A's length: u.curlym.alen */
4408    if (ST.count == 1) {
4409     if (PL_reg_match_utf8) {
4410      char *s = locinput;
4411      while (s < PL_reginput) {
4412       ST.alen++;
4413       s += UTF8SKIP(s);
4414      }
4415     }
4416     else {
4417      ST.alen = PL_reginput - locinput;
4418     }
4419     if (ST.alen == 0)
4420      ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
4421    }
4422    DEBUG_EXECUTE_r(
4423     PerlIO_printf(Perl_debug_log,
4424       "%*s  CURLYM now matched %"IVdf" times, len=%"IVdf"...\n",
4425       (int)(REPORT_CODE_OFF+(depth*2)), "",
4426       (IV) ST.count, (IV)ST.alen)
4427    );
4428
4429    locinput = PL_reginput;
4430
4431    if (cur_eval && cur_eval->u.eval.close_paren &&
4432     cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4433     goto fake_end;
4434
4435    {
4436     I32 max = (ST.minmod ? ARG1(ST.me) : ARG2(ST.me));
4437     if ( max == REG_INFTY || ST.count < max )
4438      goto curlym_do_A; /* try to match another A */
4439    }
4440    goto curlym_do_B; /* try to match B */
4441
4442   case CURLYM_A_fail: /* just failed to match an A */
4443    REGCP_UNWIND(ST.cp);
4444
4445    if (ST.minmod || ST.count < ARG1(ST.me) /* min*/
4446     || (cur_eval && cur_eval->u.eval.close_paren &&
4447      cur_eval->u.eval.close_paren == (U32)ST.me->flags))
4448     sayNO;
4449
4450   curlym_do_B: /* execute the B in /A{m,n}B/  */
4451    PL_reginput = locinput;
4452    if (ST.c1 == CHRTEST_UNINIT) {
4453     /* calculate c1 and c2 for possible match of 1st char
4454     * following curly */
4455     ST.c1 = ST.c2 = CHRTEST_VOID;
4456     if (HAS_TEXT(ST.B) || JUMPABLE(ST.B)) {
4457      regnode *text_node = ST.B;
4458      if (! HAS_TEXT(text_node))
4459       FIND_NEXT_IMPT(text_node);
4460      /* this used to be
4461
4462       (HAS_TEXT(text_node) && PL_regkind[OP(text_node)] == EXACT)
4463
4464        But the former is redundant in light of the latter.
4465
4466        if this changes back then the macro for
4467        IS_TEXT and friends need to change.
4468      */
4469      if (PL_regkind[OP(text_node)] == EXACT)
4470      {
4471
4472       ST.c1 = (U8)*STRING(text_node);
4473       ST.c2 =
4474        (IS_TEXTF(text_node))
4475        ? PL_fold[ST.c1]
4476        : (IS_TEXTFL(text_node))
4477         ? PL_fold_locale[ST.c1]
4478         : ST.c1;
4479      }
4480     }
4481    }
4482
4483    DEBUG_EXECUTE_r(
4484     PerlIO_printf(Perl_debug_log,
4485      "%*s  CURLYM trying tail with matches=%"IVdf"...\n",
4486      (int)(REPORT_CODE_OFF+(depth*2)),
4487      "", (IV)ST.count)
4488     );
4489    if (ST.c1 != CHRTEST_VOID
4490      && UCHARAT(PL_reginput) != ST.c1
4491      && UCHARAT(PL_reginput) != ST.c2)
4492    {
4493     /* simulate B failing */
4494     DEBUG_OPTIMISE_r(
4495      PerlIO_printf(Perl_debug_log,
4496       "%*s  CURLYM Fast bail c1=%"IVdf" c2=%"IVdf"\n",
4497       (int)(REPORT_CODE_OFF+(depth*2)),"",
4498       (IV)ST.c1,(IV)ST.c2
4499     ));
4500     state_num = CURLYM_B_fail;
4501     goto reenter_switch;
4502    }
4503
4504    if (ST.me->flags) {
4505     /* mark current A as captured */
4506     I32 paren = ST.me->flags;
4507     if (ST.count) {
4508      PL_regoffs[paren].start
4509       = HOPc(PL_reginput, -ST.alen) - PL_bostr;
4510      PL_regoffs[paren].end = PL_reginput - PL_bostr;
4511      /*dmq: *PL_reglastcloseparen = paren; */
4512     }
4513     else
4514      PL_regoffs[paren].end = -1;
4515     if (cur_eval && cur_eval->u.eval.close_paren &&
4516      cur_eval->u.eval.close_paren == (U32)ST.me->flags)
4517     {
4518      if (ST.count)
4519       goto fake_end;
4520      else
4521       sayNO;
4522     }
4523    }
4524
4525    PUSH_STATE_GOTO(CURLYM_B, ST.B); /* match B */
4526    /* NOTREACHED */
4527
4528   case CURLYM_B_fail: /* just failed to match a B */
4529    REGCP_UNWIND(ST.cp);
4530    if (ST.minmod) {
4531     I32 max = ARG2(ST.me);
4532     if (max != REG_INFTY && ST.count == max)
4533      sayNO;
4534     goto curlym_do_A; /* try to match a further A */
4535    }
4536    /* backtrack one A */
4537    if (ST.count == ARG1(ST.me) /* min */)
4538     sayNO;
4539    ST.count--;
4540    locinput = HOPc(locinput, -ST.alen);
4541    goto curlym_do_B; /* try to match B */
4542
4543 #undef ST
4544 #define ST st->u.curly
4545
4546 #define CURLY_SETPAREN(paren, success) \
4547  if (paren) { \
4548   if (success) { \
4549    PL_regoffs[paren].start = HOPc(locinput, -1) - PL_bostr; \
4550    PL_regoffs[paren].end = locinput - PL_bostr; \
4551    *PL_reglastcloseparen = paren; \
4552   } \
4553   else \
4554    PL_regoffs[paren].end = -1; \
4555  }
4556
4557   case STAR:  /*  /A*B/ where A is width 1 */
4558    ST.paren = 0;
4559    ST.min = 0;
4560    ST.max = REG_INFTY;
4561    scan = NEXTOPER(scan);
4562    goto repeat;
4563   case PLUS:  /*  /A+B/ where A is width 1 */
4564    ST.paren = 0;
4565    ST.min = 1;
4566    ST.max = REG_INFTY;
4567    scan = NEXTOPER(scan);
4568    goto repeat;
4569   case CURLYN:  /*  /(A){m,n}B/ where A is width 1 */
4570    ST.paren = scan->flags; /* Which paren to set */
4571    if (ST.paren > PL_regsize)
4572     PL_regsize = ST.paren;
4573    if (ST.paren > *PL_reglastparen)
4574     *PL_reglastparen = ST.paren;
4575    ST.min = ARG1(scan);  /* min to match */
4576    ST.max = ARG2(scan);  /* max to match */
4577    if (cur_eval && cur_eval->u.eval.close_paren &&
4578     cur_eval->u.eval.close_paren == (U32)ST.paren) {
4579     ST.min=1;
4580     ST.max=1;
4581    }
4582    scan = regnext(NEXTOPER(scan) + NODE_STEP_REGNODE);
4583    goto repeat;
4584   case CURLY:  /*  /A{m,n}B/ where A is width 1 */
4585    ST.paren = 0;
4586    ST.min = ARG1(scan);  /* min to match */
4587    ST.max = ARG2(scan);  /* max to match */
4588    scan = NEXTOPER(scan) + NODE_STEP_REGNODE;
4589   repeat:
4590    /*
4591    * Lookahead to avoid useless match attempts
4592    * when we know what character comes next.
4593    *
4594    * Used to only do .*x and .*?x, but now it allows
4595    * for )'s, ('s and (?{ ... })'s to be in the way
4596    * of the quantifier and the EXACT-like node.  -- japhy
4597    */
4598
4599    if (ST.min > ST.max) /* XXX make this a compile-time check? */
4600     sayNO;
4601    if (HAS_TEXT(next) || JUMPABLE(next)) {
4602     U8 *s;
4603     regnode *text_node = next;
4604
4605     if (! HAS_TEXT(text_node))
4606      FIND_NEXT_IMPT(text_node);
4607
4608     if (! HAS_TEXT(text_node))
4609      ST.c1 = ST.c2 = CHRTEST_VOID;
4610     else {
4611      if ( PL_regkind[OP(text_node)] != EXACT ) {
4612       ST.c1 = ST.c2 = CHRTEST_VOID;
4613       goto assume_ok_easy;
4614      }
4615      else
4616       s = (U8*)STRING(text_node);
4617
4618      /*  Currently we only get here when
4619
4620       PL_rekind[OP(text_node)] == EXACT
4621
4622       if this changes back then the macro for IS_TEXT and
4623       friends need to change. */
4624      if (!UTF) {
4625       ST.c2 = ST.c1 = *s;
4626       if (IS_TEXTF(text_node))
4627        ST.c2 = PL_fold[ST.c1];
4628       else if (IS_TEXTFL(text_node))
4629        ST.c2 = PL_fold_locale[ST.c1];
4630      }
4631      else { /* UTF */
4632       if (IS_TEXTF(text_node)) {
4633        STRLEN ulen1, ulen2;
4634        U8 tmpbuf1[UTF8_MAXBYTES_CASE+1];
4635        U8 tmpbuf2[UTF8_MAXBYTES_CASE+1];
4636
4637        to_utf8_lower((U8*)s, tmpbuf1, &ulen1);
4638        to_utf8_upper((U8*)s, tmpbuf2, &ulen2);
4639 #ifdef EBCDIC
4640        ST.c1 = utf8n_to_uvchr(tmpbuf1, UTF8_MAXLEN, 0,
4641              ckWARN(WARN_UTF8) ?
4642              0 : UTF8_ALLOW_ANY);
4643        ST.c2 = utf8n_to_uvchr(tmpbuf2, UTF8_MAXLEN, 0,
4644              ckWARN(WARN_UTF8) ?
4645              0 : UTF8_ALLOW_ANY);
4646 #else
4647        ST.c1 = utf8n_to_uvuni(tmpbuf1, UTF8_MAXBYTES, 0,
4648              uniflags);
4649        ST.c2 = utf8n_to_uvuni(tmpbuf2, UTF8_MAXBYTES, 0,
4650              uniflags);
4651 #endif
4652       }
4653       else {
4654        ST.c2 = ST.c1 = utf8n_to_uvchr(s, UTF8_MAXBYTES, 0,
4655              uniflags);
4656       }
4657      }
4658     }
4659    }
4660    else
4661     ST.c1 = ST.c2 = CHRTEST_VOID;
4662   assume_ok_easy:
4663
4664    ST.A = scan;
4665    ST.B = next;
4666    PL_reginput = locinput;
4667    if (minmod) {
4668     minmod = 0;
4669     if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
4670      sayNO;
4671     ST.count = ST.min;
4672     locinput = PL_reginput;
4673     REGCP_SET(ST.cp);
4674     if (ST.c1 == CHRTEST_VOID)
4675      goto curly_try_B_min;
4676
4677     ST.oldloc = locinput;
4678
4679     /* set ST.maxpos to the furthest point along the
4680     * string that could possibly match */
4681     if  (ST.max == REG_INFTY) {
4682      ST.maxpos = PL_regeol - 1;
4683      if (do_utf8)
4684       while (UTF8_IS_CONTINUATION(*(U8*)ST.maxpos))
4685        ST.maxpos--;
4686     }
4687     else if (do_utf8) {
4688      int m = ST.max - ST.min;
4689      for (ST.maxpos = locinput;
4690       m >0 && ST.maxpos + UTF8SKIP(ST.maxpos) <= PL_regeol; m--)
4691       ST.maxpos += UTF8SKIP(ST.maxpos);
4692     }
4693     else {
4694      ST.maxpos = locinput + ST.max - ST.min;
4695      if (ST.maxpos >= PL_regeol)
4696       ST.maxpos = PL_regeol - 1;
4697     }
4698     goto curly_try_B_min_known;
4699
4700    }
4701    else {
4702     ST.count = regrepeat(rex, ST.A, ST.max, depth);
4703     locinput = PL_reginput;
4704     if (ST.count < ST.min)
4705      sayNO;
4706     if ((ST.count > ST.min)
4707      && (PL_regkind[OP(ST.B)] == EOL) && (OP(ST.B) != MEOL))
4708     {
4709      /* A{m,n} must come at the end of the string, there's
4710      * no point in backing off ... */
4711      ST.min = ST.count;
4712      /* ...except that $ and \Z can match before *and* after
4713      newline at the end.  Consider "\n\n" =~ /\n+\Z\n/.
4714      We may back off by one in this case. */
4715      if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
4716       ST.min--;
4717     }
4718     REGCP_SET(ST.cp);
4719     goto curly_try_B_max;
4720    }
4721    /* NOTREACHED */
4722
4723
4724   case CURLY_B_min_known_fail:
4725    /* failed to find B in a non-greedy match where c1,c2 valid */
4726    if (ST.paren && ST.count)
4727     PL_regoffs[ST.paren].end = -1;
4728
4729    PL_reginput = locinput; /* Could be reset... */
4730    REGCP_UNWIND(ST.cp);
4731    /* Couldn't or didn't -- move forward. */
4732    ST.oldloc = locinput;
4733    if (do_utf8)
4734     locinput += UTF8SKIP(locinput);
4735    else
4736     locinput++;
4737    ST.count++;
4738   curly_try_B_min_known:
4739    /* find the next place where 'B' could work, then call B */
4740    {
4741     int n;
4742     if (do_utf8) {
4743      n = (ST.oldloc == locinput) ? 0 : 1;
4744      if (ST.c1 == ST.c2) {
4745       STRLEN len;
4746       /* set n to utf8_distance(oldloc, locinput) */
4747       while (locinput <= ST.maxpos &&
4748        utf8n_to_uvchr((U8*)locinput,
4749            UTF8_MAXBYTES, &len,
4750            uniflags) != (UV)ST.c1) {
4751        locinput += len;
4752        n++;
4753       }
4754      }
4755      else {
4756       /* set n to utf8_distance(oldloc, locinput) */
4757       while (locinput <= ST.maxpos) {
4758        STRLEN len;
4759        const UV c = utf8n_to_uvchr((U8*)locinput,
4760             UTF8_MAXBYTES, &len,
4761             uniflags);
4762        if (c == (UV)ST.c1 || c == (UV)ST.c2)
4763         break;
4764        locinput += len;
4765        n++;
4766       }
4767      }
4768     }
4769     else {
4770      if (ST.c1 == ST.c2) {
4771       while (locinput <= ST.maxpos &&
4772        UCHARAT(locinput) != ST.c1)
4773        locinput++;
4774      }
4775      else {
4776       while (locinput <= ST.maxpos
4777        && UCHARAT(locinput) != ST.c1
4778        && UCHARAT(locinput) != ST.c2)
4779        locinput++;
4780      }
4781      n = locinput - ST.oldloc;
4782     }
4783     if (locinput > ST.maxpos)
4784      sayNO;
4785     /* PL_reginput == oldloc now */
4786     if (n) {
4787      ST.count += n;
4788      if (regrepeat(rex, ST.A, n, depth) < n)
4789       sayNO;
4790     }
4791     PL_reginput = locinput;
4792     CURLY_SETPAREN(ST.paren, ST.count);
4793     if (cur_eval && cur_eval->u.eval.close_paren &&
4794      cur_eval->u.eval.close_paren == (U32)ST.paren) {
4795      goto fake_end;
4796     }
4797     PUSH_STATE_GOTO(CURLY_B_min_known, ST.B);
4798    }
4799    /* NOTREACHED */
4800
4801
4802   case CURLY_B_min_fail:
4803    /* failed to find B in a non-greedy match where c1,c2 invalid */
4804    if (ST.paren && ST.count)
4805     PL_regoffs[ST.paren].end = -1;
4806
4807    REGCP_UNWIND(ST.cp);
4808    /* failed -- move forward one */
4809    PL_reginput = locinput;
4810    if (regrepeat(rex, ST.A, 1, depth)) {
4811     ST.count++;
4812     locinput = PL_reginput;
4813     if (ST.count <= ST.max || (ST.max == REG_INFTY &&
4814       ST.count > 0)) /* count overflow ? */
4815     {
4816     curly_try_B_min:
4817      CURLY_SETPAREN(ST.paren, ST.count);
4818      if (cur_eval && cur_eval->u.eval.close_paren &&
4819       cur_eval->u.eval.close_paren == (U32)ST.paren) {
4820       goto fake_end;
4821      }
4822      PUSH_STATE_GOTO(CURLY_B_min, ST.B);
4823     }
4824    }
4825    sayNO;
4826    /* NOTREACHED */
4827
4828
4829   curly_try_B_max:
4830    /* a successful greedy match: now try to match B */
4831    if (cur_eval && cur_eval->u.eval.close_paren &&
4832     cur_eval->u.eval.close_paren == (U32)ST.paren) {
4833     goto fake_end;
4834    }
4835    {
4836     UV c = 0;
4837     if (ST.c1 != CHRTEST_VOID)
4838      c = do_utf8 ? utf8n_to_uvchr((U8*)PL_reginput,
4839           UTF8_MAXBYTES, 0, uniflags)
4840         : (UV) UCHARAT(PL_reginput);
4841     /* If it could work, try it. */
4842     if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
4843      CURLY_SETPAREN(ST.paren, ST.count);
4844      PUSH_STATE_GOTO(CURLY_B_max, ST.B);
4845      /* NOTREACHED */
4846     }
4847    }
4848    /* FALL THROUGH */
4849   case CURLY_B_max_fail:
4850    /* failed to find B in a greedy match */
4851    if (ST.paren && ST.count)
4852     PL_regoffs[ST.paren].end = -1;
4853
4854    REGCP_UNWIND(ST.cp);
4855    /*  back up. */
4856    if (--ST.count < ST.min)
4857     sayNO;
4858    PL_reginput = locinput = HOPc(locinput, -1);
4859    goto curly_try_B_max;
4860
4861 #undef ST
4862
4863   case END:
4864    fake_end:
4865    if (cur_eval) {
4866     /* we've just finished A in /(??{A})B/; now continue with B */
4867     I32 tmpix;
4868     st->u.eval.toggle_reg_flags
4869        = cur_eval->u.eval.toggle_reg_flags;
4870     PL_reg_flags ^= st->u.eval.toggle_reg_flags;
4871
4872     st->u.eval.prev_rex = rex;  /* inner */
4873     SETREX(rex,cur_eval->u.eval.prev_rex);
4874     rexi = RXi_GET(rex);
4875     cur_curlyx = cur_eval->u.eval.prev_curlyx;
4876     ReREFCNT_inc(rex);
4877     st->u.eval.cp = regcppush(0); /* Save *all* the positions. */
4878
4879     /* rex was changed so update the pointer in PL_reglastparen and PL_reglastcloseparen */
4880     PL_reglastparen = &rex->lastparen;
4881     PL_reglastcloseparen = &rex->lastcloseparen;
4882
4883     REGCP_SET(st->u.eval.lastcp);
4884     PL_reginput = locinput;
4885
4886     /* Restore parens of the outer rex without popping the
4887     * savestack */
4888     tmpix = PL_savestack_ix;
4889     PL_savestack_ix = cur_eval->u.eval.lastcp;
4890     regcppop(rex);
4891     PL_savestack_ix = tmpix;
4892
4893     st->u.eval.prev_eval = cur_eval;
4894     cur_eval = cur_eval->u.eval.prev_eval;
4895     DEBUG_EXECUTE_r(
4896      PerlIO_printf(Perl_debug_log, "%*s  EVAL trying tail ... %"UVxf"\n",
4897          REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval)););
4898     if ( nochange_depth )
4899      nochange_depth--;
4900
4901     PUSH_YES_STATE_GOTO(EVAL_AB,
4902       st->u.eval.prev_eval->u.eval.B); /* match B */
4903    }
4904
4905    if (locinput < reginfo->till) {
4906     DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
4907          "%sMatch possible, but length=%ld is smaller than requested=%ld, failing!%s\n",
4908          PL_colors[4],
4909          (long)(locinput - PL_reg_starttry),
4910          (long)(reginfo->till - PL_reg_starttry),
4911          PL_colors[5]));
4912
4913     sayNO_SILENT;  /* Cannot match: too short. */
4914    }
4915    PL_reginput = locinput; /* put where regtry can find it */
4916    sayYES;   /* Success! */
4917
4918   case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
4919    DEBUG_EXECUTE_r(
4920    PerlIO_printf(Perl_debug_log,
4921     "%*s  %ssubpattern success...%s\n",
4922     REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
4923    PL_reginput = locinput; /* put where regtry can find it */
4924    sayYES;   /* Success! */
4925
4926 #undef  ST
4927 #define ST st->u.ifmatch
4928
4929   case SUSPEND: /* (?>A) */
4930    ST.wanted = 1;
4931    PL_reginput = locinput;
4932    goto do_ifmatch;
4933
4934   case UNLESSM: /* -ve lookaround: (?!A), or with flags, (?<!A) */
4935    ST.wanted = 0;
4936    goto ifmatch_trivial_fail_test;
4937
4938   case IFMATCH: /* +ve lookaround: (?=A), or with flags, (?<=A) */
4939    ST.wanted = 1;
4940   ifmatch_trivial_fail_test:
4941    if (scan->flags) {
4942     char * const s = HOPBACKc(locinput, scan->flags);
4943     if (!s) {
4944      /* trivial fail */
4945      if (logical) {
4946       logical = 0;
4947       sw = 1 - (bool)ST.wanted;
4948      }
4949      else if (ST.wanted)
4950       sayNO;
4951      next = scan + ARG(scan);
4952      if (next == scan)
4953       next = NULL;
4954      break;
4955     }
4956     PL_reginput = s;
4957    }
4958    else
4959     PL_reginput = locinput;
4960
4961   do_ifmatch:
4962    ST.me = scan;
4963    ST.logical = logical;
4964    logical = 0; /* XXX: reset state of logical once it has been saved into ST */
4965
4966    /* execute body of (?...A) */
4967    PUSH_YES_STATE_GOTO(IFMATCH_A, NEXTOPER(NEXTOPER(scan)));
4968    /* NOTREACHED */
4969
4970   case IFMATCH_A_fail: /* body of (?...A) failed */
4971    ST.wanted = !ST.wanted;
4972    /* FALL THROUGH */
4973
4974   case IFMATCH_A: /* body of (?...A) succeeded */
4975    if (ST.logical) {
4976     sw = (bool)ST.wanted;
4977    }
4978    else if (!ST.wanted)
4979     sayNO;
4980
4981    if (OP(ST.me) == SUSPEND)
4982     locinput = PL_reginput;
4983    else {
4984     locinput = PL_reginput = st->locinput;
4985     nextchr = UCHARAT(locinput);
4986    }
4987    scan = ST.me + ARG(ST.me);
4988    if (scan == ST.me)
4989     scan = NULL;
4990    continue; /* execute B */
4991
4992 #undef ST
4993
4994   case LONGJMP:
4995    next = scan + ARG(scan);
4996    if (next == scan)
4997     next = NULL;
4998    break;
4999   case COMMIT:
5000    reginfo->cutpoint = PL_regeol;
5001    /* FALLTHROUGH */
5002   case PRUNE:
5003    PL_reginput = locinput;
5004    if (!scan->flags)
5005     sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5006    PUSH_STATE_GOTO(COMMIT_next,next);
5007    /* NOTREACHED */
5008   case COMMIT_next_fail:
5009    no_final = 1;
5010    /* FALLTHROUGH */
5011   case OPFAIL:
5012    sayNO;
5013    /* NOTREACHED */
5014
5015 #define ST st->u.mark
5016   case MARKPOINT:
5017    ST.prev_mark = mark_state;
5018    ST.mark_name = sv_commit = sv_yes_mark
5019     = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5020    mark_state = st;
5021    ST.mark_loc = PL_reginput = locinput;
5022    PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
5023    /* NOTREACHED */
5024   case MARKPOINT_next:
5025    mark_state = ST.prev_mark;
5026    sayYES;
5027    /* NOTREACHED */
5028   case MARKPOINT_next_fail:
5029    if (popmark && sv_eq(ST.mark_name,popmark))
5030    {
5031     if (ST.mark_loc > startpoint)
5032      reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5033     popmark = NULL; /* we found our mark */
5034     sv_commit = ST.mark_name;
5035
5036     DEBUG_EXECUTE_r({
5037       PerlIO_printf(Perl_debug_log,
5038        "%*s  %ssetting cutpoint to mark:%"SVf"...%s\n",
5039        REPORT_CODE_OFF+depth*2, "",
5040        PL_colors[4], SVfARG(sv_commit), PL_colors[5]);
5041     });
5042    }
5043    mark_state = ST.prev_mark;
5044    sv_yes_mark = mark_state ?
5045     mark_state->u.mark.mark_name : NULL;
5046    sayNO;
5047    /* NOTREACHED */
5048   case SKIP:
5049    PL_reginput = locinput;
5050    if (scan->flags) {
5051     /* (*SKIP) : if we fail we cut here*/
5052     ST.mark_name = NULL;
5053     ST.mark_loc = locinput;
5054     PUSH_STATE_GOTO(SKIP_next,next);
5055    } else {
5056     /* (*SKIP:NAME) : if there is a (*MARK:NAME) fail where it was,
5057     otherwise do nothing.  Meaning we need to scan
5058     */
5059     regmatch_state *cur = mark_state;
5060     SV *find = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
5061
5062     while (cur) {
5063      if ( sv_eq( cur->u.mark.mark_name,
5064         find ) )
5065      {
5066       ST.mark_name = find;
5067       PUSH_STATE_GOTO( SKIP_next, next );
5068      }
5069      cur = cur->u.mark.prev_mark;
5070     }
5071    }
5072    /* Didn't find our (*MARK:NAME) so ignore this (*SKIP:NAME) */
5073    break;
5074   case SKIP_next_fail:
5075    if (ST.mark_name) {
5076     /* (*CUT:NAME) - Set up to search for the name as we
5077     collapse the stack*/
5078     popmark = ST.mark_name;
5079    } else {
5080     /* (*CUT) - No name, we cut here.*/
5081     if (ST.mark_loc > startpoint)
5082      reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
5083     /* but we set sv_commit to latest mark_name if there
5084     is one so they can test to see how things lead to this
5085     cut */
5086     if (mark_state)
5087      sv_commit=mark_state->u.mark.mark_name;
5088    }
5089    no_final = 1;
5090    sayNO;
5091    /* NOTREACHED */
5092 #undef ST
5093   case FOLDCHAR:
5094    n = ARG(scan);
5095    if ( n == (U32)what_len_TRICKYFOLD(locinput,do_utf8,ln) ) {
5096     locinput += ln;
5097    } else if ( 0xDF == n && !do_utf8 && !UTF ) {
5098     sayNO;
5099    } else  {
5100     U8 folded[UTF8_MAXBYTES_CASE+1];
5101     STRLEN foldlen;
5102     const char * const l = locinput;
5103     char *e = PL_regeol;
5104     to_uni_fold(n, folded, &foldlen);
5105
5106     if (ibcmp_utf8((const char*) folded, 0,  foldlen, 1,
5107        l, &e, 0,  do_utf8)) {
5108       sayNO;
5109     }
5110     locinput = e;
5111    }
5112    nextchr = UCHARAT(locinput);
5113    break;
5114   case LNBREAK:
5115    if ((n=is_LNBREAK(locinput,do_utf8))) {
5116     locinput += n;
5117     nextchr = UCHARAT(locinput);
5118    } else
5119     sayNO;
5120    break;
5121
5122 #define CASE_CLASS(nAmE)                              \
5123   case nAmE:                                    \
5124    if ((n=is_##nAmE(locinput,do_utf8))) {    \
5125     locinput += n;                        \
5126     nextchr = UCHARAT(locinput);          \
5127    } else                                    \
5128     sayNO;                                \
5129    break;                                    \
5130   case N##nAmE:                                 \
5131    if ((n=is_##nAmE(locinput,do_utf8))) {    \
5132     sayNO;                                \
5133    } else {                                  \
5134     locinput += UTF8SKIP(locinput);       \
5135     nextchr = UCHARAT(locinput);          \
5136    }                                         \
5137    break
5138
5139   CASE_CLASS(VERTWS);
5140   CASE_CLASS(HORIZWS);
5141 #undef CASE_CLASS
5142
5143   default:
5144    PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
5145       PTR2UV(scan), OP(scan));
5146    Perl_croak(aTHX_ "regexp memory corruption");
5147
5148   } /* end switch */
5149
5150   /* switch break jumps here */
5151   scan = next; /* prepare to execute the next op and ... */
5152   continue;    /* ... jump back to the top, reusing st */
5153   /* NOTREACHED */
5154
5155  push_yes_state:
5156   /* push a state that backtracks on success */
5157   st->u.yes.prev_yes_state = yes_state;
5158   yes_state = st;
5159   /* FALL THROUGH */
5160  push_state:
5161   /* push a new regex state, then continue at scan  */
5162   {
5163    regmatch_state *newst;
5164
5165    DEBUG_STACK_r({
5166     regmatch_state *cur = st;
5167     regmatch_state *curyes = yes_state;
5168     int curd = depth;
5169     regmatch_slab *slab = PL_regmatch_slab;
5170     for (;curd > -1;cur--,curd--) {
5171      if (cur < SLAB_FIRST(slab)) {
5172       slab = slab->prev;
5173       cur = SLAB_LAST(slab);
5174      }
5175      PerlIO_printf(Perl_error_log, "%*s#%-3d %-10s %s\n",
5176       REPORT_CODE_OFF + 2 + depth * 2,"",
5177       curd, PL_reg_name[cur->resume_state],
5178       (curyes == cur) ? "yes" : ""
5179      );
5180      if (curyes == cur)
5181       curyes = cur->u.yes.prev_yes_state;
5182     }
5183    } else
5184     DEBUG_STATE_pp("push")
5185    );
5186    depth++;
5187    st->locinput = locinput;
5188    newst = st+1;
5189    if (newst >  SLAB_LAST(PL_regmatch_slab))
5190     newst = S_push_slab(aTHX);
5191    PL_regmatch_state = newst;
5192
5193    locinput = PL_reginput;
5194    nextchr = UCHARAT(locinput);
5195    st = newst;
5196    continue;
5197    /* NOTREACHED */
5198   }
5199  }
5200
5201  /*
5202  * We get here only if there's trouble -- normally "case END" is
5203  * the terminating point.
5204  */
5205  Perl_croak(aTHX_ "corrupted regexp pointers");
5206  /*NOTREACHED*/
5207  sayNO;
5208
5209 yes:
5210  if (yes_state) {
5211   /* we have successfully completed a subexpression, but we must now
5212   * pop to the state marked by yes_state and continue from there */
5213   assert(st != yes_state);
5214 #ifdef DEBUGGING
5215   while (st != yes_state) {
5216    st--;
5217    if (st < SLAB_FIRST(PL_regmatch_slab)) {
5218     PL_regmatch_slab = PL_regmatch_slab->prev;
5219     st = SLAB_LAST(PL_regmatch_slab);
5220    }
5221    DEBUG_STATE_r({
5222     if (no_final) {
5223      DEBUG_STATE_pp("pop (no final)");
5224     } else {
5225      DEBUG_STATE_pp("pop (yes)");
5226     }
5227    });
5228    depth--;
5229   }
5230 #else
5231   while (yes_state < SLAB_FIRST(PL_regmatch_slab)
5232    || yes_state > SLAB_LAST(PL_regmatch_slab))
5233   {
5234    /* not in this slab, pop slab */
5235    depth -= (st - SLAB_FIRST(PL_regmatch_slab) + 1);
5236    PL_regmatch_slab = PL_regmatch_slab->prev;
5237    st = SLAB_LAST(PL_regmatch_slab);
5238   }
5239   depth -= (st - yes_state);
5240 #endif
5241   st = yes_state;
5242   yes_state = st->u.yes.prev_yes_state;
5243   PL_regmatch_state = st;
5244
5245   if (no_final) {
5246    locinput= st->locinput;
5247    nextchr = UCHARAT(locinput);
5248   }
5249   state_num = st->resume_state + no_final;
5250   goto reenter_switch;
5251  }
5252
5253  DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n",
5254       PL_colors[4], PL_colors[5]));
5255
5256  if (PL_reg_eval_set) {
5257   /* each successfully executed (?{...}) block does the equivalent of
5258   *   local $^R = do {...}
5259   * When popping the save stack, all these locals would be undone;
5260   * bypass this by setting the outermost saved $^R to the latest
5261   * value */
5262   if (oreplsv != GvSV(PL_replgv))
5263    sv_setsv(oreplsv, GvSV(PL_replgv));
5264  }
5265  result = 1;
5266  goto final_exit;
5267
5268 no:
5269  DEBUG_EXECUTE_r(
5270   PerlIO_printf(Perl_debug_log,
5271    "%*s  %sfailed...%s\n",
5272    REPORT_CODE_OFF+depth*2, "",
5273    PL_colors[4], PL_colors[5])
5274   );
5275
5276 no_silent:
5277  if (no_final) {
5278   if (yes_state) {
5279    goto yes;
5280   } else {
5281    goto final_exit;
5282   }
5283  }
5284  if (depth) {
5285   /* there's a previous state to backtrack to */
5286   st--;
5287   if (st < SLAB_FIRST(PL_regmatch_slab)) {
5288    PL_regmatch_slab = PL_regmatch_slab->prev;
5289    st = SLAB_LAST(PL_regmatch_slab);
5290   }
5291   PL_regmatch_state = st;
5292   locinput= st->locinput;
5293   nextchr = UCHARAT(locinput);
5294
5295   DEBUG_STATE_pp("pop");
5296   depth--;
5297   if (yes_state == st)
5298    yes_state = st->u.yes.prev_yes_state;
5299
5300   state_num = st->resume_state + 1; /* failure = success + 1 */
5301   goto reenter_switch;
5302  }
5303  result = 0;
5304
5305   final_exit:
5306  if (rex->intflags & PREGf_VERBARG_SEEN) {
5307   SV *sv_err = get_sv("REGERROR", 1);
5308   SV *sv_mrk = get_sv("REGMARK", 1);
5309   if (result) {
5310    sv_commit = &PL_sv_no;
5311    if (!sv_yes_mark)
5312     sv_yes_mark = &PL_sv_yes;
5313   } else {
5314    if (!sv_commit)
5315     sv_commit = &PL_sv_yes;
5316    sv_yes_mark = &PL_sv_no;
5317   }
5318   sv_setsv(sv_err, sv_commit);
5319   sv_setsv(sv_mrk, sv_yes_mark);
5320  }
5321
5322  /* clean up; in particular, free all slabs above current one */
5323  LEAVE_SCOPE(oldsave);
5324
5325  return result;
5326 }
5327
5328 /*
5329  - regrepeat - repeatedly match something simple, report how many
5330  */
5331 /*
5332  * [This routine now assumes that it will only match on things of length 1.
5333  * That was true before, but now we assume scan - reginput is the count,
5334  * rather than incrementing count on every character.  [Er, except utf8.]]
5335  */
5336 STATIC I32
5337 S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
5338 {
5339  dVAR;
5340  register char *scan;
5341  register I32 c;
5342  register char *loceol = PL_regeol;
5343  register I32 hardcount = 0;
5344  register bool do_utf8 = PL_reg_match_utf8;
5345 #ifndef DEBUGGING
5346  PERL_UNUSED_ARG(depth);
5347 #endif
5348
5349  PERL_ARGS_ASSERT_REGREPEAT;
5350
5351  scan = PL_reginput;
5352  if (max == REG_INFTY)
5353   max = I32_MAX;
5354  else if (max < loceol - scan)
5355   loceol = scan + max;
5356  switch (OP(p)) {
5357  case REG_ANY:
5358   if (do_utf8) {
5359    loceol = PL_regeol;
5360    while (scan < loceol && hardcount < max && *scan != '\n') {
5361     scan += UTF8SKIP(scan);
5362     hardcount++;
5363    }
5364   } else {
5365    while (scan < loceol && *scan != '\n')
5366     scan++;
5367   }
5368   break;
5369  case SANY:
5370   if (do_utf8) {
5371    loceol = PL_regeol;
5372    while (scan < loceol && hardcount < max) {
5373     scan += UTF8SKIP(scan);
5374     hardcount++;
5375    }
5376   }
5377   else
5378    scan = loceol;
5379   break;
5380  case CANY:
5381   scan = loceol;
5382   break;
5383  case EXACT:  /* length of string is 1 */
5384   c = (U8)*STRING(p);
5385   while (scan < loceol && UCHARAT(scan) == c)
5386    scan++;
5387   break;
5388  case EXACTF: /* length of string is 1 */
5389   c = (U8)*STRING(p);
5390   while (scan < loceol &&
5391    (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold[c]))
5392    scan++;
5393   break;
5394  case EXACTFL: /* length of string is 1 */
5395   PL_reg_flags |= RF_tainted;
5396   c = (U8)*STRING(p);
5397   while (scan < loceol &&
5398    (UCHARAT(scan) == c || UCHARAT(scan) == PL_fold_locale[c]))
5399    scan++;
5400   break;
5401  case ANYOF:
5402   if (do_utf8) {
5403    loceol = PL_regeol;
5404    while (hardcount < max && scan < loceol &&
5405     reginclass(prog, p, (U8*)scan, 0, do_utf8)) {
5406     scan += UTF8SKIP(scan);
5407     hardcount++;
5408    }
5409   } else {
5410    while (scan < loceol && REGINCLASS(prog, p, (U8*)scan))
5411     scan++;
5412   }
5413   break;
5414  case ALNUM:
5415   if (do_utf8) {
5416    loceol = PL_regeol;
5417    LOAD_UTF8_CHARCLASS_ALNUM();
5418    while (hardcount < max && scan < loceol &&
5419     swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5420     scan += UTF8SKIP(scan);
5421     hardcount++;
5422    }
5423   } else {
5424    while (scan < loceol && isALNUM(*scan))
5425     scan++;
5426   }
5427   break;
5428  case ALNUML:
5429   PL_reg_flags |= RF_tainted;
5430   if (do_utf8) {
5431    loceol = PL_regeol;
5432    while (hardcount < max && scan < loceol &&
5433     isALNUM_LC_utf8((U8*)scan)) {
5434     scan += UTF8SKIP(scan);
5435     hardcount++;
5436    }
5437   } else {
5438    while (scan < loceol && isALNUM_LC(*scan))
5439     scan++;
5440   }
5441   break;
5442  case NALNUM:
5443   if (do_utf8) {
5444    loceol = PL_regeol;
5445    LOAD_UTF8_CHARCLASS_ALNUM();
5446    while (hardcount < max && scan < loceol &&
5447     !swash_fetch(PL_utf8_alnum, (U8*)scan, do_utf8)) {
5448     scan += UTF8SKIP(scan);
5449     hardcount++;
5450    }
5451   } else {
5452    while (scan < loceol && !isALNUM(*scan))
5453     scan++;
5454   }
5455   break;
5456  case NALNUML:
5457   PL_reg_flags |= RF_tainted;
5458   if (do_utf8) {
5459    loceol = PL_regeol;
5460    while (hardcount < max && scan < loceol &&
5461     !isALNUM_LC_utf8((U8*)scan)) {
5462     scan += UTF8SKIP(scan);
5463     hardcount++;
5464    }
5465   } else {
5466    while (scan < loceol && !isALNUM_LC(*scan))
5467     scan++;
5468   }
5469   break;
5470  case SPACE:
5471   if (do_utf8) {
5472    loceol = PL_regeol;
5473    LOAD_UTF8_CHARCLASS_SPACE();
5474    while (hardcount < max && scan < loceol &&
5475     (*scan == ' ' ||
5476      swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5477     scan += UTF8SKIP(scan);
5478     hardcount++;
5479    }
5480   } else {
5481    while (scan < loceol && isSPACE(*scan))
5482     scan++;
5483   }
5484   break;
5485  case SPACEL:
5486   PL_reg_flags |= RF_tainted;
5487   if (do_utf8) {
5488    loceol = PL_regeol;
5489    while (hardcount < max && scan < loceol &&
5490     (*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5491     scan += UTF8SKIP(scan);
5492     hardcount++;
5493    }
5494   } else {
5495    while (scan < loceol && isSPACE_LC(*scan))
5496     scan++;
5497   }
5498   break;
5499  case NSPACE:
5500   if (do_utf8) {
5501    loceol = PL_regeol;
5502    LOAD_UTF8_CHARCLASS_SPACE();
5503    while (hardcount < max && scan < loceol &&
5504     !(*scan == ' ' ||
5505      swash_fetch(PL_utf8_space,(U8*)scan, do_utf8))) {
5506     scan += UTF8SKIP(scan);
5507     hardcount++;
5508    }
5509   } else {
5510    while (scan < loceol && !isSPACE(*scan))
5511     scan++;
5512   }
5513   break;
5514  case NSPACEL:
5515   PL_reg_flags |= RF_tainted;
5516   if (do_utf8) {
5517    loceol = PL_regeol;
5518    while (hardcount < max && scan < loceol &&
5519     !(*scan == ' ' || isSPACE_LC_utf8((U8*)scan))) {
5520     scan += UTF8SKIP(scan);
5521     hardcount++;
5522    }
5523   } else {
5524    while (scan < loceol && !isSPACE_LC(*scan))
5525     scan++;
5526   }
5527   break;
5528  case DIGIT:
5529   if (do_utf8) {
5530    loceol = PL_regeol;
5531    LOAD_UTF8_CHARCLASS_DIGIT();
5532    while (hardcount < max && scan < loceol &&
5533     swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5534     scan += UTF8SKIP(scan);
5535     hardcount++;
5536    }
5537   } else {
5538    while (scan < loceol && isDIGIT(*scan))
5539     scan++;
5540   }
5541   break;
5542  case NDIGIT:
5543   if (do_utf8) {
5544    loceol = PL_regeol;
5545    LOAD_UTF8_CHARCLASS_DIGIT();
5546    while (hardcount < max && scan < loceol &&
5547     !swash_fetch(PL_utf8_digit, (U8*)scan, do_utf8)) {
5548     scan += UTF8SKIP(scan);
5549     hardcount++;
5550    }
5551   } else {
5552    while (scan < loceol && !isDIGIT(*scan))
5553     scan++;
5554   }
5555  case LNBREAK:
5556   if (do_utf8) {
5557    loceol = PL_regeol;
5558    while (hardcount < max && scan < loceol && (c=is_LNBREAK_utf8(scan))) {
5559     scan += c;
5560     hardcount++;
5561    }
5562   } else {
5563    /*
5564    LNBREAK can match two latin chars, which is ok,
5565    because we have a null terminated string, but we
5566    have to use hardcount in this situation
5567    */
5568    while (scan < loceol && (c=is_LNBREAK_latin1(scan)))  {
5569     scan+=c;
5570     hardcount++;
5571    }
5572   }
5573   break;
5574  case HORIZWS:
5575   if (do_utf8) {
5576    loceol = PL_regeol;
5577    while (hardcount < max && scan < loceol && (c=is_HORIZWS_utf8(scan))) {
5578     scan += c;
5579     hardcount++;
5580    }
5581   } else {
5582    while (scan < loceol && is_HORIZWS_latin1(scan))
5583     scan++;
5584   }
5585   break;
5586  case NHORIZWS:
5587   if (do_utf8) {
5588    loceol = PL_regeol;
5589    while (hardcount < max && scan < loceol && !is_HORIZWS_utf8(scan)) {
5590     scan += UTF8SKIP(scan);
5591     hardcount++;
5592    }
5593   } else {
5594    while (scan < loceol && !is_HORIZWS_latin1(scan))
5595     scan++;
5596
5597   }
5598   break;
5599  case VERTWS:
5600   if (do_utf8) {
5601    loceol = PL_regeol;
5602    while (hardcount < max && scan < loceol && (c=is_VERTWS_utf8(scan))) {
5603     scan += c;
5604     hardcount++;
5605    }
5606   } else {
5607    while (scan < loceol && is_VERTWS_latin1(scan))
5608     scan++;
5609
5610   }
5611   break;
5612  case NVERTWS:
5613   if (do_utf8) {
5614    loceol = PL_regeol;
5615    while (hardcount < max && scan < loceol && !is_VERTWS_utf8(scan)) {
5616     scan += UTF8SKIP(scan);
5617     hardcount++;
5618    }
5619   } else {
5620    while (scan < loceol && !is_VERTWS_latin1(scan))
5621     scan++;
5622
5623   }
5624   break;
5625
5626  default:  /* Called on something of 0 width. */
5627   break;  /* So match right here or not at all. */
5628  }
5629
5630  if (hardcount)
5631   c = hardcount;
5632  else
5633   c = scan - PL_reginput;
5634  PL_reginput = scan;
5635
5636  DEBUG_r({
5637   GET_RE_DEBUG_FLAGS_DECL;
5638   DEBUG_EXECUTE_r({
5639    SV * const prop = sv_newmortal();
5640    regprop(prog, prop, p);
5641    PerlIO_printf(Perl_debug_log,
5642       "%*s  %s can match %"IVdf" times out of %"IVdf"...\n",
5643       REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
5644   });
5645  });
5646
5647  return(c);
5648 }
5649
5650
5651 #if !defined(PERL_IN_XSUB_RE) || defined(PLUGGABLE_RE_EXTENSION)
5652 /*
5653 - regclass_swash - prepare the utf8 swash
5654 */
5655
5656 SV *
5657 Perl_regclass_swash(pTHX_ const regexp *prog, register const regnode* node, bool doinit, SV** listsvp, SV **altsvp)
5658 {
5659  dVAR;
5660  SV *sw  = NULL;
5661  SV *si  = NULL;
5662  SV *alt = NULL;
5663  RXi_GET_DECL(prog,progi);
5664  const struct reg_data * const data = prog ? progi->data : NULL;
5665
5666  PERL_ARGS_ASSERT_REGCLASS_SWASH;
5667
5668  if (data && data->count) {
5669   const U32 n = ARG(node);
5670
5671   if (data->what[n] == 's') {
5672    SV * const rv = MUTABLE_SV(data->data[n]);
5673    AV * const av = MUTABLE_AV(SvRV(rv));
5674    SV **const ary = AvARRAY(av);
5675    SV **a, **b;
5676
5677    /* See the end of regcomp.c:S_regclass() for
5678    * documentation of these array elements. */
5679
5680    si = *ary;
5681    a  = SvROK(ary[1]) ? &ary[1] : NULL;
5682    b  = SvTYPE(ary[2]) == SVt_PVAV ? &ary[2] : NULL;
5683
5684    if (a)
5685     sw = *a;
5686    else if (si && doinit) {
5687     sw = swash_init("utf8", "", si, 1, 0);
5688     (void)av_store(av, 1, sw);
5689    }
5690    if (b)
5691     alt = *b;
5692   }
5693  }
5694
5695  if (listsvp)
5696   *listsvp = si;
5697  if (altsvp)
5698   *altsvp  = alt;
5699
5700  return sw;
5701 }
5702 #endif
5703
5704 /*
5705  - reginclass - determine if a character falls into a character class
5706
5707   The n is the ANYOF regnode, the p is the target string, lenp
5708   is pointer to the maximum length of how far to go in the p
5709   (if the lenp is zero, UTF8SKIP(p) is used),
5710   do_utf8 tells whether the target string is in UTF-8.
5711
5712  */
5713
5714 STATIC bool
5715 S_reginclass(pTHX_ const regexp *prog, register const regnode *n, register const U8* p, STRLEN* lenp, register bool do_utf8)
5716 {
5717  dVAR;
5718  const char flags = ANYOF_FLAGS(n);
5719  bool match = FALSE;
5720  UV c = *p;
5721  STRLEN len = 0;
5722  STRLEN plen;
5723
5724  PERL_ARGS_ASSERT_REGINCLASS;
5725
5726  if (do_utf8 && !UTF8_IS_INVARIANT(c)) {
5727   c = utf8n_to_uvchr(p, UTF8_MAXBYTES, &len,
5728     (UTF8_ALLOW_DEFAULT & UTF8_ALLOW_ANYUV) | UTF8_CHECK_ONLY);
5729     /* see [perl #37836] for UTF8_ALLOW_ANYUV */
5730   if (len == (STRLEN)-1)
5731    Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
5732  }
5733
5734  plen = lenp ? *lenp : UNISKIP(NATIVE_TO_UNI(c));
5735  if (do_utf8 || (flags & ANYOF_UNICODE)) {
5736   if (lenp)
5737    *lenp = 0;
5738   if (do_utf8 && !ANYOF_RUNTIME(n)) {
5739    if (len != (STRLEN)-1 && c < 256 && ANYOF_BITMAP_TEST(n, c))
5740     match = TRUE;
5741   }
5742   if (!match && do_utf8 && (flags & ANYOF_UNICODE_ALL) && c >= 256)
5743    match = TRUE;
5744   if (!match) {
5745    AV *av;
5746    SV * const sw = regclass_swash(prog, n, TRUE, 0, (SV**)&av);
5747
5748    if (sw) {
5749     U8 * utf8_p;
5750     if (do_utf8) {
5751      utf8_p = (U8 *) p;
5752     } else {
5753      STRLEN len = 1;
5754      utf8_p = bytes_to_utf8(p, &len);
5755     }
5756     if (swash_fetch(sw, utf8_p, 1))
5757      match = TRUE;
5758     else if (flags & ANYOF_FOLD) {
5759      if (!match && lenp && av) {
5760       I32 i;
5761       for (i = 0; i <= av_len(av); i++) {
5762        SV* const sv = *av_fetch(av, i, FALSE);
5763        STRLEN len;
5764        const char * const s = SvPV_const(sv, len);
5765        if (len <= plen && memEQ(s, (char*)utf8_p, len)) {
5766         *lenp = len;
5767         match = TRUE;
5768         break;
5769        }
5770       }
5771      }
5772      if (!match) {
5773       U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
5774
5775       STRLEN tmplen;
5776       to_utf8_fold(utf8_p, tmpbuf, &tmplen);
5777       if (swash_fetch(sw, tmpbuf, 1))
5778        match = TRUE;
5779      }
5780     }
5781
5782     /* If we allocated a string above, free it */
5783     if (! do_utf8) Safefree(utf8_p);
5784    }
5785   }
5786   if (match && lenp && *lenp == 0)
5787    *lenp = UNISKIP(NATIVE_TO_UNI(c));
5788  }
5789  if (!match && c < 256) {
5790   if (ANYOF_BITMAP_TEST(n, c))
5791    match = TRUE;
5792   else if (flags & ANYOF_FOLD) {
5793    U8 f;
5794
5795    if (flags & ANYOF_LOCALE) {
5796     PL_reg_flags |= RF_tainted;
5797     f = PL_fold_locale[c];
5798    }
5799    else
5800     f = PL_fold[c];
5801    if (f != c && ANYOF_BITMAP_TEST(n, f))
5802     match = TRUE;
5803   }
5804
5805   if (!match && (flags & ANYOF_CLASS)) {
5806    PL_reg_flags |= RF_tainted;
5807    if (
5808     (ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
5809     (ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
5810     (ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
5811     (ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
5812     (ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
5813     (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
5814     (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
5815     (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
5816     (ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
5817     (ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
5818     (ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
5819     (ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
5820     (ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
5821     (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
5822     (ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
5823     (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
5824     (ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
5825     (ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
5826     (ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
5827     (ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
5828     (ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
5829     (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
5830     (ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
5831     (ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
5832     (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
5833     (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
5834     (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
5835     (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
5836     (ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
5837     (ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
5838     ) /* How's that for a conditional? */
5839    {
5840     match = TRUE;
5841    }
5842   }
5843  }
5844
5845  return (flags & ANYOF_INVERT) ? !match : match;
5846 }
5847
5848 STATIC U8 *
5849 S_reghop3(U8 *s, I32 off, const U8* lim)
5850 {
5851  dVAR;
5852
5853  PERL_ARGS_ASSERT_REGHOP3;
5854
5855  if (off >= 0) {
5856   while (off-- && s < lim) {
5857    /* XXX could check well-formedness here */
5858    s += UTF8SKIP(s);
5859   }
5860  }
5861  else {
5862   while (off++ && s > lim) {
5863    s--;
5864    if (UTF8_IS_CONTINUED(*s)) {
5865     while (s > lim && UTF8_IS_CONTINUATION(*s))
5866      s--;
5867    }
5868    /* XXX could check well-formedness here */
5869   }
5870  }
5871  return s;
5872 }
5873
5874 #ifdef XXX_dmq
5875 /* there are a bunch of places where we use two reghop3's that should
5876    be replaced with this routine. but since thats not done yet
5877    we ifdef it out - dmq
5878 */
5879 STATIC U8 *
5880 S_reghop4(U8 *s, I32 off, const U8* llim, const U8* rlim)
5881 {
5882  dVAR;
5883
5884  PERL_ARGS_ASSERT_REGHOP4;
5885
5886  if (off >= 0) {
5887   while (off-- && s < rlim) {
5888    /* XXX could check well-formedness here */
5889    s += UTF8SKIP(s);
5890   }
5891  }
5892  else {
5893   while (off++ && s > llim) {
5894    s--;
5895    if (UTF8_IS_CONTINUED(*s)) {
5896     while (s > llim && UTF8_IS_CONTINUATION(*s))
5897      s--;
5898    }
5899    /* XXX could check well-formedness here */
5900   }
5901  }
5902  return s;
5903 }
5904 #endif
5905
5906 STATIC U8 *
5907 S_reghopmaybe3(U8* s, I32 off, const U8* lim)
5908 {
5909  dVAR;
5910
5911  PERL_ARGS_ASSERT_REGHOPMAYBE3;
5912
5913  if (off >= 0) {
5914   while (off-- && s < lim) {
5915    /* XXX could check well-formedness here */
5916    s += UTF8SKIP(s);
5917   }
5918   if (off >= 0)
5919    return NULL;
5920  }
5921  else {
5922   while (off++ && s > lim) {
5923    s--;
5924    if (UTF8_IS_CONTINUED(*s)) {
5925     while (s > lim && UTF8_IS_CONTINUATION(*s))
5926      s--;
5927    }
5928    /* XXX could check well-formedness here */
5929   }
5930   if (off <= 0)
5931    return NULL;
5932  }
5933  return s;
5934 }
5935
5936 static void
5937 restore_pos(pTHX_ void *arg)
5938 {
5939  dVAR;
5940  regexp * const rex = (regexp *)arg;
5941  if (PL_reg_eval_set) {
5942   if (PL_reg_oldsaved) {
5943    rex->subbeg = PL_reg_oldsaved;
5944    rex->sublen = PL_reg_oldsavedlen;
5945 #ifdef PERL_OLD_COPY_ON_WRITE
5946    rex->saved_copy = PL_nrs;
5947 #endif
5948    RXp_MATCH_COPIED_on(rex);
5949   }
5950   PL_reg_magic->mg_len = PL_reg_oldpos;
5951   PL_reg_eval_set = 0;
5952   PL_curpm = PL_reg_oldcurpm;
5953  }
5954 }
5955
5956 STATIC void
5957 S_to_utf8_substr(pTHX_ register regexp *prog)
5958 {
5959  int i = 1;
5960
5961  PERL_ARGS_ASSERT_TO_UTF8_SUBSTR;
5962
5963  do {
5964   if (prog->substrs->data[i].substr
5965    && !prog->substrs->data[i].utf8_substr) {
5966    SV* const sv = newSVsv(prog->substrs->data[i].substr);
5967    prog->substrs->data[i].utf8_substr = sv;
5968    sv_utf8_upgrade(sv);
5969    if (SvVALID(prog->substrs->data[i].substr)) {
5970     const U8 flags = BmFLAGS(prog->substrs->data[i].substr);
5971     if (flags & FBMcf_TAIL) {
5972      /* Trim the trailing \n that fbm_compile added last
5973      time.  */
5974      SvCUR_set(sv, SvCUR(sv) - 1);
5975      /* Whilst this makes the SV technically "invalid" (as its
5976      buffer is no longer followed by "\0") when fbm_compile()
5977      adds the "\n" back, a "\0" is restored.  */
5978     }
5979     fbm_compile(sv, flags);
5980    }
5981    if (prog->substrs->data[i].substr == prog->check_substr)
5982     prog->check_utf8 = sv;
5983   }
5984  } while (i--);
5985 }
5986
5987 STATIC void
5988 S_to_byte_substr(pTHX_ register regexp *prog)
5989 {
5990  dVAR;
5991  int i = 1;
5992
5993  PERL_ARGS_ASSERT_TO_BYTE_SUBSTR;
5994
5995  do {
5996   if (prog->substrs->data[i].utf8_substr
5997    && !prog->substrs->data[i].substr) {
5998    SV* sv = newSVsv(prog->substrs->data[i].utf8_substr);
5999    if (sv_utf8_downgrade(sv, TRUE)) {
6000     if (SvVALID(prog->substrs->data[i].utf8_substr)) {
6001      const U8 flags
6002       = BmFLAGS(prog->substrs->data[i].utf8_substr);
6003      if (flags & FBMcf_TAIL) {
6004       /* Trim the trailing \n that fbm_compile added last
6005       time.  */
6006       SvCUR_set(sv, SvCUR(sv) - 1);
6007      }
6008      fbm_compile(sv, flags);
6009     }
6010    } else {
6011     SvREFCNT_dec(sv);
6012     sv = &PL_sv_undef;
6013    }
6014    prog->substrs->data[i].substr = sv;
6015    if (prog->substrs->data[i].utf8_substr == prog->check_utf8)
6016     prog->check_substr = sv;
6017   }
6018  } while (i--);
6019 }
6020
6021 /*
6022  * Local variables:
6023  * c-indentation-style: bsd
6024  * c-basic-offset: 4
6025  * indent-tabs-mode: t
6026  * End:
6027  *
6028  * ex: set ts=8 sts=4 sw=4 noet:
6029  */