Add support for perl 5.22.0

[perl/modules/re-engine-Hooks.git] / src / 5022000 / orig / regcomp.c
diff --git a/src/5021004/orig/regcomp.c b/src/5022000/orig/regcomp.c

similarity index 86%

rename from src/5021004/orig/regcomp.c

rename to src/5022000/orig/regcomp.c

index be9c184db9d24f30d2a45637173671070ffd2159..712c8ed7d98f84d6e536b9a9a488b11071b19e81 100644 (file)
--- a/src/5021004/orig/regcomp.c
+++ b/src/5022000/orig/regcomp.c
@@ -87,7 +87,6 @@ EXTERN_C const struct regexp_engine my_reg_engine;
  #endif
  
  #include "dquote_static.c"
-#include "charclass_invlists.h"
  #include "inline_invlist.c"
  #include "unicode_constants.h"
  
@@ -102,6 +101,31 @@ EXTERN_C const struct regexp_engine my_reg_engine;
  #define        STATIC  static
  #endif
  
+#ifndef MIN
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#endif
+
+/* this is a chain of data about sub patterns we are processing that
+   need to be handled separately/specially in study_chunk. Its so
+   we can simulate recursion without losing state.  */
+struct scan_frame;
+typedef struct scan_frame {
+    regnode *last_regnode;      /* last node to process in this frame */
+    regnode *next_regnode;      /* next node to process when last is reached */
+    U32 prev_recursed_depth;
+    I32 stopparen;              /* what stopparen do we use */
+    U32 is_top_frame;           /* what flags do we use? */
+
+    struct scan_frame *this_prev_frame; /* this previous frame */
+    struct scan_frame *prev_frame;      /* previous frame */
+    struct scan_frame *next_frame;      /* next frame */
+} scan_frame;
+
+/* Certain characters are output as a sequence with the first being a
+ * backslash. */
+#define isBACKSLASHED_PUNCT(c)                                              \
+                    ((c) == '-' || (c) == ']' || (c) == '\\' || (c) == '^')
+
  
  struct RExC_state_t {
      U32                flags;                  /* RXf_* are we folding, multilining? */
@@ -149,19 +173,26 @@ struct RExC_state_t {
  
      regnode    **recurse;              /* Recurse regops */
      I32                recurse_count;          /* Number of recurse regops */
-    U8          *study_chunk_recursed;  /* bitmap of which parens we have moved
+    U8          *study_chunk_recursed;  /* bitmap of which subs we have moved
                                             through */
      U32         study_chunk_recursed_bytes;  /* bytes in bitmap */
      I32                in_lookbehind;
      I32                contains_locale;
      I32                contains_i;
      I32                override_recoding;
+#ifdef EBCDIC
+    I32                recode_x_to_native;
+#endif
      I32                in_multi_char_class;
      struct reg_code_block *code_blocks;        /* positions of literal (?{})
                                             within pattern */
      int                num_code_blocks;        /* size of code_blocks[] */
      int                code_index;             /* next code_blocks[] slot */
      SSize_t     maxlen;                        /* mininum possible number of chars in string to match */
+    scan_frame *frame_head;
+    scan_frame *frame_last;
+    U32         frame_count;
+    U32         strict;
  #ifdef ADD_TO_REGEXEC
      char       *starttry;              /* -Dr: where regtry was called. */
  #define RExC_starttry  (pRExC_state->starttry)
@@ -171,9 +202,17 @@ struct RExC_state_t {
      const char  *lastparse;
      I32         lastnum;
      AV          *paren_name_list;       /* idx -> name */
+    U32         study_chunk_recursed_count;
+    SV          *mysv1;
+    SV          *mysv2;
  #define RExC_lastparse (pRExC_state->lastparse)
  #define RExC_lastnum   (pRExC_state->lastnum)
  #define RExC_paren_name_list    (pRExC_state->paren_name_list)
+#define RExC_study_chunk_recursed_count    (pRExC_state->study_chunk_recursed_count)
+#define RExC_mysv      (pRExC_state->mysv1)
+#define RExC_mysv1     (pRExC_state->mysv1)
+#define RExC_mysv2     (pRExC_state->mysv2)
+
  #endif
  };
  
@@ -195,7 +234,6 @@ struct RExC_state_t {
  #define RExC_emit_dummy        (pRExC_state->emit_dummy)
  #define RExC_emit_start        (pRExC_state->emit_start)
  #define RExC_emit_bound        (pRExC_state->emit_bound)
-#define RExC_naughty   (pRExC_state->naughty)
  #define RExC_sawback   (pRExC_state->sawback)
  #define RExC_seen      (pRExC_state->seen)
  #define RExC_size      (pRExC_state->size)
@@ -220,8 +258,28 @@ struct RExC_state_t {
  #define RExC_contains_locale   (pRExC_state->contains_locale)
  #define RExC_contains_i (pRExC_state->contains_i)
  #define RExC_override_recoding (pRExC_state->override_recoding)
+#ifdef EBCDIC
+#   define RExC_recode_x_to_native (pRExC_state->recode_x_to_native)
+#endif
  #define RExC_in_multi_char_class (pRExC_state->in_multi_char_class)
-
+#define RExC_frame_head (pRExC_state->frame_head)
+#define RExC_frame_last (pRExC_state->frame_last)
+#define RExC_frame_count (pRExC_state->frame_count)
+#define RExC_strict (pRExC_state->strict)
+
+/* Heuristic check on the complexity of the pattern: if TOO_NAUGHTY, we set
+ * a flag to disable back-off on the fixed/floating substrings - if it's
+ * a high complexity pattern we assume the benefit of avoiding a full match
+ * is worth the cost of checking for the substrings even if they rarely help.
+ */
+#define RExC_naughty   (pRExC_state->naughty)
+#define TOO_NAUGHTY (10)
+#define MARK_NAUGHTY(add) \
+    if (RExC_naughty < TOO_NAUGHTY) \
+        RExC_naughty += (add)
+#define MARK_NAUGHTY_EXP(exp, add) \
+    if (RExC_naughty < TOO_NAUGHTY) \
+        RExC_naughty += RExC_naughty / (exp) + (add)
  
  #define        ISMULT1(c)      ((c) == '*' || (c) == '+' || (c) == '?')
  #define        ISMULT2(s)      ((*s) == '*' || (*s) == '+' || (*s) == '?' || \
@@ -411,6 +469,10 @@ static const scan_data_t zero_scan_data =
  #define SCF_TRIE_RESTUDY        0x4000 /* Do restudy? */
  #define SCF_SEEN_ACCEPT         0x8000
  #define SCF_TRIE_DOING_RESTUDY 0x10000
+#define SCF_IN_DEFINE          0x20000
+
+
+
  
  #define UTF cBOOL(RExC_utf8)
  
@@ -464,6 +526,10 @@ static const scan_data_t zero_scan_data =
                  UTF8fARG(UTF, offset, RExC_precomp), \
                  UTF8fARG(UTF, RExC_end - RExC_precomp - offset, RExC_precomp + offset)
  
+/* Used to point after bad bytes for an error message, but avoid skipping
+ * past a nul byte. */
+#define SKIP_IF_CHAR(s) (!*(s) ? 0 : UTF ? UTF8SKIP(s) : 1)
+
  /*
   * Calls SAVEDESTRUCTOR_X if needed, then calls Perl_croak with the given
   * arg. Show regex, up to a maximum length. If it's too long, chop and add
@@ -495,7 +561,8 @@ static const scan_data_t zero_scan_data =
   * Simple_vFAIL -- like FAIL, but marks the current location in the scan
   */
  #define        Simple_vFAIL(m) STMT_START {                                    \
-    const IV offset = RExC_parse - RExC_precomp;                       \
+    const IV offset =                                                   \
+        (RExC_parse > RExC_end ? RExC_end : RExC_parse) - RExC_precomp; \
      Perl_croak(aTHX_ "%s" REPORT_LOCATION,                             \
             m, REPORT_LOCATION_ARGS(offset));   \
  } STMT_END
@@ -589,6 +656,12 @@ static const scan_data_t zero_scan_data =
             REPORT_LOCATION_ARGS(offset));              \
  } STMT_END
  
+#define        vWARN(loc, m) STMT_START {                                      \
+    const IV offset = loc - RExC_precomp;                              \
+    __ASSERT_(PASS2) Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION,       \
+           REPORT_LOCATION_ARGS(offset));              \
+} STMT_END
+
  #define        vWARN_dep(loc, m) STMT_START {                                  \
      const IV offset = loc - RExC_precomp;                              \
      __ASSERT_(PASS2) Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), m REPORT_LOCATION,   \
@@ -652,12 +725,6 @@ static const scan_data_t zero_scan_data =
             a1, a2, a3, a4, REPORT_LOCATION_ARGS(offset)); \
  } STMT_END
  
-
-/* Allow for side effects in s */
-#define REGC(c,s) STMT_START {                 \
-    if (!SIZE_ONLY) *(s) = (c); else (void)(s);        \
-} STMT_END
-
  /* Macros for recording node offsets.   20001227 mjd@plover.com
   * Nodes are numbered 1, 2, 3, 4.  Node #n's position is recorded in
   * element 2*n-1 of the array.  Element #2n holds the byte length node #n.
@@ -771,15 +838,44 @@ static const scan_data_t zero_scan_data =
              PerlIO_printf(Perl_debug_log,"\n");                             \
          });
  
+#define DEBUG_SHOW_STUDY_FLAG(flags,flag) \
+  if ((flags) & flag) PerlIO_printf(Perl_debug_log, "%s ", #flag)
+
+#define DEBUG_SHOW_STUDY_FLAGS(flags,open_str,close_str)                    \
+    if ( ( flags ) ) {                                                      \
+        PerlIO_printf(Perl_debug_log, "%s", open_str);                      \
+        DEBUG_SHOW_STUDY_FLAG(flags,SF_FL_BEFORE_SEOL);                     \
+        DEBUG_SHOW_STUDY_FLAG(flags,SF_FL_BEFORE_MEOL);                     \
+        DEBUG_SHOW_STUDY_FLAG(flags,SF_IS_INF);                             \
+        DEBUG_SHOW_STUDY_FLAG(flags,SF_HAS_PAR);                            \
+        DEBUG_SHOW_STUDY_FLAG(flags,SF_IN_PAR);                             \
+        DEBUG_SHOW_STUDY_FLAG(flags,SF_HAS_EVAL);                           \
+        DEBUG_SHOW_STUDY_FLAG(flags,SCF_DO_SUBSTR);                         \
+        DEBUG_SHOW_STUDY_FLAG(flags,SCF_DO_STCLASS_AND);                    \
+        DEBUG_SHOW_STUDY_FLAG(flags,SCF_DO_STCLASS_OR);                     \
+        DEBUG_SHOW_STUDY_FLAG(flags,SCF_DO_STCLASS);                        \
+        DEBUG_SHOW_STUDY_FLAG(flags,SCF_WHILEM_VISITED_POS);                \
+        DEBUG_SHOW_STUDY_FLAG(flags,SCF_TRIE_RESTUDY);                      \
+        DEBUG_SHOW_STUDY_FLAG(flags,SCF_SEEN_ACCEPT);                       \
+        DEBUG_SHOW_STUDY_FLAG(flags,SCF_TRIE_DOING_RESTUDY);                \
+        DEBUG_SHOW_STUDY_FLAG(flags,SCF_IN_DEFINE);                         \
+        PerlIO_printf(Perl_debug_log, "%s", close_str);                     \
+    }
+
+
  #define DEBUG_STUDYDATA(str,data,depth)                              \
  DEBUG_OPTIMISE_MORE_r(if(data){                                      \
      PerlIO_printf(Perl_debug_log,                                    \
          "%*s" str "Pos:%"IVdf"/%"IVdf                                \
-        " Flags: 0x%"UVXf" Whilem_c: %"IVdf" Lcp: %"IVdf" %s",       \
+        " Flags: 0x%"UVXf,                                           \
          (int)(depth)*2, "",                                          \
          (IV)((data)->pos_min),                                       \
          (IV)((data)->pos_delta),                                     \
-        (UV)((data)->flags),                                         \
+        (UV)((data)->flags)                                          \
+    );                                                               \
+    DEBUG_SHOW_STUDY_FLAGS((data)->flags," [ ","]");                 \
+    PerlIO_printf(Perl_debug_log,                                    \
+        " Whilem_c: %"IVdf" Lcp: %"IVdf" %s",                        \
          (IV)((data)->whilem_c),                                      \
          (IV)((data)->last_closep ? *((data)->last_closep) : -1),     \
          is_inf ? "INF " : ""                                         \
@@ -805,8 +901,6 @@ DEBUG_OPTIMISE_MORE_r(if(data){                                      \
      PerlIO_printf(Perl_debug_log,"\n");                              \
  });
  
-#ifdef DEBUGGING
-
  /* is c a control character for which we have a mnemonic? */
  #define isMNEMONIC_CNTRL(c) _IS_MNEMONIC_CNTRL_ONLY_FOR_USE_BY_REGCOMP_DOT_C(c)
  
@@ -830,8 +924,6 @@ S_cntrl_to_mnemonic(const U8 c)
      return NULL;
  }
  
-#endif
-
  /* Mark that we cannot extend a found fixed substring at this point.
     Update the longest found anchored substring and the longest found
     floating substrings if needed. */
@@ -861,8 +953,8 @@ S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data,
         else { /* *data->longest == data->longest_float */
             data->offset_float_min = l ? data->last_start_min : data->pos_min;
             data->offset_float_max = (l
-                                     ? data->last_start_max
-                                     : (data->pos_delta == SSize_t_MAX
+                          ? data->last_start_max
+                          : (data->pos_delta > SSize_t_MAX - data->pos_min
                                          ? SSize_t_MAX
                                          : data->pos_min + data->pos_delta));
             if (is_inf
@@ -965,13 +1057,13 @@ S_ssc_init(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc)
      ARG_SET(ssc, ANYOF_ONLY_HAS_BITMAP);
      ssc_anything(ssc);
  
-    /* If any portion of the regex is to operate under locale rules,
-     * initialization includes it.  The reason this isn't done for all regexes
-     * is that the optimizer was written under the assumption that locale was
-     * all-or-nothing.  Given the complexity and lack of documentation in the
-     * optimizer, and that there are inadequate test cases for locale, many
-     * parts of it may not work properly, it is safest to avoid locale unless
-     * necessary. */
+    /* If any portion of the regex is to operate under locale rules that aren't
+     * fully known at compile time, initialization includes it.  The reason
+     * this isn't done for all regexes is that the optimizer was written under
+     * the assumption that locale was all-or-nothing.  Given the complexity and
+     * lack of documentation in the optimizer, and that there are inadequate
+     * test cases for locale, many parts of it may not work properly, it is
+     * safest to avoid locale unless necessary. */
      if (RExC_contains_locale) {
         ANYOF_POSIXL_SETALL(ssc);
      }
@@ -1438,6 +1530,71 @@ S_ssc_clear_locale(regnode_ssc *ssc)
      ANYOF_FLAGS(ssc) &= ~ANYOF_LOCALE_FLAGS;
  }
  
+#define NON_OTHER_COUNT   NON_OTHER_COUNT_FOR_USE_ONLY_BY_REGCOMP_DOT_C
+
+STATIC bool
+S_is_ssc_worth_it(const RExC_state_t * pRExC_state, const regnode_ssc * ssc)
+{
+    /* The synthetic start class is used to hopefully quickly winnow down
+     * places where a pattern could start a match in the target string.  If it
+     * doesn't really narrow things down that much, there isn't much point to
+     * having the overhead of using it.  This function uses some very crude
+     * heuristics to decide if to use the ssc or not.
+     *
+     * It returns TRUE if 'ssc' rules out more than half what it considers to
+     * be the "likely" possible matches, but of course it doesn't know what the
+     * actual things being matched are going to be; these are only guesses
+     *
+     * For /l matches, it assumes that the only likely matches are going to be
+     *      in the 0-255 range, uniformly distributed, so half of that is 127
+     * For /a and /d matches, it assumes that the likely matches will be just
+     *      the ASCII range, so half of that is 63
+     * For /u and there isn't anything matching above the Latin1 range, it
+     *      assumes that that is the only range likely to be matched, and uses
+     *      half that as the cut-off: 127.  If anything matches above Latin1,
+     *      it assumes that all of Unicode could match (uniformly), except for
+     *      non-Unicode code points and things in the General Category "Other"
+     *      (unassigned, private use, surrogates, controls and formats).  This
+     *      is a much large number. */
+
+    const U32 max_match = (LOC)
+                          ? 127
+                          : (! UNI_SEMANTICS)
+                            ? 63
+                            : (invlist_highest(ssc->invlist) < 256)
+                              ? 127
+                              : ((NON_OTHER_COUNT + 1) / 2) - 1;
+    U32 count = 0;      /* Running total of number of code points matched by
+                           'ssc' */
+    UV start, end;      /* Start and end points of current range in inversion
+                           list */
+
+    PERL_ARGS_ASSERT_IS_SSC_WORTH_IT;
+
+    invlist_iterinit(ssc->invlist);
+    while (invlist_iternext(ssc->invlist, &start, &end)) {
+
+        /* /u is the only thing that we expect to match above 255; so if not /u
+         * and even if there are matches above 255, ignore them.  This catches
+         * things like \d under /d which does match the digits above 255, but
+         * since the pattern is /d, it is not likely to be expecting them */
+        if (! UNI_SEMANTICS) {
+            if (start > 255) {
+                break;
+            }
+            end = MIN(end, 255);
+        }
+        count += end - start + 1;
+        if (count > max_match) {
+            invlist_iterfinish(ssc->invlist);
+            return FALSE;
+        }
+    }
+
+    return TRUE;
+}
+
+
  STATIC void
  S_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, regnode_ssc *ssc)
  {
@@ -1735,7 +1892,7 @@ S_dump_trie_interim_table(pTHX_ const struct _reg_trie_data *trie,
                May be the same as tail.
    tail       : item following the branch sequence
    count      : words in the sequence
-  flags      : currently the OP() type we will be building one of /EXACT(|F|FA|FU|FU_SS)/
+  flags      : currently the OP() type we will be building one of /EXACT(|F|FA|FU|FU_SS|L|FLU8)/
    depth      : indent depth
  
  Inplace optimizes a sequence of 2 or more Branch-Exact nodes into a TRIE node.
@@ -1768,7 +1925,7 @@ then read 'r' and go to state 8 followed by 's' which takes us to state 9 which
  is also accepting. Thus we know that we can match both 'he' and 'hers' with a
  single traverse. We store a mapping from accepting to state to which word was
  matched, and then when we have multiple possibilities we try to complete the
-rest of the regex in the order in which they occured in the alternation.
+rest of the regex in the order in which they occurred in the alternation.
  
  The only prior NFA like behaviour that would be changed by the TRIE support is
  the silent ignoring of duplicate alternations which are of the form:
@@ -1999,10 +2156,11 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
  #endif
  
      switch (flags) {
-        case EXACT: break;
+        case EXACT: case EXACTL: break;
         case EXACTFA:
          case EXACTFU_SS:
-       case EXACTFU: folder = PL_fold_latin1; break;
+       case EXACTFU:
+       case EXACTFLU8: folder = PL_fold_latin1; break;
         case EXACTF:  folder = PL_fold; break;
          default: Perl_croak( aTHX_ "panic! In trie construction, unknown node type %u %s", (unsigned) flags, PL_reg_name[flags] );
      }
@@ -2013,7 +2171,7 @@ S_make_trie(pTHX_ RExC_state_t *pRExC_state, regnode *startbranch,
      trie->wordcount = word_count;
      RExC_rxi->data->data[ data_slot ] = (void*)trie;
      trie->charmap = (U16 *) PerlMemShared_calloc( 256, sizeof(U16) );
-    if (flags == EXACT)
+    if (flags == EXACT || flags == EXACTL)
         trie->bitmap = (char *) PerlMemShared_calloc( ANYOF_BITMAP_SIZE, 1 );
      trie->wordinfo = (reg_trie_wordinfo *) PerlMemShared_calloc(
                         trie->wordcount+1, sizeof(reg_trie_wordinfo));
@@ -3057,7 +3215,7 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
          StructCopy(source,op,struct regnode_charclass);
          stclass = (regnode *)op;
      }
-    OP(stclass)+=2; /* covert the TRIE type to its AHO-CORASICK equivalent */
+    OP(stclass)+=2; /* convert the TRIE type to its AHO-CORASICK equivalent */
  
      ARG_SET( stclass, data_slot );
      aho = (reg_ac_data *) PerlMemShared_calloc( 1, sizeof(reg_ac_data) );
@@ -3130,15 +3288,15 @@ S_construct_ahocorasick_from_trie(pTHX_ RExC_state_t *pRExC_state, regnode *sour
  
  #define DEBUG_PEEP(str,scan,depth) \
      DEBUG_OPTIMISE_r({if (scan){ \
-       SV * const mysv=sv_newmortal(); \
         regnode *Next = regnext(scan); \
-       regprop(RExC_rx, mysv, scan, NULL); \
-       PerlIO_printf(Perl_debug_log, "%*s" str ">%3d: %s (%d)\n", \
-       (int)depth*2, "", REG_NODE_NUM(scan), SvPV_nolen_const(mysv),\
-       Next ? (REG_NODE_NUM(Next)) : 0 ); \
+       regprop(RExC_rx, RExC_mysv, scan, NULL, pRExC_state); \
+       PerlIO_printf(Perl_debug_log, "%*s" str ">%3d: %s (%d)", \
+           (int)depth*2, "", REG_NODE_NUM(scan), SvPV_nolen_const(RExC_mysv),\
+           Next ? (REG_NODE_NUM(Next)) : 0 ); \
+       DEBUG_SHOW_STUDY_FLAGS(flags," [ ","]");\
+       PerlIO_printf(Perl_debug_log, "\n"); \
     }});
  
-
  /* The below joins as many adjacent EXACTish nodes as possible into a single
   * one.  The regop may be changed if the node(s) contain certain sequences that
   * require special handling.  The joining is only done if:
@@ -3356,7 +3514,7 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
       * this final joining, sequences could have been split over boundaries, and
       * hence missed).  The sequences only happen in folding, hence for any
       * non-EXACT EXACTish node */
-    if (OP(scan) != EXACT) {
+    if (OP(scan) != EXACT && OP(scan) != EXACTL) {
          U8* s0 = (U8*) STRING(scan);
          U8* s = s0;
          U8* s_end = s0 + STR_LEN(scan);
@@ -3579,17 +3737,17 @@ S_join_exact(pTHX_ RExC_state_t *pRExC_state, regnode *scan,
      Newx(and_withp,1, regnode_ssc); \
      SAVEFREEPV(and_withp)
  
-/* this is a chain of data about sub patterns we are processing that
-   need to be handled separately/specially in study_chunk. Its so
-   we can simulate recursion without losing state.  */
-struct scan_frame;
-typedef struct scan_frame {
-    regnode *last;  /* last node to process in this frame */
-    regnode *next;  /* next node to process when last is reached */
-    struct scan_frame *prev; /*previous frame*/
-    U32 prev_recursed_depth;
-    I32 stop; /* what stopparen do we use */
-} scan_frame;
+
+static void
+S_unwind_scan_frames(pTHX_ const void *p)
+{
+    scan_frame *f= (scan_frame *)p;
+    do {
+        scan_frame *n= f->next_frame;
+        Safefree(f);
+        f= n;
+    } while (f);
+}
  
  
  STATIC SSize_t
@@ -3626,9 +3784,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
  
      PERL_ARGS_ASSERT_STUDY_CHUNK;
  
-#ifdef DEBUGGING
-    StructCopy(&zero_scan_data, &data_fake, scan_data_t);
-#endif
+
      if ( depth == 0 ) {
          while (first_non_open && OP(first_non_open) == OPEN)
              first_non_open=regnext(first_non_open);
@@ -3636,35 +3792,50 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
  
  
    fake_study_recurse:
+    DEBUG_r(
+        RExC_study_chunk_recursed_count++;
+    );
+    DEBUG_OPTIMISE_MORE_r(
+    {
+        PerlIO_printf(Perl_debug_log,
+            "%*sstudy_chunk stopparen=%ld recursed_count=%lu depth=%lu recursed_depth=%lu scan=%p last=%p",
+            (int)(depth*2), "", (long)stopparen,
+            (unsigned long)RExC_study_chunk_recursed_count,
+            (unsigned long)depth, (unsigned long)recursed_depth,
+            scan,
+            last);
+        if (recursed_depth) {
+            U32 i;
+            U32 j;
+            for ( j = 0 ; j < recursed_depth ; j++ ) {
+                for ( i = 0 ; i < (U32)RExC_npar ; i++ ) {
+                    if (
+                        PAREN_TEST(RExC_study_chunk_recursed +
+                                   ( j * RExC_study_chunk_recursed_bytes), i )
+                        && (
+                            !j ||
+                            !PAREN_TEST(RExC_study_chunk_recursed +
+                                   (( j - 1 ) * RExC_study_chunk_recursed_bytes), i)
+                        )
+                    ) {
+                        PerlIO_printf(Perl_debug_log," %d",(int)i);
+                        break;
+                    }
+                }
+                if ( j + 1 < recursed_depth ) {
+                    PerlIO_printf(Perl_debug_log, ",");
+                }
+            }
+        }
+        PerlIO_printf(Perl_debug_log,"\n");
+    }
+    );
      while ( scan && OP(scan) != END && scan < last ){
          UV min_subtract = 0;    /* How mmany chars to subtract from the minimum
                                     node length to get a real minimum (because
                                     the folded version may be shorter) */
         bool unfolded_multi_char = FALSE;
         /* Peephole optimizer: */
-        DEBUG_OPTIMISE_MORE_r(
-        {
-            PerlIO_printf(Perl_debug_log,
-                "%*sstudy_chunk stopparen=%ld depth=%lu recursed_depth=%lu ",
-                ((int) depth*2), "", (long)stopparen,
-                (unsigned long)depth, (unsigned long)recursed_depth);
-            if (recursed_depth) {
-                U32 i;
-                U32 j;
-                for ( j = 0 ; j < recursed_depth ; j++ ) {
-                    PerlIO_printf(Perl_debug_log,"[");
-                    for ( i = 0 ; i < (U32)RExC_npar ; i++ )
-                        PerlIO_printf(Perl_debug_log,"%d",
-                            PAREN_TEST(RExC_study_chunk_recursed +
-                                       (j * RExC_study_chunk_recursed_bytes), i)
-                            ? 1 : 0
-                        );
-                    PerlIO_printf(Perl_debug_log,"]");
-                }
-            }
-            PerlIO_printf(Perl_debug_log,"\n");
-        }
-        );
          DEBUG_STUDYDATA("Peep:", data, depth);
          DEBUG_PEEP("Peep", scan, depth);
  
@@ -3698,17 +3869,47 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 NEXT_OFF(scan) = off;
         }
  
-
-
         /* The principal pseudo-switch.  Cannot be a switch, since we
            look into several different things.  */
-       if (OP(scan) == BRANCH || OP(scan) == BRANCHJ
-                  || OP(scan) == IFTHEN) {
+        if ( OP(scan) == DEFINEP ) {
+            SSize_t minlen = 0;
+            SSize_t deltanext = 0;
+            SSize_t fake_last_close = 0;
+            I32 f = SCF_IN_DEFINE;
+
+            StructCopy(&zero_scan_data, &data_fake, scan_data_t);
+            scan = regnext(scan);
+            assert( OP(scan) == IFTHEN );
+            DEBUG_PEEP("expect IFTHEN", scan, depth);
+
+            data_fake.last_closep= &fake_last_close;
+            minlen = *minlenp;
+            next = regnext(scan);
+            scan = NEXTOPER(NEXTOPER(scan));
+            DEBUG_PEEP("scan", scan, depth);
+            DEBUG_PEEP("next", next, depth);
+
+            /* we suppose the run is continuous, last=next...
+             * NOTE we dont use the return here! */
+            (void)study_chunk(pRExC_state, &scan, &minlen,
+                              &deltanext, next, &data_fake, stopparen,
+                              recursed_depth, NULL, f, depth+1);
+
+            scan = next;
+        } else
+        if (
+            OP(scan) == BRANCH  ||
+            OP(scan) == BRANCHJ ||
+            OP(scan) == IFTHEN
+        ) {
             next = regnext(scan);
             code = OP(scan);
-            /* demq: the op(next)==code check is to see if we have
-             * "branch-branch" AFAICT */
  
+            /* The op(next)==code check below is to see if we
+             * have "BRANCH-BRANCH", "BRANCHJ-BRANCHJ", "IFTHEN-IFTHEN"
+             * IFTHEN is special as it might not appear in pairs.
+             * Not sure whether BRANCH-BRANCHJ is possible, regardless
+             * we dont handle it cleanly. */
             if (OP(next) == code || code == IFTHEN) {
                  /* NOTE - There is similar code to this block below for
                   * handling TRIE nodes on a re-study.  If you change stuff here
@@ -3730,8 +3931,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     I32 f = 0;
                     regnode_ssc this_class;
  
+                    DEBUG_PEEP("Branch", scan, depth);
+
                     num++;
-                   data_fake.flags = 0;
+                    StructCopy(&zero_scan_data, &data_fake, scan_data_t);
                     if (data) {
                         data_fake.whilem_c = data->whilem_c;
                         data_fake.last_closep = data->last_closep;
@@ -3741,9 +3944,11 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
  
                     data_fake.pos_delta = delta;
                     next = regnext(scan);
-                   scan = NEXTOPER(scan);
-                   if (code != BRANCH)
+
+                    scan = NEXTOPER(scan); /* everything */
+                    if (code != BRANCH)    /* everything but BRANCH */
                         scan = NEXTOPER(scan);
+
                     if (flags & SCF_DO_STCLASS) {
                         ssc_init(pRExC_state, &this_class);
                         data_fake.start_class = &this_class;
@@ -3756,6 +3961,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     minnext = study_chunk(pRExC_state, &scan, minlenp,
                                        &deltanext, next, &data_fake, stopparen,
                                        recursed_depth, NULL, f,depth+1);
+
                     if (min1 > minnext)
                         min1 = minnext;
                     if (deltanext == SSize_t_MAX) {
@@ -3881,9 +4087,6 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                          U8 trietype = 0;
                          U32 count=0;
  
-#ifdef DEBUGGING
-                        SV * const mysv = sv_newmortal();   /* for dumping */
-#endif
                          /* var tail is used because there may be a TAIL
                             regop in the way. Ie, the exacts will point to the
                             thing following the TAIL, but the last branch will
@@ -3899,11 +4102,11 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
  
  
                          DEBUG_TRIE_COMPILE_r({
-                            regprop(RExC_rx, mysv, tail, NULL);
+                            regprop(RExC_rx, RExC_mysv, tail, NULL, pRExC_state);
                              PerlIO_printf( Perl_debug_log, "%*s%s%s\n",
                                (int)depth * 2 + 2, "",
                                "Looking for TRIE'able sequences. Tail node is: ",
-                              SvPV_nolen_const( mysv )
+                              SvPV_nolen_const( RExC_mysv )
                              );
                          });
  
@@ -3959,14 +4162,24 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                  EXACTFU         | EXACTFU
                                  EXACTFU_SS      | EXACTFU
                                  EXACTFA         | EXACTFA
+                                EXACTL          | EXACTL
+                                EXACTFLU8       | EXACTFLU8
  
  
                          */
-#define TRIE_TYPE(X) ( ( NOTHING == (X) ) ? NOTHING :   \
-                       ( EXACT == (X) )   ? EXACT :        \
-                       ( EXACTFU == (X) || EXACTFU_SS == (X) ) ? EXACTFU :        \
-                       ( EXACTFA == (X) ) ? EXACTFA :        \
-                       0 )
+#define TRIE_TYPE(X) ( ( NOTHING == (X) )                                   \
+                       ? NOTHING                                            \
+                       : ( EXACT == (X) )                                   \
+                         ? EXACT                                            \
+                         : ( EXACTFU == (X) || EXACTFU_SS == (X) )          \
+                           ? EXACTFU                                        \
+                           : ( EXACTFA == (X) )                             \
+                             ? EXACTFA                                      \
+                             : ( EXACTL == (X) )                            \
+                               ? EXACTL                                     \
+                               : ( EXACTFLU8 == (X) )                        \
+                                 ? EXACTFLU8                                 \
+                                 : 0 )
  
                          /* dont use tail as the end marker for this traverse */
                          for ( cur = startbranch ; cur != scan ; cur = regnext( cur ) ) {
@@ -3980,18 +4193,18 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
  #endif
  
                              DEBUG_TRIE_COMPILE_r({
-                                regprop(RExC_rx, mysv, cur, NULL);
+                                regprop(RExC_rx, RExC_mysv, cur, NULL, pRExC_state);
                                  PerlIO_printf( Perl_debug_log, "%*s- %s (%d)",
-                                   (int)depth * 2 + 2,"", SvPV_nolen_const( mysv ), REG_NODE_NUM(cur) );
+                                   (int)depth * 2 + 2,"", SvPV_nolen_const( RExC_mysv ), REG_NODE_NUM(cur) );
  
-                                regprop(RExC_rx, mysv, noper, NULL);
+                                regprop(RExC_rx, RExC_mysv, noper, NULL, pRExC_state);
                                  PerlIO_printf( Perl_debug_log, " -> %s",
-                                    SvPV_nolen_const(mysv));
+                                    SvPV_nolen_const(RExC_mysv));
  
                                  if ( noper_next ) {
-                                  regprop(RExC_rx, mysv, noper_next, NULL);
+                                  regprop(RExC_rx, RExC_mysv, noper_next, NULL, pRExC_state);
                                    PerlIO_printf( Perl_debug_log,"\t=> %s\t",
-                                    SvPV_nolen_const(mysv));
+                                    SvPV_nolen_const(RExC_mysv));
                                  }
                                  PerlIO_printf( Perl_debug_log, "(First==%d,Last==%d,Cur==%d,tt==%s,nt==%s,nnt==%s)\n",
                                     REG_NODE_NUM(first), REG_NODE_NUM(last), REG_NODE_NUM(cur),
@@ -4088,11 +4301,11 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                              } /* end handle unmergable node */
                          } /* loop over branches */
                          DEBUG_TRIE_COMPILE_r({
-                            regprop(RExC_rx, mysv, cur, NULL);
+                            regprop(RExC_rx, RExC_mysv, cur, NULL, pRExC_state);
                              PerlIO_printf( Perl_debug_log,
                                "%*s- %s (%d) <SCAN FINISHED>\n",
                                (int)depth * 2 + 2,
-                              "", SvPV_nolen_const( mysv ),REG_NODE_NUM(cur));
+                              "", SvPV_nolen_const( RExC_mysv ),REG_NODE_NUM(cur));
  
                          });
                          if ( last && trietype ) {
@@ -4128,10 +4341,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                                       * something like this: (?:|) So we can
                                       * turn it into a plain NOTHING op. */
                                      DEBUG_TRIE_COMPILE_r({
-                                        regprop(RExC_rx, mysv, cur, NULL);
+                                        regprop(RExC_rx, RExC_mysv, cur, NULL, pRExC_state);
                                          PerlIO_printf( Perl_debug_log,
                                            "%*s- %s (%d) <NOTHING BRANCH SEQUENCE>\n", (int)depth * 2 + 2,
-                                          "", SvPV_nolen_const( mysv ),REG_NODE_NUM(cur));
+                                          "", SvPV_nolen_const( RExC_mysv ),REG_NODE_NUM(cur));
  
                                      });
                                      OP(startbranch)= NOTHING;
@@ -4152,28 +4365,68 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 scan = NEXTOPER(scan);
             continue;
         } else if (OP(scan) == SUSPEND || OP(scan) == GOSUB || OP(scan) == GOSTART) {
-           scan_frame *newframe = NULL;
-           I32 paren;
-           regnode *start;
-           regnode *end;
+            I32 paren = 0;
+            regnode *start = NULL;
+            regnode *end = NULL;
              U32 my_recursed_depth= recursed_depth;
  
-           if (OP(scan) != SUSPEND) {
-                /* set the pointer */
+
+            if (OP(scan) != SUSPEND) { /* GOSUB/GOSTART */
+                /* Do setup, note this code has side effects beyond
+                 * the rest of this block. Specifically setting
+                 * RExC_recurse[] must happen at least once during
+                 * study_chunk(). */
                 if (OP(scan) == GOSUB) {
                     paren = ARG(scan);
                     RExC_recurse[ARG2L(scan)] = scan;
                      start = RExC_open_parens[paren-1];
                      end   = RExC_close_parens[paren-1];
                  } else {
-                    paren = 0;
                      start = RExC_rxi->program + 1;
                      end   = RExC_opend;
                  }
-                if (!recursed_depth
+                /* NOTE we MUST always execute the above code, even
+                 * if we do nothing with a GOSUB/GOSTART */
+                if (
+                    ( flags & SCF_IN_DEFINE )
+                    ||
+                    (
+                        (is_inf_internal || is_inf || (data && data->flags & SF_IS_INF))
+                        &&
+                        ( (flags & (SCF_DO_STCLASS | SCF_DO_SUBSTR)) == 0 )
+                    )
+                ) {
+                    /* no need to do anything here if we are in a define. */
+                    /* or we are after some kind of infinite construct
+                     * so we can skip recursing into this item.
+                     * Since it is infinite we will not change the maxlen
+                     * or delta, and if we miss something that might raise
+                     * the minlen it will merely pessimise a little.
+                     *
+                     * Iow /(?(DEFINE)(?<foo>foo|food))a+(?&foo)/
+                     * might result in a minlen of 1 and not of 4,
+                     * but this doesn't make us mismatch, just try a bit
+                     * harder than we should.
+                     * */
+                    scan= regnext(scan);
+                    continue;
+                }
+
+                if (
+                    !recursed_depth
                      ||
                      !PAREN_TEST(RExC_study_chunk_recursed + ((recursed_depth-1) * RExC_study_chunk_recursed_bytes), paren)
                  ) {
+                    /* it is quite possible that there are more efficient ways
+                     * to do this. We maintain a bitmap per level of recursion
+                     * of which patterns we have entered so we can detect if a
+                     * pattern creates a possible infinite loop. When we
+                     * recurse down a level we copy the previous levels bitmap
+                     * down. When we are at recursion level 0 we zero the top
+                     * level bitmap. It would be nice to implement a different
+                     * more efficient way of doing this. In particular the top
+                     * level bitmap may be unnecessary.
+                     */
                      if (!recursed_depth) {
                          Zero(RExC_study_chunk_recursed, RExC_study_chunk_recursed_bytes, U8);
                      } else {
@@ -4185,7 +4438,6 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                     DEBUG_STUDYDATA("set:", data,depth);
                      PAREN_SET(RExC_study_chunk_recursed + (recursed_depth * RExC_study_chunk_recursed_bytes), paren);
                      my_recursed_depth= recursed_depth + 1;
-                    Newx(newframe,1,scan_frame);
                  } else {
                     DEBUG_STUDYDATA("inf:", data,depth);
                      /* some form of infinite recursion, assume infinite length
@@ -4198,22 +4450,37 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                      if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
                          ssc_anything(data->start_class);
                      flags &= ~SCF_DO_STCLASS;
+
+                    start= NULL; /* reset start so we dont recurse later on. */
                 }
              } else {
-               Newx(newframe,1,scan_frame);
                 paren = stopparen;
-               start = scan+2;
+                start = scan + 2;
                 end = regnext(scan);
             }
-           if (newframe) {
-                assert(start);
+            if (start) {
+                scan_frame *newframe;
                  assert(end);
-               SAVEFREEPV(newframe);
-               newframe->next = regnext(scan);
-               newframe->last = last;
-               newframe->stop = stopparen;
-               newframe->prev = frame;
+                if (!RExC_frame_last) {
+                    Newxz(newframe, 1, scan_frame);
+                    SAVEDESTRUCTOR_X(S_unwind_scan_frames, newframe);
+                    RExC_frame_head= newframe;
+                    RExC_frame_count++;
+                } else if (!RExC_frame_last->next_frame) {
+                    Newxz(newframe,1,scan_frame);
+                    RExC_frame_last->next_frame= newframe;
+                    newframe->prev_frame= RExC_frame_last;
+                    RExC_frame_count++;
+                } else {
+                    newframe= RExC_frame_last->next_frame;
+                }
+                RExC_frame_last= newframe;
+
+                newframe->next_regnode = regnext(scan);
+                newframe->last_regnode = last;
+                newframe->stopparen = stopparen;
                  newframe->prev_recursed_depth = recursed_depth;
+                newframe->this_prev_frame= frame;
  
                  DEBUG_STUDYDATA("frame-new:",data,depth);
                  DEBUG_PEEP("fnew", scan, depth);
@@ -4228,7 +4495,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 continue;
             }
         }
-       else if (OP(scan) == EXACT) {
+       else if (OP(scan) == EXACT || OP(scan) == EXACTL) {
             SSize_t l = STR_LEN(scan);
             UV uc;
             if (UTF) {
@@ -4279,12 +4546,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
             }
             flags &= ~SCF_DO_STCLASS;
         }
-        else if (PL_regkind[OP(scan)] == EXACT) { /* But OP != EXACT!, so is
-                                                     EXACTFish */
+        else if (PL_regkind[OP(scan)] == EXACT) {
+            /* But OP != EXACT!, so is EXACTFish */
             SSize_t l = STR_LEN(scan);
-           UV uc = *((U8*)STRING(scan));
-            SV* EXACTF_invlist = _new_invlist(4); /* Start out big enough for 2
-                                                     separate code points */
              const U8 * s = (U8*)STRING(scan);
  
             /* Search for fixed substrings supports EXACT only. */
@@ -4293,7 +4557,6 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                  scan_commit(pRExC_state, data, minlenp, is_inf);
             }
             if (UTF) {
-               uc = utf8_to_uvchr_buf(s, s + l, NULL);
                 l = utf8_length(s, s + l);
             }
             if (unfolded_multi_char) {
@@ -4313,156 +4576,27 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 }
             }
  
-            if (OP(scan) != EXACTFL && flags & SCF_DO_STCLASS_AND) {
-                ssc_clear_locale(data->start_class);
-            }
-
-            if (! UTF) {
-
-                /* We punt and assume can match anything if the node begins
-                 * with a multi-character fold.  Things are complicated.  For
-                 * example, /ffi/i could match any of:
-                 *  "\N{LATIN SMALL LIGATURE FFI}"
-                 *  "\N{LATIN SMALL LIGATURE FF}I"
-                 *  "F\N{LATIN SMALL LIGATURE FI}"
-                 *  plus several other things; and making sure we have all the
-                 *  possibilities is hard. */
-                if (is_MULTI_CHAR_FOLD_latin1_safe(s, s + STR_LEN(scan))) {
-                    EXACTF_invlist =
-                             _add_range_to_invlist(EXACTF_invlist, 0, UV_MAX);
-                }
-                else {
-
-                    /* Any Latin1 range character can potentially match any
-                     * other depending on the locale */
-                    if (OP(scan) == EXACTFL) {
-                        _invlist_union(EXACTF_invlist, PL_Latin1,
-                                                              &EXACTF_invlist);
-                    }
-                    else {
-                        /* But otherwise, it matches at least itself.  We can
-                         * quickly tell if it has a distinct fold, and if so,
-                         * it matches that as well */
-                        EXACTF_invlist = add_cp_to_invlist(EXACTF_invlist, uc);
-                        if (IS_IN_SOME_FOLD_L1(uc)) {
-                            EXACTF_invlist = add_cp_to_invlist(EXACTF_invlist,
-                                                           PL_fold_latin1[uc]);
-                        }
-                    }
-
-                    /* Some characters match above-Latin1 ones under /i.  This
-                     * is true of EXACTFL ones when the locale is UTF-8 */
-                    if (HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(uc)
-                        && (! isASCII(uc) || (OP(scan) != EXACTFA
-                                            && OP(scan) != EXACTFA_NO_TRIE)))
-                    {
-                        add_above_Latin1_folds(pRExC_state,
-                                               (U8) uc,
-                                               &EXACTF_invlist);
-                    }
-                }
-            }
-            else {  /* Pattern is UTF-8 */
-                U8 folded[UTF8_MAX_FOLD_CHAR_EXPAND * UTF8_MAXBYTES_CASE + 1] = { '\0' };
-                STRLEN foldlen = UTF8SKIP(s);
-                const U8* e = s + STR_LEN(scan);
-                SV** listp;
-
-                /* The only code points that aren't folded in a UTF EXACTFish
-                 * node are are the problematic ones in EXACTFL nodes */
-                if (OP(scan) == EXACTFL
-                    && is_PROBLEMATIC_LOCALE_FOLDEDS_START_cp(uc))
-                {
-                    /* We need to check for the possibility that this EXACTFL
-                     * node begins with a multi-char fold.  Therefore we fold
-                     * the first few characters of it so that we can make that
-                     * check */
-                    U8 *d = folded;
-                    int i;
-
-                    for (i = 0; i < UTF8_MAX_FOLD_CHAR_EXPAND && s < e; i++) {
-                        if (isASCII(*s)) {
-                            *(d++) = (U8) toFOLD(*s);
-                            s++;
-                        }
-                        else {
-                            STRLEN len;
-                            to_utf8_fold(s, d, &len);
-                            d += len;
-                            s += UTF8SKIP(s);
-                        }
-                    }
-
-                    /* And set up so the code below that looks in this folded
-                     * buffer instead of the node's string */
-                    e = d;
-                    foldlen = UTF8SKIP(folded);
-                    s = folded;
-                }
+            if (flags & SCF_DO_STCLASS) {
+                SV* EXACTF_invlist = _make_exactf_invlist(pRExC_state, scan);
  
-                /* When we reach here 's' points to the fold of the first
-                 * character(s) of the node; and 'e' points to far enough along
-                 * the folded string to be just past any possible multi-char
-                 * fold. 'foldlen' is the length in bytes of the first
-                 * character in 's'
-                 *
-                 * Unlike the non-UTF-8 case, the macro for determining if a
-                 * string is a multi-char fold requires all the characters to
-                 * already be folded.  This is because of all the complications
-                 * if not.  Note that they are folded anyway, except in EXACTFL
-                 * nodes.  Like the non-UTF case above, we punt if the node
-                 * begins with a multi-char fold  */
-
-                if (is_MULTI_CHAR_FOLD_utf8_safe(s, e)) {
-                    EXACTF_invlist =
-                             _add_range_to_invlist(EXACTF_invlist, 0, UV_MAX);
+                assert(EXACTF_invlist);
+                if (flags & SCF_DO_STCLASS_AND) {
+                    if (OP(scan) != EXACTFL)
+                        ssc_clear_locale(data->start_class);
+                    ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
+                    ANYOF_POSIXL_ZERO(data->start_class);
+                    ssc_intersection(data->start_class, EXACTF_invlist, FALSE);
                  }
-                else {  /* Single char fold */
-
-                    /* It matches all the things that fold to it, which are
-                     * found in PL_utf8_foldclosures (including itself) */
-                    EXACTF_invlist = add_cp_to_invlist(EXACTF_invlist, uc);
-                    if (! PL_utf8_foldclosures) {
-                        _load_PL_utf8_foldclosures();
-                    }
-                    if ((listp = hv_fetch(PL_utf8_foldclosures,
-                                        (char *) s, foldlen, FALSE)))
-                    {
-                        AV* list = (AV*) *listp;
-                        IV k;
-                        for (k = 0; k <= av_tindex(list); k++) {
-                            SV** c_p = av_fetch(list, k, FALSE);
-                            UV c;
-                            assert(c_p);
-
-                            c = SvUV(*c_p);
-
-                            /* /aa doesn't allow folds between ASCII and non- */
-                            if ((OP(scan) == EXACTFA || OP(scan) == EXACTFA_NO_TRIE)
-                                && isASCII(c) != isASCII(uc))
-                            {
-                                continue;
-                            }
+                else {  /* SCF_DO_STCLASS_OR */
+                    ssc_union(data->start_class, EXACTF_invlist, FALSE);
+                    ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
  
-                            EXACTF_invlist = add_cp_to_invlist(EXACTF_invlist, c);
-                        }
-                    }
+                    /* See commit msg 749e076fceedeb708a624933726e7989f2302f6a */
+                    ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
                  }
+                flags &= ~SCF_DO_STCLASS;
+                SvREFCNT_dec(EXACTF_invlist);
              }
-           if (flags & SCF_DO_STCLASS_AND) {
-                ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
-                ANYOF_POSIXL_ZERO(data->start_class);
-                ssc_intersection(data->start_class, EXACTF_invlist, FALSE);
-           }
-           else if (flags & SCF_DO_STCLASS_OR) {
-                ssc_union(data->start_class, EXACTF_invlist, FALSE);
-               ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
-
-                /* See commit msg 749e076fceedeb708a624933726e7989f2302f6a */
-                ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
-           }
-           flags &= ~SCF_DO_STCLASS;
-            SvREFCNT_dec(EXACTF_invlist);
         }
         else if (REGNODE_VARIES(OP(scan))) {
             SSize_t mincount, maxcount, minnext, deltanext, pos_before = 0;
@@ -4479,7 +4613,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
             case PLUS:
                 if (flags & (SCF_DO_SUBSTR | SCF_DO_STCLASS)) {
                     next = NEXTOPER(scan);
-                   if (OP(next) == EXACT || (flags & SCF_DO_STCLASS)) {
+                   if (OP(next) == EXACT
+                        || OP(next) == EXACTL
+                        || (flags & SCF_DO_STCLASS))
+                    {
                         mincount = 1;
                         maxcount = REG_INFTY;
                         next = regnext(scan);
@@ -4602,8 +4739,11 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                 {
                     /* Fatal warnings may leak the regexp without this: */
                     SAVEFREESV(RExC_rx_sv);
-                   ckWARNreg(RExC_parse,
-                           "Quantifier unexpected on zero-length expression");
+                   Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
+                       "Quantifier unexpected on zero-length expression "
+                       "in regex m/%"UTF8f"/",
+                        UTF8fARG(UTF, RExC_end - RExC_precomp,
+                                 RExC_precomp));
                     (void)ReREFCNT_inc(RExC_rx_sv);
                 }
  
@@ -4787,8 +4927,11 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
                         } else {
                             /* start offset must point into the last copy */
                             data->last_start_min += minnext * (mincount - 1);
-                           data->last_start_max += is_inf ? SSize_t_MAX
-                               : (maxcount - 1) * (minnext + data->pos_delta);
+                           data->last_start_max =
+                              is_inf
+                               ? SSize_t_MAX
+                              : data->last_start_max +
+                                 (maxcount - 1) * (minnext + data->pos_delta);
                         }
                     }
                     /* It is counted once already... */
@@ -4892,7 +5035,8 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVuf" RHS=%"UVuf"\n",
                 flags &= ~SCF_DO_STCLASS;
              }
             min++;
-           delta++;    /* Because of the 2 char string cr-lf */
+            if (delta != SSize_t_MAX)
+                delta++;    /* Because of the 2 char string cr-lf */
              if (flags & SCF_DO_SUBSTR) {
                  /* Cannot expect anything... */
                  scan_commit(pRExC_state, data, minlenp, is_inf);
@@ -4910,7 +5054,7 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVuf" RHS=%"UVuf"\n",
             min++;
             if (flags & SCF_DO_STCLASS) {
                  bool invert = 0;
-                SV* my_invlist = sv_2mortal(_new_invlist(0));
+                SV* my_invlist = NULL;
                  U8 namedclass;
  
                  /* See commit msg 749e076fceedeb708a624933726e7989f2302f6a */
@@ -4954,6 +5098,7 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVuf" RHS=%"UVuf"\n",
                     }
                     break;
  
+                case ANYOFL:
                  case ANYOF:
                     if (flags & SCF_DO_STCLASS_AND)
                         ssc_and(pRExC_state, data->start_class,
@@ -5009,7 +5154,7 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVuf" RHS=%"UVuf"\n",
                      /* FALLTHROUGH */
                 case POSIXA:
                      if (FLAGS(scan) == _CC_ASCII) {
-                        my_invlist = PL_XPosix_ptrs[_CC_ASCII];
+                        my_invlist = invlist_clone(PL_XPosix_ptrs[_CC_ASCII]);
                      }
                      else {
                          _invlist_intersection(PL_XPosix_ptrs[FLAGS(scan)],
@@ -5046,6 +5191,7 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVuf" RHS=%"UVuf"\n",
                          assert(flags & SCF_DO_STCLASS_OR);
                          ssc_union(data->start_class, my_invlist, invert);
                      }
+                    SvREFCNT_dec(my_invlist);
                 }
                 if (flags & SCF_DO_STCLASS_OR)
                     ssc_and(pRExC_state, data->start_class, (regnode_charclass *) and_withp);
@@ -5064,33 +5210,6 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVuf" RHS=%"UVuf"\n",
                    && (scan->flags || data || (flags & SCF_DO_STCLASS))
                    && (OP(scan) == IFMATCH || OP(scan) == UNLESSM))
          {
-            if ( OP(scan) == UNLESSM &&
-                 scan->flags == 0 &&
-                 OP(NEXTOPER(NEXTOPER(scan))) == NOTHING &&
-                 OP(regnext(NEXTOPER(NEXTOPER(scan)))) == SUCCEED
-            ) {
-                regnode *opt;
-                regnode *upto= regnext(scan);
-                DEBUG_PARSE_r({
-                    SV * const mysv_val=sv_newmortal();
-                    DEBUG_STUDYDATA("OPFAIL",data,depth);
-
-                    /*DEBUG_PARSE_MSG("opfail");*/
-                    regprop(RExC_rx, mysv_val, upto, NULL);
-                    PerlIO_printf(Perl_debug_log,
-                        "~ replace with OPFAIL pointed at %s (%"IVdf") offset %"IVdf"\n",
-                        SvPV_nolen_const(mysv_val),
-                        (IV)REG_NODE_NUM(upto),
-                        (IV)(upto - scan)
-                    );
-                });
-                OP(scan) = OPFAIL;
-                NEXT_OFF(scan) = upto - scan;
-                for (opt= scan + 1; opt < upto ; opt++)
-                    OP(opt) = OPTIMIZED;
-                scan= upto;
-                continue;
-            }
              if ( !PERL_ENABLE_POSITIVE_ASSERTION_STUDY
                  || OP(scan) == UNLESSM )
              {
@@ -5103,7 +5222,7 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVuf" RHS=%"UVuf"\n",
                  regnode_ssc intrnl;
                  int f = 0;
  
-                data_fake.flags = 0;
+                StructCopy(&zero_scan_data, &data_fake, scan_data_t);
                  if (data) {
                      data_fake.whilem_c = data->whilem_c;
                      data_fake.last_closep = data->last_closep;
@@ -5347,7 +5466,7 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVuf" RHS=%"UVuf"\n",
                      SSize_t deltanext=0, minnext=0, f = 0, fake;
                      regnode_ssc this_class;
  
-                    data_fake.flags = 0;
+                    StructCopy(&zero_scan_data, &data_fake, scan_data_t);
                      if (data) {
                          data_fake.whilem_c = data->whilem_c;
                          data_fake.last_closep = data->last_closep;
@@ -5410,7 +5529,8 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVuf" RHS=%"UVuf"\n",
                      data->longest = &(data->longest_float);
              }
              min += min1;
-            delta += max1 - min1;
+            if (delta != SSize_t_MAX)
+                delta += max1 - min1;
              if (flags & SCF_DO_STCLASS_OR) {
                  ssc_or(pRExC_state, data->start_class, (regnode_charclass *) &accum);
                  if (min1) {
@@ -5470,16 +5590,19 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVuf" RHS=%"UVuf"\n",
      }
      */
      if (frame) {
+        depth = depth - 1;
+
          DEBUG_STUDYDATA("frame-end:",data,depth);
          DEBUG_PEEP("fend", scan, depth);
+
          /* restore previous context */
-        last = frame->last;
-        scan = frame->next;
-        stopparen = frame->stop;
+        last = frame->last_regnode;
+        scan = frame->next_regnode;
+        stopparen = frame->stopparen;
          recursed_depth = frame->prev_recursed_depth;
-        depth = depth - 1;
  
-        frame = frame->prev;
+        RExC_frame_last = frame->prev_frame;
+        frame = frame->this_prev_frame;
          goto fake_study_recurse;
      }
  
@@ -5512,12 +5635,15 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVuf" RHS=%"UVuf"\n",
      {
          SSize_t final_minlen= min < stopmin ? min : stopmin;
  
-        if (!(RExC_seen & REG_UNBOUNDED_QUANTIFIER_SEEN) && (RExC_maxlen < final_minlen + delta)) {
-            RExC_maxlen = final_minlen + delta;
+        if (!(RExC_seen & REG_UNBOUNDED_QUANTIFIER_SEEN)) {
+            if (final_minlen > SSize_t_MAX - delta)
+                RExC_maxlen = SSize_t_MAX;
+            else if (RExC_maxlen < final_minlen + delta)
+                RExC_maxlen = final_minlen + delta;
          }
          return final_minlen;
      }
-    /* not-reached */
+    NOT_REACHED; /* NOTREACHED */
  }
  
  STATIC U32
@@ -5770,7 +5896,7 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
              if (oplist) {
                  assert(oplist->op_type == OP_PADAV
                      || oplist->op_type == OP_RV2AV);
-                oplist = OP_SIBLING(oplist);
+                oplist = OpSIBLING(oplist);
              }
  
              if (SvRMAGICAL(av)) {
@@ -5817,10 +5943,10 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
                  pRExC_state->code_blocks[n].src_regex = NULL;
                  n++;
                  code = 1;
-                oplist = OP_SIBLING(oplist); /* skip CONST */
+                oplist = OpSIBLING(oplist); /* skip CONST */
                  assert(oplist);
              }
-            oplist = OP_SIBLING(oplist);;
+            oplist = OpSIBLING(oplist);;
          }
  
         /* apply magic and QR overloading to arg */
@@ -6054,6 +6180,7 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state,
  
         ENTER;
         SAVETMPS;
+       save_re_context();
         PUSHSTACKi(PERLSI_REQUIRE);
          /* G_RE_REPARSING causes the toker to collapse \\ into \ when
           * parsing qr''; normally only q'' does this. It also alters
@@ -6292,7 +6419,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
  
      DEBUG_r(if (!PL_colorset) reginitcolors());
  
-#ifndef PERL_IN_XSUB_RE
      /* Initialize these here instead of as-needed, as is quick and avoids
       * having to test them each time otherwise */
      if (! PL_AboveLatin1) {
@@ -6310,7 +6436,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         PL_InBitmap = _add_range_to_invlist(PL_InBitmap, 0,
                                                      NUM_ANYOF_CODE_POINTS - 1);
      }
-#endif
  
      pRExC_state->code_blocks = NULL;
      pRExC_state->num_code_blocks = 0;
@@ -6324,7 +6449,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
         OP *o;
         int ncode = 0;
  
-       for (o = cLISTOPx(expr)->op_first; o; o = OP_SIBLING(o))
+       for (o = cLISTOPx(expr)->op_first; o; o = OpSIBLING(o))
             if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL))
                 ncode++; /* count of DO blocks */
         if (ncode) {
@@ -6345,7 +6470,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          if (expr->op_type == OP_CONST)
              n = 1;
          else
-            for (o = cLISTOPx(expr)->op_first; o; o = OP_SIBLING(o)) {
+            for (o = cLISTOPx(expr)->op_first; o; o = OpSIBLING(o)) {
                  if (o->op_type == OP_CONST)
                      n++;
              }
@@ -6361,7 +6486,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          if (expr->op_type == OP_CONST)
              new_patternp[n] = cSVOPx_sv(expr);
          else
-            for (o = cLISTOPx(expr)->op_first; o; o = OP_SIBLING(o)) {
+            for (o = cLISTOPx(expr)->op_first; o; o = OpSIBLING(o)) {
                  if (o->op_type == OP_CONST)
                      new_patternp[n++] = cSVOPo_sv;
              }
@@ -6381,7 +6506,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
              assert(   expr->op_type == OP_PUSHMARK
                     || (expr->op_type == OP_NULL && expr->op_targ == OP_PUSHMARK)
                     || expr->op_type == OP_PADRANGE);
-            expr = OP_SIBLING(expr);
+            expr = OpSIBLING(expr);
      }
  
      pat = S_concat_pat(aTHX_ pRExC_state, NULL, new_patternp, pat_count,
@@ -6425,8 +6550,16 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      RExC_uni_semantics = 0;
      RExC_contains_locale = 0;
      RExC_contains_i = 0;
+    RExC_strict = cBOOL(pm_flags & RXf_PMf_STRICT);
      pRExC_state->runtime_code_qr = NULL;
+    RExC_frame_head= NULL;
+    RExC_frame_last= NULL;
+    RExC_frame_count= 0;
  
+    DEBUG_r({
+        RExC_mysv1= sv_newmortal();
+        RExC_mysv2= sv_newmortal();
+    });
      DEBUG_COMPILE_r({
              SV *dsv= sv_newmortal();
              RE_PV_QUOTED_DECL(s, RExC_utf8, dsv, exp, plen, 60);
@@ -6503,6 +6636,9 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      RExC_seen_zerolen = *exp == '^' ? -1 : 0;
      RExC_extralen = 0;
      RExC_override_recoding = 0;
+#ifdef EBCDIC
+    RExC_recode_x_to_native = 0;
+#endif
      RExC_in_multi_char_class = 0;
  
      /* First pass: determine size, legality. */
@@ -6528,10 +6664,6 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      RExC_recurse_count = 0;
      pRExC_state->code_index = 0;
  
-#if 0 /* REGC() is (currently) a NOP at the first pass.
-       * Clever compilers notice this and complain. --jhi */
-    REGC((U8)REG_MAGIC, (char*)RExC_emit);
-#endif
      DEBUG_PARSE_r(
         PerlIO_printf(Perl_debug_log, "Starting first pass (sizing)\n");
          RExC_lastnum=0;
@@ -6643,7 +6775,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
                                                     == REG_RUN_ON_COMMENT_SEEN);
         U16 reganch = (U16)((r->extflags & RXf_PMf_STD_PMMOD)
                             >> RXf_PMf_STD_PMMOD_SHIFT);
-       const char *fptr = STD_PAT_MODS;        /*"msix"*/
+       const char *fptr = STD_PAT_MODS;        /*"msixn"*/
         char *p;
          /* Allocate for the worst case, which is all the std flags are turned
           * on.  If more precision is desired, we could do a population count of
@@ -6745,7 +6877,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
      RExC_emit_bound = ri->program + RExC_size + 1;
      pRExC_state->code_index = 0;
  
-    REGC((U8)REG_MAGIC, (char*) RExC_emit++);
+    *((char*) RExC_emit++) = (char) REG_MAGIC;
      if (reg(pRExC_state, 0, &flags,1) == NULL) {
         ReREFCNT_dec(rx);
          Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for generation pass, flags=%#"UVxf"", (UV) flags);
@@ -6758,12 +6890,17 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
          SAVEFREEPV(RExC_recurse);
      }
  
-reStudy:
+  reStudy:
      r->minlen = minlen = sawlookahead = sawplus = sawopen = sawminmod = 0;
+    DEBUG_r(
+        RExC_study_chunk_recursed_count= 0;
+    );
      Zero(r->substrs, 1, struct reg_substr_data);
-    if (RExC_study_chunk_recursed)
+    if (RExC_study_chunk_recursed) {
          Zero(RExC_study_chunk_recursed,
               RExC_study_chunk_recursed_bytes * RExC_npar, U8);
+    }
+
  
  #ifdef TRIE_STUDY_OPT
      if (!restudied) {
@@ -6791,7 +6928,7 @@ reStudy:
      if (UTF)
         SvUTF8_on(rx);  /* Unicode in it? */
      ri->regstclass = NULL;
-    if (RExC_naughty >= 10)    /* Probably an expensive pattern. */
+    if (RExC_naughty >= TOO_NAUGHTY)   /* Probably an expensive pattern. */
         r->intflags |= PREGf_NAUGHTY;
      scan = ri->program + 1;            /* First BRANCH. */
  
@@ -6850,7 +6987,7 @@ reStudy:
          DEBUG_PEEP("first:",first,0);
          /* Ignore EXACT as we deal with it later. */
         if (PL_regkind[OP(first)] == EXACT) {
-           if (OP(first) == EXACT)
+           if (OP(first) == EXACT || OP(first) == EXACTL)
                 NOOP;   /* Empty, get anchored substr later. */
             else
                 ri->regstclass = first;
@@ -7025,7 +7162,7 @@ reStudy:
         if ((!(r->anchored_substr || r->anchored_utf8) || r->anchored_offset)
             && stclass_flag
              && ! (ANYOF_FLAGS(data.start_class) & SSC_MATCHES_EMPTY_STRING)
-           && !ssc_is_anything(data.start_class))
+           && is_ssc_worth_it(pRExC_state, data.start_class))
         {
             const U32 n = add_data(pRExC_state, STR_WITH_LEN("f"));
  
@@ -7038,7 +7175,7 @@ reStudy:
             ri->regstclass = (regnode*)RExC_rxi->data->data[n];
             r->intflags &= ~PREGf_SKIP; /* Used in find_byclass(). */
             DEBUG_COMPILE_r({ SV *sv = sv_newmortal();
-                     regprop(r, sv, (regnode*)data.start_class, NULL);
+                      regprop(r, sv, (regnode*)data.start_class, NULL, pRExC_state);
                       PerlIO_printf(Perl_debug_log,
                                     "synthetic stclass \"%s\".\n",
                                     SvPVX_const(sv));});
@@ -7105,7 +7242,7 @@ reStudy:
                 = r->float_substr = r->float_utf8 = NULL;
  
          if (! (ANYOF_FLAGS(data.start_class) & SSC_MATCHES_EMPTY_STRING)
-            && ! ssc_is_anything(data.start_class))
+           && is_ssc_worth_it(pRExC_state, data.start_class))
          {
             const U32 n = add_data(pRExC_state, STR_WITH_LEN("f"));
  
@@ -7118,7 +7255,7 @@ reStudy:
             ri->regstclass = (regnode*)RExC_rxi->data->data[n];
             r->intflags &= ~PREGf_SKIP; /* Used in find_byclass(). */
             DEBUG_COMPILE_r({ SV* sv = sv_newmortal();
-                     regprop(r, sv, (regnode*)data.start_class, NULL);
+                      regprop(r, sv, (regnode*)data.start_class, NULL, pRExC_state);
                       PerlIO_printf(Perl_debug_log,
                                     "synthetic stclass \"%s\".\n",
                                     SvPVX_const(sv));});
@@ -7183,7 +7320,7 @@ reStudy:
           * flags appropriately - Yves */
          regnode *first = ri->program + 1;
          U8 fop = OP(first);
-        regnode *next = NEXTOPER(first);
+        regnode *next = regnext(first);
          U8 nop = OP(next);
  
          if (PL_regkind[fop] == NOTHING && nop == END)
@@ -7197,13 +7334,13 @@ reStudy:
              r->extflags |= RXf_START_ONLY;
          else if (fop == PLUS
                   && PL_regkind[nop] == POSIXD && FLAGS(next) == _CC_SPACE
-                 && OP(regnext(first)) == END)
+                 && nop == END)
              r->extflags |= RXf_WHITE;
          else if ( r->extflags & RXf_SPLIT
-                  && fop == EXACT
+                  && (fop == EXACT || fop == EXACTL)
                    && STR_LEN(first) == 1
                    && *(STRING(first)) == ' '
-                  && OP(regnext(first)) == END )
+                  && nop == END )
              r->extflags |= (RXf_SKIPWHITE|RXf_WHITE);
  
      }
@@ -7229,7 +7366,10 @@ reStudy:
      }
      Newxz(r->offs, RExC_npar, regexp_paren_pair);
      /* assume we don't need to swap parens around before we match */
-
+    DEBUG_TEST_r({
+        PerlIO_printf(Perl_debug_log,"study_chunk_recursed_count: %lu\n",
+            (unsigned long)RExC_study_chunk_recursed_count);
+    });
      DEBUG_DUMP_r({
          DEBUG_RExC_seen();
          PerlIO_printf(Perl_debug_log,"Final program:\n");
@@ -7761,28 +7901,26 @@ S_reg_scan_name(pTHX_ RExC_state_t *pRExC_state, U32 flags)
              Perl_croak(aTHX_ "panic: bad flag %lx in reg_scan_name",
                        (unsigned long) flags);
          }
-        assert(0); /* NOT REACHED */
+        NOT_REACHED; /* NOTREACHED */
      }
      return NULL;
  }
  
  #define DEBUG_PARSE_MSG(funcname)     DEBUG_PARSE_r({           \
-    int rem=(int)(RExC_end - RExC_parse);                       \
-    int cut;                                                    \
      int num;                                                    \
-    int iscut=0;                                                \
-    if (rem>10) {                                               \
-        rem=10;                                                 \
-        iscut=1;                                                \
-    }                                                           \
-    cut=10-rem;                                                 \
-    if (RExC_lastparse!=RExC_parse)                             \
-        PerlIO_printf(Perl_debug_log," >%.*s%-*s",              \
-            rem, RExC_parse,                                    \
-            cut + 4,                                            \
-            iscut ? "..." : "<"                                 \
+    if (RExC_lastparse!=RExC_parse) {                           \
+        PerlIO_printf(Perl_debug_log, "%s",                     \
+            Perl_pv_pretty(aTHX_ RExC_mysv1, RExC_parse,        \
+                RExC_end - RExC_parse, 16,                      \
+                "", "",                                         \
+                PERL_PV_ESCAPE_UNI_DETECT |                     \
+                PERL_PV_PRETTY_ELLIPSES   |                     \
+                PERL_PV_PRETTY_LTGT       |                     \
+                PERL_PV_ESCAPE_RE         |                     \
+                PERL_PV_PRETTY_EXACTSIZE                        \
+            )                                                   \
          );                                                      \
-    else                                                        \
+    } else                                                      \
          PerlIO_printf(Perl_debug_log,"%16s","");                \
                                                                  \
      if (SIZE_ONLY)                                              \
@@ -7882,27 +8020,6 @@ S__invlist_array_init(SV* const invlist, const bool will_have_0)
      return zero_addr + *offset;
  }
  
-PERL_STATIC_INLINE UV*
-S_invlist_array(SV* const invlist)
-{
-    /* Returns the pointer to the inversion list's array.  Every time the
-     * length changes, this needs to be called in case malloc or realloc moved
-     * it */
-
-    PERL_ARGS_ASSERT_INVLIST_ARRAY;
-
-    /* Must not be empty.  If these fail, you probably didn't check for <len>
-     * being non-zero before trying to get the array */
-    assert(_invlist_len(invlist));
-
-    /* The very first element always contains zero, The array begins either
-     * there, or if the inversion list is offset, at the element after it.
-     * The offset header field determines which; it contains 0 or 1 to indicate
-     * how much additionally to add */
-    assert(0 == *(SvPVX(invlist)));
-    return ((UV *) SvPVX(invlist) + *get_invlist_offset_addr(invlist));
-}
-
  PERL_STATIC_INLINE void
  S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset)
  {
@@ -7920,6 +8037,8 @@ S_invlist_set_len(pTHX_ SV* const invlist, const UV len, const bool offset)
      assert(SvLEN(invlist) == 0 || SvCUR(invlist) <= SvLEN(invlist));
  }
  
+#ifndef PERL_IN_XSUB_RE
+
  PERL_STATIC_INLINE IV*
  S_get_invlist_previous_index_addr(SV* invlist)
  {
@@ -7954,6 +8073,28 @@ S_invlist_set_previous_index(SV* const invlist, const IV index)
      *get_invlist_previous_index_addr(invlist) = index;
  }
  
+PERL_STATIC_INLINE void
+S_invlist_trim(SV* const invlist)
+{
+    PERL_ARGS_ASSERT_INVLIST_TRIM;
+
+    assert(SvTYPE(invlist) == SVt_INVLIST);
+
+    /* Change the length of the inversion list to how many entries it currently
+     * has */
+    SvPV_shrink_to_cur((SV *) invlist);
+}
+
+PERL_STATIC_INLINE bool
+S_invlist_is_iterating(SV* const invlist)
+{
+    PERL_ARGS_ASSERT_INVLIST_IS_ITERATING;
+
+    return *(get_invlist_iter_addr(invlist)) < (STRLEN) UV_MAX;
+}
+
+#endif /* ifndef PERL_IN_XSUB_RE */
+
  PERL_STATIC_INLINE UV
  S_invlist_max(SV* const invlist)
  {
@@ -8073,18 +8214,6 @@ S_invlist_extend(pTHX_ SV* const invlist, const UV new_max)
      SvGROW((SV *)invlist, TO_INTERNAL_SIZE(new_max + 1));
  }
  
-PERL_STATIC_INLINE void
-S_invlist_trim(SV* const invlist)
-{
-    PERL_ARGS_ASSERT_INVLIST_TRIM;
-
-    assert(SvTYPE(invlist) == SVt_INVLIST);
-
-    /* Change the length of the inversion list to how many entries it currently
-     * has */
-    SvPV_shrink_to_cur((SV *) invlist);
-}
-
  STATIC void
  S__append_range_to_invlist(pTHX_ SV* const invlist,
                                   const UV start, const UV end)
@@ -8329,7 +8458,7 @@ Perl__invlist_populate_swatch(SV* const invlist,
              swatch[offset >> 3] |= 1 << (offset & 7);
          }
  
-    join_end_of_list:
+      join_end_of_list:
  
         /* Quit if at the end of the list */
          if (i >= len) {
@@ -8855,7 +8984,7 @@ Perl__add_range_to_invlist(pTHX_ SV* invlist, const UV start, const UV end)
      /* Add the range from 'start' to 'end' inclusive to the inversion list's
       * set.  A pointer to the inversion list is returned.  This may actually be
       * a new list, in which case the passed in one has been destroyed.  The
-     * passed in inversion list can be NULL, in which case a new one is created
+     * passed-in inversion list can be NULL, in which case a new one is created
       * with just the one range in it */
  
      SV* range_invlist;
@@ -9045,14 +9174,6 @@ S_invlist_iternext(SV* invlist, UV* start, UV* end)
      return TRUE;
  }
  
-PERL_STATIC_INLINE bool
-S_invlist_is_iterating(SV* const invlist)
-{
-    PERL_ARGS_ASSERT_INVLIST_IS_ITERATING;
-
-    return *(get_invlist_iter_addr(invlist)) < (STRLEN) UV_MAX;
-}
-
  PERL_STATIC_INLINE UV
  S_invlist_highest(SV* const invlist)
  {
@@ -9247,14 +9368,160 @@ S__invlistEQ(pTHX_ SV* const a, SV* const b, const bool complement_b)
  }
  #endif
  
-#undef HEADER_LENGTH
-#undef TO_INTERNAL_SIZE
-#undef FROM_INTERNAL_SIZE
-#undef INVLIST_VERSION_ID
-
-/* End of inversion list object */
-
-STATIC void
+/*
+ * As best we can, determine the characters that can match the start of
+ * the given EXACTF-ish node.
+ *
+ * Returns the invlist as a new SV*; it is the caller's responsibility to
+ * call SvREFCNT_dec() when done with it.
+ */
+STATIC SV*
+S__make_exactf_invlist(pTHX_ RExC_state_t *pRExC_state, regnode *node)
+{
+    const U8 * s = (U8*)STRING(node);
+    SSize_t bytelen = STR_LEN(node);
+    UV uc;
+    /* Start out big enough for 2 separate code points */
+    SV* invlist = _new_invlist(4);
+
+    PERL_ARGS_ASSERT__MAKE_EXACTF_INVLIST;
+
+    if (! UTF) {
+        uc = *s;
+
+        /* We punt and assume can match anything if the node begins
+         * with a multi-character fold.  Things are complicated.  For
+         * example, /ffi/i could match any of:
+         *  "\N{LATIN SMALL LIGATURE FFI}"
+         *  "\N{LATIN SMALL LIGATURE FF}I"
+         *  "F\N{LATIN SMALL LIGATURE FI}"
+         *  plus several other things; and making sure we have all the
+         *  possibilities is hard. */
+        if (is_MULTI_CHAR_FOLD_latin1_safe(s, s + bytelen)) {
+            invlist = _add_range_to_invlist(invlist, 0, UV_MAX);
+        }
+        else {
+            /* Any Latin1 range character can potentially match any
+             * other depending on the locale */
+            if (OP(node) == EXACTFL) {
+                _invlist_union(invlist, PL_Latin1, &invlist);
+            }
+            else {
+                /* But otherwise, it matches at least itself.  We can
+                 * quickly tell if it has a distinct fold, and if so,
+                 * it matches that as well */
+                invlist = add_cp_to_invlist(invlist, uc);
+                if (IS_IN_SOME_FOLD_L1(uc))
+                    invlist = add_cp_to_invlist(invlist, PL_fold_latin1[uc]);
+            }
+
+            /* Some characters match above-Latin1 ones under /i.  This
+             * is true of EXACTFL ones when the locale is UTF-8 */
+            if (HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(uc)
+                && (! isASCII(uc) || (OP(node) != EXACTFA
+                                    && OP(node) != EXACTFA_NO_TRIE)))
+            {
+                add_above_Latin1_folds(pRExC_state, (U8) uc, &invlist);
+            }
+        }
+    }
+    else {  /* Pattern is UTF-8 */
+        U8 folded[UTF8_MAX_FOLD_CHAR_EXPAND * UTF8_MAXBYTES_CASE + 1] = { '\0' };
+        STRLEN foldlen = UTF8SKIP(s);
+        const U8* e = s + bytelen;
+        SV** listp;
+
+        uc = utf8_to_uvchr_buf(s, s + bytelen, NULL);
+
+        /* The only code points that aren't folded in a UTF EXACTFish
+         * node are are the problematic ones in EXACTFL nodes */
+        if (OP(node) == EXACTFL && is_PROBLEMATIC_LOCALE_FOLDEDS_START_cp(uc)) {
+            /* We need to check for the possibility that this EXACTFL
+             * node begins with a multi-char fold.  Therefore we fold
+             * the first few characters of it so that we can make that
+             * check */
+            U8 *d = folded;
+            int i;
+
+            for (i = 0; i < UTF8_MAX_FOLD_CHAR_EXPAND && s < e; i++) {
+                if (isASCII(*s)) {
+                    *(d++) = (U8) toFOLD(*s);
+                    s++;
+                }
+                else {
+                    STRLEN len;
+                    to_utf8_fold(s, d, &len);
+                    d += len;
+                    s += UTF8SKIP(s);
+                }
+            }
+
+            /* And set up so the code below that looks in this folded
+             * buffer instead of the node's string */
+            e = d;
+            foldlen = UTF8SKIP(folded);
+            s = folded;
+        }
+
+        /* When we reach here 's' points to the fold of the first
+         * character(s) of the node; and 'e' points to far enough along
+         * the folded string to be just past any possible multi-char
+         * fold. 'foldlen' is the length in bytes of the first
+         * character in 's'
+         *
+         * Unlike the non-UTF-8 case, the macro for determining if a
+         * string is a multi-char fold requires all the characters to
+         * already be folded.  This is because of all the complications
+         * if not.  Note that they are folded anyway, except in EXACTFL
+         * nodes.  Like the non-UTF case above, we punt if the node
+         * begins with a multi-char fold  */
+
+        if (is_MULTI_CHAR_FOLD_utf8_safe(s, e)) {
+            invlist = _add_range_to_invlist(invlist, 0, UV_MAX);
+        }
+        else {  /* Single char fold */
+
+            /* It matches all the things that fold to it, which are
+             * found in PL_utf8_foldclosures (including itself) */
+            invlist = add_cp_to_invlist(invlist, uc);
+            if (! PL_utf8_foldclosures)
+                _load_PL_utf8_foldclosures();
+            if ((listp = hv_fetch(PL_utf8_foldclosures,
+                                (char *) s, foldlen, FALSE)))
+            {
+                AV* list = (AV*) *listp;
+                IV k;
+                for (k = 0; k <= av_tindex(list); k++) {
+                    SV** c_p = av_fetch(list, k, FALSE);
+                    UV c;
+                    assert(c_p);
+
+                    c = SvUV(*c_p);
+
+                    /* /aa doesn't allow folds between ASCII and non- */
+                    if ((OP(node) == EXACTFA || OP(node) == EXACTFA_NO_TRIE)
+                        && isASCII(c) != isASCII(uc))
+                    {
+                        continue;
+                    }
+
+                    invlist = add_cp_to_invlist(invlist, c);
+                }
+            }
+        }
+    }
+
+    return invlist;
+}
+
+#undef HEADER_LENGTH
+#undef TO_INTERNAL_SIZE
+#undef FROM_INTERNAL_SIZE
+#undef INVLIST_VERSION_ID
+
+/* End of inversion list object */
+
+STATIC void
  S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
  {
      /* This parses the flags that are in either the '(?foo)' or '(?foo:bar)'
@@ -9277,6 +9544,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
      regex_charset cs;
      bool has_use_defaults = FALSE;
      const char* const seqstart = RExC_parse - 1; /* Point to the '?' */
+    int x_mod_count = 0;
  
      PERL_ARGS_ASSERT_PARSE_LPAREN_QUESTION_FLAGS;
  
@@ -9303,8 +9571,8 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
             and must be globally applied -- japhy */
          switch (*RExC_parse) {
  
-            /* Code for the imsx flags */
-            CASE_STD_PMMOD_FLAGS_PARSE_SET(flagsp);
+            /* Code for the imsxn flags */
+            CASE_STD_PMMOD_FLAGS_PARSE_SET(flagsp, x_mod_count);
  
              case LOCALE_PAT_MOD:
                  if (has_charset_modifier) {
@@ -9362,7 +9630,7 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                       : REGEX_DEPENDS_CHARSET;
                  has_charset_modifier = DEPENDS_PAT_MOD;
                  break;
-            excess_modifier:
+              excess_modifier:
                  RExC_parse++;
                  if (has_charset_modifier == ASCII_RESTRICT_PAT_MOD) {
                      vFAIL2("Regexp modifier \"%c\" may appear a maximum of twice", ASCII_RESTRICT_PAT_MOD);
@@ -9374,12 +9642,12 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                  else {
                      vFAIL3("Regexp modifiers \"%c\" and \"%c\" are mutually exclusive", has_charset_modifier, *(RExC_parse - 1));
                  }
-                /*NOTREACHED*/
-            neg_modifier:
+                NOT_REACHED; /*NOTREACHED*/
+              neg_modifier:
                  RExC_parse++;
                  vFAIL2("Regexp modifier \"%c\" may not appear after the \"-\"",
                                      *(RExC_parse - 1));
-                /*NOTREACHED*/
+                NOT_REACHED; /*NOTREACHED*/
              case ONCE_PAT_MOD: /* 'o' */
              case GLOBAL_PAT_MOD: /* 'g' */
                  if (PASS2 && ckWARN(WARN_REGEXP)) {
@@ -9441,19 +9709,26 @@ S_parse_lparen_question_flags(pTHX_ RExC_state_t *pRExC_state)
                  if (RExC_flags & RXf_PMf_FOLD) {
                      RExC_contains_i = 1;
                  }
+                if (PASS2) {
+                    STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
+                }
                  return;
                  /*NOTREACHED*/
              default:
-            fail_modifiers:
-                RExC_parse += UTF ? UTF8SKIP(RExC_parse) : 1;
+              fail_modifiers:
+                RExC_parse += SKIP_IF_CHAR(RExC_parse);
                 /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
                  vFAIL2utf8f("Sequence (%"UTF8f"...) not recognized",
                        UTF8fARG(UTF, RExC_parse-seqstart, seqstart));
-                /*NOTREACHED*/
+                NOT_REACHED; /*NOTREACHED*/
          }
  
          ++RExC_parse;
      }
+
+    if (PASS2) {
+        STD_PMMOD_FLAGS_PARSE_X_WARN(x_mod_count);
+    }
  }
  
  /*
@@ -9689,11 +9964,12 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                      nextchar(pRExC_state);
                      return ret;
                  }
-                RExC_parse++;
+                --RExC_parse;
+                RExC_parse += SKIP_IF_CHAR(RExC_parse);
                  /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
                 vFAIL3("Sequence (%.*s...) not recognized",
                                  RExC_parse-seqstart, seqstart);
-               /*NOTREACHED*/
+               NOT_REACHED; /*NOTREACHED*/
              case '<':           /* (?<...) */
                 if (*RExC_parse == '!')
                     paren = ',';
@@ -9785,6 +10061,9 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                  break;
             case '!':           /* (?!...) */
                 RExC_seen_zerolen++;
+               /* check if we're really just a "FAIL" assertion */
+               --RExC_parse;
+               nextchar(pRExC_state);
                 if (*RExC_parse == ')') {
                     ret=reg_node(pRExC_state, OPFAIL);
                     nextchar(pRExC_state);
@@ -9826,14 +10105,14 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                  if (RExC_parse == RExC_end || *RExC_parse != ')')
                      vFAIL("Sequence (?&... not terminated");
                  goto gen_recurse_regop;
-                assert(0); /* NOT REACHED */
+                /* NOTREACHED */
              case '+':
                  if (!(RExC_parse[0] >= '1' && RExC_parse[0] <= '9')) {
                      RExC_parse++;
                      vFAIL("Illegal pattern");
                  }
                  goto parse_recursion;
-                /* NOT REACHED*/
+                /* NOTREACHED*/
              case '-': /* (?-1) */
                  if (!(RExC_parse[0] >= '1' && RExC_parse[0] <= '9')) {
                      RExC_parse--; /* rewind to let it be handled later */
@@ -9846,14 +10125,19 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                parse_recursion:
                  {
                      bool is_neg = FALSE;
+                    UV unum;
                      parse_start = RExC_parse - 1; /* MJD */
                      if (*RExC_parse == '-') {
                          RExC_parse++;
                          is_neg = TRUE;
                      }
-                    num = grok_atou(RExC_parse, &endptr);
-                    if (endptr)
-                       RExC_parse = (char*)endptr;
+                    if (grok_atoUV(RExC_parse, &unum, &endptr)
+                        && unum <= I32_MAX
+                    ) {
+                        num = (I32)unum;
+                        RExC_parse = (char*)endptr;
+                    } else
+                        num = I32_MAX;
                      if (is_neg) {
                          /* Some limit for num? */
                          num = -num;
@@ -9884,21 +10168,19 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                      num = RExC_npar + num - 1;
                  }
  
-                ret = reganode(pRExC_state, GOSUB, num);
+                ret = reg2Lanode(pRExC_state, GOSUB, num, RExC_recurse_count);
                  if (!SIZE_ONLY) {
                     if (num > (I32)RExC_rx->nparens) {
                         RExC_parse++;
                         vFAIL("Reference to nonexistent group");
                     }
-                   ARG2L_SET( ret, RExC_recurse_count++);
-                    RExC_emit++;
+                   RExC_recurse_count++;
                     DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
-                       "Recurse #%"UVuf" to %"IVdf"\n",
+                        "%*s%*s Recurse #%"UVuf" to %"IVdf"\n",
+                              22, "|    |", (int)(depth * 2 + 1), "",
                                (UV)ARG(ret), (IV)ARG2L(ret)));
-               } else {
-                   RExC_size++;
-               }
-                    RExC_seen |= REG_RECURSE_SEEN;
+                }
+                RExC_seen |= REG_RECURSE_SEEN;
                  Set_Node_Length(ret, 1 + regarglen[OP(ret)]); /* MJD */
                 Set_Node_Offset(ret, parse_start); /* MJD */
  
@@ -9906,17 +10188,17 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                  nextchar(pRExC_state);
                  return ret;
  
-            assert(0); /* NOT REACHED */
+            /* NOTREACHED */
  
             case '?':           /* (??...) */
                 is_logical = 1;
                 if (*RExC_parse != '{') {
-                   RExC_parse++;
+                    RExC_parse += SKIP_IF_CHAR(RExC_parse);
                      /* diag_listed_as: Sequence (?%s...) not recognized in regex; marked by <-- HERE in m/%s/ */
                      vFAIL2utf8f(
                          "Sequence (%"UTF8f"...) not recognized",
                          UTF8fARG(UTF, RExC_parse-seqstart, seqstart));
-                   /*NOTREACHED*/
+                   NOT_REACHED; /*NOTREACHED*/
                 }
                 *flagp |= POSTPONED;
                 paren = *RExC_parse++;
@@ -9961,17 +10243,22 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                 if (is_logical) {
                      regnode *eval;
                     ret = reg_node(pRExC_state, LOGICAL);
-                    eval = reganode(pRExC_state, EVAL, n);
+
+                    eval = reg2Lanode(pRExC_state, EVAL,
+                                       n,
+
+                                       /* for later propagation into (??{})
+                                        * return value */
+                                       RExC_flags & RXf_PMf_COMPILETIME
+                                      );
                     if (!SIZE_ONLY) {
                         ret->flags = 2;
-                        /* for later propagation into (??{}) return value */
-                        eval->flags = (U8) (RExC_flags & RXf_PMf_COMPILETIME);
                      }
                      REGTAIL(pRExC_state, ret, eval);
                      /* deal with the length of this later - MJD */
                     return ret;
                 }
-               ret = reganode(pRExC_state, EVAL, n);
+               ret = reg2Lanode(pRExC_state, EVAL, n, 0);
                 Set_Node_Length(ret, RExC_parse - parse_start + 1);
                 Set_Node_Offset(ret, parse_start);
                 return ret;
@@ -9979,6 +10266,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
             case '(':           /* (?(?{...})...) and (?(?=...)...) */
             {
                 int is_define= 0;
+                const int DEFINE_len = sizeof("DEFINE") - 1;
                 if (RExC_parse[0] == '?') {        /* (?(?...)) */
                     if (RExC_parse[1] == '=' || RExC_parse[1] == '!'
                         || RExC_parse[1] == '<'
@@ -10021,15 +10309,11 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                      ret = reganode(pRExC_state,NGROUPP,num);
                      goto insert_if_check_paren;
                 }
-               else if (RExC_parse[0] == 'D' &&
-                        RExC_parse[1] == 'E' &&
-                        RExC_parse[2] == 'F' &&
-                        RExC_parse[3] == 'I' &&
-                        RExC_parse[4] == 'N' &&
-                        RExC_parse[5] == 'E')
-               {
+               else if (RExC_end - RExC_parse >= DEFINE_len
+                        && strnEQ(RExC_parse, "DEFINE", DEFINE_len))
+                {
                     ret = reganode(pRExC_state,DEFINEP,0);
-                   RExC_parse +=6 ;
+                   RExC_parse += DEFINE_len;
                     is_define = 1;
                     goto insert_if_check_paren;
                 }
@@ -10037,9 +10321,14 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     RExC_parse++;
                     parno = 0;
                     if (RExC_parse[0] >= '1' && RExC_parse[0] <= '9' ) {
-                       parno = grok_atou(RExC_parse, &endptr);
-                       if (endptr)
+                        UV uv;
+                        if (grok_atoUV(RExC_parse, &uv, &endptr)
+                            && uv <= I32_MAX
+                        ) {
+                            parno = (I32)uv;
                              RExC_parse = (char*)endptr;
+                        }
+                        /* else "Switch condition not recognized" below */
                     } else if (RExC_parse[0] == '&') {
                         SV *sv_dat;
                         RExC_parse++;
@@ -10056,9 +10345,14 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                      /* (?(1)...) */
                     char c;
                     char *tmp;
-                   parno = grok_atou(RExC_parse, &endptr);
-                    if (endptr)
-                       RExC_parse = (char*)endptr;
+                    UV uv;
+                    if (grok_atoUV(RExC_parse, &uv, &endptr)
+                        && uv <= I32_MAX
+                    ) {
+                        parno = (I32)uv;
+                        RExC_parse = (char*)endptr;
+                    }
+                    /* XXX else what? */
                      ret = reganode(pRExC_state, GROUPP, parno);
  
                   insert_if_check_paren:
@@ -10108,8 +10402,12 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     }
                     else
                         lastbr = NULL;
-                   if (c != ')')
-                       vFAIL("Switch (?(condition)... contains too many branches");
+                    if (c != ')') {
+                        if (RExC_parse>RExC_end)
+                            vFAIL("Switch (?(condition)... not terminated");
+                        else
+                            vFAIL("Switch (?(condition)... contains too many branches");
+                    }
                     ender = reg_node(pRExC_state, TAIL);
                      REGTAIL(pRExC_state, br, ender);
                     if (lastbr) {
@@ -10138,7 +10436,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                parse_flags:
                 parse_lparen_question_flags(pRExC_state);
                  if (UCHARAT(RExC_parse) != ':') {
-                    nextchar(pRExC_state);
+                    if (*RExC_parse)
+                        nextchar(pRExC_state);
                      *flagp = TRYAGAIN;
                      return NULL;
                  }
@@ -10148,7 +10447,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                  goto parse_rest;
              } /* end switch */
         }
-       else {                  /* (...) */
+       else if (!(RExC_flags & RXf_PMf_NOCAPTURE)) {   /* (...) */
           capturing_parens:
             parno = RExC_npar;
             RExC_npar++;
@@ -10161,7 +10460,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                     && !RExC_open_parens[parno-1])
                 {
                     DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
-                       "Setting open paren #%"IVdf" to %d\n",
+                        "%*s%*s Setting open paren #%"IVdf" to %d\n",
+                        22, "|    |", (int)(depth * 2 + 1), "",
                         (IV)parno, REG_NODE_NUM(ret)));
                     RExC_open_parens[parno-1]= ret;
                 }
@@ -10169,6 +10469,10 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
              Set_Node_Length(ret, 1); /* MJD */
              Set_Node_Offset(ret, RExC_parse); /* MJD */
             is_open = 1;
+       } else {
+            /* with RXf_PMf_NOCAPTURE treat (...) as (?:...) */
+            paren = ':';
+           ret = NULL;
         }
      }
      else                        /* ! paren */
@@ -10250,8 +10554,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
             ender = reganode(pRExC_state, CLOSE, parno);
              if (!SIZE_ONLY && RExC_seen & REG_RECURSE_SEEN) {
                 DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
-                       "Setting close paren #%"IVdf" to %d\n",
-                       (IV)parno, REG_NODE_NUM(ender)));
+                        "%*s%*s Setting close paren #%"IVdf" to %d\n",
+                        22, "|    |", (int)(depth * 2 + 1), "", (IV)parno, REG_NODE_NUM(ender)));
                 RExC_close_parens[parno-1]= ender;
                 if (RExC_nestroot == parno)
                     RExC_nestroot = 0;
@@ -10277,15 +10581,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
             break;
         }
          DEBUG_PARSE_r(if (!SIZE_ONLY) {
-            SV * const mysv_val1=sv_newmortal();
-            SV * const mysv_val2=sv_newmortal();
              DEBUG_PARSE_MSG("lsbr");
-            regprop(RExC_rx, mysv_val1, lastbr, NULL);
-            regprop(RExC_rx, mysv_val2, ender, NULL);
+            regprop(RExC_rx, RExC_mysv1, lastbr, NULL, pRExC_state);
+            regprop(RExC_rx, RExC_mysv2, ender, NULL, pRExC_state);
              PerlIO_printf(Perl_debug_log, "~ tying lastbr %s (%"IVdf") to ender %s (%"IVdf") offset %"IVdf"\n",
-                          SvPV_nolen_const(mysv_val1),
+                          SvPV_nolen_const(RExC_mysv1),
                            (IV)REG_NODE_NUM(lastbr),
-                          SvPV_nolen_const(mysv_val2),
+                          SvPV_nolen_const(RExC_mysv2),
                            (IV)REG_NODE_NUM(ender),
                            (IV)(ender - lastbr)
              );
@@ -10318,15 +10620,13 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
              if (is_nothing) {
                  br= PL_regkind[OP(ret)] != BRANCH ? regnext(ret) : ret;
                  DEBUG_PARSE_r(if (!SIZE_ONLY) {
-                    SV * const mysv_val1=sv_newmortal();
-                    SV * const mysv_val2=sv_newmortal();
                      DEBUG_PARSE_MSG("NADA");
-                    regprop(RExC_rx, mysv_val1, ret, NULL);
-                    regprop(RExC_rx, mysv_val2, ender, NULL);
+                    regprop(RExC_rx, RExC_mysv1, ret, NULL, pRExC_state);
+                    regprop(RExC_rx, RExC_mysv2, ender, NULL, pRExC_state);
                      PerlIO_printf(Perl_debug_log, "~ converting ret %s (%"IVdf") to ender %s (%"IVdf") offset %"IVdf"\n",
-                                  SvPV_nolen_const(mysv_val1),
+                                  SvPV_nolen_const(RExC_mysv1),
                                    (IV)REG_NODE_NUM(ret),
-                                  SvPV_nolen_const(mysv_val2),
+                                  SvPV_nolen_const(RExC_mysv2),
                                    (IV)REG_NODE_NUM(ender),
                                    (IV)(ender - ret)
                      );
@@ -10379,7 +10679,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
         }
         else
             FAIL("Junk on end of regexp");      /* "Can't happen". */
-       assert(0); /* NOTREACHED */
+       NOT_REACHED; /* NOTREACHED */
      }
  
      if (RExC_in_lookbehind) {
@@ -10447,7 +10747,9 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
         if (chain == NULL)      /* First piece. */
             *flagp |= flags&SPSTART;
         else {
-           RExC_naughty++;
+           /* FIXME adding one for every branch after the first is probably
+            * excessive now we have TRIE support. (hv) */
+           MARK_NAUGHTY(1);
              REGTAIL(pRExC_state, chain, latest);
         }
         chain = latest;
@@ -10493,6 +10795,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
      char *parse_start;
  #endif
      const char *maxpos = NULL;
+    UV uv;
  
      /* Save the original in case we change the emitted regop to a FAIL. */
      regnode * const orig_emit = RExC_emit;
@@ -10534,16 +10837,28 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
             if (!maxpos)
                 maxpos = next;
             RExC_parse++;
-           min = grok_atou(RExC_parse, &endptr);
+            if (isDIGIT(*RExC_parse)) {
+                if (!grok_atoUV(RExC_parse, &uv, &endptr))
+                    vFAIL("Invalid quantifier in {,}");
+                if (uv >= REG_INFTY)
+                    vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
+                min = (I32)uv;
+            } else {
+                min = 0;
+            }
             if (*maxpos == ',')
                 maxpos++;
             else
                 maxpos = RExC_parse;
-           max = grok_atou(maxpos, &endptr);
-           if (!max && *maxpos != '0')
+            if (isDIGIT(*maxpos)) {
+                if (!grok_atoUV(maxpos, &uv, &endptr))
+                    vFAIL("Invalid quantifier in {,}");
+                if (uv >= REG_INFTY)
+                    vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
+                max = (I32)uv;
+            } else {
                 max = REG_INFTY;                /* meaning "infinity" */
-           else if (max >= REG_INFTY)
-               vFAIL2("Quantifier in {,} bigger than %d", REG_INFTY - 1);
+            }
             RExC_parse = next;
             nextchar(pRExC_state);
              if (max < min) {    /* If can't match, warn and optimize to fail
@@ -10577,9 +10892,9 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                  nextchar(pRExC_state);
              }
  
-       do_curly:
+         do_curly:
             if ((flags&SIMPLE)) {
-               RExC_naughty += 2 + RExC_naughty / 2;
+                MARK_NAUGHTY_EXP(2, 2);
                 reginsert(pRExC_state, CURLY, ret, depth+1);
                  Set_Node_Offset(ret, parse_start+1); /* MJD */
                  Set_Node_Cur_Length(ret, parse_start);
@@ -10605,7 +10920,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                  REGTAIL(pRExC_state, ret, reg_node(pRExC_state, NOTHING));
                 if (SIZE_ONLY)
                     RExC_whilem_seen++, RExC_extralen += 3;
-               RExC_naughty += 4 + RExC_naughty;       /* compound interest */
+                MARK_NAUGHTY_EXP(1, 4);     /* compound interest */
             }
             ret->flags = 0;
  
@@ -10655,7 +10970,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
      if (op == '*' && (flags&SIMPLE)) {
         reginsert(pRExC_state, STAR, ret, depth+1);
         ret->flags = 0;
-       RExC_naughty += 4;
+       MARK_NAUGHTY(4);
          RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
      }
      else if (op == '*') {
@@ -10665,7 +10980,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
      else if (op == '+' && (flags&SIMPLE)) {
         reginsert(pRExC_state, PLUS, ret, depth+1);
         ret->flags = 0;
-       RExC_naughty += 3;
+       MARK_NAUGHTY(3);
          RExC_seen |= REG_UNBOUNDED_QUANTIFIER_SEEN;
      }
      else if (op == '+') {
@@ -10713,95 +11028,94 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
      return(ret);
  }
  
-STATIC STRLEN
-S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** node_p,
-                      UV *valuep, I32 *flagp, U32 depth, SV** substitute_parse
+STATIC bool
+S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
+                regnode ** node_p,
+                UV * code_point_p,
+                int * cp_count,
+                I32 * flagp,
+                const U32 depth
      )
  {
-
- /* This is expected to be called by a parser routine that has recognized '\N'
-   and needs to handle the rest. RExC_parse is expected to point at the first
-   char following the N at the time of the call.  On successful return,
-   RExC_parse has been updated to point to just after the sequence identified
-   by this routine, <*flagp> has been updated, and the non-NULL input pointers
-   have been set appropriately.
-
-   The typical case for this is \N{some character name}.  This is usually
-   called while parsing the input, filling in or ready to fill in an EXACTish
-   node, and the code point for the character should be returned, so that it
-   can be added to the node, and parsing continued with the next input
-   character.  But it may be that instead of a single character the \N{}
-   expands to more than one, a named sequence.  In this case any following
-   quantifier applies to the whole sequence, and it is easier, given the code
-   structure that calls this, to handle it from a different area of the code.
-   For this reason, the input parameters can be set so that it returns valid
-   only on one or the other of these cases.
-
-   Another possibility is for the input to be an empty \N{}, which for
-   backwards compatibility we accept, but generate a NOTHING node which should
-   later get optimized out.  This is handled from the area of code which can
-   handle a named sequence, so if called with the parameters for the other, it
-   fails.
-
-   Still another possibility is for the \N to mean [^\n], and not a single
-   character or explicit sequence at all.  This is determined by context.
-   Again, this is handled from the area of code which can handle a named
-   sequence, so if called with the parameters for the other, it also fails.
-
-   And the final possibility is for the \N to be called from within a bracketed
-   character class.  In this case the [^\n] meaning makes no sense, and so is
-   an error.  Other anomalous situations are left to the calling code to handle.
-
-   For non-single-quoted regexes, the tokenizer has attempted to decide which
-   of the above applies, and in the case of a named sequence, has converted it
-   into one of the forms: \N{} (if the sequence is null), or \N{U+c1.c2...},
-   where c1... are the characters in the sequence.  For single-quoted regexes,
-   the tokenizer passes the \N sequence through unchanged; this code will not
-   attempt to determine this nor expand those, instead raising a syntax error.
-   The net effect is that if the beginning of the passed-in pattern isn't '{U+'
-   or there is no '}', it signals that this \N occurrence means to match a
-   non-newline. (This mostly was done because of [perl #56444].)
-
-   The API is somewhat convoluted due to historical and the above reasons.
-
-   The function raises an error (via vFAIL), and doesn't return for various
-   syntax errors.  For other failures, it returns (STRLEN) -1.  For successes,
-   it returns a count of how many characters were accounted for by it.  (This
-   can be 0 for \N{}; 1 for it meaning [^\n]; and otherwise the number of code
-   points in the sequence.  It sets <node_p>, <valuep>, and/or
-   <substitute_parse> on success.
-
-   If <valuep> is non-null, it means the caller can accept an input sequence
-   consisting of a just a single code point; <*valuep> is set to the value
-   of the only or first code point in the input.
-
-   If <substitute_parse> is non-null, it means the caller can accept an input
-   sequence consisting of one or more code points; <*substitute_parse> is a
-   newly created mortal SV* in this case, containing \x{} escapes representing
-   those code points.
-
-   Both <valuep> and <substitute_parse> can be non-NULL.
-
-   If <node_p> is non-null, <substitute_parse> must be NULL.  This signifies
-   that the caller can accept any legal sequence other than a single code
-   point.  To wit, <*node_p> is set as follows:
-    1) \N means not-a-NL: points to a newly created REG_ANY node; return is 1
-    2) \N{}:              points to a new NOTHING node; return is 0
-    3) otherwise:         points to a new EXACT node containing the resolved
-                          string; return is the number of code points in the
-                          string.  This will never be 1.
-   Note that failure is returned for single code point sequences if <valuep> is
-   null and <node_p> is not.
- */
-
-    char * endbrace;    /* '}' following the name */
-    char* p;
+ /* This routine teases apart the various meanings of \N and returns
+  * accordingly.  The input parameters constrain which meaning(s) is/are valid
+  * in the current context.
+  *
+  * Exactly one of <node_p> and <code_point_p> must be non-NULL.
+  *
+  * If <code_point_p> is not NULL, the context is expecting the result to be a
+  * single code point.  If this \N instance turns out to a single code point,
+  * the function returns TRUE and sets *code_point_p to that code point.
+  *
+  * If <node_p> is not NULL, the context is expecting the result to be one of
+  * the things representable by a regnode.  If this \N instance turns out to be
+  * one such, the function generates the regnode, returns TRUE and sets *node_p
+  * to point to that regnode.
+  *
+  * If this instance of \N isn't legal in any context, this function will
+  * generate a fatal error and not return.
+  *
+  * On input, RExC_parse should point to the first char following the \N at the
+  * time of the call.  On successful return, RExC_parse will have been updated
+  * to point to just after the sequence identified by this routine.  Also
+  * *flagp has been updated as needed.
+  *
+  * When there is some problem with the current context and this \N instance,
+  * the function returns FALSE, without advancing RExC_parse, nor setting
+  * *node_p, nor *code_point_p, nor *flagp.
+  *
+  * If <cp_count> is not NULL, the caller wants to know the length (in code
+  * points) that this \N sequence matches.  This is set even if the function
+  * returns FALSE, as detailed below.
+  *
+  * There are 5 possibilities here, as detailed in the next 5 paragraphs.
+  *
+  * Probably the most common case is for the \N to specify a single code point.
+  * *cp_count will be set to 1, and *code_point_p will be set to that code
+  * point.
+  *
+  * Another possibility is for the input to be an empty \N{}, which for
+  * backwards compatibility we accept.  *cp_count will be set to 0. *node_p
+  * will be set to a generated NOTHING node.
+  *
+  * Still another possibility is for the \N to mean [^\n]. *cp_count will be
+  * set to 0. *node_p will be set to a generated REG_ANY node.
+  *
+  * The fourth possibility is that \N resolves to a sequence of more than one
+  * code points.  *cp_count will be set to the number of code points in the
+  * sequence. *node_p * will be set to a generated node returned by this
+  * function calling S_reg().
+  *
+  * The final possibility, which happens only when the fourth one would
+  * otherwise be in effect, is that one of those code points requires the
+  * pattern to be recompiled as UTF-8.  The function returns FALSE, and sets
+  * the RESTART_UTF8 flag in *flagp.  When this happens, the caller needs to
+  * desist from continuing parsing, and return this information to its caller.
+  * This is not set for when there is only one code point, as this can be
+  * called as part of an ANYOF node, and they can store above-Latin1 code
+  * points without the pattern having to be in UTF-8.
+  *
+  * For non-single-quoted regexes, the tokenizer has resolved character and
+  * sequence names inside \N{...} into their Unicode values, normalizing the
+  * result into what we should see here: '\N{U+c1.c2...}', where c1... are the
+  * hex-represented code points in the sequence.  This is done there because
+  * the names can vary based on what charnames pragma is in scope at the time,
+  * so we need a way to take a snapshot of what they resolve to at the time of
+  * the original parse. [perl #56444].
+  *
+  * That parsing is skipped for single-quoted regexes, so we may here get
+  * '\N{NAME}'.  This is a fatal error.  These names have to be resolved by the
+  * parser.  But if the single-quoted regex is something like '\N{U+41}', that
+  * is legal and handled here.  The code point is Unicode, and has to be
+  * translated into the native character set for non-ASCII platforms.
+  * the tokenizer passes the \N sequence through unchanged; this code will not
+  * attempt to determine this nor expand those, instead raising a syntax error.
+  */
+
+    char * endbrace;    /* points to '}' following the name */
      char *endchar;     /* Points to '.' or '}' ending cur char in the input
                             stream */
-    bool has_multiple_chars; /* true if the input stream contains a sequence of
-                                more than one character */
-    bool in_char_class = substitute_parse != NULL;
-    STRLEN count = 0;   /* Number of characters in this sequence */
+    char* p;            /* Temporary */
  
      GET_RE_DEBUG_FLAGS_DECL;
  
@@ -10809,11 +11123,15 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** node_p,
  
      GET_RE_DEBUG_FLAGS;
  
-    assert(cBOOL(node_p) ^ cBOOL(valuep));  /* Exactly one should be set */
-    assert(! (node_p && substitute_parse)); /* At most 1 should be set */
+    assert(cBOOL(node_p) ^ cBOOL(code_point_p));  /* Exactly one should be set */
+    assert(! (node_p && cp_count));               /* At most 1 should be set */
+
+    if (cp_count) {     /* Initialize return for the most common case */
+        *cp_count = 1;
+    }
  
      /* The [^\n] meaning of \N ignores spaces and comments under the /x
-     * modifier.  The other meaning does not, so use a temporary until we find
+     * modifier.  The other meanings do not, so use a temporary until we find
       * out which we are being called with */
      p = (RExC_flags & RXf_PMf_EXTENDED)
         ? regpatws(pRExC_state, RExC_parse,
@@ -10821,24 +11139,25 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** node_p,
         : RExC_parse;
  
      /* Disambiguate between \N meaning a named character versus \N meaning
-     * [^\n].  The former is assumed when it can't be the latter. */
+     * [^\n].  The latter is assumed when the {...} following the \N is a legal
+     * quantifier, or there is no a '{' at all */
      if (*p != '{' || regcurly(p)) {
         RExC_parse = p;
+        if (cp_count) {
+            *cp_count = -1;
+        }
+
         if (! node_p) {
-           /* no bare \N allowed in a charclass */
-            if (in_char_class) {
-                vFAIL("\\N in a character class must be a named character: \\N{...}");
-            }
-            return (STRLEN) -1;
+            return FALSE;
          }
          RExC_parse--;   /* Need to back off so nextchar() doesn't skip the
                             current char */
         nextchar(pRExC_state);
         *node_p = reg_node(pRExC_state, REG_ANY);
         *flagp |= HASWIDTH|SIMPLE;
-       RExC_naughty++;
+       MARK_NAUGHTY(1);
          Set_Node_Length(*node_p, 1); /* MJD */
-       return 1;
+       return TRUE;
      }
  
      /* Here, we have decided it should be a named character or sequence */
@@ -10853,146 +11172,159 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** node_p,
  
      RExC_parse++;      /* Skip past the '{' */
  
-    if (! (endbrace = strchr(RExC_parse, '}')) /* no trailing brace */
+    if (! (endbrace = strchr(RExC_parse, '}'))  /* no trailing brace */
         || ! (endbrace == RExC_parse            /* nothing between the {} */
-              || (endbrace - RExC_parse >= 2   /* U+ (bad hex is checked below
-                                                 */
-                  && strnEQ(RExC_parse, "U+", 2)))) /* for a better error msg)
-                                                     */
+              || (endbrace - RExC_parse >= 2   /* U+ (bad hex is checked... */
+                  && strnEQ(RExC_parse, "U+", 2)))) /* ... below for a better
+                                                       error msg) */
      {
         if (endbrace) RExC_parse = endbrace;    /* position msg's '<--HERE' */
         vFAIL("\\N{NAME} must be resolved by the lexer");
      }
  
+    RExC_uni_semantics = 1; /* Unicode named chars imply Unicode semantics */
+
      if (endbrace == RExC_parse) {   /* empty: \N{} */
-       if (node_p) {
-           *node_p = reg_node(pRExC_state,NOTHING);
-       }
-        else if (! in_char_class) {
-            return (STRLEN) -1;
+        if (cp_count) {
+            *cp_count = 0;
          }
          nextchar(pRExC_state);
-        return 0;
+       if (! node_p) {
+            return FALSE;
+        }
+
+        *node_p = reg_node(pRExC_state,NOTHING);
+        return TRUE;
      }
  
-    RExC_uni_semantics = 1; /* Unicode named chars imply Unicode semantics */
      RExC_parse += 2;   /* Skip past the 'U+' */
  
      endchar = RExC_parse + strcspn(RExC_parse, ".}");
  
      /* Code points are separated by dots.  If none, there is only one code
       * point, and is terminated by the brace */
-    has_multiple_chars = (endchar < endbrace);
  
-    /* We get the first code point if we want it, and either there is only one,
-     * or we can accept both cases of one and more than one */
-    if (valuep && (substitute_parse || ! has_multiple_chars)) {
-       STRLEN length_of_hex = (STRLEN)(endchar - RExC_parse);
-       I32 grok_hex_flags = PERL_SCAN_ALLOW_UNDERSCORES
+    if (endchar >= endbrace) {
+       STRLEN length_of_hex;
+       I32 grok_hex_flags;
+
+        /* Here, exactly one code point.  If that isn't what is wanted, fail */
+        if (! code_point_p) {
+            RExC_parse = p;
+            return FALSE;
+        }
+
+        /* Convert code point from hex */
+       length_of_hex = (STRLEN)(endchar - RExC_parse);
+       grok_hex_flags = PERL_SCAN_ALLOW_UNDERSCORES
                             | PERL_SCAN_DISALLOW_PREFIX
  
                               /* No errors in the first pass (See [perl
                                * #122671].)  We let the code below find the
                                * errors when there are multiple chars. */
-                           | ((SIZE_ONLY || has_multiple_chars)
+                           | ((SIZE_ONLY)
                                ? PERL_SCAN_SILENT_ILLDIGIT
                                : 0);
  
-       *valuep = grok_hex(RExC_parse, &length_of_hex, &grok_hex_flags, NULL);
+        /* This routine is the one place where both single- and double-quotish
+         * \N{U+xxxx} are evaluated.  The value is a Unicode code point which
+         * must be converted to native. */
+       *code_point_p = UNI_TO_NATIVE(grok_hex(RExC_parse,
+                                         &length_of_hex,
+                                         &grok_hex_flags,
+                                         NULL));
  
         /* The tokenizer should have guaranteed validity, but it's possible to
           * bypass it by using single quoting, so check.  Don't do the check
           * here when there are multiple chars; we do it below anyway. */
-        if (! has_multiple_chars) {
-            if (length_of_hex == 0
-                || length_of_hex != (STRLEN)(endchar - RExC_parse) )
-            {
-                RExC_parse += length_of_hex;   /* Includes all the valid */
-                RExC_parse += (RExC_orig_utf8) /* point to after 1st invalid */
-                                ? UTF8SKIP(RExC_parse)
-                                : 1;
-                /* Guard against malformed utf8 */
-                if (RExC_parse >= endchar) {
-                    RExC_parse = endchar;
-                }
-                vFAIL("Invalid hexadecimal number in \\N{U+...}");
+        if (length_of_hex == 0
+            || length_of_hex != (STRLEN)(endchar - RExC_parse) )
+        {
+            RExC_parse += length_of_hex;       /* Includes all the valid */
+            RExC_parse += (RExC_orig_utf8)     /* point to after 1st invalid */
+                            ? UTF8SKIP(RExC_parse)
+                            : 1;
+            /* Guard against malformed utf8 */
+            if (RExC_parse >= endchar) {
+                RExC_parse = endchar;
              }
-
-            RExC_parse = endbrace + 1;
-            return 1;
+            vFAIL("Invalid hexadecimal number in \\N{U+...}");
          }
-    }
  
-    /* Here, we should have already handled the case where a single character
-     * is expected and found.  So it is a failure if we aren't expecting
-     * multiple chars and got them; or didn't get them but wanted them.  We
-     * fail without advancing the parse, so that the caller can try again with
-     * different acceptance criteria */
-    if ((! node_p && ! substitute_parse) || ! has_multiple_chars) {
-        RExC_parse = p;
-        return (STRLEN) -1;
+        RExC_parse = endbrace + 1;
+        return TRUE;
      }
-
-    {
-
-       /* What is done here is to convert this to a sub-pattern of the form
-        * \x{char1}\x{char2}...
-         * and then either return it in <*substitute_parse> if non-null; or
-         * call reg recursively to parse it (enclosing in "(?: ... )" ).  That
-         * way, it retains its atomicness, while not having to worry about
-         * special handling that some code points may have.  toke.c has
-         * converted the original Unicode values to native, so that we can just
-         * pass on the hex values unchanged.  We do have to set a flag to keep
-         * recoding from happening in the recursion */
-
-       SV * dummy = NULL;
+    else {  /* Is a multiple character sequence */
+       SV * substitute_parse;
         STRLEN len;
         char *orig_end = RExC_end;
          I32 flags;
  
-        if (substitute_parse) {
-            *substitute_parse = newSVpvs("");
+        /* Count the code points, if desired, in the sequence */
+        if (cp_count) {
+            *cp_count = 0;
+            while (RExC_parse < endbrace) {
+                /* Point to the beginning of the next character in the sequence. */
+                RExC_parse = endchar + 1;
+                endchar = RExC_parse + strcspn(RExC_parse, ".}");
+                (*cp_count)++;
+            }
          }
-        else {
-            substitute_parse = &dummy;
-            *substitute_parse = newSVpvs("?:");
+
+        /* Fail if caller doesn't want to handle a multi-code-point sequence.
+         * But don't backup up the pointer if the caller want to know how many
+         * code points there are (they can then handle things) */
+        if (! node_p) {
+            if (! cp_count) {
+                RExC_parse = p;
+            }
+            return FALSE;
          }
-        *substitute_parse = sv_2mortal(*substitute_parse);
+
+       /* What is done here is to convert this to a sub-pattern of the form
+         * \x{char1}\x{char2}...  and then call reg recursively to parse it
+         * (enclosing in "(?: ... )" ).  That way, it retains its atomicness,
+         * while not having to worry about special handling that some code
+         * points may have. */
+
+       substitute_parse = newSVpvs("?:");
  
         while (RExC_parse < endbrace) {
  
             /* Convert to notation the rest of the code understands */
-           sv_catpv(*substitute_parse, "\\x{");
-           sv_catpvn(*substitute_parse, RExC_parse, endchar - RExC_parse);
-           sv_catpv(*substitute_parse, "}");
+           sv_catpv(substitute_parse, "\\x{");
+           sv_catpvn(substitute_parse, RExC_parse, endchar - RExC_parse);
+           sv_catpv(substitute_parse, "}");
  
             /* Point to the beginning of the next character in the sequence. */
             RExC_parse = endchar + 1;
             endchar = RExC_parse + strcspn(RExC_parse, ".}");
  
-            count++;
         }
-        if (! in_char_class) {
-            sv_catpv(*substitute_parse, ")");
-        }
+        sv_catpv(substitute_parse, ")");
  
-       RExC_parse = SvPV(*substitute_parse, len);
+       RExC_parse = SvPV(substitute_parse, len);
  
         /* Don't allow empty number */
-       if (len < (STRLEN) ((substitute_parse) ? 6 : 8)) {
+       if (len < (STRLEN) 8) {
              RExC_parse = endbrace;
             vFAIL("Invalid hexadecimal number in \\N{U+...}");
         }
         RExC_end = RExC_parse + len;
  
-       /* The values are Unicode, and therefore not subject to recoding */
+        /* The values are Unicode, and therefore not subject to recoding, but
+         * have to be converted to native on a non-Unicode (meaning non-ASCII)
+         * platform. */
         RExC_override_recoding = 1;
+#ifdef EBCDIC
+        RExC_recode_x_to_native = 1;
+#endif
  
          if (node_p) {
              if (!(*node_p = reg(pRExC_state, 1, &flags, depth+1))) {
                  if (flags & RESTART_UTF8) {
                      *flagp = RESTART_UTF8;
-                    return (STRLEN) -1;
+                    return FALSE;
                  }
                  FAIL2("panic: reg returned NULL to grok_bslash_N, flags=%#"UVxf"",
                      (UV) flags);
@@ -11000,14 +11332,19 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** node_p,
              *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
          }
  
+        /* Restore the saved values */
         RExC_parse = endbrace;
         RExC_end = orig_end;
         RExC_override_recoding = 0;
+#ifdef EBCDIC
+        RExC_recode_x_to_native = 0;
+#endif
  
+        SvREFCNT_dec_NN(substitute_parse);
          nextchar(pRExC_state);
-    }
  
-    return count;
+        return TRUE;
+    }
  }
  
  
@@ -11052,7 +11389,9 @@ S_compute_EXACTish(RExC_state_t *pRExC_state)
      PERL_ARGS_ASSERT_COMPUTE_EXACTISH;
  
      if (! FOLD) {
-        return EXACT;
+        return (LOC)
+                ? EXACTL
+                : EXACT;
      }
  
      op = get_regex_charset(RExC_flags);
@@ -11109,7 +11448,7 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state,
  
      if (! len_passed_in) {
          if (UTF) {
-            if (UNI_IS_INVARIANT(code_point)) {
+            if (UVCHR_IS_INVARIANT(code_point)) {
                  if (LOC || ! FOLD) {    /* /l defers folding until runtime */
                      *character = (U8) code_point;
                  }
@@ -11150,7 +11489,9 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t *pRExC_state,
                                                 for those.  */
                      && ! _invlist_contains_cp(PL_utf8_foldable, code_point))
                  {
-                    OP(node) = EXACT;
+                    OP(node) = (LOC)
+                               ? EXACTL
+                               : EXACT;
                  }
              }
              else if (code_point <= MAX_UTF8_TWO_BYTE) {
@@ -11234,10 +11575,10 @@ static I32
  S_backref_value(char *p)
  {
      const char* endptr;
-    UV val = grok_atou(p, &endptr);
-    if (endptr == p || endptr == NULL || val > I32_MAX)
-        return I32_MAX;
-    return (I32)val;
+    UV val;
+    if (grok_atoUV(p, &val, &endptr) && val <= I32_MAX)
+        return (I32)val;
+    return I32_MAX;
  }
  
  
@@ -11325,7 +11666,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
  
      PERL_ARGS_ASSERT_REGATOM;
  
-tryagain:
+  tryagain:
      switch ((U8)*RExC_parse) {
      case '^':
         RExC_seen_zerolen++;
@@ -11353,7 +11694,7 @@ tryagain:
         else
             ret = reg_node(pRExC_state, REG_ANY);
         *flagp |= HASWIDTH|SIMPLE;
-       RExC_naughty++;
+       MARK_NAUGHTY(1);
          Set_Node_Length(ret, 1); /* MJD */
         break;
      case '[':
@@ -11363,6 +11704,7 @@ tryagain:
                         FALSE, /* means parse the whole char class */
                         TRUE, /* allow multi-char folds */
                         FALSE, /* don't silence non-portable warnings. */
+                       (bool) RExC_strict,
                         NULL);
         if (*RExC_parse != ']') {
             RExC_parse = oregcomp_parse;
@@ -11484,49 +11826,121 @@ tryagain:
              arg = ANYOF_WORDCHAR;
              goto join_posix;
  
+       case 'B':
+            invert = 1;
+            /* FALLTHROUGH */
         case 'b':
+          {
+           regex_charset charset = get_regex_charset(RExC_flags);
+
             RExC_seen_zerolen++;
              RExC_seen |= REG_LOOKBEHIND_SEEN;
-           op = BOUND + get_regex_charset(RExC_flags);
-            if (op > BOUNDA) {  /* /aa is same as /a */
-                op = BOUNDA;
-            }
-            else if (op == BOUNDL) {
+           op = BOUND + charset;
+
+            if (op == BOUNDL) {
                  RExC_contains_locale = 1;
              }
+
             ret = reg_node(pRExC_state, op);
-           FLAGS(ret) = get_regex_charset(RExC_flags);
             *flagp |= SIMPLE;
-           if ((U8) *(RExC_parse + 1) == '{') {
-                /* diag_listed_as: Use "%s" instead of "%s" */
-               vFAIL("Use \"\\b\\{\" instead of \"\\b{\"");
-           }
-           goto finish_meta_pat;
-       case 'B':
-           RExC_seen_zerolen++;
-            RExC_seen |= REG_LOOKBEHIND_SEEN;
-           op = NBOUND + get_regex_charset(RExC_flags);
-            if (op > NBOUNDA) { /* /aa is same as /a */
-                op = NBOUNDA;
-            }
-            else if (op == NBOUNDL) {
-                RExC_contains_locale = 1;
+           if (*(RExC_parse + 1) != '{') {
+                FLAGS(ret) = TRADITIONAL_BOUND;
+                if (PASS2 && op > BOUNDA) {  /* /aa is same as /a */
+                    OP(ret) = BOUNDA;
+                }
              }
-           ret = reg_node(pRExC_state, op);
-           FLAGS(ret) = get_regex_charset(RExC_flags);
-           *flagp |= SIMPLE;
-           if ((U8) *(RExC_parse + 1) == '{') {
-                /* diag_listed_as: Use "%s" instead of "%s" */
-               vFAIL("Use \"\\B\\{\" instead of \"\\B{\"");
+            else {
+                STRLEN length;
+                char name = *RExC_parse;
+                char * endbrace;
+                RExC_parse += 2;
+                endbrace = strchr(RExC_parse, '}');
+
+                if (! endbrace) {
+                    vFAIL2("Missing right brace on \\%c{}", name);
+                }
+                /* XXX Need to decide whether to take spaces or not.  Should be
+                 * consistent with \p{}, but that currently is SPACE, which
+                 * means vertical too, which seems wrong
+                 * while (isBLANK(*RExC_parse)) {
+                    RExC_parse++;
+                }*/
+                if (endbrace == RExC_parse) {
+                    RExC_parse++;  /* After the '}' */
+                    vFAIL2("Empty \\%c{}", name);
+                }
+                length = endbrace - RExC_parse;
+                /*while (isBLANK(*(RExC_parse + length - 1))) {
+                    length--;
+                }*/
+                switch (*RExC_parse) {
+                    case 'g':
+                        if (length != 1
+                            && (length != 3 || strnNE(RExC_parse + 1, "cb", 2)))
+                        {
+                            goto bad_bound_type;
+                        }
+                        FLAGS(ret) = GCB_BOUND;
+                        break;
+                    case 's':
+                        if (length != 2 || *(RExC_parse + 1) != 'b') {
+                            goto bad_bound_type;
+                        }
+                        FLAGS(ret) = SB_BOUND;
+                        break;
+                    case 'w':
+                        if (length != 2 || *(RExC_parse + 1) != 'b') {
+                            goto bad_bound_type;
+                        }
+                        FLAGS(ret) = WB_BOUND;
+                        break;
+                    default:
+                      bad_bound_type:
+                        RExC_parse = endbrace;
+                       vFAIL2utf8f(
+                            "'%"UTF8f"' is an unknown bound type",
+                           UTF8fARG(UTF, length, endbrace - length));
+                        NOT_REACHED; /*NOTREACHED*/
+                }
+                RExC_parse = endbrace;
+                RExC_uni_semantics = 1;
+
+                if (PASS2 && op >= BOUNDA) {  /* /aa is same as /a */
+                    OP(ret) = BOUNDU;
+                    length += 4;
+
+                    /* Don't have to worry about UTF-8, in this message because
+                     * to get here the contents of the \b must be ASCII */
+                    ckWARN4reg(RExC_parse + 1,  /* Include the '}' in msg */
+                              "Using /u for '%.*s' instead of /%s",
+                              (unsigned) length,
+                              endbrace - length + 1,
+                              (charset == REGEX_ASCII_RESTRICTED_CHARSET)
+                              ? ASCII_RESTRICT_PAT_MODS
+                              : ASCII_MORE_RESTRICT_PAT_MODS);
+                }
             }
+
+            if (PASS2 && invert) {
+                OP(ret) += NBOUND - BOUND;
+            }
             goto finish_meta_pat;
+          }
  
         case 'D':
              invert = 1;
              /* FALLTHROUGH */
         case 'd':
              arg = ANYOF_DIGIT;
-            goto join_posix;
+            if (! DEPENDS_SEMANTICS) {
+                goto join_posix;
+            }
+
+            /* \d doesn't have any matches in the upper Latin1 range, hence /d
+             * is equivalent to /u.  Changing to /u saves some branches at
+             * runtime */
+            op = POSIXU;
+            goto join_posix_op_known;
  
         case 'R':
             ret = reg_node(pRExC_state, LNBREAK);
@@ -11555,7 +11969,7 @@ tryagain:
         case 's':
              arg = ANYOF_SPACE;
  
-        join_posix:
+          join_posix:
  
             op = POSIXD + get_regex_charset(RExC_flags);
              if (op > POSIXA) {  /* /aa is same as /a */
@@ -11565,7 +11979,7 @@ tryagain:
                  RExC_contains_locale = 1;
              }
  
-        join_posix_op_known:
+          join_posix_op_known:
  
              if (invert) {
                  op += NPOSIXD - POSIXD;
@@ -11579,7 +11993,7 @@ tryagain:
             *flagp |= HASWIDTH|SIMPLE;
              /* FALLTHROUGH */
  
-         finish_meta_pat:
+          finish_meta_pat:
             nextchar(pRExC_state);
              Set_Node_Length(ret, 2); /* MJD */
             break;
@@ -11598,6 +12012,7 @@ tryagain:
                                 FALSE, /* don't silence non-portable warnings.
                                           It would be a bug if these returned
                                           non-portables */
+                               (bool) RExC_strict,
                                 NULL);
                  /* regclass() can only return RESTART_UTF8 if multi-char folds
                     are allowed.  */
@@ -11613,28 +12028,37 @@ tryagain:
             }
             break;
          case 'N':
-            /* Handle \N and \N{NAME} with multiple code points here and not
-             * below because it can be multicharacter. join_exact() will join
-             * them up later on.  Also this makes sure that things like
-             * /\N{BLAH}+/ and \N{BLAH} being multi char Just Happen. dmq.
-             * The options to the grok function call causes it to fail if the
-             * sequence is just a single code point.  We then go treat it as
-             * just another character in the current EXACT node, and hence it
-             * gets uniform treatment with all the other characters.  The
-             * special treatment for quantifiers is not needed for such single
-             * character sequences */
+            /* Handle \N, \N{} and \N{NAMED SEQUENCE} (the latter meaning the
+             * \N{...} evaluates to a sequence of more than one code points).
+             * The function call below returns a regnode, which is our result.
+             * The parameters cause it to fail if the \N{} evaluates to a
+             * single code point; we handle those like any other literal.  The
+             * reason that the multicharacter case is handled here and not as
+             * part of the EXACtish code is because of quantifiers.  In
+             * /\N{BLAH}+/, the '+' applies to the whole thing, and doing it
+             * this way makes that Just Happen. dmq.
+             * join_exact() will join this up with adjacent EXACTish nodes
+             * later on, if appropriate. */
              ++RExC_parse;
-            if ((STRLEN) -1 == grok_bslash_N(pRExC_state, &ret, NULL, flagp,
-                                             depth, FALSE))
-            {
-                if (*flagp & RESTART_UTF8)
-                    return NULL;
-                RExC_parse--;
-                goto defchar;
+            if (grok_bslash_N(pRExC_state,
+                              &ret,     /* Want a regnode returned */
+                              NULL,     /* Fail if evaluates to a single code
+                                           point */
+                              NULL,     /* Don't need a count of how many code
+                                           points */
+                              flagp,
+                              depth)
+            ) {
+                break;
              }
-            break;
+
+            if (*flagp & RESTART_UTF8)
+                return NULL;
+            RExC_parse--;
+            goto defchar;
+
         case 'k':    /* Handle \k<NAME> and \k'NAME' */
-       parse_named_seq:
+      parse_named_seq:
          {
              char ch= RExC_parse[1];
             if (ch != '<' && ch != '\'' && ch != '{') {
@@ -11724,19 +12148,37 @@ tryagain:
                  }
                  else {
                      num = S_backref_value(RExC_parse);
-                    /* bare \NNN might be backref or octal - if it is larger than or equal
-                     * RExC_npar then it is assumed to be and octal escape.
-                     * Note RExC_npar is +1 from the actual number of parens*/
-                    if (num == I32_MAX || (num > 9 && num >= RExC_npar
-                            && *RExC_parse != '8' && *RExC_parse != '9'))
+                    /* bare \NNN might be backref or octal - if it is larger
+                     * than or equal RExC_npar then it is assumed to be an
+                     * octal escape. Note RExC_npar is +1 from the actual
+                     * number of parens. */
+                    /* Note we do NOT check if num == I32_MAX here, as that is
+                     * handled by the RExC_npar check */
+
+                    if (
+                        /* any numeric escape < 10 is always a backref */
+                        num > 9
+                        /* any numeric escape < RExC_npar is a backref */
+                        && num >= RExC_npar
+                        /* cannot be an octal escape if it starts with 8 */
+                        && *RExC_parse != '8'
+                        /* cannot be an octal escape it it starts with 9 */
+                        && *RExC_parse != '9'
+                    )
                      {
-                        /* Probably a character specified in octal, e.g. \35 */
+                        /* Probably not a backref, instead likely to be an
+                         * octal character escape, e.g. \35 or \777.
+                         * The above logic should make it obvious why using
+                         * octal escapes in patterns is problematic. - Yves */
                          goto defchar;
                      }
                  }
  
-                /* at this point RExC_parse definitely points to a backref
-                 * number */
+                /* At this point RExC_parse points at a numeric escape like
+                 * \12 or \88 or something similar, which we should NOT treat
+                 * as an octal escape. It may or may not be a valid backref
+                 * escape. For instance \88888888 is unlikely to be a valid
+                 * backref. */
                 {
  #ifdef RE_TRACK_PATTERN_OFFSETS
                     char * const parse_start = RExC_parse - 1; /* MJD */
@@ -11800,7 +12242,7 @@ tryagain:
  
             RExC_parse++;
  
-       defchar: {
+         defchar: {
             STRLEN len = 0;
             UV ender = 0;
             char *p;
@@ -11839,7 +12281,7 @@ tryagain:
  
              s0 = s;
  
-       reparse:
+         reparse:
  
              /* We do the EXACTFish to EXACT node only if folding.  (And we
               * don't need to figure this out until pass 2) */
@@ -11923,18 +12365,24 @@ tryagain:
                         p++;
                         break;
                     case 'N': /* Handle a single-code point named character. */
-                        /* The options cause it to fail if a multiple code
-                         * point sequence.  Handle those in the switch() above
-                         * */
                          RExC_parse = p + 1;
-                        if ((STRLEN) -1 == grok_bslash_N(pRExC_state, NULL,
-                                                         &ender,
-                                                         flagp,
-                                                         depth,
-                                                         FALSE
-                        )) {
+                        if (! grok_bslash_N(pRExC_state,
+                                            NULL,   /* Fail if evaluates to
+                                                       anything other than a
+                                                       single code point */
+                                            &ender, /* The returned single code
+                                                       point */
+                                            NULL,   /* Don't need a count of
+                                                       how many code points */
+                                            flagp,
+                                            depth)
+                        ) {
                              if (*flagp & RESTART_UTF8)
                                  FAIL("panic: grok_bslash_N set RESTART_UTF8");
+
+                            /* Here, it wasn't a single code point.  Go close
+                             * up this EXACTish node.  The switch() prior to
+                             * this switch handles the other cases */
                              RExC_parse = p = oldp;
                              goto loopdone;
                          }
@@ -11972,7 +12420,7 @@ tryagain:
                                                        &result,
                                                        &error_msg,
                                                        PASS2, /* out warnings */
-                                                       FALSE, /* not strict */
+                                                       (bool) RExC_strict,
                                                         TRUE, /* Output warnings
                                                                  for non-
                                                                  portables */
@@ -11983,7 +12431,7 @@ tryagain:
                                 vFAIL(error_msg);
                             }
                              ender = result;
-                           if (PL_encoding && ender < 0x100) {
+                           if (IN_ENCODING && ender < 0x100) {
                                 goto recode_encoding;
                             }
                             if (ender > 0xff) {
@@ -12001,8 +12449,8 @@ tryagain:
                                                        &result,
                                                        &error_msg,
                                                        PASS2, /* out warnings */
-                                                       FALSE, /* not strict */
-                                                       TRUE, /* Output warnings
+                                                       (bool) RExC_strict,
+                                                       TRUE, /* Silence warnings
                                                                  for non-
                                                                  portables */
                                                         UTF);
@@ -12013,10 +12461,18 @@ tryagain:
                             }
                              ender = result;
  
-                           if (PL_encoding && ender < 0x100) {
-                               goto recode_encoding;
+                            if (ender < 0x100) {
+#ifdef EBCDIC
+                                if (RExC_recode_x_to_native) {
+                                    ender = LATIN1_TO_NATIVE(ender);
+                                }
+                                else
+#endif
+                                if (IN_ENCODING) {
+                                    goto recode_encoding;
+                                }
                             }
-                           if (ender > 0xff) {
+                            else {
                                 REQUIRE_UTF8;
                             }
                             break;
@@ -12027,6 +12483,9 @@ tryagain:
                         break;
                      case '8': case '9': /* must be a backreference */
                          --p;
+                        /* we have an escape like \8 which cannot be an octal escape
+                         * so we exit the loop, and let the outer loop handle this
+                         * escape which may or may not be a legitimate backref. */
                          goto loopdone;
                      case '1': case '2': case '3':case '4':
                     case '5': case '6': case '7':
@@ -12035,8 +12494,8 @@ tryagain:
                           * from \1 - \9 is a backreference, any multi-digit
                           * escape which does not start with 0 and which when
                           * evaluated as decimal could refer to an already
-                         * parsed capture buffer is a backslash. Anything else
-                         * is octal.
+                         * parsed capture buffer is a back reference. Anything
+                         * else is octal.
                           *
                           * Note this implies that \118 could be interpreted as
                           * 118 OR as "\11" . "8" depending on whether there
@@ -12072,12 +12531,12 @@ tryagain:
                                           form_short_octal_warning(p, numlen));
                              }
                         }
-                       if (PL_encoding && ender < 0x100)
+                       if (IN_ENCODING && ender < 0x100)
                             goto recode_encoding;
                         break;
-                   recode_encoding:
+                     recode_encoding:
                         if (! RExC_override_recoding) {
-                           SV* enc = PL_encoding;
+                           SV* enc = _get_encoding();
                             ender = reg_recode((const char)(U8)ender, &enc);
                             if (!enc && PASS2)
                                 ckWARNreg(p, "Invalid escape in the specified encoding");
@@ -12144,39 +12603,64 @@ tryagain:
                      goto loopdone;
                  }
  
-                if (! FOLD   /* The simple case, just append the literal */
-                    || (LOC  /* Also don't fold for tricky chars under /l */
-                        && is_PROBLEMATIC_LOCALE_FOLD_cp(ender)))
-                {
-                    if (UTF) {
-                        const STRLEN unilen = reguni(pRExC_state, ender, s);
-                        if (unilen > 0) {
-                           s   += unilen;
-                           len += unilen;
-                        }
-
-                        /* The loop increments <len> each time, as all but this
-                         * path (and one other) through it add a single byte to
-                         * the EXACTish node.  But this one has changed len to
-                         * be the correct final value, so subtract one to
-                         * cancel out the increment that follows */
-                        len--;
-                    }
-                    else {
-                        REGC((char)ender, s++);
-                    }
+                if (! FOLD) {  /* The simple case, just append the literal */
  
-                    /* Can get here if folding only if is one of the /l
-                     * characters whose fold depends on the locale.  The
-                     * occurrence of any of these indicate that we can't
-                     * simplify things */
-                    if (FOLD) {
-                        maybe_exact = FALSE;
-                        maybe_exactfu = FALSE;
+                    /* In the sizing pass, we need only the size of the
+                     * character we are appending, hence we can delay getting
+                     * its representation until PASS2. */
+                    if (SIZE_ONLY) {
+                        if (UTF) {
+                            const STRLEN unilen = UNISKIP(ender);
+                            s += unilen;
+
+                            /* We have to subtract 1 just below (and again in
+                             * the corresponding PASS2 code) because the loop
+                             * increments <len> each time, as all but this path
+                             * (and one other) through it add a single byte to
+                             * the EXACTish node.  But these paths would change
+                             * len to be the correct final value, so cancel out
+                             * the increment that follows */
+                            len += unilen - 1;
+                        }
+                        else {
+                            s++;
+                        }
+                    } else { /* PASS2 */
+                      not_fold_common:
+                        if (UTF) {
+                            U8 * new_s = uvchr_to_utf8((U8*)s, ender);
+                            len += (char *) new_s - s - 1;
+                            s = (char *) new_s;
+                        }
+                        else {
+                            *(s++) = (char) ender;
+                        }
                      }
                  }
-                else             /* FOLD */
-                     if (! ( UTF
+                else if (LOC && is_PROBLEMATIC_LOCALE_FOLD_cp(ender)) {
+
+                    /* Here are folding under /l, and the code point is
+                     * problematic.  First, we know we can't simplify things */
+                    maybe_exact = FALSE;
+                    maybe_exactfu = FALSE;
+
+                    /* A problematic code point in this context means that its
+                     * fold isn't known until runtime, so we can't fold it now.
+                     * (The non-problematic code points are the above-Latin1
+                     * ones that fold to also all above-Latin1.  Their folds
+                     * don't vary no matter what the locale is.) But here we
+                     * have characters whose fold depends on the locale.
+                     * Unlike the non-folding case above, we have to keep track
+                     * of these in the sizing pass, so that we can make sure we
+                     * don't split too-long nodes in the middle of a potential
+                     * multi-char fold.  And unlike the regular fold case
+                     * handled in the else clauses below, we don't actually
+                     * fold and don't have special cases to consider.  What we
+                     * do for both passes is the PASS2 code for non-folding */
+                    goto not_fold_common;
+                }
+                else /* A regular FOLD code point */
+                    if (! ( UTF
                          /* See comments for join_exact() as to why we fold this
                           * non-UTF at compile time */
                          || (node_type == EXACTFU
@@ -12185,7 +12669,7 @@ tryagain:
                      /* Here, are folding and are not UTF-8 encoded; therefore
                       * the character must be in the range 0-255, and is not /l
                       * (Not /l because we already handled these under /l in
-                     * is_PROBLEMATIC_LOCALE_FOLD_cp */
+                     * is_PROBLEMATIC_LOCALE_FOLD_cp) */
                      if (IS_IN_SOME_FOLD_L1(ender)) {
                          maybe_exact = FALSE;
  
@@ -12214,11 +12698,10 @@ tryagain:
                       * unfolded, and we have to calculate how many EXACTish
                       * nodes it will take; and we may run out of room in a node
                       * in the middle of a potential multi-char fold, and have
-                     * to back off accordingly.  (Hence we can't use REGC for
-                     * the simple case just below.) */
+                     * to back off accordingly.  */
  
                      UV folded;
-                    if (isASCII(ender)) {
+                    if (isASCII_uni(ender)) {
                          folded = toFOLD(ender);
                          *(s)++ = (U8) folded;
                      }
@@ -12434,8 +12917,8 @@ tryagain:
                  }
             }   /* End of verifying node ends with an appropriate char */
  
-       loopdone:   /* Jumped to when encounters something that shouldn't be in
-                      the node */
+          loopdone:   /* Jumped to when encounters something that shouldn't be
+                         in the node */
  
              /* I (khw) don't know if you can get here with zero length, but the
               * old code handled this situation by creating a zero-length EXACT
@@ -12451,10 +12934,14 @@ tryagain:
                       * differently depending on UTF8ness of the target string
                       * (for /u), or depending on locale for /l */
                      if (maybe_exact) {
-                        OP(ret) = EXACT;
+                        OP(ret) = (LOC)
+                                  ? EXACTL
+                                  : EXACT;
                      }
                      else if (maybe_exactfu) {
-                        OP(ret) = EXACTFU;
+                        OP(ret) = (LOC)
+                                  ? EXACTFLU8
+                                  : EXACTFU;
                      }
                  }
                  alloc_maybe_populate_EXACT(pRExC_state, ret, flagp, len, ender,
@@ -12646,7 +13133,7 @@ S_regpposixcc(pTHX_ RExC_state_t *pRExC_state, I32 value, const bool strict)
                             break;
                         case 'e':
                             if (memEQ(posixcc, "spac", 4)) /* space */
-                               namedclass = ANYOF_PSXSPC;
+                               namedclass = ANYOF_SPACE;
                             break;
                         case 'h':
                             if (memEQ(posixcc, "grap", 4)) /* graph */
@@ -12780,6 +13267,34 @@ S_could_it_be_a_POSIX_class(RExC_state_t *pRExC_state)
              && first_char == *(p - 1));
  }
  
+STATIC unsigned  int
+S_regex_set_precedence(const U8 my_operator) {
+
+    /* Returns the precedence in the (?[...]) construct of the input operator,
+     * specified by its character representation.  The precedence follows
+     * general Perl rules, but it extends this so that ')' and ']' have (low)
+     * precedence even though they aren't really operators */
+
+    switch (my_operator) {
+        case '!':
+            return 5;
+        case '&':
+            return 4;
+        case '^':
+        case '|':
+        case '+':
+        case '-':
+            return 3;
+        case ')':
+            return 2;
+        case ']':
+            return 1;
+    }
+
+    NOT_REACHED; /* NOTREACHED */
+    return 0;   /* Silence compiler warning */
+}
+
  STATIC regnode *
  S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                      I32 *flagp, U32 depth,
@@ -12787,24 +13302,35 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
  {
      /* Handle the (?[...]) construct to do set operations */
  
-    U8 curchar;
-    UV start, end;     /* End points of code point ranges */
-    SV* result_string;
-    char *save_end, *save_parse;
-    SV* final;
-    STRLEN len;
-    regnode* node;
-    AV* stack;
-    const bool save_fold = FOLD;
+    U8 curchar;                     /* Current character being parsed */
+    UV start, end;                 /* End points of code point ranges */
+    SV* final = NULL;               /* The end result inversion list */
+    SV* result_string;              /* 'final' stringified */
+    AV* stack;                      /* stack of operators and operands not yet
+                                       resolved */
+    AV* fence_stack = NULL;         /* A stack containing the positions in
+                                       'stack' of where the undealt-with left
+                                       parens would be if they were actually
+                                       put there */
+    IV fence = 0;                   /* Position of where most recent undealt-
+                                       with left paren in stack is; -1 if none.
+                                     */
+    STRLEN len;                     /* Temporary */
+    regnode* node;                  /* Temporary, and final regnode returned by
+                                       this function */
+    const bool save_fold = FOLD;    /* Temporary */
+    char *save_end, *save_parse;    /* Temporaries */
  
      GET_RE_DEBUG_FLAGS_DECL;
  
      PERL_ARGS_ASSERT_HANDLE_REGEX_SETS;
  
-    if (LOC) {
+    if (LOC) {  /* XXX could make valid in UTF-8 locales */
          vFAIL("(?[...]) not valid in locale");
      }
-    RExC_uni_semantics = 1;
+    RExC_uni_semantics = 1;     /* The use of this operator implies /u.  This
+                                   is required so that the compile time values
+                                   are valid in all runtime cases */
  
      /* This will return only an ANYOF regnode, or (unlikely) something smaller
       * (such as EXACT).  Thus we can skip most everything if just sizing.  We
@@ -12813,16 +13339,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
       * upon an unescaped ']' that isn't one ending a regclass.  To do both
       * these things, we need to realize that something preceded by a backslash
       * is escaped, so we have to keep track of backslashes */
-    if (PASS2) {
-        Perl_ck_warner_d(aTHX_
-            packWARN(WARN_EXPERIMENTAL__REGEX_SETS),
-            "The regex_sets feature is experimental" REPORT_LOCATION,
-                UTF8fARG(UTF, (RExC_parse - RExC_precomp), RExC_precomp),
-                UTF8fARG(UTF,
-                         RExC_end - RExC_start - (RExC_parse - RExC_precomp),
-                         RExC_precomp + (RExC_parse - RExC_precomp)));
-    }
-    else {
+    if (SIZE_ONLY) {
          UV depth = 0; /* how many nested (?[...]) constructs */
  
          while (RExC_parse < RExC_end) {
@@ -12865,9 +13382,11 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                                                       posix class */
                                    FALSE, /* don't allow multi-char folds */
                                    TRUE, /* silence non-portable warnings. */
-                                  &current))
-                        FAIL2("panic: regclass returned NULL to handle_sets, flags=%#"UVxf"",
-                              (UV) *flagp);
+                                  TRUE, /* strict */
+                                  &current
+                                 ))
+                        FAIL2("panic: regclass returned NULL to handle_sets, "
+                              "flags=%#"UVxf"", (UV) *flagp);
  
                      /* function call leaves parse pointing to the ']', except
                       * if we faked it */
@@ -12897,73 +13416,128 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
              RExC_parse++;
          }
  
-        no_close:
+      no_close:
          FAIL("Syntax error in (?[...])");
      }
  
-    /* Pass 2 only after this.  Everything in this construct is a
-     * metacharacter.  Operands begin with either a '\' (for an escape
-     * sequence), or a '[' for a bracketed character class.  Any other
-     * character should be an operator, or parenthesis for grouping.  Both
-     * types of operands are handled by calling regclass() to parse them.  It
-     * is called with a parameter to indicate to return the computed inversion
-     * list.  The parsing here is implemented via a stack.  Each entry on the
-     * stack is a single character representing one of the operators, or the
-     * '('; or else a pointer to an operand inversion list. */
+    /* Pass 2 only after this. */
+    Perl_ck_warner_d(aTHX_
+        packWARN(WARN_EXPERIMENTAL__REGEX_SETS),
+        "The regex_sets feature is experimental" REPORT_LOCATION,
+            UTF8fARG(UTF, (RExC_parse - RExC_precomp), RExC_precomp),
+            UTF8fARG(UTF,
+                     RExC_end - RExC_start - (RExC_parse - RExC_precomp),
+                     RExC_precomp + (RExC_parse - RExC_precomp)));
+
+    /* Everything in this construct is a metacharacter.  Operands begin with
+     * either a '\' (for an escape sequence), or a '[' for a bracketed
+     * character class.  Any other character should be an operator, or
+     * parenthesis for grouping.  Both types of operands are handled by calling
+     * regclass() to parse them.  It is called with a parameter to indicate to
+     * return the computed inversion list.  The parsing here is implemented via
+     * a stack.  Each entry on the stack is a single character representing one
+     * of the operators; or else a pointer to an operand inversion list. */
  
  #define IS_OPERAND(a)  (! SvIOK(a))
  
-    /* The stack starts empty.  It is a syntax error if the first thing parsed
-     * is a binary operator; everything else is pushed on the stack.  When an
-     * operand is parsed, the top of the stack is examined.  If it is a binary
-     * operator, the item before it should be an operand, and both are replaced
-     * by the result of doing that operation on the new operand and the one on
-     * the stack.   Thus a sequence of binary operands is reduced to a single
-     * one before the next one is parsed.
+    /* The stack is kept in Łukasiewicz order.  (That's pronounced similar
+     * to luke-a-shave-itch (or -itz), but people who didn't want to bother
+     * with prounouncing it called it Reverse Polish instead, but now that YOU
+     * know how to prounounce it you can use the correct term, thus giving due
+     * credit to the person who invented it, and impressing your geek friends.
+     * Wikipedia says that the pronounciation of "Ł" has been changing so that
+     * it is now more like an English initial W (as in wonk) than an L.)
+     *
+     * This means that, for example, 'a | b & c' is stored on the stack as
       *
-     * A unary operator may immediately follow a binary in the input, for
-     * example
+     * c  [4]
+     * b  [3]
+     * &  [2]
+     * a  [1]
+     * |  [0]
+     *
+     * where the numbers in brackets give the stack [array] element number.
+     * In this implementation, parentheses are not stored on the stack.
+     * Instead a '(' creates a "fence" so that the part of the stack below the
+     * fence is invisible except to the corresponding ')' (this allows us to
+     * replace testing for parens, by using instead subtraction of the fence
+     * position).  As new operands are processed they are pushed onto the stack
+     * (except as noted in the next paragraph).  New operators of higher
+     * precedence than the current final one are inserted on the stack before
+     * the lhs operand (so that when the rhs is pushed next, everything will be
+     * in the correct positions shown above.  When an operator of equal or
+     * lower precedence is encountered in parsing, all the stacked operations
+     * of equal or higher precedence are evaluated, leaving the result as the
+     * top entry on the stack.  This makes higher precedence operations
+     * evaluate before lower precedence ones, and causes operations of equal
+     * precedence to left associate.
+     *
+     * The only unary operator '!' is immediately pushed onto the stack when
+     * encountered.  When an operand is encountered, if the top of the stack is
+     * a '!", the complement is immediately performed, and the '!' popped.  The
+     * resulting value is treated as a new operand, and the logic in the
+     * previous paragraph is executed.  Thus in the expression
       *      [a] + ! [b]
-     * When an operand is parsed and the top of the stack is a unary operator,
-     * the operation is performed, and then the stack is rechecked to see if
-     * this new operand is part of a binary operation; if so, it is handled as
-     * above.
+     * the stack looks like
+     *
+     * !
+     * a
+     * +
+     *
+     * as 'b' gets parsed, the latter gets evaluated to '!b', and the stack
+     * becomes
       *
-     * A '(' is simply pushed on the stack; it is valid only if the stack is
-     * empty, or the top element of the stack is an operator or another '('
-     * (for which the parenthesized expression will become an operand).  By the
-     * time the corresponding ')' is parsed everything in between should have
-     * been parsed and evaluated to a single operand (or else is a syntax
-     * error), and is handled as a regular operand */
+     * !b
+     * a
+     * +
+     *
+     * A ')' is treated as an operator with lower precedence than all the
+     * aforementioned ones, which causes all operations on the stack above the
+     * corresponding '(' to be evaluated down to a single resultant operand.
+     * Then the fence for the '(' is removed, and the operand goes through the
+     * algorithm above, without the fence.
+     *
+     * A separate stack is kept of the fence positions, so that the position of
+     * the latest so-far unbalanced '(' is at the top of it.
+     *
+     * The ']' ending the construct is treated as the lowest operator of all,
+     * so that everything gets evaluated down to a single operand, which is the
+     * result */
  
      sv_2mortal((SV *)(stack = newAV()));
+    sv_2mortal((SV *)(fence_stack = newAV()));
  
      while (RExC_parse < RExC_end) {
-        I32 top_index = av_tindex(stack);
-        SV** top_ptr;
-        SV* current = NULL;
+        I32 top_index;              /* Index of top-most element in 'stack' */
+        SV** top_ptr;               /* Pointer to top 'stack' element */
+        SV* current = NULL;         /* To contain the current inversion list
+                                       operand */
+        SV* only_to_avoid_leaks;
  
          /* Skip white space */
          RExC_parse = regpatws(pRExC_state, RExC_parse,
-                                         TRUE /* means recognize comments */ );
+                TRUE /* means recognize comments */ );
          if (RExC_parse >= RExC_end) {
              Perl_croak(aTHX_ "panic: Read past end of '(?[ ])'");
          }
-        if ((curchar = UCHARAT(RExC_parse)) == ']') {
-            break;
-        }
+
+        curchar = UCHARAT(RExC_parse);
+
+redo_curchar:
+
+        top_index = av_tindex(stack);
  
          switch (curchar) {
+            SV** stacked_ptr;       /* Ptr to something already on 'stack' */
+            char stacked_operator;  /* The topmost operator on the 'stack'. */
+            SV* lhs;                /* Operand to the left of the operator */
+            SV* rhs;                /* Operand to the right of the operator */
+            SV* fence_ptr;          /* Pointer to top element of the fence
+                                       stack */
+
+            case '(':
  
-            case '?':
-                if (av_tindex(stack) >= 0   /* This makes sure that we can
-                                               safely subtract 1 from
-                                               RExC_parse in the next clause.
-                                               If we have something on the
-                                               stack, we have parsed something
-                                             */
-                    && UCHARAT(RExC_parse - 1) == '('
-                    && RExC_parse < RExC_end)
+                if (RExC_parse < RExC_end && (UCHARAT(RExC_parse + 1) == '?'))
                  {
                      /* If is a '(?', could be an embedded '(?flags:(?[...])'.
                       * This happens when we have some thing like
@@ -12978,14 +13552,18 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                       * interpolated expression evaluates to.  We use the flags
                       * from the interpolated pattern. */
                      U32 save_flags = RExC_flags;
-                    const char * const save_parse = ++RExC_parse;
+                    const char * save_parse;
+
+                    RExC_parse += 2;        /* Skip past the '(?' */
+                    save_parse = RExC_parse;
  
+                    /* Parse any flags for the '(?' */
                      parse_lparen_question_flags(pRExC_state);
  
                      if (RExC_parse == save_parse  /* Makes sure there was at
-                                                     least one flag (or this
-                                                     embedding wasn't compiled)
-                                                   */
+                                                     least one flag (or else
+                                                     this embedding wasn't
+                                                     compiled) */
                          || RExC_parse >= RExC_end - 4
                          || UCHARAT(RExC_parse) != ':'
                          || UCHARAT(++RExC_parse) != '('
@@ -13005,25 +13583,50 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                          }
                          vFAIL("Expecting '(?flags:(?[...'");
                      }
+
+                    /* Recurse, with the meat of the embedded expression */
                      RExC_parse++;
                      (void) handle_regex_sets(pRExC_state, &current, flagp,
                                                      depth+1, oregcomp_parse);
  
                      /* Here, 'current' contains the embedded expression's
                       * inversion list, and RExC_parse points to the trailing
-                     * ']'; the next character should be the ')' which will be
-                     * paired with the '(' that has been put on the stack, so
-                     * the whole embedded expression reduces to '(operand)' */
+                     * ']'; the next character should be the ')' */
+                    RExC_parse++;
+                    assert(RExC_parse < RExC_end && UCHARAT(RExC_parse) == ')');
+
+                    /* Then the ')' matching the original '(' handled by this
+                     * case: statement */
                      RExC_parse++;
+                    assert(RExC_parse < RExC_end && UCHARAT(RExC_parse) == ')');
  
+                    RExC_parse++;
                      RExC_flags = save_flags;
                      goto handle_operand;
                  }
-                /* FALLTHROUGH */
  
-            default:
-                RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
-                vFAIL("Unexpected character");
+                /* A regular '('.  Look behind for illegal syntax */
+                if (top_index - fence >= 0) {
+                    /* If the top entry on the stack is an operator, it had
+                     * better be a '!', otherwise the entry below the top
+                     * operand should be an operator */
+                    if ( ! (top_ptr = av_fetch(stack, top_index, FALSE))
+                        || (! IS_OPERAND(*top_ptr) && SvUV(*top_ptr) != '!')
+                        || top_index - fence < 1
+                        || ! (stacked_ptr = av_fetch(stack,
+                                                     top_index - 1,
+                                                     FALSE))
+                        || IS_OPERAND(*stacked_ptr))
+                    {
+                        RExC_parse++;
+                        vFAIL("Unexpected '(' with no preceding operator");
+                    }
+                }
+
+                /* Stack the position of this undealt-with left paren */
+                fence = top_index + 1;
+                av_push(fence_stack, newSViv(fence));
+                break;
  
              case '\\':
                  /* regclass() can only return RESTART_UTF8 if multi-char
@@ -13032,9 +13635,13 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                                TRUE, /* means parse just the next thing */
                                FALSE, /* don't allow multi-char folds */
                                FALSE, /* don't silence non-portable warnings.  */
+                              TRUE,  /* strict */
                                &current))
-                    FAIL2("panic: regclass returned NULL to handle_sets, flags=%#"UVxf"",
-                          (UV) *flagp);
+                {
+                    FAIL2("panic: regclass returned NULL to handle_sets, "
+                          "flags=%#"UVxf"", (UV) *flagp);
+                }
+
                  /* regclass() will return with parsing just the \ sequence,
                   * leaving the parse pointer at the next thing to parse */
                  RExC_parse--;
@@ -13055,9 +13662,14 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                                                  only if not a posix class */
                               FALSE, /* don't allow multi-char folds */
                               FALSE, /* don't silence non-portable warnings.  */
-                             &current))
-                    FAIL2("panic: regclass returned NULL to handle_sets, flags=%#"UVxf"",
-                          (UV) *flagp);
+                             TRUE,   /* strict */
+                             &current
+                            ))
+                {
+                    FAIL2("panic: regclass returned NULL to handle_sets, "
+                          "flags=%#"UVxf"", (UV) *flagp);
+                }
+
                  /* function call leaves parse pointing to the ']', except if we
                   * faked it */
                  if (is_posix_class) {
@@ -13067,147 +13679,237 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                  goto handle_operand;
              }
  
+            case ']':
+                if (top_index >= 1) {
+                    goto join_operators;
+                }
+
+                /* Only a single operand on the stack: are done */
+                goto done;
+
+            case ')':
+                if (av_tindex(fence_stack) < 0) {
+                    RExC_parse++;
+                    vFAIL("Unexpected ')'");
+                }
+
+                 /* If at least two thing on the stack, treat this as an
+                  * operator */
+                if (top_index - fence >= 1) {
+                    goto join_operators;
+                }
+
+                /* Here only a single thing on the fenced stack, and there is a
+                 * fence.  Get rid of it */
+                fence_ptr = av_pop(fence_stack);
+                assert(fence_ptr);
+                fence = SvIV(fence_ptr) - 1;
+                SvREFCNT_dec_NN(fence_ptr);
+                fence_ptr = NULL;
+
+                if (fence < 0) {
+                    fence = 0;
+                }
+
+                /* Having gotten rid of the fence, we pop the operand at the
+                 * stack top and process it as a newly encountered operand */
+                current = av_pop(stack);
+                assert(IS_OPERAND(current));
+                goto handle_operand;
+
              case '&':
              case '|':
              case '+':
              case '-':
              case '^':
-                if (top_index < 0
+
+                /* These binary operators should have a left operand already
+                 * parsed */
+                if (   top_index - fence < 0
+                    || top_index - fence == 1
                      || ( ! (top_ptr = av_fetch(stack, top_index, FALSE)))
                      || ! IS_OPERAND(*top_ptr))
                  {
-                    RExC_parse++;
-                    vFAIL2("Unexpected binary operator '%c' with no preceding operand", curchar);
+                    goto unexpected_binary;
                  }
-                av_push(stack, newSVuv(curchar));
-                break;
  
-            case '!':
-                av_push(stack, newSVuv(curchar));
-                break;
+                /* If only the one operand is on the part of the stack visible
+                 * to us, we just place this operator in the proper position */
+                if (top_index - fence < 2) {
  
-            case '(':
-                if (top_index >= 0) {
-                    top_ptr = av_fetch(stack, top_index, FALSE);
-                    assert(top_ptr);
-                    if (IS_OPERAND(*top_ptr)) {
-                        RExC_parse++;
-                        vFAIL("Unexpected '(' with no preceding operator");
-                    }
+                    /* Place the operator before the operand */
+
+                    SV* lhs = av_pop(stack);
+                    av_push(stack, newSVuv(curchar));
+                    av_push(stack, lhs);
+                    break;
                  }
-                av_push(stack, newSVuv(curchar));
-                break;
  
-            case ')':
-            {
-                SV* lparen;
-                if (top_index < 1
-                    || ! (current = av_pop(stack))
-                    || ! IS_OPERAND(current)
-                    || ! (lparen = av_pop(stack))
-                    || IS_OPERAND(lparen)
-                    || SvUV(lparen) != '(')
+                /* But if there is something else on the stack, we need to
+                 * process it before this new operator if and only if the
+                 * stacked operation has equal or higher precedence than the
+                 * new one */
+
+             join_operators:
+
+                /* The operator on the stack is supposed to be below both its
+                 * operands */
+                if (   ! (stacked_ptr = av_fetch(stack, top_index - 2, FALSE))
+                    || IS_OPERAND(*stacked_ptr))
                  {
-                    SvREFCNT_dec(current);
+                    /* But if not, it's legal and indicates we are completely
+                     * done if and only if we're currently processing a ']',
+                     * which should be the final thing in the expression */
+                    if (curchar == ']') {
+                        goto done;
+                    }
+
+                  unexpected_binary:
                      RExC_parse++;
-                    vFAIL("Unexpected ')'");
+                    vFAIL2("Unexpected binary operator '%c' with no "
+                           "preceding operand", curchar);
                  }
-                top_index -= 2;
-                SvREFCNT_dec_NN(lparen);
+                stacked_operator = (char) SvUV(*stacked_ptr);
  
-                /* FALLTHROUGH */
-            }
+                if (regex_set_precedence(curchar)
+                    > regex_set_precedence(stacked_operator))
+                {
+                    /* Here, the new operator has higher precedence than the
+                     * stacked one.  This means we need to add the new one to
+                     * the stack to await its rhs operand (and maybe more
+                     * stuff).  We put it before the lhs operand, leaving
+                     * untouched the stacked operator and everything below it
+                     * */
+                    lhs = av_pop(stack);
+                    assert(IS_OPERAND(lhs));
  
-              handle_operand:
+                    av_push(stack, newSVuv(curchar));
+                    av_push(stack, lhs);
+                    break;
+                }
  
-                /* Here, we have an operand to process, in 'current' */
+                /* Here, the new operator has equal or lower precedence than
+                 * what's already there.  This means the operation already
+                 * there should be performed now, before the new one. */
+                rhs = av_pop(stack);
+                lhs = av_pop(stack);
  
-                if (top_index < 0) {    /* Just push if stack is empty */
-                    av_push(stack, current);
-                }
-                else {
-                    SV* top = av_pop(stack);
-                    SV *prev = NULL;
-                    char current_operator;
-
-                    if (IS_OPERAND(top)) {
-                        SvREFCNT_dec_NN(top);
-                        SvREFCNT_dec_NN(current);
-                        vFAIL("Operand with no preceding operator");
+                assert(IS_OPERAND(rhs));
+                assert(IS_OPERAND(lhs));
+
+                switch (stacked_operator) {
+                    case '&':
+                        _invlist_intersection(lhs, rhs, &rhs);
+                        break;
+
+                    case '|':
+                    case '+':
+                        _invlist_union(lhs, rhs, &rhs);
+                        break;
+
+                    case '-':
+                        _invlist_subtract(lhs, rhs, &rhs);
+                        break;
+
+                    case '^':   /* The union minus the intersection */
+                    {
+                        SV* i = NULL;
+                        SV* u = NULL;
+                        SV* element;
+
+                        _invlist_union(lhs, rhs, &u);
+                        _invlist_intersection(lhs, rhs, &i);
+                        /* _invlist_subtract will overwrite rhs
+                            without freeing what it already contains */
+                        element = rhs;
+                        _invlist_subtract(u, i, &rhs);
+                        SvREFCNT_dec_NN(i);
+                        SvREFCNT_dec_NN(u);
+                        SvREFCNT_dec_NN(element);
+                        break;
                      }
-                    current_operator = (char) SvUV(top);
-                    switch (current_operator) {
-                        case '(':   /* Push the '(' back on followed by the new
-                                       operand */
-                            av_push(stack, top);
-                            av_push(stack, current);
-                            SvREFCNT_inc(top);  /* Counters the '_dec' done
-                                                   just after the 'break', so
-                                                   it doesn't get wrongly freed
-                                                 */
-                            break;
+                }
+                SvREFCNT_dec(lhs);
+
+                /* Here, the higher precedence operation has been done, and the
+                 * result is in 'rhs'.  We overwrite the stacked operator with
+                 * the result.  Then we redo this code to either push the new
+                 * operator onto the stack or perform any higher precedence
+                 * stacked operation */
+                only_to_avoid_leaks = av_pop(stack);
+                SvREFCNT_dec(only_to_avoid_leaks);
+                av_push(stack, rhs);
+                goto redo_curchar;
+
+            case '!':   /* Highest priority, right associative, so just push
+                           onto stack */
+                av_push(stack, newSVuv(curchar));
+                break;
  
-                        case '!':
-                            _invlist_invert(current);
-
-                            /* Unlike binary operators, the top of the stack,
-                             * now that this unary one has been popped off, may
-                             * legally be an operator, and we now have operand
-                             * for it. */
-                            top_index--;
-                            SvREFCNT_dec_NN(top);
-                            goto handle_operand;
-
-                        case '&':
-                            prev = av_pop(stack);
-                            _invlist_intersection(prev,
-                                                   current,
-                                                   &current);
-                            av_push(stack, current);
-                            break;
+            default:
+                RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
+                vFAIL("Unexpected character");
  
-                        case '|':
-                        case '+':
-                            prev = av_pop(stack);
-                            _invlist_union(prev, current, &current);
-                            av_push(stack, current);
-                            break;
+          handle_operand:
+
+            /* Here 'current' is the operand.  If something is already on the
+             * stack, we have to check if it is a !. */
+            top_index = av_tindex(stack);   /* Code above may have altered the
+                                             * stack in the time since we
+                                             * earlier set 'top_index'. */
+            if (top_index - fence >= 0) {
+                /* If the top entry on the stack is an operator, it had better
+                 * be a '!', otherwise the entry below the top operand should
+                 * be an operator */
+                top_ptr = av_fetch(stack, top_index, FALSE);
+                assert(top_ptr);
+                if (! IS_OPERAND(*top_ptr)) {
+
+                    /* The only permissible operator at the top of the stack is
+                     * '!', which is applied immediately to this operand. */
+                    curchar = (char) SvUV(*top_ptr);
+                    if (curchar != '!') {
+                        SvREFCNT_dec(current);
+                        vFAIL2("Unexpected binary operator '%c' with no "
+                                "preceding operand", curchar);
+                    }
  
-                        case '-':
-                            prev = av_pop(stack);;
-                            _invlist_subtract(prev, current, &current);
-                            av_push(stack, current);
-                            break;
+                    _invlist_invert(current);
  
-                        case '^':   /* The union minus the intersection */
-                        {
-                            SV* i = NULL;
-                            SV* u = NULL;
-                            SV* element;
-
-                            prev = av_pop(stack);
-                            _invlist_union(prev, current, &u);
-                            _invlist_intersection(prev, current, &i);
-                            /* _invlist_subtract will overwrite current
-                                without freeing what it already contains */
-                            element = current;
-                            _invlist_subtract(u, i, &current);
-                            av_push(stack, current);
-                            SvREFCNT_dec_NN(i);
-                            SvREFCNT_dec_NN(u);
-                            SvREFCNT_dec_NN(element);
-                            break;
-                        }
+                    only_to_avoid_leaks = av_pop(stack);
+                    SvREFCNT_dec(only_to_avoid_leaks);
+                    top_index = av_tindex(stack);
  
-                        default:
-                            Perl_croak(aTHX_ "panic: Unexpected item on '(?[ ])' stack");
+                    /* And we redo with the inverted operand.  This allows
+                     * handling multiple ! in a row */
+                    goto handle_operand;
+                }
+                          /* Single operand is ok only for the non-binary ')'
+                           * operator */
+                else if ((top_index - fence == 0 && curchar != ')')
+                         || (top_index - fence > 0
+                             && (! (stacked_ptr = av_fetch(stack,
+                                                           top_index - 1,
+                                                           FALSE))
+                                 || IS_OPERAND(*stacked_ptr))))
+                {
+                    SvREFCNT_dec(current);
+                    vFAIL("Operand with no preceding operator");
                  }
-                SvREFCNT_dec_NN(top);
-                SvREFCNT_dec(prev);
              }
-        }
+
+            /* Here there was nothing on the stack or the top element was
+             * another operand.  Just add this new one */
+            av_push(stack, current);
+
+        } /* End of switch on next parse token */
  
          RExC_parse += (UTF) ? UTF8SKIP(RExC_parse) : 1;
+    } /* End of loop parsing through the construct */
+
+  done:
+    if (av_tindex(fence_stack) >= 0) {
+        vFAIL("Unmatched (");
      }
  
      if (av_tindex(stack) < 0   /* Was empty */
@@ -13215,6 +13917,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
          || ! IS_OPERAND(final)
          || av_tindex(stack) >= 0)  /* More left on stack */
      {
+        SvREFCNT_dec(final);
          vFAIL("Incomplete expression within '(?[ ])'");
      }
  
@@ -13239,6 +13942,8 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
          }
      }
  
+    /* About to generate an ANYOF (or similar) node from the inversion list we
+     * have calculated */
      save_parse = RExC_parse;
      RExC_parse = SvPV(result_string, len);
      save_end = RExC_end;
@@ -13256,7 +13961,9 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist,
                      TRUE, /* silence non-portable warnings.  The above may very
                               well have generated non-portable code points, but
                               they're valid on this machine */
-                    NULL);
+                    FALSE, /* similarly, no need for strict */
+                    NULL
+                );
      if (!node)
          FAIL2("panic: regclass returned NULL to handle_sets, flags=%#"UVxf,
                      PTR2UV(flagp));
@@ -13392,7 +14099,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                   const bool silence_non_portable,   /* Don't output warnings
                                                         about too large
                                                         characters */
-                 SV** ret_invlist)  /* Return an inversion list, not a node */
+                 const bool strict,
+                 SV** ret_invlist  /* Return an inversion list, not a node */
+          )
  {
      /* parse a bracketed class specification.  Most of these will produce an
       * ANYOF node; but something like [a] will produce an EXACT node; [aA], an
@@ -13441,6 +14150,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                 separate for a while from the non-complemented
                                 versions because of complications with /d
                                 matching */
+    SV* simple_posixes = NULL; /* But under some conditions, the classes can be
+                                  treated more simply than the general case,
+                                  leading to less compilation and execution
+                                  work */
      UV element_count = 0;   /* Number of distinct elements in the class.
                                Optimizations may be possible if this is tiny */
      AV * multi_char_matches = NULL; /* Code points that fold to more than one
@@ -13449,7 +14162,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      char * stop_ptr = RExC_end;    /* where to stop parsing */
      const bool skip_white = cBOOL(ret_invlist); /* ignore unescaped white
                                                     space? */
-    const bool strict = cBOOL(ret_invlist); /* Apply strict parsing rules? */
  
      /* Unicode properties are stored in a swash; this holds the current one
       * being parsed.  If this swash is the only above-latin1 component of the
@@ -13479,11 +14191,17 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
       * runtime locale is UTF-8 */
      SV* only_utf8_locale_list = NULL;
  
-#ifdef EBCDIC
-    /* In a range, counts how many 0-2 of the ends of it came from literals,
-     * not escapes.  Thus we can tell if 'A' was input vs \x{C1} */
-    UV literal_endpoint = 0;
-#endif
+    /* In a range, if one of the endpoints is non-character-set portable,
+     * meaning that it hard-codes a code point that may mean a different
+     * charactger in ASCII vs. EBCDIC, as opposed to, say, a literal 'A' or a
+     * mnemonic '\t' which each mean the same character no matter which
+     * character set the platform is on. */
+    unsigned int non_portable_endpoint = 0;
+
+    /* Is the range unicode? which means on a platform that isn't 1-1 native
+     * to Unicode (i.e. non-ASCII), each code point in it should be considered
+     * to be a Unicode value.  */
+    bool unicode_range = FALSE;
      bool invert = FALSE;    /* Is this class to be complemented */
  
      bool warn_super = ALWAYS_WARN_SUPER;
@@ -13503,7 +14221,11 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      DEBUG_PARSE("clas");
  
      /* Assume we are going to generate an ANYOF node. */
-    ret = reganode(pRExC_state, ANYOF, 0);
+    ret = reganode(pRExC_state,
+                   (LOC)
+                    ? ANYOFL
+                    : ANYOF,
+                   0);
  
      if (SIZE_ONLY) {
         RExC_size += ANYOF_SKIP;
@@ -13527,7 +14249,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         RExC_parse++;
          invert = TRUE;
          allow_multi_folds = FALSE;
-        RExC_naughty++;
+        MARK_NAUGHTY(1);
          if (skip_white) {
              RExC_parse = regpatws(pRExC_state, RExC_parse,
                                    FALSE /* means don't recognize comments */ );
@@ -13539,6 +14261,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         const char *s = RExC_parse;
         const char  c = *s++;
  
+        if (*s == '^') {
+            s++;
+        }
         while (isWORDCHAR(*s))
             s++;
         if (*s && c == *s && s[1] == ']') {
@@ -13574,7 +14299,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              break;
          }
  
-    charclassloop:
+      charclassloop:
  
         namedclass = OOB_NAMEDCLASS; /* initialize as illegal */
          save_value = value;
@@ -13583,6 +14308,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
         if (!range) {
             rangebegin = RExC_parse;
             element_count++;
+            non_portable_endpoint = 0;
         }
         if (UTF) {
             value = utf8n_to_uvchr((U8*)RExC_parse,
@@ -13599,14 +14325,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
          {
              namedclass = regpposixcc(pRExC_state, value, strict);
          }
-        else if (value != '\\') {
-#ifdef EBCDIC
-            literal_endpoint++;
-#endif
-        }
-        else {
+        else if (value == '\\') {
              /* Is a backslash; get the code point of the char after it */
-           if (UTF && ! UTF8_IS_INVARIANT(RExC_parse)) {
+           if (UTF && ! UTF8_IS_INVARIANT(UCHARAT(RExC_parse))) {
                 value = utf8n_to_uvchr((U8*)RExC_parse,
                                    RExC_end - RExC_parse,
                                    &numlen, UTF8_ALLOW_DEFAULT);
@@ -13639,14 +14360,24 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
             case 'H':   namedclass = ANYOF_NHORIZWS;    break;
              case 'N':  /* Handle \N{NAME} in class */
                  {
-                    SV *as_text;
-                    STRLEN cp_count = grok_bslash_N(pRExC_state, NULL, &value,
-                                                    flagp, depth, &as_text);
-                    if (*flagp & RESTART_UTF8)
-                        FAIL("panic: grok_bslash_N set RESTART_UTF8");
-                    if (cp_count != 1) {    /* The typical case drops through */
-                        assert(cp_count != (STRLEN) -1);
-                        if (cp_count == 0) {
+                    const char * const backslash_N_beg = RExC_parse - 2;
+                    int cp_count;
+
+                    if (! grok_bslash_N(pRExC_state,
+                                        NULL,      /* No regnode */
+                                        &value,    /* Yes single value */
+                                        &cp_count, /* Multiple code pt count */
+                                        flagp,
+                                        depth)
+                    ) {
+
+                        if (*flagp & RESTART_UTF8)
+                            FAIL("panic: grok_bslash_N set RESTART_UTF8");
+
+                        if (cp_count < 0) {
+                            vFAIL("\\N in a character class must be a named character: \\N{...}");
+                        }
+                        else if (cp_count == 0) {
                              if (strict) {
                                  RExC_parse++;   /* Position after the "}" */
                                  vFAIL("Zero length \\N{}");
@@ -13666,16 +14397,18 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                      else if (PASS2) {
                                          ckWARNreg(RExC_parse, "Using just the first character returned by \\N{} in character class");
                                      }
+                                    break; /* <value> contains the first code
+                                              point. Drop out of the switch to
+                                              process it */
                                  }
                                  else {
+                                    SV * multi_char_N = newSVpvn(backslash_N_beg,
+                                                 RExC_parse - backslash_N_beg);
                                      multi_char_matches
                                          = add_multi_match(multi_char_matches,
-                                                          as_text,
+                                                          multi_char_N,
                                                            cp_count);
                                  }
-                                break; /* <value> contains the first code
-                                          point. Drop out of the switch to
-                                          process it */
                              }
                          } /* End of cp_count != 1 */
  
@@ -13686,7 +14419,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                          prevvalue = save_prevvalue;
                          continue;   /* Back to top of loop to get next char */
                      }
+
                      /* Here, is a single code point, and <value> contains it */
+                    unicode_range = TRUE;   /* \N{} are Unicode */
                  }
                  break;
             case 'p':
@@ -13884,7 +14619,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                         vFAIL(error_msg);
                     }
                 }
-               if (PL_encoding && value < 0x100) {
+                non_portable_endpoint++;
+               if (IN_ENCODING && value < 0x100) {
                     goto recode_encoding;
                 }
                 break;
@@ -13903,11 +14639,13 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                         vFAIL(error_msg);
                     }
                 }
-               if (PL_encoding && value < 0x100)
+                non_portable_endpoint++;
+               if (IN_ENCODING && value < 0x100)
                     goto recode_encoding;
                 break;
             case 'c':
                 value = grok_bslash_c(*RExC_parse++, PASS2);
+                non_portable_endpoint++;
                 break;
             case '0': case '1': case '2': case '3': case '4':
             case '5': case '6': case '7':
@@ -13935,13 +14673,14 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                              (void)ReREFCNT_inc(RExC_rx_sv);
                          }
                      }
-                   if (PL_encoding && value < 0x100)
+                    non_portable_endpoint++;
+                   if (IN_ENCODING && value < 0x100)
                         goto recode_encoding;
                     break;
                 }
-           recode_encoding:
+             recode_encoding:
                 if (! RExC_override_recoding) {
-                   SV* enc = PL_encoding;
+                   SV* enc = _get_encoding();
                     value = reg_recode((const char)(U8)value, &enc);
                     if (!enc) {
                          if (strict) {
@@ -14113,15 +14852,33 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                                  &cp_list);
                      }
                  }
-                else {  /* Garden variety class.  If is NASCII, NDIGIT, ...
+                else if (UNI_SEMANTICS
+                        || classnum == _CC_ASCII
+                        || (DEPENDS_SEMANTICS && (classnum == _CC_DIGIT
+                                                  || classnum == _CC_XDIGIT)))
+                {
+                    /* We usually have to worry about /d and /a affecting what
+                     * POSIX classes match, with special code needed for /d
+                     * because we won't know until runtime what all matches.
+                     * But there is no extra work needed under /u, and
+                     * [:ascii:] is unaffected by /a and /d; and :digit: and
+                     * :xdigit: don't have runtime differences under /d.  So we
+                     * can special case these, and avoid some extra work below,
+                     * and at runtime. */
+                    _invlist_union_maybe_complement_2nd(
+                                                     simple_posixes,
+                                                     PL_XPosix_ptrs[classnum],
+                                                     namedclass % 2 != 0,
+                                                     &simple_posixes);
+                }
+                else {  /* Garden variety class.  If is NUPPER, NALPHA, ...
                             complement and use nposixes */
                      SV** posixes_ptr = namedclass % 2 == 0
                                         ? &posixes
                                         : &nposixes;
-                    SV** source_ptr = &PL_XPosix_ptrs[classnum];
                      _invlist_union_maybe_complement_2nd(
                                                       *posixes_ptr,
-                                                     *source_ptr,
+                                                     PL_XPosix_ptrs[classnum],
                                                       namedclass % 2 != 0,
                                                       posixes_ptr);
                  }
@@ -14143,12 +14900,27 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
           * minus sign */
  
         if (range) {
+#ifdef EBCDIC
+            /* For unicode ranges, we have to test that the Unicode as opposed
+             * to the native values are not decreasing.  (Above 255, there is
+             * no difference between native and Unicode) */
+           if (unicode_range && prevvalue < 255 && value < 255) {
+                if (NATIVE_TO_LATIN1(prevvalue) > NATIVE_TO_LATIN1(value)) {
+                    goto backwards_range;
+                }
+            }
+            else
+#endif
             if (prevvalue > value) /* b-a */ {
-               const int w = RExC_parse - rangebegin;
+               int w;
+#ifdef EBCDIC
+              backwards_range:
+#endif
+                w = RExC_parse - rangebegin;
                  vFAIL2utf8f(
                      "Invalid [] range \"%"UTF8f"\"",
                      UTF8fARG(UTF, w, rangebegin));
-               range = 0; /* not a valid range */
+                NOT_REACHED; /* NOTREACHED */
             }
         }
         else {
@@ -14200,8 +14972,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              continue;
          }
  
-        /* Here, we have a single value, and <prevvalue> is the beginning of
-         * the range, if any; or <value> if not */
+        /* Here, we have a single value this time through the loop, and
+         * <prevvalue> is the beginning of the range, if any; or <value> if
+         * not. */
  
         /* non-Latin1 code point implies unicode semantics.  Must be set in
          * pass1 so is there for the whole of pass 2 */
@@ -14272,37 +15045,147 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              }
          }
  
+        if (strict && PASS2 && ckWARN(WARN_REGEXP)) {
+            if (range) {
+
+                /* If the range starts above 255, everything is portable and
+                 * likely to be so for any forseeable character set, so don't
+                 * warn. */
+                if (unicode_range && non_portable_endpoint && prevvalue < 256) {
+                    vWARN(RExC_parse, "Both or neither range ends should be Unicode");
+                }
+                else if (prevvalue != value) {
+
+                    /* Under strict, ranges that stop and/or end in an ASCII
+                     * printable should have each end point be a portable value
+                     * for it (preferably like 'A', but we don't warn if it is
+                     * a (portable) Unicode name or code point), and the range
+                     * must be be all digits or all letters of the same case.
+                     * Otherwise, the range is non-portable and unclear as to
+                     * what it contains */
+                    if ((isPRINT_A(prevvalue) || isPRINT_A(value))
+                        && (non_portable_endpoint
+                            || ! ((isDIGIT_A(prevvalue) && isDIGIT_A(value))
+                                   || (isLOWER_A(prevvalue) && isLOWER_A(value))
+                                   || (isUPPER_A(prevvalue) && isUPPER_A(value)))))
+                    {
+                        vWARN(RExC_parse, "Ranges of ASCII printables should be some subset of \"0-9\", \"A-Z\", or \"a-z\"");
+                    }
+                    else if (prevvalue >= 0x660) { /* ARABIC_INDIC_DIGIT_ZERO */
+
+                        /* But the nature of Unicode and languages mean we
+                         * can't do the same checks for above-ASCII ranges,
+                         * except in the case of digit ones.  These should
+                         * contain only digits from the same group of 10.  The
+                         * ASCII case is handled just above.  0x660 is the
+                         * first digit character beyond ASCII.  Hence here, the
+                         * range could be a range of digits.  Find out.  */
+                        IV index_start = _invlist_search(PL_XPosix_ptrs[_CC_DIGIT],
+                                                         prevvalue);
+                        IV index_final = _invlist_search(PL_XPosix_ptrs[_CC_DIGIT],
+                                                         value);
+
+                        /* If the range start and final points are in the same
+                         * inversion list element, it means that either both
+                         * are not digits, or both are digits in a consecutive
+                         * sequence of digits.  (So far, Unicode has kept all
+                         * such sequences as distinct groups of 10, but assert
+                         * to make sure).  If the end points are not in the
+                         * same element, neither should be a digit. */
+                        if (index_start == index_final) {
+                            assert(! ELEMENT_RANGE_MATCHES_INVLIST(index_start)
+                            || (invlist_array(PL_XPosix_ptrs[_CC_DIGIT])[index_start+1]
+                               - invlist_array(PL_XPosix_ptrs[_CC_DIGIT])[index_start]
+                               == 10)
+                               /* But actually Unicode did have one group of 11
+                                * 'digits' in 5.2, so in case we are operating
+                                * on that version, let that pass */
+                            || (invlist_array(PL_XPosix_ptrs[_CC_DIGIT])[index_start+1]
+                               - invlist_array(PL_XPosix_ptrs[_CC_DIGIT])[index_start]
+                                == 11
+                               && invlist_array(PL_XPosix_ptrs[_CC_DIGIT])[index_start]
+                                == 0x19D0)
+                            );
+                        }
+                        else if ((index_start >= 0
+                                  && ELEMENT_RANGE_MATCHES_INVLIST(index_start))
+                                 || (index_final >= 0
+                                     && ELEMENT_RANGE_MATCHES_INVLIST(index_final)))
+                        {
+                            vWARN(RExC_parse, "Ranges of digits should be from the same group of 10");
+                        }
+                    }
+                }
+            }
+            if ((! range || prevvalue == value) && non_portable_endpoint) {
+                if (isPRINT_A(value)) {
+                    char literal[3];
+                    unsigned d = 0;
+                    if (isBACKSLASHED_PUNCT(value)) {
+                        literal[d++] = '\\';
+                    }
+                    literal[d++] = (char) value;
+                    literal[d++] = '\0';
+
+                    vWARN4(RExC_parse,
+                           "\"%.*s\" is more clearly written simply as \"%s\"",
+                           (int) (RExC_parse - rangebegin),
+                           rangebegin,
+                           literal
+                        );
+                }
+                else if isMNEMONIC_CNTRL(value) {
+                    vWARN4(RExC_parse,
+                           "\"%.*s\" is more clearly written simply as \"%s\"",
+                           (int) (RExC_parse - rangebegin),
+                           rangebegin,
+                           cntrl_to_mnemonic((char) value)
+                        );
+                }
+            }
+        }
+
          /* Deal with this element of the class */
         if (! SIZE_ONLY) {
+
  #ifndef EBCDIC
              cp_foldable_list = _add_range_to_invlist(cp_foldable_list,
                                                       prevvalue, value);
  #else
-            SV* this_range = _new_invlist(1);
-            _append_range_to_invlist(this_range, prevvalue, value);
-
-            /* In EBCDIC, the ranges 'A-Z' and 'a-z' are each not contiguous.
-             * If this range was specified using something like 'i-j', we want
-             * to include only the 'i' and the 'j', and not anything in
-             * between, so exclude non-ASCII, non-alphabetics from it.
-             * However, if the range was specified with something like
-             * [\x89-\x91] or [\x89-j], all code points within it should be
-             * included.  literal_endpoint==2 means both ends of the range used
-             * a literal character, not \x{foo} */
-           if (literal_endpoint == 2
-                && ((prevvalue >= 'a' && value <= 'z')
-                    || (prevvalue >= 'A' && value <= 'Z')))
+            /* On non-ASCII platforms, for ranges that span all of 0..255, and
+             * ones that don't require special handling, we can just add the
+             * range like we do for ASCII platforms */
+            if ((UNLIKELY(prevvalue == 0) && value >= 255)
+                || ! (prevvalue < 256
+                      && (unicode_range
+                          || (! non_portable_endpoint
+                              && ((isLOWER_A(prevvalue) && isLOWER_A(value))
+                                  || (isUPPER_A(prevvalue)
+                                      && isUPPER_A(value)))))))
              {
-                _invlist_intersection(this_range, PL_XPosix_ptrs[_CC_ASCII],
-                                      &this_range);
-
-                /* Since this above only contains ascii, the intersection of it
-                 * with anything will still yield only ascii */
-                _invlist_intersection(this_range, PL_XPosix_ptrs[_CC_ALPHA],
-                                      &this_range);
+                cp_foldable_list = _add_range_to_invlist(cp_foldable_list,
+                                                         prevvalue, value);
+            }
+            else {
+                /* Here, requires special handling.  This can be because it is
+                 * a range whose code points are considered to be Unicode, and
+                 * so must be individually translated into native, or because
+                 * its a subrange of 'A-Z' or 'a-z' which each aren't
+                 * contiguous in EBCDIC, but we have defined them to include
+                 * only the "expected" upper or lower case ASCII alphabetics.
+                 * Subranges above 255 are the same in native and Unicode, so
+                 * can be added as a range */
+                U8 start = NATIVE_TO_LATIN1(prevvalue);
+                unsigned j;
+                U8 end = (value < 256) ? NATIVE_TO_LATIN1(value) : 255;
+                for (j = start; j <= end; j++) {
+                    cp_foldable_list = add_cp_to_invlist(cp_foldable_list, LATIN1_TO_NATIVE(j));
+                }
+                if (value > 255) {
+                    cp_foldable_list = _add_range_to_invlist(cp_foldable_list,
+                                                             256, value);
+                }
              }
-            _invlist_union(cp_foldable_list, this_range, &cp_foldable_list);
-            literal_endpoint = 0;
  #endif
          }
  
@@ -14432,7 +15315,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
                  /* The actual POSIXish node for all the rest depends on the
                   * charset modifier.  The ones in the first set depend only on
-                 * ASCII or, if available on this platform, locale */
+                 * ASCII or, if available on this platform, also locale */
                  case ANYOF_ASCII:
                  case ANYOF_NASCII:
  #ifdef HAS_ISASCII
@@ -14442,19 +15325,27 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  #endif
                      goto join_posix;
  
-                case ANYOF_NCASED:
+                /* The following don't have any matches in the upper Latin1
+                 * range, hence /d is equivalent to /u for them.  Making it /u
+                 * saves some branches at runtime */
+                case ANYOF_DIGIT:
+                case ANYOF_NDIGIT:
+                case ANYOF_XDIGIT:
+                case ANYOF_NXDIGIT:
+                    if (! DEPENDS_SEMANTICS) {
+                        goto treat_as_default;
+                    }
+
+                    op = POSIXU;
+                    goto join_posix;
+
+                /* The following change to CASED under /i */
                  case ANYOF_LOWER:
                  case ANYOF_NLOWER:
                  case ANYOF_UPPER:
                  case ANYOF_NUPPER:
-                    /* under /a could be alpha */
                      if (FOLD) {
-                        if (ASCII_RESTRICTED) {
-                            namedclass = ANYOF_ALPHA + (namedclass % 2);
-                        }
-                        else if (! LOC) {
-                            break;
-                        }
+                        namedclass = ANYOF_CASED + (namedclass % 2);
                      }
                      /* FALLTHROUGH */
  
@@ -14462,12 +15353,13 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                   * We take advantage of the enum ordering of the charset
                   * modifiers to get the exact node type, */
                  default:
+                  treat_as_default:
                      op = POSIXD + get_regex_charset(RExC_flags);
                      if (op > POSIXA) { /* /aa is same as /a */
                          op = POSIXA;
                      }
  
-                join_posix:
+                  join_posix:
                      /* The odd numbered ones are the complements of the
                       * next-lower even number one */
                      if (namedclass % 2 == 1) {
@@ -14486,7 +15378,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  if (! LOC && value == '\n') {
                      op = REG_ANY; /* Optimize [^\n] */
                      *flagp |= HASWIDTH|SIMPLE;
-                    RExC_naughty++;
+                    MARK_NAUGHTY(1);
                  }
              }
              else if (value < 256 || UTF) {
@@ -14503,24 +15395,28 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                      op = POSIXA;
                  }
              }
-            else if (prevvalue == 'A') {
-                if (value == 'Z'
+            else if (! FOLD || ASCII_FOLD_RESTRICTED) {
+                /* We can optimize A-Z or a-z, but not if they could match
+                 * something like the KELVIN SIGN under /i. */
+                if (prevvalue == 'A') {
+                    if (value == 'Z'
  #ifdef EBCDIC
-                    && literal_endpoint == 2
+                        && ! non_portable_endpoint
  #endif
-                ) {
-                    arg = (FOLD) ? _CC_ALPHA : _CC_UPPER;
-                    op = POSIXA;
+                    ) {
+                        arg = (FOLD) ? _CC_ALPHA : _CC_UPPER;
+                        op = POSIXA;
+                    }
                  }
-            }
-            else if (prevvalue == 'a') {
-                if (value == 'z'
+                else if (prevvalue == 'a') {
+                    if (value == 'z'
  #ifdef EBCDIC
-                    && literal_endpoint == 2
+                        && ! non_portable_endpoint
  #endif
-                ) {
-                    arg = (FOLD) ? _CC_ALPHA : _CC_LOWER;
-                    op = POSIXA;
+                    ) {
+                        arg = (FOLD) ? _CC_ALPHA : _CC_LOWER;
+                        op = POSIXA;
+                    }
                  }
              }
          }
@@ -14574,6 +15470,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
  
              SvREFCNT_dec(posixes);
              SvREFCNT_dec(nposixes);
+            SvREFCNT_dec(simple_posixes);
              SvREFCNT_dec(cp_list);
              SvREFCNT_dec(cp_foldable_list);
              return ret;
@@ -14731,6 +15628,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
       * classes.  The lists are kept separate up to now because we don't want to
       * fold the classes (folding of those is automatically handled by the swash
       * fetching code) */
+    if (simple_posixes) {
+        _invlist_union(cp_list, simple_posixes, &cp_list);
+        SvREFCNT_dec_NN(simple_posixes);
+    }
      if (posixes || nposixes) {
          if (posixes && AT_LEAST_ASCII_RESTRICTED) {
              /* Under /a and /aa, nothing above ASCII matches these */
@@ -14885,6 +15786,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
      }
  
      if (ret_invlist) {
+        assert(cp_list);
+
          *ret_invlist = cp_list;
          SvREFCNT_dec(swash);
  
@@ -14951,7 +15854,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
                  value = start;
  
                  if (! FOLD) {
-                    op = EXACT;
+                    op = (LOC)
+                         ? EXACTL
+                         : EXACT;
                  }
                  else if (LOC) {
  
@@ -14993,7 +15898,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              if (end == UV_MAX) {
                  op = SANY;
                  *flagp |= HASWIDTH|SIMPLE;
-                RExC_naughty++;
+                MARK_NAUGHTY(1);
              }
              else if (end == '\n' - 1
                      && invlist_iternext(cp_list, &start, &end)
@@ -15001,7 +15906,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
              {
                  op = REG_ANY;
                  *flagp |= HASWIDTH|SIMPLE;
-                RExC_naughty++;
+                MARK_NAUGHTY(1);
              }
          }
          invlist_iterfinish(cp_list);
@@ -15365,21 +16270,23 @@ S_nextchar(pTHX_ RExC_state_t *pRExC_state)
      }
  }
  
-/*
-- reg_node - emit a node
-*/
-STATIC regnode *                       /* Location. */
-S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op)
+STATIC regnode *
+S_regnode_guts(pTHX_ RExC_state_t *pRExC_state, const U8 op, const STRLEN extra_size, const char* const name)
  {
-    regnode *ptr;
+    /* Allocate a regnode for 'op' and returns it, with 'extra_size' extra
+     * space.  In pass1, it aligns and increments RExC_size; in pass2,
+     * RExC_emit */
+
      regnode * const ret = RExC_emit;
      GET_RE_DEBUG_FLAGS_DECL;
  
-    PERL_ARGS_ASSERT_REG_NODE;
+    PERL_ARGS_ASSERT_REGNODE_GUTS;
+
+    assert(extra_size >= regarglen[op]);
  
      if (SIZE_ONLY) {
         SIZE_ALIGN(RExC_size);
-       RExC_size += 1;
+       RExC_size += 1 + extra_size;
         return(ret);
      }
      if (RExC_emit >= RExC_emit_bound)
@@ -15387,13 +16294,13 @@ S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op)
                    op, (void*)RExC_emit, (void*)RExC_emit_bound);
  
      NODE_ALIGN_FILL(ret);
-    ptr = ret;
-    FILL_ADVANCE_NODE(ptr, op);
-#ifdef RE_TRACK_PATTERN_OFFSETS
+#ifndef RE_TRACK_PATTERN_OFFSETS
+    PERL_UNUSED_ARG(name);
+#else
      if (RExC_offsets) {         /* MJD */
         MJD_OFFSET_DEBUG(
                ("%s:%d: (op %s) %s %"UVuf" (len %"UVuf") (max %"UVuf").\n",
-              "reg_node", __LINE__,
+              name, __LINE__,
                PL_reg_name[op],
                (UV)(RExC_emit - RExC_emit_start) > RExC_offsets[0]
                 ? "Overwriting end of array!\n" : "OK",
@@ -15403,76 +16310,66 @@ S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op)
         Set_Node_Offset(RExC_emit, RExC_parse + (op == END));
      }
  #endif
-    RExC_emit = ptr;
      return(ret);
  }
  
  /*
-- reganode - emit a node with an argument
+- reg_node - emit a node
  */
  STATIC regnode *                       /* Location. */
-S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
+S_reg_node(pTHX_ RExC_state_t *pRExC_state, U8 op)
  {
-    regnode *ptr;
-    regnode * const ret = RExC_emit;
-    GET_RE_DEBUG_FLAGS_DECL;
+    regnode * const ret = regnode_guts(pRExC_state, op, regarglen[op], "reg_node");
  
-    PERL_ARGS_ASSERT_REGANODE;
+    PERL_ARGS_ASSERT_REG_NODE;
  
-    if (SIZE_ONLY) {
-       SIZE_ALIGN(RExC_size);
-       RExC_size += 2;
-       /*
-          We can't do this:
+    assert(regarglen[op] == 0);
  
-          assert(2==regarglen[op]+1);
+    if (PASS2) {
+        regnode *ptr = ret;
+        FILL_ADVANCE_NODE(ptr, op);
+        RExC_emit = ptr;
+    }
+    return(ret);
+}
  
-          Anything larger than this has to allocate the extra amount.
-          If we changed this to be:
+/*
+- reganode - emit a node with an argument
+*/
+STATIC regnode *                       /* Location. */
+S_reganode(pTHX_ RExC_state_t *pRExC_state, U8 op, U32 arg)
+{
+    regnode * const ret = regnode_guts(pRExC_state, op, regarglen[op], "reganode");
  
-          RExC_size += (1 + regarglen[op]);
+    PERL_ARGS_ASSERT_REGANODE;
  
-          then it wouldn't matter. Its not clear what side effect
-          might come from that so its not done so far.
-          -- dmq
-       */
-       return(ret);
-    }
-    if (RExC_emit >= RExC_emit_bound)
-        Perl_croak(aTHX_ "panic: reg_node overrun trying to emit %d, %p>=%p",
-                  op, (void*)RExC_emit, (void*)RExC_emit_bound);
+    assert(regarglen[op] == 1);
  
-    NODE_ALIGN_FILL(ret);
-    ptr = ret;
-    FILL_ADVANCE_NODE_ARG(ptr, op, arg);
-#ifdef RE_TRACK_PATTERN_OFFSETS
-    if (RExC_offsets) {         /* MJD */
-       MJD_OFFSET_DEBUG(
-              ("%s(%d): (op %s) %s %"UVuf" <- %"UVuf" (max %"UVuf").\n",
-              "reganode",
-             __LINE__,
-             PL_reg_name[op],
-              (UV)(RExC_emit - RExC_emit_start) > RExC_offsets[0] ?
-              "Overwriting end of array!\n" : "OK",
-              (UV)(RExC_emit - RExC_emit_start),
-              (UV)(RExC_parse - RExC_start),
-              (UV)RExC_offsets[0]));
-       Set_Cur_Node_Offset;
+    if (PASS2) {
+        regnode *ptr = ret;
+        FILL_ADVANCE_NODE_ARG(ptr, op, arg);
+        RExC_emit = ptr;
      }
-#endif
-    RExC_emit = ptr;
      return(ret);
  }
  
-/*
-- reguni - emit (if appropriate) a Unicode character
-*/
-PERL_STATIC_INLINE STRLEN
-S_reguni(pTHX_ const RExC_state_t *pRExC_state, UV uv, char* s)
+STATIC regnode *
+S_reg2Lanode(pTHX_ RExC_state_t *pRExC_state, const U8 op, const U32 arg1, const I32 arg2)
  {
-    PERL_ARGS_ASSERT_REGUNI;
+    /* emit a node with U32 and I32 arguments */
+
+    regnode * const ret = regnode_guts(pRExC_state, op, regarglen[op], "reg2Lanode");
+
+    PERL_ARGS_ASSERT_REG2LANODE;
+
+    assert(regarglen[op] == 2);
  
-    return SIZE_ONLY ? UNISKIP(uv) : (uvchr_to_utf8((U8*)s, uv) - (U8*)s);
+    if (PASS2) {
+        regnode *ptr = ret;
+        FILL_ADVANCE_NODE_2L_ARG(ptr, op, arg1, arg2);
+        RExC_emit = ptr;
+    }
+    return(ret);
  }
  
  /*
@@ -15590,11 +16487,10 @@ S_regtail(pTHX_ RExC_state_t *pRExC_state, regnode *p,
      for (;;) {
         regnode * const temp = regnext(scan);
          DEBUG_PARSE_r({
-            SV * const mysv=sv_newmortal();
              DEBUG_PARSE_MSG((scan==p ? "tail" : ""));
-            regprop(RExC_rx, mysv, scan, NULL);
+            regprop(RExC_rx, RExC_mysv, scan, NULL, pRExC_state);
              PerlIO_printf(Perl_debug_log, "~ %s (%d) %s %s\n",
-                SvPV_nolen_const(mysv), REG_NODE_NUM(scan),
+                SvPV_nolen_const(RExC_mysv), REG_NODE_NUM(scan),
                      (temp == NULL ? "->" : ""),
                      (temp == NULL ? PL_reg_name[OP(val)] : "")
              );
@@ -15662,10 +16558,12 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p,
          if ( exact ) {
              switch (OP(scan)) {
                  case EXACT:
+                case EXACTL:
                  case EXACTF:
                  case EXACTFA_NO_TRIE:
                  case EXACTFA:
                  case EXACTFU:
+                case EXACTFLU8:
                  case EXACTFU_SS:
                  case EXACTFL:
                          if( exact == PSEUDO )
@@ -15679,11 +16577,10 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p,
              }
          }
          DEBUG_PARSE_r({
-            SV * const mysv=sv_newmortal();
              DEBUG_PARSE_MSG((scan==p ? "tsdy" : ""));
-            regprop(RExC_rx, mysv, scan, NULL);
+            regprop(RExC_rx, RExC_mysv, scan, NULL, pRExC_state);
              PerlIO_printf(Perl_debug_log, "~ %s (%d) -> %s\n",
-                SvPV_nolen_const(mysv),
+                SvPV_nolen_const(RExC_mysv),
                  REG_NODE_NUM(scan),
                  PL_reg_name[exact]);
          });
@@ -15692,12 +16589,11 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p,
         scan = temp;
      }
      DEBUG_PARSE_r({
-        SV * const mysv_val=sv_newmortal();
          DEBUG_PARSE_MSG("");
-        regprop(RExC_rx, mysv_val, val, NULL);
+        regprop(RExC_rx, RExC_mysv, val, NULL, pRExC_state);
          PerlIO_printf(Perl_debug_log,
                        "~ attach to %s (%"IVdf") offset to %"IVdf"\n",
-                     SvPV_nolen_const(mysv_val),
+                     SvPV_nolen_const(RExC_mysv),
                       (IV)REG_NODE_NUM(val),
                       (IV)(val - scan)
          );
@@ -15849,7 +16745,7 @@ Perl_regdump(pTHX_ const regexp *r)
         PerlIO_printf(Perl_debug_log, ") ");
  
      if (ri->regstclass) {
-       regprop(r, sv, ri->regstclass, NULL);
+        regprop(r, sv, ri->regstclass, NULL, NULL);
         PerlIO_printf(Perl_debug_log, "stclass %s ", SvPVX_const(sv));
      }
      if (r->intflags & PREGf_ANCH) {
@@ -15888,7 +16784,7 @@ Perl_regdump(pTHX_ const regexp *r)
  */
  
  void
-Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_info *reginfo)
+Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_info *reginfo, const RExC_state_t *pRExC_state)
  {
  #ifdef DEBUGGING
      int k;
@@ -15899,8 +16795,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
      || _CC_UPPER != 4 || _CC_PUNCT != 5 || _CC_PRINT != 6                   \
      || _CC_ALPHANUMERIC != 7 || _CC_GRAPH != 8 || _CC_CASED != 9            \
      || _CC_SPACE != 10 || _CC_BLANK != 11 || _CC_XDIGIT != 12               \
-    || _CC_PSXSPC != 13 || _CC_CNTRL != 14 || _CC_ASCII != 15               \
-    || _CC_VERTSPACE != 16
+    || _CC_CNTRL != 13 || _CC_ASCII != 14 || _CC_VERTSPACE != 15
    #error Need to adjust order of anyofs[]
  #endif
          "\\w",
@@ -15929,8 +16824,6 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
          "[:^blank:]",
          "[:xdigit:]",
          "[:^xdigit:]",
-        "[:space:]",
-        "[:^space:]",
          "[:cntrl:]",
          "[:^cntrl:]",
          "[:ascii:]",
@@ -15943,7 +16836,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
  
      PERL_ARGS_ASSERT_REGPROP;
  
-    sv_setpvs(sv, "");
+    sv_setpvn(sv, "", 0);
  
      if (OP(o) > REGNODE_MAX)           /* regnode.type is unsigned */
         /* It would be nice to FAIL() here, but this may be called from
@@ -16011,19 +16904,23 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
      else if (k == REF || k == OPEN || k == CLOSE
               || k == GROUPP || OP(o)==ACCEPT)
      {
+        AV *name_list= NULL;
         Perl_sv_catpvf(aTHX_ sv, "%d", (int)ARG(o));    /* Parenth number */
         if ( RXp_PAREN_NAMES(prog) ) {
+            name_list= MUTABLE_AV(progi->data->data[progi->name_list_idx]);
+        } else if ( pRExC_state ) {
+            name_list= RExC_paren_name_list;
+        }
+        if (name_list) {
              if ( k != REF || (OP(o) < NREF)) {
-               AV *list= MUTABLE_AV(progi->data->data[progi->name_list_idx]);
-               SV **name= av_fetch(list, ARG(o), 0 );
+                SV **name= av_fetch(name_list, ARG(o), 0 );
                 if (name)
                     Perl_sv_catpvf(aTHX_ sv, " '%"SVf"'", SVfARG(*name));
              }
              else {
-                AV *list= MUTABLE_AV(progi->data->data[ progi->name_list_idx ]);
                  SV *sv_dat= MUTABLE_SV(progi->data->data[ ARG( o ) ]);
                  I32 *nums=(I32*)SvPVX(sv_dat);
-                SV **name= av_fetch(list, nums[0], 0 );
+                SV **name= av_fetch(name_list, nums[0], 0 );
                  I32 n;
                  if (name) {
                      for ( n=0; n<SvIVX(sv_dat); n++ ) {
@@ -16048,9 +16945,22 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
                      PERL_PV_ESCAPE_UNI_DETECT|PERL_PV_PRETTY_NOCLEAR|PERL_PV_PRETTY_ELLIPSES|PERL_PV_PRETTY_QUOTE );
              }
          }
-    } else if (k == GOSUB)
+    } else if (k == GOSUB) {
+        AV *name_list= NULL;
+        if ( RXp_PAREN_NAMES(prog) ) {
+            name_list= MUTABLE_AV(progi->data->data[progi->name_list_idx]);
+        } else if ( pRExC_state ) {
+            name_list= RExC_paren_name_list;
+        }
+
          /* Paren and offset */
         Perl_sv_catpvf(aTHX_ sv, "%d[%+d]", (int)ARG(o),(int)ARG2L(o));
+        if (name_list) {
+            SV **name= av_fetch(name_list, ARG(o), 0 );
+            if (name)
+                Perl_sv_catpvf(aTHX_ sv, " '%"SVf"'", SVfARG(*name));
+        }
+    }
      else if (k == VERB) {
          if (!o->flags)
              Perl_sv_catpvf(aTHX_ sv, ":%"SVf,
@@ -16064,7 +16974,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
          SV* bitmap_invlist;  /* Will hold what the bit map contains */
  
  
-       if (flags & ANYOF_LOCALE_FLAGS)
+       if (OP(o) == ANYOFL)
             sv_catpvs(sv, "{loc}");
         if (flags & ANYOF_LOC_FOLD)
             sv_catpvs(sv, "{i}");
@@ -16105,13 +17015,12 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
                  sv_catpvs(sv, "{non-utf8-latin1-all}");
              }
  
-            /* output information about the unicode matching */
              if (flags & ANYOF_MATCHES_ALL_ABOVE_BITMAP)
                  sv_catpvs(sv, "{above_bitmap_all}");
-            else if (ARG(o) != ANYOF_ONLY_HAS_BITMAP) {
+
+            if (ARG(o) != ANYOF_ONLY_HAS_BITMAP) {
                  SV *lv; /* Set if there is something outside the bit map. */
-                bool byte_output = FALSE;   /* If something in the bitmap has
-                                               been output */
+                bool byte_output = FALSE;   /* If something has been output */
                  SV *only_utf8_locale;
  
                  /* Get the stuff that wasn't in the bitmap.  'bitmap_invlist'
@@ -16165,7 +17074,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
                          sv_catpv(sv, t);
                      }
  
-                out_dump:
+                  out_dump:
  
                      Safefree(origs);
                      SvREFCNT_dec_NN(lv);
@@ -16213,6 +17122,16 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
              Perl_sv_catpvf(aTHX_ sv, "[illegal type=%d])", index);
          }
      }
+    else if (k == BOUND || k == NBOUND) {
+        /* Must be synced with order of 'bound_type' in regcomp.h */
+        const char * const bounds[] = {
+            "",      /* Traditional */
+            "{gcb}",
+            "{sb}",
+            "{wb}"
+        };
+        sv_catpv(sv, bounds[FLAGS(o)]);
+    }
      else if (k == BRANCHJ && (OP(o) == UNLESSM || OP(o) == IFMATCH))
         Perl_sv_catpvf(aTHX_ sv, "[%d]", -(o->flags));
      else if (OP(o) == SBOL)
@@ -16223,6 +17142,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
      PERL_UNUSED_ARG(o);
      PERL_UNUSED_ARG(prog);
      PERL_UNUSED_ARG(reginfo);
+    PERL_UNUSED_ARG(pRExC_state);
  #endif /* DEBUGGING */
  }
  
@@ -16239,21 +17159,22 @@ Perl_re_intuit_string(pTHX_ REGEXP * const r)
  
      DEBUG_COMPILE_r(
         {
-           const char * const s = SvPV_nolen_const(prog->check_substr
-                     ? prog->check_substr : prog->check_utf8);
+           const char * const s = SvPV_nolen_const(RX_UTF8(r)
+                     ? prog->check_utf8 : prog->check_substr);
  
             if (!PL_colorset) reginitcolors();
             PerlIO_printf(Perl_debug_log,
                       "%sUsing REx %ssubstr:%s \"%s%.60s%s%s\"\n",
                       PL_colors[4],
-                     prog->check_substr ? "" : "utf8 ",
+                     RX_UTF8(r) ? "utf8 " : "",
                       PL_colors[5],PL_colors[0],
                       s,
                       PL_colors[1],
                       (strlen(s) > 60 ? "..." : ""));
         } );
  
-    return prog->check_substr ? prog->check_substr : prog->check_utf8;
+    /* use UTF8 check substring if regexp pattern itself is in UTF8 */
+    return RX_UTF8(r) ? prog->check_utf8 : prog->check_substr;
  }
  
  /*
@@ -16789,11 +17710,48 @@ S_re_croak2(pTHX_ bool utf8, const char* pat1,const char* pat2,...)
      Perl_croak(aTHX_ "%"UTF8f, UTF8fARG(utf8, l1-1, buf));
  }
  
+/* XXX Here's a total kludge.  But we need to re-enter for swash routines. */
+
+#ifndef PERL_IN_XSUB_RE
+void
+Perl_save_re_context(pTHX)
+{
+    I32 nparens = -1;
+    I32 i;
+
+    /* Save $1..$n (#18107: UTF-8 s/(\w+)/uc($1)/e); AMS 20021106. */
+
+    if (PL_curpm) {
+       const REGEXP * const rx = PM_GETRE(PL_curpm);
+       if (rx)
+            nparens = RX_NPARENS(rx);
+    }
+
+    /* RT #124109. This is a complete hack; in the SWASHNEW case we know
+     * that PL_curpm will be null, but that utf8.pm and the modules it
+     * loads will only use $1..$3.
+     * The t/porting/re_context.t test file checks this assumption.
+     */
+    if (nparens == -1)
+        nparens = 3;
+
+    for (i = 1; i <= nparens; i++) {
+        char digits[TYPE_CHARS(long)];
+        const STRLEN len = my_snprintf(digits, sizeof(digits),
+                                       "%lu", (long)i);
+        GV *const *const gvp
+            = (GV**)hv_fetch(PL_defstash, digits, len, 0);
+
+        if (gvp) {
+            GV * const gv = *gvp;
+            if (SvTYPE(gv) == SVt_PVGV && GvSV(gv))
+                save_scalar(gv);
+        }
+    }
+}
+#endif
+
  #ifdef DEBUGGING
-/* Certain characters are output as a sequence with the first being a
- * backslash. */
-#define isBACKSLASHED_PUNCT(c)                                              \
-                    ((c) == '-' || (c) == ']' || (c) == '\\' || (c) == '^')
  
  STATIC void
  S_put_code_point(pTHX_ SV *sv, UV c)
@@ -16822,10 +17780,6 @@ S_put_code_point(pTHX_ SV *sv, UV c)
  
  #define MAX_PRINT_A MAX_PRINT_A_FOR_USE_ONLY_BY_REGCOMP_DOT_C
  
-#ifndef MIN
-#define MIN(a,b) ((a) < (b) ? (a) : (b))
-#endif
-
  STATIC void
  S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
  {
@@ -16976,7 +17930,9 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const bool allow_literals)
          format = (this_end < 256)
                   ? "\\x{%02"UVXf"}-\\x{%02"UVXf"}"
                   : "\\x{%04"UVXf"}-\\x{%04"UVXf"}";
+        GCC_DIAG_IGNORE(-Wformat-nonliteral);
          Perl_sv_catpvf(aTHX_ sv, format, start, this_end);
+        GCC_DIAG_RESTORE;
          break;
      }
  }
@@ -17135,7 +18091,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
         } else
             CLEAR_OPTSTART;
  
-       regprop(r, sv, node, NULL);
+        regprop(r, sv, node, NULL, NULL);
         PerlIO_printf(Perl_debug_log, "%4"IVdf":%*s%s", (IV)(node - start),
                       (int)(2*indent + 1), "", SvPVX_const(sv));
  
@@ -17259,11 +18215,5 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node,
  #endif /* DEBUGGING */
  
  /*
- * Local variables:
- * c-indentation-style: bsd
- * c-basic-offset: 4
- * indent-tabs-mode: nil
- * End:
- *
   * ex: set ts=8 sts=4 sw=4 et:
   */