]> git.vpit.fr Git - perl/modules/re-engine-Hooks.git/blobdiff - src/5018002/orig/regcomp.c
Add support for perl 5.18.2, 5.20.0, and 5.21.[0123]
[perl/modules/re-engine-Hooks.git] / src / 5018002 / orig / regcomp.c
similarity index 99%
rename from src/5019000/orig/regcomp.c
rename to src/5018002/orig/regcomp.c
index fb52d47b272351faf68d49720e6eaac50f59ed1c..0841f172e59e226d6909ed3ea2df65ee5de8d84d 100644 (file)
@@ -5088,16 +5088,15 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state,
                  *     sv_catsv_nomg(pat, msv);
                  * that allows us to adjust code block indices if
                  * needed */
-                STRLEN slen, dlen;
+                STRLEN dlen;
                 char *dst = SvPV_force_nomg(pat, dlen);
-                const char *src = SvPV_flags_const(msv, slen, 0);
                 orig_patlen = dlen;
                 if (SvUTF8(msv) && !SvUTF8(pat)) {
                     S_pat_upgrade_to_utf8(aTHX_ pRExC_state, &dst, &dlen, n);
                     sv_setpvn(pat, dst, dlen);
                     SvUTF8_on(pat);
                 }
-                sv_catpvn_nomg(pat, src, slen);
+                sv_catsv_nomg(pat, msv);
                 rx = msv;
             }
             else
@@ -5817,7 +5816,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
                                     pRExC_state->num_code_blocks);
             goto redo_first_pass;
         }
-        Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for sizing pass, flags=%#"UVxf"", (UV) flags);
+        Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for sizing pass, flags=%#X", flags);
     }
     if (code_blocksv)
        SvLEN_set(code_blocksv,0); /* no you can't have it, sv_clear */
@@ -5989,7 +5988,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
     REGC((U8)REG_MAGIC, (char*) RExC_emit++);
     if (reg(pRExC_state, 0, &flags,1) == NULL) {
        ReREFCNT_dec(rx);   
-        Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for generation pass, flags=%#"UVxf"", (UV) flags);
+        Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for generation pass, flags=%#X", flags);
     }
     /* XXXX To minimize changes to RE engine we always allocate
        3-units-long substrs field. */
@@ -6689,13 +6688,23 @@ Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren,
 
     PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_FETCH;
         
-    if ( (    n == RX_BUFF_IDX_CARET_PREMATCH
+    if (      n == RX_BUFF_IDX_CARET_PREMATCH
            || n == RX_BUFF_IDX_CARET_FULLMATCH
            || n == RX_BUFF_IDX_CARET_POSTMATCH
-         )
-         && !(rx->extflags & RXf_PMf_KEEPCOPY)
-    )
-        goto ret_undef;
+       )
+    {
+        bool keepcopy = cBOOL(rx->extflags & RXf_PMf_KEEPCOPY);
+        if (!keepcopy) {
+            /* on something like
+             *    $r = qr/.../;
+             *    /$qr/p;
+             * the KEEPCOPY is set on the PMOP rather than the regex */
+            if (PL_curpm && r == PM_GETRE(PL_curpm))
+                 keepcopy = cBOOL(PL_curpm->op_pmflags & PMf_KEEPCOPY);
+        }
+        if (!keepcopy)
+            goto ret_undef;
+    }
 
     if (!rx->subbeg)
         goto ret_undef;
@@ -6801,13 +6810,27 @@ Perl_reg_numbered_buff_length(pTHX_ REGEXP * const r, const SV * const sv,
 
     PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_LENGTH;
 
+    if (   paren == RX_BUFF_IDX_CARET_PREMATCH
+        || paren == RX_BUFF_IDX_CARET_FULLMATCH
+        || paren == RX_BUFF_IDX_CARET_POSTMATCH
+    )
+    {
+        bool keepcopy = cBOOL(rx->extflags & RXf_PMf_KEEPCOPY);
+        if (!keepcopy) {
+            /* on something like
+             *    $r = qr/.../;
+             *    /$qr/p;
+             * the KEEPCOPY is set on the PMOP rather than the regex */
+            if (PL_curpm && r == PM_GETRE(PL_curpm))
+                 keepcopy = cBOOL(PL_curpm->op_pmflags & PMf_KEEPCOPY);
+        }
+        if (!keepcopy)
+            goto warn_undef;
+    }
+
     /* Some of this code was originally in C<Perl_magic_len> in F<mg.c> */
     switch (paren) {
       case RX_BUFF_IDX_CARET_PREMATCH: /* ${^PREMATCH} */
-         if (!(rx->extflags & RXf_PMf_KEEPCOPY))
-            goto warn_undef;
-        /*FALLTHROUGH*/
-
       case RX_BUFF_IDX_PREMATCH:       /* $` */
         if (rx->offs[0].start != -1) {
                        i = rx->offs[0].start;
@@ -6820,8 +6843,6 @@ Perl_reg_numbered_buff_length(pTHX_ REGEXP * const r, const SV * const sv,
         return 0;
 
       case RX_BUFF_IDX_CARET_POSTMATCH: /* ${^POSTMATCH} */
-         if (!(rx->extflags & RXf_PMf_KEEPCOPY))
-            goto warn_undef;
       case RX_BUFF_IDX_POSTMATCH:       /* $' */
            if (rx->offs[0].end != -1) {
                        i = rx->sublen - rx->offs[0].end;
@@ -6833,13 +6854,7 @@ Perl_reg_numbered_buff_length(pTHX_ REGEXP * const r, const SV * const sv,
            }
         return 0;
 
-      case RX_BUFF_IDX_CARET_FULLMATCH: /* ${^MATCH} */
-         if (!(rx->extflags & RXf_PMf_KEEPCOPY))
-            goto warn_undef;
-        /*FALLTHROUGH*/
-
-      /* $& / ${^MATCH}, $1, $2, ... */
-      default:
+      default: /* $& / ${^MATCH}, $1, $2, ... */
            if (paren <= (I32)rx->nparens &&
             (s1 = rx->offs[paren].start) != -1 &&
             (t1 = rx->offs[paren].end) != -1)
@@ -9176,8 +9191,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                             *flagp = RESTART_UTF8;
                             return NULL;
                         }
-                        FAIL2("panic: regbranch returned NULL, flags=%#"UVxf"",
-                              (UV) flags);
+                        FAIL2("panic: regbranch returned NULL, flags=%#X",
+                              flags);
                     } else
                         REGTAIL(pRExC_state, br, reganode(pRExC_state, LONGJMP, 0));
                    c = *nextchar(pRExC_state);
@@ -9192,8 +9207,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                                 *flagp = RESTART_UTF8;
                                 return NULL;
                             }
-                            FAIL2("panic: regbranch returned NULL, flags=%#"UVxf"",
-                                  (UV) flags);
+                            FAIL2("panic: regbranch returned NULL, flags=%#X",
+                                  flags);
                         }
                         REGTAIL(pRExC_state, ret, lastbr);
                        if (flags&HASWIDTH)
@@ -9281,7 +9296,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
             *flagp = RESTART_UTF8;
             return NULL;
         }
-        FAIL2("panic: regbranch returned NULL, flags=%#"UVxf"", (UV) flags);
+        FAIL2("panic: regbranch returned NULL, flags=%#X", flags);
     }
     if (*RExC_parse == '|') {
        if (!SIZE_ONLY && RExC_extralen) {
@@ -9326,7 +9341,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
                 *flagp = RESTART_UTF8;
                 return NULL;
             }
-            FAIL2("panic: regbranch returned NULL, flags=%#"UVxf"", (UV) flags);
+            FAIL2("panic: regbranch returned NULL, flags=%#X", flags);
         }
         REGTAIL(pRExC_state, lastbr, br);               /* BRANCH -> BRANCH. */
        lastbr = br;
@@ -9531,7 +9546,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
                 *flagp = RESTART_UTF8;
                 return NULL;
             }
-            FAIL2("panic: regpiece returned NULL, flags=%#"UVxf"", (UV) flags);
+            FAIL2("panic: regpiece returned NULL, flags=%#X", flags);
        }
        else if (ret == NULL)
            ret = latest;
@@ -9601,7 +9616,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
        if (flags & (TRYAGAIN|RESTART_UTF8))
            *flagp |= flags & (TRYAGAIN|RESTART_UTF8);
         else
-            FAIL2("panic: regatom returned NULL, flags=%#"UVxf"", (UV) flags);
+            FAIL2("panic: regatom returned NULL, flags=%#X", flags);
        return(NULL);
     }
 
@@ -9655,16 +9670,6 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
                 ret = reg_node(pRExC_state, OPFAIL);
                 return ret;
             }
-            else if (max == 0) {    /* replace {0} with a nothing node */
-                if (SIZE_ONLY) {
-                    RExC_size = PREVOPER(RExC_size) - regarglen[(U8)NOTHING];
-                }
-                else {
-                    RExC_emit = orig_emit;
-                }
-                ret = reg_node(pRExC_state, NOTHING);
-                return ret;
-            }
 
        do_curly:
            if ((flags&SIMPLE)) {
@@ -10043,8 +10048,8 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** node_p, UV *valuep, I
                 *flagp = RESTART_UTF8;
                 return FALSE;
             }
-            FAIL2("panic: reg returned NULL to grok_bslash_N, flags=%#"UVxf"",
-                  (UV) flags);
+            FAIL2("panic: reg returned NULL to grok_bslash_N, flags=%#X",
+                  flags);
         } 
        *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
 
@@ -10329,8 +10334,8 @@ tryagain:
         if (ret == NULL) {
             if (*flagp & RESTART_UTF8)
                 return NULL;
-            FAIL2("panic: regclass returned NULL to regatom, flags=%#"UVxf"",
-                  (UV) *flagp);
+            FAIL2("panic: regclass returned NULL to regatom, flags=%#X",
+                  *flagp);
         }
        nextchar(pRExC_state);
         Set_Node_Length(ret, RExC_parse - oregcomp_parse + 1); /* MJD */
@@ -10352,7 +10357,7 @@ tryagain:
                     *flagp = RESTART_UTF8;
                     return NULL;
                 }
-                FAIL2("panic: reg returned NULL to regatom, flags=%#"UVxf"", (UV) flags);
+                FAIL2("panic: reg returned NULL to regatom, flags=%#X", flags);
        }
        *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
        break;
@@ -10547,8 +10552,8 @@ tryagain:
                 /* regclass() can only return RESTART_UTF8 if multi-char folds
                    are allowed.  */
                 if (!ret)
-                    FAIL2("panic: regclass returned NULL to regatom, flags=%#"UVxf"",
-                          (UV) *flagp);
+                    FAIL2("panic: regclass returned NULL to regatom, flags=%#X",
+                          *flagp);
 
                RExC_parse--;
 
@@ -10961,6 +10966,12 @@ tryagain:
                        if (PL_encoding && ender < 0x100)
                            goto recode_encoding;
                        break;
+                    case '8': case '9': /* These are illegal unless backrefs */
+                        if (atoi(p) <= RExC_npar) {
+                            --p;   /* backup to backslash; handle as backref */
+                            goto loopdone;
+                        }
+                        goto unrecognized;
                    recode_encoding:
                        if (! RExC_override_recoding) {
                            SV* enc = PL_encoding;
@@ -10975,6 +10986,7 @@ tryagain:
                            FAIL("Trailing \\");
                        /* FALL THROUGH */
                    default:
+                    unrecognized:
                        if (!SIZE_ONLY&& isALPHANUMERIC(*p)) {
                            /* Include any { following the alpha to emphasize
                             * that it could be part of an escape at some point
@@ -11671,8 +11683,8 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f
                                   FALSE, /* don't allow multi-char folds */
                                   TRUE, /* silence non-portable warnings. */
                                   &current))
-                        FAIL2("panic: regclass returned NULL to handle_sets, flags=%#"UVxf"",
-                              (UV) *flagp);
+                        FAIL2("panic: regclass returned NULL to handle_sets, flags=%#X",
+                              *flagp);
 
                     /* function call leaves parse pointing to the ']', except
                      * if we faked it */
@@ -11740,7 +11752,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f
      * been parsed and evaluated to a single operand (or else is a syntax
      * error), and is handled as a regular operand */
 
-    stack = newAV();
+    sv_2mortal((SV *)(stack = newAV()));
 
     while (RExC_parse < RExC_end) {
         I32 top_index = av_tindex(stack);
@@ -11837,8 +11849,8 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f
                               FALSE, /* don't allow multi-char folds */
                               FALSE, /* don't silence non-portable warnings.  */
                               &current))
-                    FAIL2("panic: regclass returned NULL to handle_sets, flags=%#"UVxf"",
-                          (UV) *flagp);
+                    FAIL2("panic: regclass returned NULL to handle_sets, flags=%#X",
+                          *flagp);
                 /* regclass() will return with parsing just the \ sequence,
                  * leaving the parse pointer at the next thing to parse */
                 RExC_parse--;
@@ -11860,8 +11872,8 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f
                              FALSE, /* don't allow multi-char folds */
                              FALSE, /* don't silence non-portable warnings.  */
                              &current))
-                    FAIL2("panic: regclass returned NULL to handle_sets, flags=%#"UVxf"",
-                          (UV) *flagp);
+                    FAIL2("panic: regclass returned NULL to handle_sets, flags=%#X",
+                          *flagp);
                 /* function call leaves parse pointing to the ']', except if we
                  * faked it */
                 if (is_posix_class) {
@@ -11912,6 +11924,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f
                     || IS_OPERAND(lparen)
                     || SvUV(lparen) != '(')
                 {
+                    SvREFCNT_dec(current);
                     RExC_parse++;
                     vFAIL("Unexpected ')'");
                 }
@@ -11930,9 +11943,12 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f
                 }
                 else {
                     SV* top = av_pop(stack);
+                    SV *prev = NULL;
                     char current_operator;
 
                     if (IS_OPERAND(top)) {
+                        SvREFCNT_dec_NN(top);
+                        SvREFCNT_dec_NN(current);
                         vFAIL("Operand with no preceding operator");
                     }
                     current_operator = (char) SvUV(top);
@@ -11959,7 +11975,8 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f
                             goto handle_operand;
 
                         case '&':
-                            _invlist_intersection(av_pop(stack),
+                            prev = av_pop(stack);
+                            _invlist_intersection(prev,
                                                    current,
                                                    &current);
                             av_push(stack, current);
@@ -11967,12 +11984,14 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f
 
                         case '|':
                         case '+':
-                            _invlist_union(av_pop(stack), current, &current);
+                            prev = av_pop(stack);
+                            _invlist_union(prev, current, &current);
                             av_push(stack, current);
                             break;
 
                         case '-':
-                            _invlist_subtract(av_pop(stack), current, &current);
+                            prev = av_pop(stack);;
+                            _invlist_subtract(prev, current, &current);
                             av_push(stack, current);
                             break;
 
@@ -11982,9 +12001,12 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f
                             SV* u = NULL;
                             SV* element;
 
-                            element = av_pop(stack);
-                            _invlist_union(element, current, &u);
-                            _invlist_intersection(element, current, &i);
+                            prev = av_pop(stack);
+                            _invlist_union(prev, current, &u);
+                            _invlist_intersection(prev, current, &i);
+                            /* _invlist_subtract will overwrite current
+                                without freeing what it already contains */
+                            element = current;
                             _invlist_subtract(u, i, &current);
                             av_push(stack, current);
                             SvREFCNT_dec_NN(i);
@@ -11997,6 +12019,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f
                             Perl_croak(aTHX_ "panic: Unexpected item on '(?[ ])' stack");
                 }
                 SvREFCNT_dec_NN(top);
+                SvREFCNT_dec(prev);
             }
         }
 
@@ -12060,7 +12083,6 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f
     RExC_end = save_end;
     SvREFCNT_dec_NN(final);
     SvREFCNT_dec_NN(result_string);
-    SvREFCNT_dec_NN(stack);
 
     nextchar(pRExC_state);
     Set_Node_Length(node, RExC_parse - oregcomp_parse + 1); /* MJD */
@@ -12665,12 +12687,21 @@ parseit:
                     }
                     else
 #endif  /* Not isascii(); just use the hard-coded definition for it */
+                    {
                         _invlist_union_maybe_complement_2nd(
                                 posixes,
                                 PL_ASCII,
                                 cBOOL(namedclass % 2), /* Complement if odd
                                                           (NASCII) */
                                 &posixes);
+
+                        /* The code points 128-255 added above will be
+                         * subtracted out below under /d, so the flag needs to
+                         * be set */
+                        if (namedclass == ANYOF_NASCII && DEPENDS_SEMANTICS) {
+                            ANYOF_FLAGS(ret) |= ANYOF_NON_UTF8_LATIN1_ALL;
+                        }
+                    }
                 }
                 else {  /* Garden variety class */
 
@@ -13681,6 +13712,7 @@ parseit:
 
     if (ret_invlist) {
         *ret_invlist = cp_list;
+        SvREFCNT_dec(swash);
 
         /* Discard the generated node */
         if (SIZE_ONLY) {