X-Git-Url: http://git.vpit.fr/?p=perl%2Fmodules%2Fre-engine-Hooks.git;a=blobdiff_plain;f=src%2F5018002%2Forig%2Fregcomp.c;fp=src%2F5019000%2Forig%2Fregcomp.c;h=0841f172e59e226d6909ed3ea2df65ee5de8d84d;hp=fb52d47b272351faf68d49720e6eaac50f59ed1c;hb=819b78c9396701a0ef5fe7334e4054dd53c7ef93;hpb=5f4fe0b63e7d03e713a655997310a3875c40b7a8 diff --git a/src/5019000/orig/regcomp.c b/src/5018002/orig/regcomp.c similarity index 99% rename from src/5019000/orig/regcomp.c rename to src/5018002/orig/regcomp.c index fb52d47..0841f17 100644 --- a/src/5019000/orig/regcomp.c +++ b/src/5018002/orig/regcomp.c @@ -5088,16 +5088,15 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state, * sv_catsv_nomg(pat, msv); * that allows us to adjust code block indices if * needed */ - STRLEN slen, dlen; + STRLEN dlen; char *dst = SvPV_force_nomg(pat, dlen); - const char *src = SvPV_flags_const(msv, slen, 0); orig_patlen = dlen; if (SvUTF8(msv) && !SvUTF8(pat)) { S_pat_upgrade_to_utf8(aTHX_ pRExC_state, &dst, &dlen, n); sv_setpvn(pat, dst, dlen); SvUTF8_on(pat); } - sv_catpvn_nomg(pat, src, slen); + sv_catsv_nomg(pat, msv); rx = msv; } else @@ -5817,7 +5816,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, pRExC_state->num_code_blocks); goto redo_first_pass; } - Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for sizing pass, flags=%#"UVxf"", (UV) flags); + Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for sizing pass, flags=%#X", flags); } if (code_blocksv) SvLEN_set(code_blocksv,0); /* no you can't have it, sv_clear */ @@ -5989,7 +5988,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, REGC((U8)REG_MAGIC, (char*) RExC_emit++); if (reg(pRExC_state, 0, &flags,1) == NULL) { ReREFCNT_dec(rx); - Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for generation pass, flags=%#"UVxf"", (UV) flags); + Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for generation pass, flags=%#X", flags); } /* XXXX To minimize changes to RE engine we always allocate 3-units-long substrs field. */ @@ -6689,13 +6688,23 @@ Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren, PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_FETCH; - if ( ( n == RX_BUFF_IDX_CARET_PREMATCH + if ( n == RX_BUFF_IDX_CARET_PREMATCH || n == RX_BUFF_IDX_CARET_FULLMATCH || n == RX_BUFF_IDX_CARET_POSTMATCH - ) - && !(rx->extflags & RXf_PMf_KEEPCOPY) - ) - goto ret_undef; + ) + { + bool keepcopy = cBOOL(rx->extflags & RXf_PMf_KEEPCOPY); + if (!keepcopy) { + /* on something like + * $r = qr/.../; + * /$qr/p; + * the KEEPCOPY is set on the PMOP rather than the regex */ + if (PL_curpm && r == PM_GETRE(PL_curpm)) + keepcopy = cBOOL(PL_curpm->op_pmflags & PMf_KEEPCOPY); + } + if (!keepcopy) + goto ret_undef; + } if (!rx->subbeg) goto ret_undef; @@ -6801,13 +6810,27 @@ Perl_reg_numbered_buff_length(pTHX_ REGEXP * const r, const SV * const sv, PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_LENGTH; + if ( paren == RX_BUFF_IDX_CARET_PREMATCH + || paren == RX_BUFF_IDX_CARET_FULLMATCH + || paren == RX_BUFF_IDX_CARET_POSTMATCH + ) + { + bool keepcopy = cBOOL(rx->extflags & RXf_PMf_KEEPCOPY); + if (!keepcopy) { + /* on something like + * $r = qr/.../; + * /$qr/p; + * the KEEPCOPY is set on the PMOP rather than the regex */ + if (PL_curpm && r == PM_GETRE(PL_curpm)) + keepcopy = cBOOL(PL_curpm->op_pmflags & PMf_KEEPCOPY); + } + if (!keepcopy) + goto warn_undef; + } + /* Some of this code was originally in C in F */ switch (paren) { case RX_BUFF_IDX_CARET_PREMATCH: /* ${^PREMATCH} */ - if (!(rx->extflags & RXf_PMf_KEEPCOPY)) - goto warn_undef; - /*FALLTHROUGH*/ - case RX_BUFF_IDX_PREMATCH: /* $` */ if (rx->offs[0].start != -1) { i = rx->offs[0].start; @@ -6820,8 +6843,6 @@ Perl_reg_numbered_buff_length(pTHX_ REGEXP * const r, const SV * const sv, return 0; case RX_BUFF_IDX_CARET_POSTMATCH: /* ${^POSTMATCH} */ - if (!(rx->extflags & RXf_PMf_KEEPCOPY)) - goto warn_undef; case RX_BUFF_IDX_POSTMATCH: /* $' */ if (rx->offs[0].end != -1) { i = rx->sublen - rx->offs[0].end; @@ -6833,13 +6854,7 @@ Perl_reg_numbered_buff_length(pTHX_ REGEXP * const r, const SV * const sv, } return 0; - case RX_BUFF_IDX_CARET_FULLMATCH: /* ${^MATCH} */ - if (!(rx->extflags & RXf_PMf_KEEPCOPY)) - goto warn_undef; - /*FALLTHROUGH*/ - - /* $& / ${^MATCH}, $1, $2, ... */ - default: + default: /* $& / ${^MATCH}, $1, $2, ... */ if (paren <= (I32)rx->nparens && (s1 = rx->offs[paren].start) != -1 && (t1 = rx->offs[paren].end) != -1) @@ -9176,8 +9191,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) *flagp = RESTART_UTF8; return NULL; } - FAIL2("panic: regbranch returned NULL, flags=%#"UVxf"", - (UV) flags); + FAIL2("panic: regbranch returned NULL, flags=%#X", + flags); } else REGTAIL(pRExC_state, br, reganode(pRExC_state, LONGJMP, 0)); c = *nextchar(pRExC_state); @@ -9192,8 +9207,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) *flagp = RESTART_UTF8; return NULL; } - FAIL2("panic: regbranch returned NULL, flags=%#"UVxf"", - (UV) flags); + FAIL2("panic: regbranch returned NULL, flags=%#X", + flags); } REGTAIL(pRExC_state, ret, lastbr); if (flags&HASWIDTH) @@ -9281,7 +9296,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) *flagp = RESTART_UTF8; return NULL; } - FAIL2("panic: regbranch returned NULL, flags=%#"UVxf"", (UV) flags); + FAIL2("panic: regbranch returned NULL, flags=%#X", flags); } if (*RExC_parse == '|') { if (!SIZE_ONLY && RExC_extralen) { @@ -9326,7 +9341,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) *flagp = RESTART_UTF8; return NULL; } - FAIL2("panic: regbranch returned NULL, flags=%#"UVxf"", (UV) flags); + FAIL2("panic: regbranch returned NULL, flags=%#X", flags); } REGTAIL(pRExC_state, lastbr, br); /* BRANCH -> BRANCH. */ lastbr = br; @@ -9531,7 +9546,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth) *flagp = RESTART_UTF8; return NULL; } - FAIL2("panic: regpiece returned NULL, flags=%#"UVxf"", (UV) flags); + FAIL2("panic: regpiece returned NULL, flags=%#X", flags); } else if (ret == NULL) ret = latest; @@ -9601,7 +9616,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) if (flags & (TRYAGAIN|RESTART_UTF8)) *flagp |= flags & (TRYAGAIN|RESTART_UTF8); else - FAIL2("panic: regatom returned NULL, flags=%#"UVxf"", (UV) flags); + FAIL2("panic: regatom returned NULL, flags=%#X", flags); return(NULL); } @@ -9655,16 +9670,6 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) ret = reg_node(pRExC_state, OPFAIL); return ret; } - else if (max == 0) { /* replace {0} with a nothing node */ - if (SIZE_ONLY) { - RExC_size = PREVOPER(RExC_size) - regarglen[(U8)NOTHING]; - } - else { - RExC_emit = orig_emit; - } - ret = reg_node(pRExC_state, NOTHING); - return ret; - } do_curly: if ((flags&SIMPLE)) { @@ -10043,8 +10048,8 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** node_p, UV *valuep, I *flagp = RESTART_UTF8; return FALSE; } - FAIL2("panic: reg returned NULL to grok_bslash_N, flags=%#"UVxf"", - (UV) flags); + FAIL2("panic: reg returned NULL to grok_bslash_N, flags=%#X", + flags); } *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED); @@ -10329,8 +10334,8 @@ tryagain: if (ret == NULL) { if (*flagp & RESTART_UTF8) return NULL; - FAIL2("panic: regclass returned NULL to regatom, flags=%#"UVxf"", - (UV) *flagp); + FAIL2("panic: regclass returned NULL to regatom, flags=%#X", + *flagp); } nextchar(pRExC_state); Set_Node_Length(ret, RExC_parse - oregcomp_parse + 1); /* MJD */ @@ -10352,7 +10357,7 @@ tryagain: *flagp = RESTART_UTF8; return NULL; } - FAIL2("panic: reg returned NULL to regatom, flags=%#"UVxf"", (UV) flags); + FAIL2("panic: reg returned NULL to regatom, flags=%#X", flags); } *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED); break; @@ -10547,8 +10552,8 @@ tryagain: /* regclass() can only return RESTART_UTF8 if multi-char folds are allowed. */ if (!ret) - FAIL2("panic: regclass returned NULL to regatom, flags=%#"UVxf"", - (UV) *flagp); + FAIL2("panic: regclass returned NULL to regatom, flags=%#X", + *flagp); RExC_parse--; @@ -10961,6 +10966,12 @@ tryagain: if (PL_encoding && ender < 0x100) goto recode_encoding; break; + case '8': case '9': /* These are illegal unless backrefs */ + if (atoi(p) <= RExC_npar) { + --p; /* backup to backslash; handle as backref */ + goto loopdone; + } + goto unrecognized; recode_encoding: if (! RExC_override_recoding) { SV* enc = PL_encoding; @@ -10975,6 +10986,7 @@ tryagain: FAIL("Trailing \\"); /* FALL THROUGH */ default: + unrecognized: if (!SIZE_ONLY&& isALPHANUMERIC(*p)) { /* Include any { following the alpha to emphasize * that it could be part of an escape at some point @@ -11671,8 +11683,8 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f FALSE, /* don't allow multi-char folds */ TRUE, /* silence non-portable warnings. */ ¤t)) - FAIL2("panic: regclass returned NULL to handle_sets, flags=%#"UVxf"", - (UV) *flagp); + FAIL2("panic: regclass returned NULL to handle_sets, flags=%#X", + *flagp); /* function call leaves parse pointing to the ']', except * if we faked it */ @@ -11740,7 +11752,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f * been parsed and evaluated to a single operand (or else is a syntax * error), and is handled as a regular operand */ - stack = newAV(); + sv_2mortal((SV *)(stack = newAV())); while (RExC_parse < RExC_end) { I32 top_index = av_tindex(stack); @@ -11837,8 +11849,8 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f FALSE, /* don't allow multi-char folds */ FALSE, /* don't silence non-portable warnings. */ ¤t)) - FAIL2("panic: regclass returned NULL to handle_sets, flags=%#"UVxf"", - (UV) *flagp); + FAIL2("panic: regclass returned NULL to handle_sets, flags=%#X", + *flagp); /* regclass() will return with parsing just the \ sequence, * leaving the parse pointer at the next thing to parse */ RExC_parse--; @@ -11860,8 +11872,8 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f FALSE, /* don't allow multi-char folds */ FALSE, /* don't silence non-portable warnings. */ ¤t)) - FAIL2("panic: regclass returned NULL to handle_sets, flags=%#"UVxf"", - (UV) *flagp); + FAIL2("panic: regclass returned NULL to handle_sets, flags=%#X", + *flagp); /* function call leaves parse pointing to the ']', except if we * faked it */ if (is_posix_class) { @@ -11912,6 +11924,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f || IS_OPERAND(lparen) || SvUV(lparen) != '(') { + SvREFCNT_dec(current); RExC_parse++; vFAIL("Unexpected ')'"); } @@ -11930,9 +11943,12 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f } else { SV* top = av_pop(stack); + SV *prev = NULL; char current_operator; if (IS_OPERAND(top)) { + SvREFCNT_dec_NN(top); + SvREFCNT_dec_NN(current); vFAIL("Operand with no preceding operator"); } current_operator = (char) SvUV(top); @@ -11959,7 +11975,8 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f goto handle_operand; case '&': - _invlist_intersection(av_pop(stack), + prev = av_pop(stack); + _invlist_intersection(prev, current, ¤t); av_push(stack, current); @@ -11967,12 +11984,14 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f case '|': case '+': - _invlist_union(av_pop(stack), current, ¤t); + prev = av_pop(stack); + _invlist_union(prev, current, ¤t); av_push(stack, current); break; case '-': - _invlist_subtract(av_pop(stack), current, ¤t); + prev = av_pop(stack);; + _invlist_subtract(prev, current, ¤t); av_push(stack, current); break; @@ -11982,9 +12001,12 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f SV* u = NULL; SV* element; - element = av_pop(stack); - _invlist_union(element, current, &u); - _invlist_intersection(element, current, &i); + prev = av_pop(stack); + _invlist_union(prev, current, &u); + _invlist_intersection(prev, current, &i); + /* _invlist_subtract will overwrite current + without freeing what it already contains */ + element = current; _invlist_subtract(u, i, ¤t); av_push(stack, current); SvREFCNT_dec_NN(i); @@ -11997,6 +12019,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f Perl_croak(aTHX_ "panic: Unexpected item on '(?[ ])' stack"); } SvREFCNT_dec_NN(top); + SvREFCNT_dec(prev); } } @@ -12060,7 +12083,6 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f RExC_end = save_end; SvREFCNT_dec_NN(final); SvREFCNT_dec_NN(result_string); - SvREFCNT_dec_NN(stack); nextchar(pRExC_state); Set_Node_Length(node, RExC_parse - oregcomp_parse + 1); /* MJD */ @@ -12665,12 +12687,21 @@ parseit: } else #endif /* Not isascii(); just use the hard-coded definition for it */ + { _invlist_union_maybe_complement_2nd( posixes, PL_ASCII, cBOOL(namedclass % 2), /* Complement if odd (NASCII) */ &posixes); + + /* The code points 128-255 added above will be + * subtracted out below under /d, so the flag needs to + * be set */ + if (namedclass == ANYOF_NASCII && DEPENDS_SEMANTICS) { + ANYOF_FLAGS(ret) |= ANYOF_NON_UTF8_LATIN1_ALL; + } + } } else { /* Garden variety class */ @@ -13681,6 +13712,7 @@ parseit: if (ret_invlist) { *ret_invlist = cp_list; + SvREFCNT_dec(swash); /* Discard the generated node */ if (SIZE_ONLY) {