X-Git-Url: http://git.vpit.fr/?p=perl%2Fmodules%2Fre-engine-Hooks.git;a=blobdiff_plain;f=src%2F5018002%2Fregcomp.c;fp=src%2F5019000%2Fregcomp.c;h=79751403cc8044d41bbe9c8c6425db2787ebc273;hp=cc7b914802bf76ece9d10e3a04328259f2bd107c;hb=819b78c9396701a0ef5fe7334e4054dd53c7ef93;hpb=5f4fe0b63e7d03e713a655997310a3875c40b7a8 diff --git a/src/5019000/regcomp.c b/src/5018002/regcomp.c similarity index 99% rename from src/5019000/regcomp.c rename to src/5018002/regcomp.c index cc7b914..7975140 100644 --- a/src/5019000/regcomp.c +++ b/src/5018002/regcomp.c @@ -5089,16 +5089,15 @@ S_concat_pat(pTHX_ RExC_state_t * const pRExC_state, * sv_catsv_nomg(pat, msv); * that allows us to adjust code block indices if * needed */ - STRLEN slen, dlen; + STRLEN dlen; char *dst = SvPV_force_nomg(pat, dlen); - const char *src = SvPV_flags_const(msv, slen, 0); orig_patlen = dlen; if (SvUTF8(msv) && !SvUTF8(pat)) { S_pat_upgrade_to_utf8(aTHX_ pRExC_state, &dst, &dlen, n); sv_setpvn(pat, dst, dlen); SvUTF8_on(pat); } - sv_catpvn_nomg(pat, src, slen); + sv_catsv_nomg(pat, msv); rx = msv; } else @@ -5818,7 +5817,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, pRExC_state->num_code_blocks); goto redo_first_pass; } - Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for sizing pass, flags=%#"UVxf"", (UV) flags); + Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for sizing pass, flags=%#X", flags); } if (code_blocksv) SvLEN_set(code_blocksv,0); /* no you can't have it, sv_clear */ @@ -5991,7 +5990,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, REGC((U8)REG_MAGIC, (char*) RExC_emit++); if (reg(pRExC_state, 0, &flags,1) == NULL) { ReREFCNT_dec(rx); - Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for generation pass, flags=%#"UVxf"", (UV) flags); + Perl_croak(aTHX_ "panic: reg returned NULL to re_op_compile for generation pass, flags=%#X", flags); } /* XXXX To minimize changes to RE engine we always allocate 3-units-long substrs field. */ @@ -6691,13 +6690,23 @@ Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren, PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_FETCH; - if ( ( n == RX_BUFF_IDX_CARET_PREMATCH + if ( n == RX_BUFF_IDX_CARET_PREMATCH || n == RX_BUFF_IDX_CARET_FULLMATCH || n == RX_BUFF_IDX_CARET_POSTMATCH - ) - && !(rx->extflags & RXf_PMf_KEEPCOPY) ) - goto ret_undef; + { + bool keepcopy = cBOOL(rx->extflags & RXf_PMf_KEEPCOPY); + if (!keepcopy) { + /* on something like + * $r = qr/.../; + * /$qr/p; + * the KEEPCOPY is set on the PMOP rather than the regex */ + if (PL_curpm && r == PM_GETRE(PL_curpm)) + keepcopy = cBOOL(PL_curpm->op_pmflags & PMf_KEEPCOPY); + } + if (!keepcopy) + goto ret_undef; + } if (!rx->subbeg) goto ret_undef; @@ -6803,13 +6812,27 @@ Perl_reg_numbered_buff_length(pTHX_ REGEXP * const r, const SV * const sv, PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_LENGTH; + if ( paren == RX_BUFF_IDX_CARET_PREMATCH + || paren == RX_BUFF_IDX_CARET_FULLMATCH + || paren == RX_BUFF_IDX_CARET_POSTMATCH + ) + { + bool keepcopy = cBOOL(rx->extflags & RXf_PMf_KEEPCOPY); + if (!keepcopy) { + /* on something like + * $r = qr/.../; + * /$qr/p; + * the KEEPCOPY is set on the PMOP rather than the regex */ + if (PL_curpm && r == PM_GETRE(PL_curpm)) + keepcopy = cBOOL(PL_curpm->op_pmflags & PMf_KEEPCOPY); + } + if (!keepcopy) + goto warn_undef; + } + /* Some of this code was originally in C in F */ switch (paren) { case RX_BUFF_IDX_CARET_PREMATCH: /* ${^PREMATCH} */ - if (!(rx->extflags & RXf_PMf_KEEPCOPY)) - goto warn_undef; - /*FALLTHROUGH*/ - case RX_BUFF_IDX_PREMATCH: /* $` */ if (rx->offs[0].start != -1) { i = rx->offs[0].start; @@ -6822,8 +6845,6 @@ Perl_reg_numbered_buff_length(pTHX_ REGEXP * const r, const SV * const sv, return 0; case RX_BUFF_IDX_CARET_POSTMATCH: /* ${^POSTMATCH} */ - if (!(rx->extflags & RXf_PMf_KEEPCOPY)) - goto warn_undef; case RX_BUFF_IDX_POSTMATCH: /* $' */ if (rx->offs[0].end != -1) { i = rx->sublen - rx->offs[0].end; @@ -6835,13 +6856,7 @@ Perl_reg_numbered_buff_length(pTHX_ REGEXP * const r, const SV * const sv, } return 0; - case RX_BUFF_IDX_CARET_FULLMATCH: /* ${^MATCH} */ - if (!(rx->extflags & RXf_PMf_KEEPCOPY)) - goto warn_undef; - /*FALLTHROUGH*/ - - /* $& / ${^MATCH}, $1, $2, ... */ - default: + default: /* $& / ${^MATCH}, $1, $2, ... */ if (paren <= (I32)rx->nparens && (s1 = rx->offs[paren].start) != -1 && (t1 = rx->offs[paren].end) != -1) @@ -9178,8 +9193,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) *flagp = RESTART_UTF8; return NULL; } - FAIL2("panic: regbranch returned NULL, flags=%#"UVxf"", - (UV) flags); + FAIL2("panic: regbranch returned NULL, flags=%#X", + flags); } else REGTAIL(pRExC_state, br, reganode(pRExC_state, LONGJMP, 0)); c = *nextchar(pRExC_state); @@ -9194,8 +9209,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) *flagp = RESTART_UTF8; return NULL; } - FAIL2("panic: regbranch returned NULL, flags=%#"UVxf"", - (UV) flags); + FAIL2("panic: regbranch returned NULL, flags=%#X", + flags); } REGTAIL(pRExC_state, ret, lastbr); if (flags&HASWIDTH) @@ -9283,7 +9298,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) *flagp = RESTART_UTF8; return NULL; } - FAIL2("panic: regbranch returned NULL, flags=%#"UVxf"", (UV) flags); + FAIL2("panic: regbranch returned NULL, flags=%#X", flags); } if (*RExC_parse == '|') { if (!SIZE_ONLY && RExC_extralen) { @@ -9328,7 +9343,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) *flagp = RESTART_UTF8; return NULL; } - FAIL2("panic: regbranch returned NULL, flags=%#"UVxf"", (UV) flags); + FAIL2("panic: regbranch returned NULL, flags=%#X", flags); } REGTAIL(pRExC_state, lastbr, br); /* BRANCH -> BRANCH. */ lastbr = br; @@ -9533,7 +9548,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth) *flagp = RESTART_UTF8; return NULL; } - FAIL2("panic: regpiece returned NULL, flags=%#"UVxf"", (UV) flags); + FAIL2("panic: regpiece returned NULL, flags=%#X", flags); } else if (ret == NULL) ret = latest; @@ -9603,7 +9618,7 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) if (flags & (TRYAGAIN|RESTART_UTF8)) *flagp |= flags & (TRYAGAIN|RESTART_UTF8); else - FAIL2("panic: regatom returned NULL, flags=%#"UVxf"", (UV) flags); + FAIL2("panic: regatom returned NULL, flags=%#X", flags); return(NULL); } @@ -9657,16 +9672,6 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth) ret = reg_node(pRExC_state, OPFAIL); return ret; } - else if (max == 0) { /* replace {0} with a nothing node */ - if (SIZE_ONLY) { - RExC_size = PREVOPER(RExC_size) - regarglen[(U8)NOTHING]; - } - else { - RExC_emit = orig_emit; - } - ret = reg_node(pRExC_state, NOTHING); - return ret; - } do_curly: if ((flags&SIMPLE)) { @@ -10045,8 +10050,8 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** node_p, UV *valuep, I *flagp = RESTART_UTF8; return FALSE; } - FAIL2("panic: reg returned NULL to grok_bslash_N, flags=%#"UVxf"", - (UV) flags); + FAIL2("panic: reg returned NULL to grok_bslash_N, flags=%#X", + flags); } *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED); @@ -10331,8 +10336,8 @@ tryagain: if (ret == NULL) { if (*flagp & RESTART_UTF8) return NULL; - FAIL2("panic: regclass returned NULL to regatom, flags=%#"UVxf"", - (UV) *flagp); + FAIL2("panic: regclass returned NULL to regatom, flags=%#X", + *flagp); } nextchar(pRExC_state); Set_Node_Length(ret, RExC_parse - oregcomp_parse + 1); /* MJD */ @@ -10354,7 +10359,7 @@ tryagain: *flagp = RESTART_UTF8; return NULL; } - FAIL2("panic: reg returned NULL to regatom, flags=%#"UVxf"", (UV) flags); + FAIL2("panic: reg returned NULL to regatom, flags=%#X", flags); } *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED); break; @@ -10549,8 +10554,8 @@ tryagain: /* regclass() can only return RESTART_UTF8 if multi-char folds are allowed. */ if (!ret) - FAIL2("panic: regclass returned NULL to regatom, flags=%#"UVxf"", - (UV) *flagp); + FAIL2("panic: regclass returned NULL to regatom, flags=%#X", + *flagp); RExC_parse--; @@ -10963,6 +10968,12 @@ tryagain: if (PL_encoding && ender < 0x100) goto recode_encoding; break; + case '8': case '9': /* These are illegal unless backrefs */ + if (atoi(p) <= RExC_npar) { + --p; /* backup to backslash; handle as backref */ + goto loopdone; + } + goto unrecognized; recode_encoding: if (! RExC_override_recoding) { SV* enc = PL_encoding; @@ -10977,6 +10988,7 @@ tryagain: FAIL("Trailing \\"); /* FALL THROUGH */ default: + unrecognized: if (!SIZE_ONLY&& isALPHANUMERIC(*p)) { /* Include any { following the alpha to emphasize * that it could be part of an escape at some point @@ -11673,8 +11685,8 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f FALSE, /* don't allow multi-char folds */ TRUE, /* silence non-portable warnings. */ ¤t)) - FAIL2("panic: regclass returned NULL to handle_sets, flags=%#"UVxf"", - (UV) *flagp); + FAIL2("panic: regclass returned NULL to handle_sets, flags=%#X", + *flagp); /* function call leaves parse pointing to the ']', except * if we faked it */ @@ -11742,7 +11754,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f * been parsed and evaluated to a single operand (or else is a syntax * error), and is handled as a regular operand */ - stack = newAV(); + sv_2mortal((SV *)(stack = newAV())); while (RExC_parse < RExC_end) { I32 top_index = av_tindex(stack); @@ -11839,8 +11851,8 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f FALSE, /* don't allow multi-char folds */ FALSE, /* don't silence non-portable warnings. */ ¤t)) - FAIL2("panic: regclass returned NULL to handle_sets, flags=%#"UVxf"", - (UV) *flagp); + FAIL2("panic: regclass returned NULL to handle_sets, flags=%#X", + *flagp); /* regclass() will return with parsing just the \ sequence, * leaving the parse pointer at the next thing to parse */ RExC_parse--; @@ -11862,8 +11874,8 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f FALSE, /* don't allow multi-char folds */ FALSE, /* don't silence non-portable warnings. */ ¤t)) - FAIL2("panic: regclass returned NULL to handle_sets, flags=%#"UVxf"", - (UV) *flagp); + FAIL2("panic: regclass returned NULL to handle_sets, flags=%#X", + *flagp); /* function call leaves parse pointing to the ']', except if we * faked it */ if (is_posix_class) { @@ -11914,6 +11926,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f || IS_OPERAND(lparen) || SvUV(lparen) != '(') { + SvREFCNT_dec(current); RExC_parse++; vFAIL("Unexpected ')'"); } @@ -11932,9 +11945,12 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f } else { SV* top = av_pop(stack); + SV *prev = NULL; char current_operator; if (IS_OPERAND(top)) { + SvREFCNT_dec_NN(top); + SvREFCNT_dec_NN(current); vFAIL("Operand with no preceding operator"); } current_operator = (char) SvUV(top); @@ -11961,7 +11977,8 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f goto handle_operand; case '&': - _invlist_intersection(av_pop(stack), + prev = av_pop(stack); + _invlist_intersection(prev, current, ¤t); av_push(stack, current); @@ -11969,12 +11986,14 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f case '|': case '+': - _invlist_union(av_pop(stack), current, ¤t); + prev = av_pop(stack); + _invlist_union(prev, current, ¤t); av_push(stack, current); break; case '-': - _invlist_subtract(av_pop(stack), current, ¤t); + prev = av_pop(stack);; + _invlist_subtract(prev, current, ¤t); av_push(stack, current); break; @@ -11984,9 +12003,12 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f SV* u = NULL; SV* element; - element = av_pop(stack); - _invlist_union(element, current, &u); - _invlist_intersection(element, current, &i); + prev = av_pop(stack); + _invlist_union(prev, current, &u); + _invlist_intersection(prev, current, &i); + /* _invlist_subtract will overwrite current + without freeing what it already contains */ + element = current; _invlist_subtract(u, i, ¤t); av_push(stack, current); SvREFCNT_dec_NN(i); @@ -11999,6 +12021,7 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f Perl_croak(aTHX_ "panic: Unexpected item on '(?[ ])' stack"); } SvREFCNT_dec_NN(top); + SvREFCNT_dec(prev); } } @@ -12062,7 +12085,6 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, SV** return_invlist, I32 *f RExC_end = save_end; SvREFCNT_dec_NN(final); SvREFCNT_dec_NN(result_string); - SvREFCNT_dec_NN(stack); nextchar(pRExC_state); Set_Node_Length(node, RExC_parse - oregcomp_parse + 1); /* MJD */ @@ -12667,12 +12689,21 @@ parseit: } else #endif /* Not isascii(); just use the hard-coded definition for it */ + { _invlist_union_maybe_complement_2nd( posixes, PL_ASCII, cBOOL(namedclass % 2), /* Complement if odd (NASCII) */ &posixes); + + /* The code points 128-255 added above will be + * subtracted out below under /d, so the flag needs to + * be set */ + if (namedclass == ANYOF_NASCII && DEPENDS_SEMANTICS) { + ANYOF_FLAGS(ret) |= ANYOF_NON_UTF8_LATIN1_ALL; + } + } } else { /* Garden variety class */ @@ -13683,6 +13714,7 @@ parseit: if (ret_invlist) { *ret_invlist = cp_list; + SvREFCNT_dec(swash); /* Discard the generated node */ if (SIZE_ONLY) {