- if (deltanext == I32_MAX || -counted * deltanext + (minnext + deltanext) * maxcount - minnext * mincount >= I32_MAX - data->pos_delta)
- data->pos_delta = I32_MAX;
- else
- data->pos_delta += - counted * deltanext +
- (minnext + deltanext) * maxcount - minnext * mincount;
- if (mincount != maxcount) {
- /* Cannot extend fixed substrings found inside
- the group. */
- SCAN_COMMIT(pRExC_state,data,minlenp);
- if (mincount && last_str) {
- SV * const sv = data->last_found;
- MAGIC * const mg = SvUTF8(sv) && SvMAGICAL(sv) ?
- mg_find(sv, PERL_MAGIC_utf8) : NULL;
-
- if (mg)
- mg->mg_len = -1;
- sv_setsv(sv, last_str);
- data->last_end = data->pos_min;
- data->last_start_min =
- data->pos_min - CHR_SVLEN(last_str);
- data->last_start_max = is_inf
- ? I32_MAX
- : data->pos_min + data->pos_delta
- - CHR_SVLEN(last_str);
- }
- data->longest = &(data->longest_float);
- }
- SvREFCNT_dec(last_str);
- }
- if (data && (fl & SF_HAS_EVAL))
- data->flags |= SF_HAS_EVAL;
- optimize_curly_tail:
- if (OP(oscan) != CURLYX) {
- while (PL_regkind[OP(next = regnext(oscan))] == NOTHING
- && NEXT_OFF(next))
- NEXT_OFF(oscan) += NEXT_OFF(next);
- }
- continue;
- default: /* REF, and CLUMP only? */
- if (flags & SCF_DO_SUBSTR) {
- SCAN_COMMIT(pRExC_state,data,minlenp); /* Cannot expect anything... */
- data->longest = &(data->longest_float);
- }
- is_inf = is_inf_internal = 1;
- if (flags & SCF_DO_STCLASS_OR)
- cl_anything(pRExC_state, data->start_class);
- flags &= ~SCF_DO_STCLASS;
- break;
- }
- }
- else if (OP(scan) == LNBREAK) {
- if (flags & SCF_DO_STCLASS) {
- int value = 0;
- CLEAR_SSC_EOS(data->start_class); /* No match on empty */
- if (flags & SCF_DO_STCLASS_AND) {
- for (value = 0; value < 256; value++)
- if (!is_VERTWS_cp(value))
- ANYOF_BITMAP_CLEAR(data->start_class, value);
- }
- else {
- for (value = 0; value < 256; value++)
- if (is_VERTWS_cp(value))
- ANYOF_BITMAP_SET(data->start_class, value);
- }
- if (flags & SCF_DO_STCLASS_OR)
- cl_and(data->start_class, and_withp);
- flags &= ~SCF_DO_STCLASS;
- }
- min++;
- delta++; /* Because of the 2 char string cr-lf */
- if (flags & SCF_DO_SUBSTR) {
- SCAN_COMMIT(pRExC_state,data,minlenp); /* Cannot expect anything... */
- data->pos_min += 1;
- data->pos_delta += 1;
- data->longest = &(data->longest_float);
- }
- }
- else if (REGNODE_SIMPLE(OP(scan))) {
- int value = 0;
-
- if (flags & SCF_DO_SUBSTR) {
- SCAN_COMMIT(pRExC_state,data,minlenp);
- data->pos_min++;
- }
- min++;
- if (flags & SCF_DO_STCLASS) {
- int loop_max = 256;
- CLEAR_SSC_EOS(data->start_class); /* No match on empty */
-
- /* Some of the logic below assumes that switching
- locale on will only add false positives. */
- switch (PL_regkind[OP(scan)]) {
- U8 classnum;
-
- case SANY:
- default:
-#ifdef DEBUGGING
- Perl_croak(aTHX_ "panic: unexpected simple REx opcode %d", OP(scan));
-#endif
- do_default:
- if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
- cl_anything(pRExC_state, data->start_class);
- break;
- case REG_ANY:
- if (OP(scan) == SANY)
- goto do_default;
- if (flags & SCF_DO_STCLASS_OR) { /* Everything but \n */
- value = (ANYOF_BITMAP_TEST(data->start_class,'\n')
- || ANYOF_CLASS_TEST_ANY_SET(data->start_class));
- cl_anything(pRExC_state, data->start_class);
- }
- if (flags & SCF_DO_STCLASS_AND || !value)
- ANYOF_BITMAP_CLEAR(data->start_class,'\n');
- break;
- case ANYOF:
- if (flags & SCF_DO_STCLASS_AND)
- cl_and(data->start_class,
- (struct regnode_charclass_class*)scan);
- else
- cl_or(pRExC_state, data->start_class,
- (struct regnode_charclass_class*)scan);
- break;
- case POSIXA:
- loop_max = 128;
- /* FALL THROUGH */
- case POSIXL:
- case POSIXD:
- case POSIXU:
- classnum = FLAGS(scan);
- if (flags & SCF_DO_STCLASS_AND) {
- if (!(data->start_class->flags & ANYOF_LOCALE)) {
- ANYOF_CLASS_CLEAR(data->start_class, classnum_to_namedclass(classnum) + 1);
- for (value = 0; value < loop_max; value++) {
- if (! _generic_isCC(UNI_TO_NATIVE(value), classnum)) {
- ANYOF_BITMAP_CLEAR(data->start_class, UNI_TO_NATIVE(value));
- }
- }
- }
- }
- else {
- if (data->start_class->flags & ANYOF_LOCALE) {
- ANYOF_CLASS_SET(data->start_class, classnum_to_namedclass(classnum));
- }
- else {
-
- /* Even if under locale, set the bits for non-locale
- * in case it isn't a true locale-node. This will
- * create false positives if it truly is locale */
- for (value = 0; value < loop_max; value++) {
- if (_generic_isCC(UNI_TO_NATIVE(value), classnum)) {
- ANYOF_BITMAP_SET(data->start_class, UNI_TO_NATIVE(value));
- }
- }
- }
- }
- break;
- case NPOSIXA:
- loop_max = 128;
- /* FALL THROUGH */
- case NPOSIXL:
- case NPOSIXU:
- case NPOSIXD:
- classnum = FLAGS(scan);
- if (flags & SCF_DO_STCLASS_AND) {
- if (!(data->start_class->flags & ANYOF_LOCALE)) {
- ANYOF_CLASS_CLEAR(data->start_class, classnum_to_namedclass(classnum));
- for (value = 0; value < loop_max; value++) {
- if (_generic_isCC(UNI_TO_NATIVE(value), classnum)) {
- ANYOF_BITMAP_CLEAR(data->start_class, UNI_TO_NATIVE(value));
- }
- }
- }
- }
- else {
- if (data->start_class->flags & ANYOF_LOCALE) {
- ANYOF_CLASS_SET(data->start_class, classnum_to_namedclass(classnum) + 1);
- }
- else {
-
- /* Even if under locale, set the bits for non-locale in
- * case it isn't a true locale-node. This will create
- * false positives if it truly is locale */
- for (value = 0; value < loop_max; value++) {
- if (! _generic_isCC(UNI_TO_NATIVE(value), classnum)) {
- ANYOF_BITMAP_SET(data->start_class, UNI_TO_NATIVE(value));
- }
- }
- if (PL_regkind[OP(scan)] == NPOSIXD) {
- data->start_class->flags |= ANYOF_NON_UTF8_LATIN1_ALL;
- }
- }
- }
- break;
- }
- if (flags & SCF_DO_STCLASS_OR)
- cl_and(data->start_class, and_withp);
- flags &= ~SCF_DO_STCLASS;
- }
- }
- else if (PL_regkind[OP(scan)] == EOL && flags & SCF_DO_SUBSTR) {
- data->flags |= (OP(scan) == MEOL
- ? SF_BEFORE_MEOL
- : SF_BEFORE_SEOL);
- SCAN_COMMIT(pRExC_state, data, minlenp);
-
- }
- else if ( PL_regkind[OP(scan)] == BRANCHJ
- /* Lookbehind, or need to calculate parens/evals/stclass: */
- && (scan->flags || data || (flags & SCF_DO_STCLASS))
- && (OP(scan) == IFMATCH || OP(scan) == UNLESSM)) {
- if ( OP(scan) == UNLESSM &&
- scan->flags == 0 &&
- OP(NEXTOPER(NEXTOPER(scan))) == NOTHING &&
- OP(regnext(NEXTOPER(NEXTOPER(scan)))) == SUCCEED
- ) {
- regnode *opt;
- regnode *upto= regnext(scan);
- DEBUG_PARSE_r({
- SV * const mysv_val=sv_newmortal();
- DEBUG_STUDYDATA("OPFAIL",data,depth);
-
- /*DEBUG_PARSE_MSG("opfail");*/
- regprop(RExC_rx, mysv_val, upto);
- PerlIO_printf(Perl_debug_log, "~ replace with OPFAIL pointed at %s (%"IVdf") offset %"IVdf"\n",
- SvPV_nolen_const(mysv_val),
- (IV)REG_NODE_NUM(upto),
- (IV)(upto - scan)
- );
- });
- OP(scan) = OPFAIL;
- NEXT_OFF(scan) = upto - scan;
- for (opt= scan + 1; opt < upto ; opt++)
- OP(opt) = OPTIMIZED;
- scan= upto;
- continue;
- }
- if ( !PERL_ENABLE_POSITIVE_ASSERTION_STUDY
- || OP(scan) == UNLESSM )
- {
- /* Negative Lookahead/lookbehind
- In this case we can't do fixed string optimisation.
- */
-
- I32 deltanext, minnext, fake = 0;
- regnode *nscan;
- struct regnode_charclass_class intrnl;
- int f = 0;
-
- data_fake.flags = 0;
- if (data) {
- data_fake.whilem_c = data->whilem_c;
- data_fake.last_closep = data->last_closep;
- }
- else
- data_fake.last_closep = &fake;
- data_fake.pos_delta = delta;
- if ( flags & SCF_DO_STCLASS && !scan->flags
- && OP(scan) == IFMATCH ) { /* Lookahead */
- cl_init(pRExC_state, &intrnl);
- data_fake.start_class = &intrnl;
- f |= SCF_DO_STCLASS_AND;
- }
- if (flags & SCF_WHILEM_VISITED_POS)
- f |= SCF_WHILEM_VISITED_POS;
- next = regnext(scan);
- nscan = NEXTOPER(NEXTOPER(scan));
- minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext,
- last, &data_fake, stopparen, recursed, NULL, f, depth+1);
- if (scan->flags) {
- if (deltanext) {
- FAIL("Variable length lookbehind not implemented");
- }
- else if (minnext > (I32)U8_MAX) {
- FAIL2("Lookbehind longer than %"UVuf" not implemented", (UV)U8_MAX);
- }
- scan->flags = (U8)minnext;
- }
- if (data) {
- if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
- pars++;
- if (data_fake.flags & SF_HAS_EVAL)
- data->flags |= SF_HAS_EVAL;
- data->whilem_c = data_fake.whilem_c;
- }
- if (f & SCF_DO_STCLASS_AND) {
- if (flags & SCF_DO_STCLASS_OR) {
- /* OR before, AND after: ideally we would recurse with
- * data_fake to get the AND applied by study of the
- * remainder of the pattern, and then derecurse;
- * *** HACK *** for now just treat as "no information".
- * See [perl #56690].
- */
- cl_init(pRExC_state, data->start_class);
- } else {
- /* AND before and after: combine and continue */
- const int was = TEST_SSC_EOS(data->start_class);
-
- cl_and(data->start_class, &intrnl);
- if (was)
- SET_SSC_EOS(data->start_class);
- }
- }
- }
-#if PERL_ENABLE_POSITIVE_ASSERTION_STUDY
- else {
- /* Positive Lookahead/lookbehind
- In this case we can do fixed string optimisation,
- but we must be careful about it. Note in the case of
- lookbehind the positions will be offset by the minimum
- length of the pattern, something we won't know about
- until after the recurse.
- */
- I32 deltanext, fake = 0;
- regnode *nscan;
- struct regnode_charclass_class intrnl;
- int f = 0;
- /* We use SAVEFREEPV so that when the full compile
- is finished perl will clean up the allocated
- minlens when it's all done. This way we don't
- have to worry about freeing them when we know
- they wont be used, which would be a pain.
- */
- I32 *minnextp;
- Newx( minnextp, 1, I32 );
- SAVEFREEPV(minnextp);
-
- if (data) {
- StructCopy(data, &data_fake, scan_data_t);
- if ((flags & SCF_DO_SUBSTR) && data->last_found) {
- f |= SCF_DO_SUBSTR;
- if (scan->flags)
- SCAN_COMMIT(pRExC_state, &data_fake,minlenp);
- data_fake.last_found=newSVsv(data->last_found);
- }
- }
- else
- data_fake.last_closep = &fake;
- data_fake.flags = 0;
- data_fake.pos_delta = delta;
- if (is_inf)
- data_fake.flags |= SF_IS_INF;
- if ( flags & SCF_DO_STCLASS && !scan->flags
- && OP(scan) == IFMATCH ) { /* Lookahead */
- cl_init(pRExC_state, &intrnl);
- data_fake.start_class = &intrnl;
- f |= SCF_DO_STCLASS_AND;
- }
- if (flags & SCF_WHILEM_VISITED_POS)
- f |= SCF_WHILEM_VISITED_POS;
- next = regnext(scan);
- nscan = NEXTOPER(NEXTOPER(scan));
-
- *minnextp = study_chunk(pRExC_state, &nscan, minnextp, &deltanext,
- last, &data_fake, stopparen, recursed, NULL, f,depth+1);
- if (scan->flags) {
- if (deltanext) {
- FAIL("Variable length lookbehind not implemented");
- }
- else if (*minnextp > (I32)U8_MAX) {
- FAIL2("Lookbehind longer than %"UVuf" not implemented", (UV)U8_MAX);
- }
- scan->flags = (U8)*minnextp;
- }
-
- *minnextp += min;
-
- if (f & SCF_DO_STCLASS_AND) {
- const int was = TEST_SSC_EOS(data.start_class);
-
- cl_and(data->start_class, &intrnl);
- if (was)
- SET_SSC_EOS(data->start_class);
- }
- if (data) {
- if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
- pars++;
- if (data_fake.flags & SF_HAS_EVAL)
- data->flags |= SF_HAS_EVAL;
- data->whilem_c = data_fake.whilem_c;
- if ((flags & SCF_DO_SUBSTR) && data_fake.last_found) {
- if (RExC_rx->minlen<*minnextp)
- RExC_rx->minlen=*minnextp;
- SCAN_COMMIT(pRExC_state, &data_fake, minnextp);
- SvREFCNT_dec_NN(data_fake.last_found);
-
- if ( data_fake.minlen_fixed != minlenp )
- {
- data->offset_fixed= data_fake.offset_fixed;
- data->minlen_fixed= data_fake.minlen_fixed;
- data->lookbehind_fixed+= scan->flags;
- }
- if ( data_fake.minlen_float != minlenp )
- {
- data->minlen_float= data_fake.minlen_float;
- data->offset_float_min=data_fake.offset_float_min;
- data->offset_float_max=data_fake.offset_float_max;
- data->lookbehind_float+= scan->flags;
- }
- }
- }
- }
-#endif
- }
- else if (OP(scan) == OPEN) {
- if (stopparen != (I32)ARG(scan))
- pars++;
- }
- else if (OP(scan) == CLOSE) {
- if (stopparen == (I32)ARG(scan)) {
- break;
- }
- if ((I32)ARG(scan) == is_par) {
- next = regnext(scan);
-
- if ( next && (OP(next) != WHILEM) && next < last)
- is_par = 0; /* Disable optimization */
- }
- if (data)
- *(data->last_closep) = ARG(scan);
- }
- else if (OP(scan) == EVAL) {
- if (data)
- data->flags |= SF_HAS_EVAL;
- }
- else if ( PL_regkind[OP(scan)] == ENDLIKE ) {
- if (flags & SCF_DO_SUBSTR) {
- SCAN_COMMIT(pRExC_state,data,minlenp);
- flags &= ~SCF_DO_SUBSTR;
- }
- if (data && OP(scan)==ACCEPT) {
- data->flags |= SCF_SEEN_ACCEPT;
- if (stopmin > min)
- stopmin = min;
- }
- }
- else if (OP(scan) == LOGICAL && scan->flags == 2) /* Embedded follows */
- {
- if (flags & SCF_DO_SUBSTR) {
- SCAN_COMMIT(pRExC_state,data,minlenp);
- data->longest = &(data->longest_float);
- }
- is_inf = is_inf_internal = 1;
- if (flags & SCF_DO_STCLASS_OR) /* Allow everything */
- cl_anything(pRExC_state, data->start_class);
- flags &= ~SCF_DO_STCLASS;
- }
- else if (OP(scan) == GPOS) {
- if (!(RExC_rx->extflags & RXf_GPOS_FLOAT) &&
- !(delta || is_inf || (data && data->pos_delta)))
- {
- if (!(RExC_rx->extflags & RXf_ANCH) && (flags & SCF_DO_SUBSTR))
- RExC_rx->extflags |= RXf_ANCH_GPOS;
- if (RExC_rx->gofs < (U32)min)
- RExC_rx->gofs = min;
- } else {
- RExC_rx->extflags |= RXf_GPOS_FLOAT;
- RExC_rx->gofs = 0;
- }
- }
-#ifdef TRIE_STUDY_OPT
-#ifdef FULL_TRIE_STUDY
- else if (PL_regkind[OP(scan)] == TRIE) {
- /* NOTE - There is similar code to this block above for handling
- BRANCH nodes on the initial study. If you change stuff here
- check there too. */
- regnode *trie_node= scan;
- regnode *tail= regnext(scan);
- reg_trie_data *trie = (reg_trie_data*)RExC_rxi->data->data[ ARG(scan) ];
- I32 max1 = 0, min1 = I32_MAX;
- struct regnode_charclass_class accum;
-
- if (flags & SCF_DO_SUBSTR) /* XXXX Add !SUSPEND? */
- SCAN_COMMIT(pRExC_state, data,minlenp); /* Cannot merge strings after this. */
- if (flags & SCF_DO_STCLASS)
- cl_init_zero(pRExC_state, &accum);
-
- if (!trie->jump) {
- min1= trie->minlen;
- max1= trie->maxlen;
- } else {
- const regnode *nextbranch= NULL;
- U32 word;
-
- for ( word=1 ; word <= trie->wordcount ; word++)
- {
- I32 deltanext=0, minnext=0, f = 0, fake;
- struct regnode_charclass_class this_class;
-
- data_fake.flags = 0;
- if (data) {
- data_fake.whilem_c = data->whilem_c;
- data_fake.last_closep = data->last_closep;
- }
- else
- data_fake.last_closep = &fake;
- data_fake.pos_delta = delta;
- if (flags & SCF_DO_STCLASS) {
- cl_init(pRExC_state, &this_class);
- data_fake.start_class = &this_class;
- f = SCF_DO_STCLASS_AND;
- }
- if (flags & SCF_WHILEM_VISITED_POS)
- f |= SCF_WHILEM_VISITED_POS;
-
- if (trie->jump[word]) {
- if (!nextbranch)
- nextbranch = trie_node + trie->jump[0];
- scan= trie_node + trie->jump[word];
- /* We go from the jump point to the branch that follows
- it. Note this means we need the vestigal unused branches
- even though they arent otherwise used.
- */
- minnext = study_chunk(pRExC_state, &scan, minlenp,
- &deltanext, (regnode *)nextbranch, &data_fake,
- stopparen, recursed, NULL, f,depth+1);
- }
- if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH)
- nextbranch= regnext((regnode*)nextbranch);
-
- if (min1 > (I32)(minnext + trie->minlen))
- min1 = minnext + trie->minlen;
- if (deltanext == I32_MAX) {
- is_inf = is_inf_internal = 1;
- max1 = I32_MAX;
- } else if (max1 < (I32)(minnext + deltanext + trie->maxlen))
- max1 = minnext + deltanext + trie->maxlen;
-
- if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
- pars++;
- if (data_fake.flags & SCF_SEEN_ACCEPT) {
- if ( stopmin > min + min1)
- stopmin = min + min1;
- flags &= ~SCF_DO_SUBSTR;
- if (data)
- data->flags |= SCF_SEEN_ACCEPT;
- }
- if (data) {
- if (data_fake.flags & SF_HAS_EVAL)
- data->flags |= SF_HAS_EVAL;
- data->whilem_c = data_fake.whilem_c;
- }
- if (flags & SCF_DO_STCLASS)
- cl_or(pRExC_state, &accum, &this_class);
- }
- }
- if (flags & SCF_DO_SUBSTR) {
- data->pos_min += min1;
- data->pos_delta += max1 - min1;
- if (max1 != min1 || is_inf)
- data->longest = &(data->longest_float);
- }
- min += min1;
- delta += max1 - min1;
- if (flags & SCF_DO_STCLASS_OR) {
- cl_or(pRExC_state, data->start_class, &accum);
- if (min1) {
- cl_and(data->start_class, and_withp);
- flags &= ~SCF_DO_STCLASS;
- }
- }
- else if (flags & SCF_DO_STCLASS_AND) {
- if (min1) {
- cl_and(data->start_class, &accum);
- flags &= ~SCF_DO_STCLASS;
- }
- else {
- /* Switch to OR mode: cache the old value of
- * data->start_class */
- INIT_AND_WITHP;
- StructCopy(data->start_class, and_withp,
- struct regnode_charclass_class);
- flags &= ~SCF_DO_STCLASS_AND;
- StructCopy(&accum, data->start_class,
- struct regnode_charclass_class);
- flags |= SCF_DO_STCLASS_OR;
- SET_SSC_EOS(data->start_class);
- }
- }
- scan= tail;
- continue;
- }
-#else
- else if (PL_regkind[OP(scan)] == TRIE) {
- reg_trie_data *trie = (reg_trie_data*)RExC_rxi->data->data[ ARG(scan) ];
- U8*bang=NULL;
-
- min += trie->minlen;
- delta += (trie->maxlen - trie->minlen);
- flags &= ~SCF_DO_STCLASS; /* xxx */
- if (flags & SCF_DO_SUBSTR) {
- SCAN_COMMIT(pRExC_state,data,minlenp); /* Cannot expect anything... */
- data->pos_min += trie->minlen;
- data->pos_delta += (trie->maxlen - trie->minlen);
- if (trie->maxlen != trie->minlen)
- data->longest = &(data->longest_float);
- }
- if (trie->jump) /* no more substrings -- for now /grr*/
- flags &= ~SCF_DO_SUBSTR;
- }
-#endif /* old or new */
-#endif /* TRIE_STUDY_OPT */
-
- /* Else: zero-length, ignore. */
- scan = regnext(scan);