Skip to content

Commit 3808a68

Browse files
committedMay 29, 2011
RT #91032: formline: bugs with non-string formats
When the format SV used by formline isn't a simple POK (such as ties, overloads, or stringified refs), many many things go wrong, and SEGVs ensue. Originally, pp_formline forced the SV to a PV, and then assumed it could rely on the resulting SvPVX value. Recent commits fixed this to skip the force (good), but then broke things such as: * in the absence of POK or pPOK, $^A was grown by 0 bytes rather than the length of the format, so the buffer overran; * the compiled format stored indexes into the original format string to refer to chunks of content text and the like. If there's no real SvPVX around, that's bad. * Stuff like tie and overload could return different format strings on each get, but the format would not be re-compiled (but would index into the new string anyway) Also, the format compiler would convert strings like '~~' into blanks in the original format SV. The easiest way to fix all these is to save a copy of the original string at the time it is compiled. This can conveniently be stored in the mg_obj slot of the fm magic (the compiled format already goes in mg_ptr). This way we're always guaranteed to have an unadulterated copy of the string to mess with. Also, the ~~ self-modification now happens to the copy rather than the original. Now each time formline is called, we also compare the current value of the SV with the stored copy, and if it's changed (e.g. tie with a FETCH that returns different values each time), then we recompile. Note that the recompile test is currently defeated by the ~~ modification, so re-compiles unnecessarily (but safely) in that case. A fix for that is coming next.
1 parent 554bc0f commit 3808a68

File tree

2 files changed

+95
-28
lines changed

2 files changed

+95
-28
lines changed
 

‎pp_ctl.c

+47-27
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,7 @@ PP(pp_formline)
523523
{
524524
dVAR; dSP; dMARK; dORIGMARK;
525525
register SV * const tmpForm = *++MARK;
526+
SV *formsv;
526527
register U32 *fpc;
527528
register char *t;
528529
const char *f;
@@ -538,35 +539,30 @@ PP(pp_formline)
538539
NV value;
539540
bool gotsome = FALSE;
540541
STRLEN len;
541-
const STRLEN fudge = SvPOKp(tmpForm)
542-
? (SvCUR(tmpForm) * (IN_BYTES ? 1 : 3) + 1) : 0;
542+
STRLEN fudge;
543543
bool item_is_utf8 = FALSE;
544544
bool targ_is_utf8 = FALSE;
545545
SV * nsv = NULL;
546546
const char *fmt;
547547
MAGIC *mg = NULL;
548548

549-
if (SvTYPE(tmpForm) >= SVt_PVMG) {
550-
/* This might, of course, still return NULL. */
551-
mg = mg_find(tmpForm, PERL_MAGIC_fm);
552-
} else {
553-
sv_upgrade(tmpForm, SVt_PVMG);
554-
}
549+
mg = doparseform(tmpForm);
555550

556-
if(!mg) {
557-
mg = doparseform(tmpForm);
558-
assert(mg);
559-
}
560551
fpc = (U32*)mg->mg_ptr;
552+
/* the actual string the format was compiled from.
553+
* with overload etc, this may not match tmpForm */
554+
formsv = mg->mg_obj;
555+
561556

562557
SvPV_force(PL_formtarget, len);
563-
if (SvTAINTED(tmpForm))
558+
if (SvTAINTED(tmpForm) || SvTAINTED(formsv))
564559
SvTAINTED_on(PL_formtarget);
565560
if (DO_UTF8(PL_formtarget))
566561
targ_is_utf8 = TRUE;
562+
fudge = (SvCUR(formsv) * (IN_BYTES ? 1 : 3) + 1);
567563
t = SvGROW(PL_formtarget, len + fudge + 1); /* XXX SvCUR bad */
568564
t += len;
569-
f = SvPV_const(tmpForm, len);
565+
f = SvPV_const(formsv, len);
570566

571567
for (;;) {
572568
DEBUG_f( {
@@ -607,15 +603,15 @@ PP(pp_formline)
607603

608604
case FF_LITERAL:
609605
arg = *fpc++;
610-
if (targ_is_utf8 && !SvUTF8(tmpForm)) {
606+
if (targ_is_utf8 && !SvUTF8(formsv)) {
611607
SvCUR_set(PL_formtarget, t - SvPVX_const(PL_formtarget));
612608
*t = '\0';
613609
sv_catpvn_utf8_upgrade(PL_formtarget, f, arg, nsv);
614610
t = SvEND(PL_formtarget);
615611
f += arg;
616612
break;
617613
}
618-
if (!targ_is_utf8 && DO_UTF8(tmpForm)) {
614+
if (!targ_is_utf8 && DO_UTF8(formsv)) {
619615
SvCUR_set(PL_formtarget, t - SvPVX_const(PL_formtarget));
620616
*t = '\0';
621617
sv_utf8_upgrade_flags_grow(PL_formtarget, SV_GMAGIC, fudge + 1);
@@ -4921,7 +4917,7 @@ S_doparseform(pTHX_ SV *sv)
49214917
{
49224918
STRLEN len;
49234919
register char *s = SvPV(sv, len);
4924-
register char * const send = s + len;
4920+
register char *send;
49254921
register char *base = NULL;
49264922
register I32 skipspaces = 0;
49274923
bool noblank = FALSE;
@@ -4934,13 +4930,43 @@ S_doparseform(pTHX_ SV *sv)
49344930
bool ischop;
49354931
bool unchopnum = FALSE;
49364932
int maxops = 12; /* FF_LINEMARK + FF_END + 10 (\0 without preceding \n) */
4937-
MAGIC *mg;
4933+
MAGIC *mg = NULL;
4934+
SV *sv_copy;
49384935

49394936
PERL_ARGS_ASSERT_DOPARSEFORM;
49404937

49414938
if (len == 0)
49424939
Perl_croak(aTHX_ "Null picture in formline");
49434940

4941+
if (SvTYPE(sv) >= SVt_PVMG) {
4942+
/* This might, of course, still return NULL. */
4943+
mg = mg_find(sv, PERL_MAGIC_fm);
4944+
} else {
4945+
sv_upgrade(sv, SVt_PVMG);
4946+
}
4947+
4948+
if (mg) {
4949+
/* still the same as previously-compiled string? */
4950+
SV *old = mg->mg_obj;
4951+
if ( !(!!SvUTF8(old) ^ !!SvUTF8(sv))
4952+
&& len == SvCUR(old)
4953+
&& strnEQ(SvPVX(old), SvPVX(sv), len)
4954+
)
4955+
return mg;
4956+
4957+
Safefree(mg->mg_ptr);
4958+
mg->mg_ptr = NULL;
4959+
SvREFCNT_dec(old);
4960+
mg->mg_obj = NULL;
4961+
}
4962+
else
4963+
mg = sv_magicext(sv, NULL, PERL_MAGIC_fm, &PL_vtbl_fm, NULL, 0);
4964+
4965+
sv_copy = newSVpvn_utf8(s, len, SvUTF8(sv));
4966+
s = SvPV(sv_copy, len); /* work on the copy, not the original */
4967+
send = s + len;
4968+
4969+
49444970
/* estimate the buffer size needed */
49454971
for (base = s; s <= send; s++) {
49464972
if (*s == '\n' || *s == '@' || *s == '^')
@@ -5121,16 +5147,10 @@ S_doparseform(pTHX_ SV *sv)
51215147
assert (fpc <= fops + maxops); /* ensure our buffer estimate was valid */
51225148
arg = fpc - fops;
51235149

5124-
/* If we pass the length in to sv_magicext() it will copy the buffer for us.
5125-
We don't need that, so by setting the length on return we "donate" the
5126-
buffer to the magic, avoiding an allocation. We could realloc() the
5127-
buffer to the exact size used, but that feels like it's not worth it
5128-
(particularly if the rumours are true and some realloc() implementations
5129-
don't shrink blocks). However, set the true length used in mg_len so that
5130-
mg_dup only allocates and copies what's actually needed. */
5131-
mg = sv_magicext(sv, NULL, PERL_MAGIC_fm, &PL_vtbl_fm,
5132-
(const char *const) fops, 0);
5150+
mg->mg_ptr = (char *) fops;
51335151
mg->mg_len = arg * sizeof(U32);
5152+
mg->mg_obj = sv_copy;
5153+
mg->mg_flags |= MGf_REFCOUNTED;
51345154

51355155
if (unchopnum && repeat)
51365156
Perl_die(aTHX_ "Repeated format line will never terminate (~~ and @#)");

‎t/op/write.t

+48-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ for my $tref ( @NumTests ){
6161
my $bas_tests = 20;
6262

6363
# number of tests in section 3
64-
my $bug_tests = 4 + 3 * 3 * 5 * 2 * 3 + 2 + 2 + 1 + 1;
64+
my $bug_tests = 4 + 3 * 3 * 5 * 2 * 3 + 2 + 6 + 2 + 1 + 1;
6565

6666
# number of tests in section 4
6767
my $hmb_tests = 35;
@@ -610,6 +610,53 @@ close STDOUT_DUP;
610610
*CmT = *{$::{Comment}}{FORMAT};
611611
ok defined *{$::{CmT}}{FORMAT}, "glob assign";
612612

613+
614+
# RT #91032: Check that "non-real" strings like tie and overload work,
615+
# especially that they re-compile the pattern on each FETCH, and that
616+
# they don't overrun the buffer
617+
618+
619+
{
620+
package RT91032;
621+
622+
sub TIESCALAR { bless [] }
623+
my $i = 0;
624+
sub FETCH { $i++; "A$i @> Z\n" }
625+
626+
use overload '""' => \&FETCH;
627+
628+
tie my $f, 'RT91032';
629+
630+
formline $f, "a";
631+
formline $f, "bc";
632+
::is $^A, "A1 a Z\nA2 bc Z\n", "RT 91032: tied";
633+
$^A = '';
634+
635+
my $g = bless []; # has overloaded stringify
636+
formline $g, "de";
637+
formline $g, "f";
638+
::is $^A, "A3 de Z\nA4 f Z\n", "RT 91032: overloaded";
639+
$^A = '';
640+
641+
my $h = [];
642+
formline $h, "junk1";
643+
formline $h, "junk2";
644+
::is ref($h), 'ARRAY', "RT 91032: array ref still a ref";
645+
::like "$h", qr/^ARRAY\(0x[0-9a-f]+\)$/, "RT 91032: array stringifies ok";
646+
::is $^A, "$h$h","RT 91032: stringified array";
647+
$^A = '';
648+
649+
# used to overwrite the ~~ in the *original SV with spaces. Naughty!
650+
651+
my $orig = my $format = "^<<<<< ~~\n";
652+
my $abc = "abc";
653+
formline $format, $abc;
654+
$^A ='';
655+
::is $format, $orig, "RT91032: don't overwrite orig format string";
656+
657+
}
658+
659+
613660
SKIP: {
614661
skip_if_miniperl('miniperl does not support scalario');
615662
my $buf = "";

0 commit comments

Comments
 (0)
Please sign in to comment.