Take the following program: #include <stdio.h> int main (int argc, char **argv) { printf ("argc: %d\n", argc); return 0; } Compiled against glibc-2.21.90-21.fc23.s390x with gcc-5.1.1-4.fc23.s390x Running this will give: ==42061== Conditional jump or move depends on uninitialised value(s) ==42061== at 0x40AE366: vfprintf@@GLIBC_2.4 (vfprintf.c:1630) ==42061== by 0x40B5F7B: printf@@GLIBC_2.4 (printf.c:33) ==42061== by 0x800006C3: main (ptest.c:6) ==42061== Uninitialised value was created by a stack allocation ==42061== at 0x40ACF7A: vfprintf@@GLIBC_2.4 (vfprintf.c:1235) Unfortunately that is in code that uses lots of defines: 0x00000000040ae366 in _IO_vfprintf_internal (s=0x421ca00 <_IO_2_1_stdout_>, format=0x40008a6 "free", ap=0x4029990 <_rtld_local+2448>, ap@entry=0xffeffff90) at vfprintf.c:1630 1630 process_arg (((struct printf_spec *) NULL)); (gdb) list 1625 JUMP (*++f, step4_jumps); 1626 1627 /* Process current format. */ 1628 while (1) 1629 { 1630 process_arg (((struct printf_spec *) NULL)); 1631 process_string_arg (((struct printf_spec *) NULL)); 1632 1633 LABEL (form_unknown): 1634 if (spec == L_('\0')) The disassembly for that part of the code is huge: 1626 1627 /* Process current format. */ 1628 while (1) 1629 { 1630 process_arg (((struct printf_spec *) NULL)); 0x00000000040adb1a <+2994>: lg %r1,40(%r7) 0x00000000040adb20 <+3000>: clg %r1,48(%r7) 0x00000000040adb26 <+3006>: jhe 0x40af19a <_IO_vfprintf_internal+8754> 0x00000000040adb2a <+3010>: la %r2,1(%r1) 0x00000000040adb2e <+3014>: stg %r2,40(%r7) 0x00000000040adb34 <+3020>: mvi 0(%r1),37 0x00000000040adb38 <+3024>: cfi %r10,2147483647 0x00000000040adb3e <+3030>: je 0x40ad6d0 <_IO_vfprintf_internal+1896> [...] 0x00000000040ae34c <+5092>: l %r1,272(%r11) 0x00000000040ae350 <+5096>: ltr %r1,%r1 0x00000000040ae352 <+5098>: je 0x40ae378 <_IO_vfprintf_internal+5136> 0x00000000040ae356 <+5102>: lhi %r9,0 0x00000000040ae35a <+5106>: j 0x40adf02 <_IO_vfprintf_internal+3994> 0x00000000040ae35e <+5110>: l %r1,304(%r11) 0x00000000040ae362 <+5114>: tmll %r1,48 => 0x00000000040ae366 <+5118>: jnh 0x40adc9a <_IO_vfprintf_internal+3378> 0x00000000040ae36a <+5122>: ltr %r9,%r9 0x00000000040ae36c <+5124>: jh 0x40ae30a <_IO_vfprintf_internal+5026> 0x00000000040ae370 <+5128>: lhi %r9,0 0x00000000040ae374 <+5132>: j 0x40adc9a <_IO_vfprintf_internal+3378> [...] Reproducible: Always
------------------------ Front end ------------------------ l %r1,304(%r11) ------ IMark(0x40AE35E, 4, 0) ------ t0 = Add64(Add64(0x130:I64,GET:I64(280)),0x0:I64) PUT(204) = LDbe:I32(t0) PUT(336) = 0x40AE362:I64 tmll %r1,48 ------ IMark(0x40AE362, 4, 0) ------ t1 = GET:I16(206) t2 = 0x30:I16 PUT(352) = 0x13:I64 PUT(360) = 16Uto64(t1) PUT(368) = 16Uto64(t2) PUT(376) = 0x0:I64 PUT(336) = 0x40AE366:I64 jnh .-1740 ------ IMark(0x40AE366, 4, 0) ------ t3 = s390_calculate_cond[mcx=0x13]{0x8001352d0}(0xD:I64,GET:I64(352),GET:I64(360),GET:I64(368),GET:I64(376)):I32 if (CmpNE32(t3,0x0:I32)) { PUT(336) = 0x40ADC9A:I64; exit-Boring } PUT(336) = 0x40AE36A:I64 PUT(336) = GET:I64(336); exit-Boring GuestBytes 40AE35E 12 58 10 B1 30 A7 11 00 30 A7 D4 FC 9A 000381CA VexExpansionRatio 12 384 320 :10
------------------------ After tree-building ------------------------ IRSB { t0:I64 t1:I16 t2:I16 t3:I32 t4:I64 t5:I64 t6:I64 t7:I32 t8:I64 t9:I64 t10:I64 t11:I64 t12:I64 t13:I64 t14:I32 t15:I1 t16:I64 t17:I32 t18:I64 t19:I32 t20:I32 t21:I64 t22:I64 t23:I64 t24:I32 t25:I64 t26:I32 t27:I32 t28:I1 t29:I64 t30:I64 t31:I32 t32:I32 t33:I16 t34:I32 t35:I64 t36:I64 t37:I32 t38:I32 t39:I32 t40:I32 t41:I64 t42:I64 t43:I64 t44:I64 t45:I64 t46:I32 t47:I32 t48:I32 t49:I1 t50:I32 t51:I1 t52:I64 t53:I1 t54:I1 t55:I64 ------ IMark(0x40AE35E, 4, 0) ------ t5 = Add64(0x130:I64,GET:I64(280)) t25 = DIRTY 1:I1 ::: MC_(helperc_b_load4)[rp=1]{0x800012208}(t5) DIRTY CmpNEZ64(GET:I64(712)) RdFX-gst(312,8) RdFX-gst(336,8) ::: MC_(helperc_value_check8_fail_w_o)[rp=1]{0x800011820}(32Uto64(GET:I32(1144))) t31 = DIRTY 1:I1 RdFX-gst(312,8) RdFX-gst(336,8) ::: MC_(helperc_LOADV32be)[rp=1]{0x800010d00}(t5) PUT(1064) = 64to32(t25) PUT(636) = t31 PUT(204) = LDbe:I32(t5) ------ IMark(0x40AE362, 4, 0) ------ t32 = GET:I32(1064) PUT(352) = 0x13:I64 t36 = 16Uto64(GET:I16(638)) t8 = 16Uto64(GET:I16(206)) PUT(1224) = t32 PUT(792) = t36 PUT(360) = t8 PUT(1232) = 0x0:I32 PUT(800) = 0x0:I64 PUT(368) = 0x30:I64 PUT(376) = 0x0:I64 PUT(336) = 0x40AE366:I64 ------ IMark(0x40AE366, 4, 0) ------ DIRTY CmpNEZ64(t36) RdFX-gst(312,8) RdFX-gst(336,8) ::: MC_(helperc_value_check0_fail_w_o)[rp=1]{0x800011798}(32Uto64(t32)) if (CmpNE32(s390_calculate_cond[mcx=0x13]{0x8001352d0}(0xD:I64,0x13:I64,t8,0x30:I64,0x0:I64):I32,0x0:I32)) { PUT(336) = 0x40ADC9A:I64; exit-Boring } PUT(336) = 0x40AE36A:I64; exit-Boring } VexExpansionRatio 12 384 320 :10
In theory, could be that the bits of r1 we're interested in are undefined. But since the contents of r1 is the link register in teh guest state, that is probably unlikely. Perhaps the spec helper has a bug. The patch below disables it: Index: VEX/priv/guest_s390_helpers.c =================================================================== --- VEX/priv/guest_s390_helpers.c (revision 3182) +++ VEX/priv/guest_s390_helpers.c (working copy) @@ -2139,7 +2139,7 @@ if (cc_op == S390_CC_OP_TEST_UNDER_MASK_16) { ULong mask16; UInt msb; - + goto missed; if (! isC64(cc_dep2)) goto missed; mask16 = cc_dep2->Iex.Const.con->Ico.U64;
With --vgdb-shadow-registers=yes and stepi I can see r1 being fully defined, but then after the tmll %r1,48 $r1s1 is 0xffffff00. Disabling the spec helper doesn't seem to change that. But maybe I am misinterpreting what is going on.
S390_CC_OP_TEST_UNDER_MASK_16 is called with mask16 = 0x30 and cond = 13 so none of the cases match and we goto missed.
You're right. It's not the spec helper. Which means that the call to s390_calculate_cond is executed. Which brings us to guest_s390_helpers.c line 1567. I don't see anything wrong there. I do not understand how r1's V bits are changed by tmll as that insn does not modify its register argument.
For now it seems it only occurs in this one particular function on s390x. Adding the following suppression produces good regtest results: { s390-vfprintf Memcheck:Cond fun:vfprintf@@GLIBC_2.4 }
./vg-in-place --vex-guest-chase-thresh=0 --trace-flags=10001000 --tool=none --tracenotbelow=1586 ./ptest ==== SB 1591 (evchecks 11789) [tid 1] 0x409a35e vfprintf@@GLIBC_2.4+5110 /usr/lib64/libc-2.21.90.so+0x5735e ------------------------ Front end ------------------------ l %r1,304(%r11) ------ IMark(0x409A35E, 4, 0) ------ t0 = Add64(Add64(0x130:I64,GET:I64(280)),0x0:I64) PUT(204) = LDbe:I32(t0) PUT(336) = 0x409A362:I64 tmll %r1,48 ------ IMark(0x409A362, 4, 0) ------ t1 = GET:I16(206) t2 = 0x30:I16 PUT(352) = 0x13:I64 PUT(360) = 16Uto64(t1) PUT(368) = 16Uto64(t2) PUT(376) = 0x0:I64 PUT(336) = 0x409A366:I64 jnh .-1740 ------ IMark(0x409A366, 4, 0) ------ t3 = s390_calculate_cond[mcx=0x13]{0x8000ff648}(0xD:I64,GET:I64(352),GET:I64(360),GET:I64(368),GET:I64(376)):I32 if (CmpNE32(t3,0x0:I32)) { PUT(336) = 0x4099C9A:I64; exit-Boring } PUT(336) = 0x409A36A:I64 PUT(336) = GET:I64(336); exit-Boring GuestBytes 409A35E 12 58 10 B1 30 A7 11 00 30 A7 D4 FC 9A 000381CA ------------------------ After tree-building ------------------------ IRSB { t0:I64 t1:I16 t2:I16 t3:I32 t4:I64 t5:I64 t6:I64 t7:I32 t8:I64 t9:I64 t10:I64 t11:I64 t12:I64 t13:I64 t14:I32 t15:I1 t16:I64 ------ IMark(0x409A35E, 4, 0) ------ PUT(204) = LDbe:I32(Add64(0x130:I64,GET:I64(280))) ------ IMark(0x409A362, 4, 0) ------ PUT(352) = 0x13:I64 t8 = 16Uto64(GET:I16(206)) PUT(360) = t8 PUT(368) = 0x30:I64 PUT(376) = 0x0:I64 PUT(336) = 0x409A366:I64 ------ IMark(0x409A366, 4, 0) ------ if (CmpNE32(s390_calculate_cond[mcx=0x13]{0x8000ff648}(0xD:I64,0x13:I64,t8,0x30:I64,0x0:I64):I32,0x0:I32)) { PUT(336) = 0x4099C9A:I64; exit-Boring } PUT(336) = 0x409A36A:I64; exit-Boring } VexExpansionRatio 12 200 166 :10
If I had to guess (because this problem has happened many times before for the amd64 and x86 ports), the problem happens because the final optimised IR still contains a call to s390_calculate_cond: if (CmpNE32(s390_calculate_cond[mcx=0x13]{0x8001352d0}(0xD:I64,0x13:I64,t8,0x30:I64,0x0:I64):I32,0x0:I32)) { the mcx mask tells Memcheck that the result of this function call depends on t8 and 0x30 (dep1 and dep2) and no others. What I would guess is that, because of the other arguments, it doesn't depend on _all_ the bits of t8 and at least one of the bits that it doesn't depend on is undefined. So Memcheck complains. The "fix" for this in the other ports has been to enhance the spechelper function so as to translate this call into pure IR. Then Memcheck can see the dataflow more accurately (generally speaking) and the complaint goes away. In other words, Memcheck's instrumentation of the helper function is too crude and pessimistic, because it doesn't really know what happens inside it. So if we "inline" it by causing the spechelper to convert it to IR, the instrumentation can be more accurate after that. So .. is it possible to write a spechelper case for 0x13 (S390_CC_OP_TEST_UNDER_MASK_16) ?
Mark, can you try this? Does it help ? Index: VEX/priv/guest_s390_helpers.c =================================================================== --- VEX/priv/guest_s390_helpers.c (revision 3185) +++ VEX/priv/guest_s390_helpers.c (working copy) @@ -2151,6 +2151,8 @@ return mkU32(0); } + if (cond == 15) return mkU32(1); + if (cond == 8) { return unop(Iop_1Uto32, binop(Iop_CmpEQ64, binop(Iop_And64, cc_dep1, cc_dep2), @@ -2227,6 +2229,17 @@ binop(Iop_And64, cc_dep1, mkU64(msb)), mkU64(0))); } + if (cond == 13) { /* cc == 0 || cc == 1 || cc == 3 */ + IRExpr *c01, *c3; + + c01 = binop(Iop_CmpEQ64, binop(Iop_And64, cc_dep1, mkU64(msb)), + mkU64(0)); + c3 = binop(Iop_CmpEQ64, binop(Iop_And64, cc_dep1, cc_dep2), + mkU64(mask16)); + return binop(Iop_Or32, unop(Iop_1Uto32, c01), + unop(Iop_1Uto32, c3)); + } + // fixs390: handle cond = 5,6,9,10 (the missing cases) // vex_printf("TUM mask = 0x%llx\n", mask16); goto missed; } @@ -2365,7 +2378,7 @@ goto missed; } - /* --------- Specialising "s390_calculate_cond" --------- */ + /* --------- Specialising "s390_calculate_cc" --------- */ if (vex_streq(function_name, "s390_calculate_cc")) { IRExpr *cc_op_expr, *cc_dep1;
(In reply to Florian Krohm from comment #10) > Mark, can you try this? Does it help ? Yes, no more uninitialised value warnings. Thanks.
I checked this patch in as VEX r3186