A testcase that exercise Iop_CmpNE32 Iop triggers uninitalised value error on MIPS64 target. Reproducible: Always Steps to Reproduce: Build the following testcase on Octeon target (or may be a testcase that will exercise Iop_CmpNE32 on MIPS64 target) and run it on valgrind. /* saa.c */ 1#include <stdio.h> 2 3void saa() { 4 unsigned int rt = 0x11111111; 5 unsigned int out = 0, out_pre = 0; 6 unsigned int offset = 4; 7 unsigned int base[] = {0x22222222, 0x33333333}; 8 9 __asm__ volatile ( 10 "move $t0, %2" "\n\t" /* mem */ 11 "move $t1, %3" "\n\t" /* offset */ 12 "daddu $t0, $t1, $t0""\n\t" /* mem + offset */ 13 "lw %1, 0($t0)" "\n\t" /* out_pre = *(mem + offset) */ 14 "move $t2, %4" "\n\t" /* t2 = rt */ 15 "saa $t2, ($t0)" "\n\t" 16 "lw %0, 0($t0)" "\n\t" /* out = *(mem + offset) */ 17 : "=&r" (out), "=&r" (out_pre) 18 : "r" (base), "r" (offset), "r" (rt) 19 : "$12", "$13", "$14", "cc", "memory" 20 ); 21 22 printf("saa rt: %x out: %x out_pre: %x\n", rt, out, out_pre); 23} 24 25int main() { 26 saa(); 27 28 return 0; 29} Actual Results: valgrind --track-origins=yes --quiet ./saa ==2587== Conditional jump or move depends on uninitialised value(s) ==2587== at 0x10000B4C: saa (saa.c:9) ==2587== by 0x10000BD8: main (saa.c:26) ==2587== saa rt: 11111111 out: 44444444 out_pre: 33333333 Expected Results: valgrind --track-origins=yes --quiet ./saa ==2587== saa rt: 11111111 out: 44444444 out_pre: 33333333 The following patch helps to get rid of the above valgrind error. --- a/priv/guest_mips_toIR.c +++ b/priv/guest_mips_toIR.c @@ -2203,7 +2203,10 @@ static void mips_irgen_load_and_add32(IRTemp op1addr, IRTemp n /* If old_mem contains the expected value, then the CAS succeeded. Otherwise, it did not */ - jump_back(binop(Iop_CmpNE32, mkexpr(old_mem), mkexpr(expd))); + jump_back(binop(Iop_CmpNE64, + mkWidenFrom32(Ity_I64, mkexpr(old_mem), True), + mkWidenFrom32(Ity_I64, mkexpr(expd), True))); + if (putIntoRd) putIReg(rd, mkWidenFrom32(Ity_I64, mkexpr(old_mem), True)); }
Petar, any comments on the proposed fix?
(In reply to Julian Seward from comment #1) > Petar, any comments on the proposed fix? It seems to me that this change will only hide the real problem. We should first check why Iop_CmpNE32 is generating a false warning on MIPS64.
I investigated this attempting a better fix. It seems that helperc_LOADV32be from memcheck returns a 64bit result with the "upper" bits as all "undefined". Those bits should be ignored by the generated code but this does not seem to happen on mips. The saa instruction is interpreted like this: 0x100000C4: 0x718E0018 saa r14, (r12)^M ------ IMark(0x100000C4, 4, 0) ------^M t6 = GET:I64(96)^M t9 = GET:I64(96)^M t10 = Add32(LDbe:I32(t9),64to32(GET:I64(112)))^M t12 = LDbe:I32(t9)^M t11 = CASle(t9::t12->t10)^M if (CmpNE32(t11,t12)) { PUT(256) = 0x100000C4:I64; exit-Yield } ^M PUT(256) = 0x100000C8:I64^M The CASle is odd but irrelevant. The post-instrumented IR tree looks like this: ------ IMark(0x100000C4, 4, 0) ------ t91 = DIRTY 1:I1 RdFX-gst(232,8) RdFX-gst(256,8) ::: MC_(helperc_LOADV32be)[rp=1]{0x38045838}(t45)^M t98 = DIRTY 1:I1 RdFX-gst(232,8) RdFX-gst(256,8) ::: MC_(helperc_LOADV32be)[rp=1]{0x38045838}(t45)^M t12 = LDbe:I32(t45)^M t103 = DIRTY 1:I1 RdFX-gst(232,8) RdFX-gst(256,8) ::: MC_(helperc_LOADV32le)[rp=1]{0x38045940}(t45)^M t11 = CASle(t45::t12->Add32(LDbe:I32(t45),0x11:I32))^M DIRTY CasCmpEQ32(t12,t11) RdFX-gst(232,8) RdFX-gst(256,8) ::: MC_(helperc_STOREV32le)[rp=2]{0x38045b88}(t45,32Uto64(Left32(t91)))^M DIRTY CmpNEZ32(Or32(t103,t98)) RdFX-gst(232,8) RdFX-gst(256,8) ::: MC_(helperc_value_check0_fail_no_o){0x38046708}()^M if (CmpNE32(t11,t12)) { PUT(256) = 0x100000C4:I64; exit-Yield } ^M The problem is that helperc_value_check0_fail_no_o is incorrectly called. This happens if CmpNEZ32(Or32(t103,t98)) where t103 and t98 come directly from helperc_LOADV32be. That function returns a 64bit result and I suspect that either Or32 or CmpNEZ32 is supposed to ignore the upper bits but that's not happening. The generated mips assembly operates on 64bit quantities. I stepped through the generated code in gdb and the registers that end up holding t103 and t98 do contain 0xFFFFFFFF00000000 and this ends up triggering a false positive. Here's the real generated mips asm snippet: 0x803271180: lui s2,0x1000 0x803271184: ori s2,s2,0xc4 0x803271188: sd s2,256(s7) 0x80327118c: or a0,s1,s1 0x803271190: lui t9,0x3804 0x803271194: ori t9,t9,0x5838 0x803271198: jalr t9 0x80327119c: nop 0x8032711a0: or s2,v0,v0 0x8032711a4: or a0,s1,s1 0x8032711a8: lui t9,0x3804 0x8032711ac: ori t9,t9,0x5838 0x8032711b0: jalr t9 0x8032711b4: nop 0x8032711b8: or s3,v0,v0 0x8032711bc: lw s4,0(s1) 0x8032711c0: or a0,s1,s1 0x8032711c4: lui t9,0x3804 0x8032711c8: ori t9,t9,0x5940 0x8032711cc: jalr t9 0x8032711d0: nop 0x8032711d4: or s5,v0,v0 0x8032711d8: lw s6,0(s1) 0x8032711dc: addiu t8,s6,17 0x8032711e0: ll s6,0(s1) 0x8032711e4: bne s6,s4,0x8032711f8 0x8032711e8: nop 0x8032711ec: addiu s6,s6,1 0x8032711f0: sc t8,0(s1) 0x8032711f4: movn s6,s4,t8 0x8032711f8: negu t8,s2 0x8032711fc: or t8,t8,s2 0x803271200: dsll32 s2,t8,0x0 0x803271204: dsrl32 s2,s2,0x0 0x803271208: xor t8,s4,s6 0x80327120c: sltiu t8,t8,1 0x803271210: sw t8,612(s7) 0x803271214: xor t8,s4,s6 0x803271218: sltiu t8,t8,1 0x80327121c: or a0,s1,s1 0x803271220: or a1,s2,s2 0x803271224: sd t8,1928(s7) 0x803271228: ld s1,1928(s7) 0x80327122c: blez s1,0x803271244 0x803271230: nop 0x803271234: lui t9,0x3804 0x803271238: ori t9,t9,0x5b88 0x80327123c: jalr t9 0x803271240: nop 0x803271244: or s1,s5,s3 0x803271248: xor s2,s1,zero 0x80327124c: sltu s2,zero,s2 0x803271250: sw s2,612(s7) 0x803271254: or s1,s5,s3 0x803271258: xor s2,s1,zero 0x80327125c: sltu s2,zero,s2 0x803271260: blez s2,0x803271278 0x803271264: nop 0x803271268: lui t9,0x3804 0x80327126c: ori t9,t9,0x6708 => 0x803271270: jalr t9 0x803271274: nop 0x803271278: xor s1,s6,s4 0x80327127c: sltu s1,zero,s1 0x803271280: sw s1,612(s7) 0x803271284: lui s1,0x1000 0x803271288: ori s1,s1,0xc4 0x80327128c: lw a5,612(s7) 0x803271290: beq zero,a5,0x8032712d0 0x803271294: nop 0x803271298: sd s1,256(s7) 0x80327129c: lui s7,0x0 0x8032712a0: ori s7,s7,0x0 0x8032712a4: dsll s7,s7,0x10 0x8032712a8: ori s7,s7,0x0 0x8032712ac: dsll s7,s7,0x10 0x8032712b0: ori s7,s7,0x43 0x8032712b4: lui a5,0x0 0x8032712b8: ori a5,a5,0x0 0x8032712bc: dsll a5,a5,0x10 0x8032712c0: ori a5,a5,0x380c 0x8032712c4: dsll a5,a5,0x10 0x8032712c8: ori a5,a5,0xbb10 0x8032712cc: jalr a5 0x8032712d0: nop
You can see the generated insns more easily using --trace-flags=00000100 I think. It's OK that the back end uses 64 bit insns to implement Or32. See the comment at host_mips_isel.c:769. But for CmpNEZ32 it does need to only look at the lower 32 bits. So maybe that's wrong? I don't know mips assembly so I can't tell from the code in comment 3.
This is very strange. It seems like that MIPSInstr_Cmp takes a sz32 argument and assigns it to MIPSInstr.Min.Cmp.sz32. Apparently it's never used when actually generating code? On mips64 the slt{i,}{u,} instructions always operate on the full 64 bits register. This could be hacked to narrow for MIPScc_EQ and MIPScc_NE by inserting an "sll r_dst, r_dst, 0". But I think that stuff like checking if the lower 32bits of on register are less than the lower 32bits of another register requires actual temporaries. So if stuff like CmpLE32U needs to do implicit narrowing then it won't work correctly on mips. I attempted to handle MIPSInstr.Min.Cmp.sz32 inside emit_MIPSInstr but broke other things.
Are you saying that mips64 doesn't have an efficient way to handle 32 bit arithmetic operations? All other 64 bit archs I know of always have a way to do (eg) compare just the lower halves of two registers. They need to have that so as to generate efficient code for C (etc) that is primarily targetted at 32 bit platforms. That said, I also see, in the Min_Cmp case for emit_MIPSInstr, that sz32 is unused. This doesn't seem right. emit_MIPSInstr seems to me the right place to fix it.
Here's a minimal patch which just fixes CmpNEZ32 by explicitly narrowing the src operand. It does fix the issue originally reported. --- a/priv/host_mips_isel.c +++ b/priv/host_mips_isel.c @@ -1639,8 +1639,16 @@ static HReg iselWordExpr_R_wrk(ISelEnv * env, IRExpr * e) HReg r_dst = newVRegI(env); HReg r_src = iselWordExpr_R(env, e->Iex.Unop.arg); - addInstr(env, MIPSInstr_Cmp(False, True, r_dst, r_src, - hregMIPS_GPR0(mode64), MIPScc_NE)); + if (mode64) { + HReg tmp = newVRegI(env); + addInstr(env, MIPSInstr_Shft(Mshft_SLL, True /*!32bit shift */, + tmp, r_src, MIPSRH_Imm(True, 0))); + addInstr(env, MIPSInstr_Cmp(False, True, r_dst, tmp, + hregMIPS_GPR0(mode64), MIPScc_NE)); + } else { + addInstr(env, MIPSInstr_Cmp(False, True, r_dst, r_src, + hregMIPS_GPR0(mode64), MIPScc_NE)); + } return r_dst; } Here is also a bigger patch which attempts to implement MIPSInstr_Cmp.sz32 instead. That patch breaks a bunch of stuff, for example I can no longer load libstdc++. It's not clear why it does that and debugging would be difficult. I also don't think that the LT/LO/LE/LS cases can be emitted for sz32 without using additional temporaries. Perhaps the correct fix would be to remove sz32 completely and always narrow to 32 bits at isel time as appropriate. --- a/priv/host_mips_defs.c +++ b/priv/host_mips_defs.c @@ -1221,7 +1221,8 @@ void ppMIPSInstr(const MIPSInstr * i, Bool mode64) case Min_Cmp: { vex_printf("word_compare "); ppHRegMIPS(i->Min.Cmp.dst, mode64); - vex_printf(" = %s ( ", showMIPSCondCode(i->Min.Cmp.cond)); + vex_printf(" = %s%s ( ", showMIPSCondCode(i->Min.Cmp.cond), + (mode64 && i->Min.Cmp.sz32) ? "32" : ""); ppHRegMIPS(i->Min.Cmp.srcL, mode64); vex_printf(", "); ppHRegMIPS(i->Min.Cmp.srcR, mode64); @@ -2750,39 +2751,50 @@ Int emit_MIPSInstr ( /*MB_MOD*/Bool* is_profInc, UInt r_srcL = iregNo(i->Min.Cmp.srcL, mode64); UInt r_srcR = iregNo(i->Min.Cmp.srcR, mode64); UInt r_dst = iregNo(i->Min.Cmp.dst, mode64); + Bool sz32 = i->Min.Cmp.sz32; switch (i->Min.Cmp.cond) { case MIPScc_EQ: /* xor r_dst, r_srcL, r_srcR + if (mode64 && sz32) sll r_dst, r_dst, 0 sltiu r_dst, r_dst, 1 */ p = mkFormR(p, 0, r_srcL, r_srcR, r_dst, 0, 38); + if (mode64 && sz32) + p = mkFormS(p, 0, r_dst, 0, r_dst, 0, 0); p = mkFormI(p, 11, r_dst, r_dst, 1); break; case MIPScc_NE: /* xor r_dst, r_srcL, r_srcR + if (mode64 && sz32) sll r_dst, r_dst, 0 sltu r_dst, zero, r_dst */ p = mkFormR(p, 0, r_srcL, r_srcR, r_dst, 0, 38); + if (mode64 && sz32) + p = mkFormS(p, 0, r_dst, 0, r_dst, 0, 0); p = mkFormR(p, 0, 0, r_dst, r_dst, 0, 43); break; case MIPScc_LT: /* slt r_dst, r_srcL, r_srcR */ p = mkFormR(p, 0, r_srcL, r_srcR, r_dst, 0, 42); + vassert(!(mode64 && sz32)); break; case MIPScc_LO: /* sltu r_dst, r_srcL, r_srcR */ p = mkFormR(p, 0, r_srcL, r_srcR, r_dst, 0, 43); + vassert(!(mode64 && sz32)); break; case MIPScc_LE: /* slt r_dst, r_srcR, r_srcL xori r_dst, r_dst, 1 */ p = mkFormR(p, 0, r_srcR, r_srcL, r_dst, 0, 42); p = mkFormI(p, 14, r_dst, r_dst, 1); + vassert(!(mode64 && sz32)); break; case MIPScc_LS: /* sltu r_dst, rsrcR, r_srcL xori r_dsr, r_dst, 1 */ p = mkFormR(p, 0, r_srcR, r_srcL, r_dst, 0, 43); p = mkFormI(p, 14, r_dst, r_dst, 1); + vassert(!(mode64 && sz32)); break; default: goto bad; Somebody who knows more about mips64 should take a look at this.
I have committed a change (VEX r3304) that should resolve this issue. Let me know if you have any additional concerns.
We should close this issue now.