Bug 368120 - x86_linux asm _start functions do not keep 16-byte aligned stack pointer
Summary: x86_linux asm _start functions do not keep 16-byte aligned stack pointer
Status: RESOLVED FIXED
Alias: None
Product: valgrind
Classification: Developer tools
Component: memcheck (show other bugs)
Version: 3.11.0
Platform: Android Other
: NOR normal
Target Milestone: ---
Assignee: Julian Seward
URL:
Keywords:
Depends on:
Blocks:
 
Reported: 2016-09-01 20:02 UTC by chh
Modified: 2016-10-19 06:01 UTC (History)
2 users (show)

See Also:
Latest Commit:
Version Fixed In:


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description chh 2016-09-01 20:02:04 UTC
When valgrind is optimized by clang/llvm for x86 targets, clang/llvm requires 16-byte aligned stack pointers from the caller. The valgrind assembly functions _start,  vgModuleLocal_call_on_new_stack_0_1, and do_syscall_clone_x86_linux do not keep stack pointer aligned before calling other functions.

This is working with gcc because it does not require such stack pointer alignment.
When compiled with clang/llvm, memcheck aborted with segmentation fault.

The following diff should fixed the problem for Android x86 target:

@@ -2925,12 +2925,13 @@ asm("\n"
     "\tmovl  $vgPlain_interim_stack, %eax\n"
     "\taddl  $"VG_STRINGIFY(VG_STACK_GUARD_SZB)", %eax\n"
     "\taddl  $"VG_STRINGIFY(VG_DEFAULT_STACK_ACTIVE_SZB)", %eax\n"
+    /* allocate at least 16 bytes on the new stack, and aligned */
     "\tsubl  $16, %eax\n"
     "\tandl  $~15, %eax\n"
     /* install it, and collect the original one */
     "\txchgl %eax, %esp\n"
     /* call _start_in_C_linux, passing it the startup %esp */
-    "\tpushl %eax\n"
+    "\tmovl  %eax, (%esp)\n"
     "\tcall  _start_in_C_linux\n"
     "\thlt\n"
     ".previous\n"
diff --git a/coregrind/m_syswrap/syswrap-x86-linux.c b/coregrind/m_syswrap/syswrap-x86-linux.c
index 24d7dc1..233886d 100644
--- a/coregrind/m_syswrap/syswrap-x86-linux.c
+++ b/coregrind/m_syswrap/syswrap-x86-linux.c
@@ -83,8 +83,9 @@ asm(
 ".globl vgModuleLocal_call_on_new_stack_0_1\n"
 "vgModuleLocal_call_on_new_stack_0_1:\n"
 "   movl %esp, %esi\n"     // remember old stack pointer
-"   movl 4(%esi), %esp\n"  // set stack
-"   pushl 16(%esi)\n"      // arg1 to stack
+"   movl 4(%esi), %esp\n"  // set stack, assume %esp is now 16-byte aligned
+"   subl $12, %esp\n"      // skip 12 bytes
+"   pushl 16(%esi)\n"      // arg1 to stack, %esp is 16-byte aligned
 "   pushl  8(%esi)\n"      // retaddr to stack
 "   pushl 12(%esi)\n"      // f to stack
 "   movl $0, %eax\n"       // zero all GP regs
@@ -150,7 +151,8 @@ asm(
 "        movl     4+"FSZ"(%esp), %ecx\n"    /* syscall arg2: child stack */
 "        movl    12+"FSZ"(%esp), %ebx\n"    /* fn arg */
 "        movl     0+"FSZ"(%esp), %eax\n"    /* fn */
-"        lea     -8(%ecx), %ecx\n"          /* make space on stack */
+"        andl    $-16, %ecx\n"              /* align to 16-byte */
+"        lea     -20(%ecx), %ecx\n"         /* allocate 16*n+4 bytes on stack */
 "        movl    %ebx, 4(%ecx)\n"           /*   fn arg */
 "        movl    %eax, 0(%ecx)\n"           /*   fn */
 
@@ -165,7 +167,7 @@ asm(
 "        jnz     1f\n"
 
          /* CHILD - call thread function */
-"        popl    %eax\n"
+"        popl    %eax\n"                    /* child %esp is 16-byte aligned */
 "        call    *%eax\n"                   /* call fn */
 
          /* exit with result */


 

Reproducible: Always

Steps to Reproduce:
1. Build valgrind with clang/llvm for x86 linux
2. Run valgrind to debug a 32-bit x86 program 


Actual Results:  
Segmentation fault in x86 32-bit memcheck.



See Android patch in https://android-review.googlesource.com/#/c/264837
Comment 1 Mark Wielaard 2016-09-22 12:24:20 UTC
Note that this is also true for gcc x86[-32] now. The abi used to say, and older gcc only guaranteed/relied upon, the stack being 4 byte aligned. But a couple of years ago the abi and gcc got changed to align the stack at a 16 byte boundary. See e.g. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=40838
With gcc generated code it only seems an issue if the called function uses any sse instructions.
Comment 2 Julian Seward 2016-10-19 06:01:02 UTC
Committed, r16075.  Thanks for the patch.