#if 0 gcc -g -O0 valgrind-fork-sigsegv.c || exit $? ulimit -u 128 valgrind -q ./a.out exit $? #endif #include <stdio.h> #include <stdlib.h> #include <signal.h> #include <unistd.h> int main() { int i; if (SIG_ERR == signal(SIGCHLD, SIG_IGN)) return EXIT_FAILURE; for (i = 0; i < 0x100; i++) { pid_t pid = fork(); if (pid == 0) { sleep(4); return EXIT_SUCCESS; } if (0 < pid) continue; fprintf(stderr, "%d\n", 0); return EXIT_FAILURE; } return EXIT_SUCCESS; } crashes valgrind, haven't managed so far to find out where.
I can reproduce this. I can't figure out why the segfault is happening. It is happening in vex-generated code. FTR: Dump of assembler code from 0x4038d4700 to 0x4038d47ff: 0x00000004038d4700: lea 0x2000(%r13),%r12 0x00000004038d4707: mov %r12,%rdi 0x00000004038d470a: mov $0x0,%esi 0x00000004038d470f: mov $0x38018c00,%r11 0x00000004038d4716: callq *%r11 0x00000004038d4719: mov $0xfffffffffbad8004,%r14 0x00000004038d4720: mov %r14d,0x0(%r12) 0x00000004038d4725: movq $0x4e73d07,0xa8(%rbp) -- guest_RIP = 0x4e73d07 0x00000004038d4730: lea 0x2088(%r13),%r12 0x00000004038d4737: mov %r12,%rdi 0x00000004038d473a: mov $0x0,%esi 0x00000004038d473f: mov $0x38018e00,%r11 0x00000004038d4746: callq *%r11 0x00000004038d4749: movq $0x0,0x0(%r12) 0x00000004038d4752: movq $0x4e73d13,0xa8(%rbp) -- guest_RIP = 0x4e73d13 0x00000004038d475d: lea 0x20d8(%r13),%r12 0x00000004038d4764: mov %r12,%rdi 0x00000004038d4767: mov $0x0,%esi 0x00000004038d476c: mov $0x38018e00,%r11 0x00000004038d4773: callq *%r11 0x00000004038d4776: movq $0x517bbe0,0x0(%r12) 0x00000004038d477f: movq $0x4e73d1b,0xa8(%rbp) -- guest_RIP = 0x4e73d1b 0x00000004038d478a: mov %rbx,%r12 0x00000004038d478d: neg %r12 0x00000004038d4790: or %rbx,%r12 0x00000004038d4793: sub $0x8,%r13 0x00000004038d4797: mov %r12,0x280(%rbp) 0x00000004038d479e: mov %r13,%rdi 0x00000004038d47a1: mov $0x3800a2b0,%r11 0x00000004038d47a8: callq *%r11 0x00000004038d47ab: mov %r13,0x20(%rbp) 0x00000004038d47af: cmp $0x0,%r12 0x00000004038d47b3: je 0x4038d47bf 0x00000004038d47b5: mov $0x38018450,%r11 0x00000004038d47bc: callq *%r11 -- set up for the shadow store 0x00000004038d47bf: mov %r13,%rdi -- arg1 (r13 = store address) 0x00000004038d47c2: mov $0x0,%esi -- arg2 ("all bits defined") 0x00000004038d47c7: mov $0x38018e00,%r11 -- &vgMemCheck_helperc_STOREV64le 0x00000004038d47ce: callq *%r11 -- vgMemCheck_helperc_STOREV64le(arg1,arg2) 0x00000004038d47d1: movq $0x4e73d20,0x0(%r13) -- the real store --- SEGFAULT 0x00000004038d47d9: sub $0x80,%r13 0x00000004038d47e0: mov %r13,%rdi 0x00000004038d47e3: mov $0x80,%esi 0x00000004038d47e8: mov $0x0,%edx 0x00000004038d47ed: mov $0x38010400,%r11 0x00000004038d47f4: callq *%r11 0x00000004038d47f7: mov $0x4e6ed60,%rax 0x00000004038d47fe: mov $0x380647f0,%rdx #0 0x00000004038d47d1 in ?? () #1 0x000000000001372b in ?? () #2 0x0000000038dcaa40 in vgPlain_threads () #3 0x0000000000001880 in ?? () #4 0x0000000038dcaa30 in vgPlain_threads () #5 0x0000000000001870 in ?? () #6 0x0000000000000001 in ?? () #7 0x0000000404f6d000 in ?? () #8 0x000000040502fc18 in ?? () #9 0x0000000000000000 in ?? () (gdb) p/x $r13 $1 = 0x7feffd328 So we know three guest RIP values leading up to the crash: 0x4e73d07 0x4e73d13 0x4e73d1b One possibility is to find out what these instructions are (presumably part of ld.so or libc.so) A better possibility is to edit VG_(helperc_STOREV64le). Add a check: if store address == 0x7feffd328 then print a stacktrace by calling VG_(get_and_pp_StackTrace) That will at least give us a stack trace (in the guest) at the point where it segfaulted. What's strange about this is, though, that if this store address is invalid (as it obviously is) then the call to VG_(helperc_STOREV64le) should have printed an error message, but it didn't.
I can no longer replicate with valgrind > 3.6.0. I could replicate it with valgrind 3.5.0.
I already take that back, I replicated it against valgrind-3.8.0-1.fc18.x86_64 now.
Well i have been looking for a huge issue that has been puzzeling me for 2 days now and i find this thread. I have greatly simplified the issue so mabey someone can figure it out. (would it be alignement issue)? Also it seem's that it will only trigger from inside a signal handler, which make it more sneaky. binf@SINGULAR:~/BY$ gcc -v Reading specs from /usr/lib64/gcc/x86_64-slackware-linux/4.7.1/specs COLLECT_GCC=gcc COLLECT_LTO_WRAPPER=/usr/libexec/gcc/x86_64-slackware-linux/4.7.1/lto-wrapper Target: x86_64-slackware-linux Configured with: ../gcc-4.7.1/configure --prefix=/usr --libdir=/usr/lib64 --mandir=/usr/man --infodir=/usr/info --enable-shared --enable-bootstrap --enable-languages=ada,c,c++,fortran,go,java,lto,objc --enable-threads=posix --enable-checking=release --enable-objc-gc --with-system-zlib --with-python-dir=/lib64/python2.7/site-packages --disable-libunwind-exceptions --enable-__cxa_atexit --enable-libssp --enable-lto --with-gnu-ld --verbose --enable-java-home --with-java-home=/usr/lib64/jvm/jre --with-jvm-root-dir=/usr/lib64/jvm --with-jvm-jar-dir=/usr/lib64/jvm/jvm-exports --with-arch-directory=amd64 --with-antlr-jar=/slack/TMPTMPTMP/gcc-round-two/antlr-runtime-3.4.jar --enable-java-awt=gtk --disable-gtktest --disable-multilib --target=x86_64-slackware-linux --build=x86_64-slackware-linux --host=x86_64-slackware-linux Thread model: posix gcc version 4.7.1 (GCC) binf@SINGULAR:~/BY$ valgrind --version valgrind-3.8.1 <CODE> #include <signal.h> #include <stdio.h> #include <sys/types.h> #include <unistd.h> double some_function_crash(double a,double b) { double pct = 1.2; return pct; } long double some_function(long double a,long double b) { long double pct = 2.1; return pct; } static void sighand(int signal) { //printf("uncomment Will crash [%llu] \n",some_function_crash(1,2)); printf("Will not crash [%llu] \n",some_function(2,1)); exit(1); } int main(int argc,char **argv) { signal(SIGINT, sighand); sleep(300); return 0; } </CODE> binf@SINGULAR:~/BY$ valgrind ./t ==1424== Memcheck, a memory error detector ==1424== Copyright (C) 2002-2012, and GNU GPL'd, by Julian Seward et al. ==1424== Using Valgrind-3.8.1 and LibVEX; rerun with -h for copyright info ==1424== Command: ./t ==1424== ^CWill not crash [34342956376] ==1424== ==1424== HEAP SUMMARY: ==1424== in use at exit: 0 bytes in 0 blocks ==1424== total heap usage: 0 allocs, 0 frees, 0 bytes allocated ==1424== ==1424== All heap blocks were freed -- no leaks are possible ==1424== ==1424== For counts of detected and suppressed errors, rerun with: -v ==1424== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0) binf@SINGULAR:~/BY$ valgrind ./t ==1431== Memcheck, a memory error detector ==1431== Copyright (C) 2002-2012, and GNU GPL'd, by Julian Seward et al. ==1431== Using Valgrind-3.8.1 and LibVEX; rerun with -h for copyright info ==1431== Command: ./t ==1431== ^C==1431== ==1431== Process terminating with default action of signal 11 (SIGSEGV) ==1431== General Protection Fault ==1431== at 0x4E849A4: printf (in /lib64/libc-2.15.so) ==1431== by 0x400793: sighand (in /home/binf/BY/t) ==1431== by 0x4E68A9F: ??? (in /lib64/libc-2.15.so) ==1431== by 0x4E68CDF: sigprocmask (in /lib64/libc-2.15.so) ==1431== by 0x4EF037E: sleep (in /lib64/libc-2.15.so) ==1431== by 0x4007CA: main (in /home/binf/BY/t) ==1431== ==1431== HEAP SUMMARY: ==1431== in use at exit: 0 bytes in 0 blocks ==1431== total heap usage: 0 allocs, 0 frees, 0 bytes allocated ==1431== ==1431== All heap blocks were freed -- no leaks are possible ==1431== ==1431== For counts of detected and suppressed errors, rerun with: -v ==1431== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0) Cheers, -elz
Please don't add unrelated comments to bugs - your program does not seem to relate in any way to the problem being discussed here other than that both lead to a segfault. If you need help using valgrind then the mailing lists are the place to go and if you think you've found a bug in valgrind then please open a new bug. Anyway your problem is almost certainly that you're using printf from a signal handler, which is not safe - very few functions are safe to use in a signal handler.
I will open a new bug, since i guess you didin't even tried to reproduce it if i read your response.
This was finally closed downstream https://bugzilla.redhat.com/show_bug.cgi?id=587817