I am posting this bug report on recommendation of Tom Hughes and Julian Seward, see http://article.gmane.org/gmane.comp.debugging.valgrind/5388 . To demonstrate the problem, I provide a test program here: >>>>> --- valgrind-test.c --- #include <stdio.h> #include <stdlib.h> #include <errno.h> #include <signal.h> #include <sys/mman.h> #include <limits.h> /* for PAGESIZE */ #ifndef PAGESIZE #define PAGESIZE 4096 #endif #define FAULT_HANDLER_ARGUMENTS \ int signum, struct siginfo *siginfo, void *ctx #define GET_FAULT_ADDRESS siginfo->si_addr typedef void (* SIGACTION_TYPE)(int, struct siginfo *, void *); struct sigaction act, segv_oact, bus_oact; #define FAULT_HANDLER_REMOVE_HANDLER char *p; static int run_number = 0; void designate_modified (void *addr) { fprintf (stderr, "[WRITEBARRIER] designate_modified: 0x%X\n", (int) addr); } void remove_fault_handler () { fprintf (stderr, "[FAULTHANDLER] remove_fault_handler\n"); sigaction (SIGSEGV, &segv_oact, 0); sigaction (SIGBUS, &bus_oact, 0); } void fault_handler (FAULT_HANDLER_ARGUMENTS) { fprintf (stderr, "\n[FAULTHANDLER] fault_handler: caught signal "); fprintf (stderr, "%d for 0x%X.\n", signum, GET_FAULT_ADDRESS); if ((GET_FAULT_ADDRESS >= (void *) p) && (GET_FAULT_ADDRESS < (void *)p + PAGESIZE)) { designate_modified (GET_FAULT_ADDRESS); fprintf (stderr, "[FAULTHANDLER] Un-protect the page.\n"); if (mprotect (p, PAGESIZE, PROT_READ | PROT_WRITE)) { perror ("Couldn't mprotect"); exit (errno); } #ifdef FAULT_HANDLER_REINSTALL_HANDLER initialize_fault_handler (); #endif } else /* default sigsegv handler */ { char *signal_name; if (signum == SIGSEGV) signal_name = "SIGSEGV"; else if (signum == SIGBUS) signal_name = "SIGBUS"; else abort (); /* something weird happened: wrong signal caught */ fprintf (stderr, "\n\n[FAULTHANDLER] Fatal Error: "); fprintf (stderr, "Received %s (%d) for address 0x%x\n", signal_name, signum, (int) GET_FAULT_ADDRESS); #ifdef FAULT_HANDLER_REMOVE_HANDLER remove_fault_handler (); #endif } } void initialize_fault_handler () { fprintf (stderr, "[FAULTHANDLER] initialize_fault_handler\n"); memset (&act, sizeof(sigaction), 0); act.sa_sigaction = (SIGACTION_TYPE) fault_handler; sigemptyset (&act.sa_mask); act.sa_flags = SA_SIGINFO | SA_RESTART; sigaction (SIGSEGV, &act, &segv_oact); sigaction (SIGBUS, &act, &bus_oact); } void test_vdb (void) { char c; char *p_local; fprintf (stderr, "[TESTVDB] *****< test run no. %d >*****\n", ++run_number); /* Allocate a buffer; it will have the default protection PROT_READ | PROT_WRITE. */ p_local = malloc (PAGESIZE + PAGESIZE - 1); /* Align to a multiple of PAGESIZE, assumed to be a power of two */ p = (char *)(((int) p_local + PAGESIZE - 1) & ~(PAGESIZE - 1)); fprintf (stderr, "[TESTVDB] Allocate p: [0x%X--0x%X], length %d\n", (int) p, (int) (p + PAGESIZE), (int) PAGESIZE); /* Test read. */ fprintf (stderr, "[TESTVDB] Attempt to read p[666]... "); c = p[666]; fprintf (stderr, "read ok.\n"); /* Test write. */ fprintf (stderr, "[TESTVDB] Attempt to write 42 to p[666]... "); p[666] = 42; fprintf (stderr, "write ok, p[666] = %d\n", p[666]); /* Mark the buffer read-only and set environemnt for write-barrier. */ fprintf (stderr, "[TESTVDB] Write-protect the page.\n"); if (mprotect (p, PAGESIZE, PROT_READ)) { perror ("Couldn't mprotect"); exit (errno); } /* Test write-barrier read. */ fprintf (stderr, "[TESTVDB] Attempt to read p[666]... "); c = p[666]; fprintf (stderr, "read ok.\n"); /* Test write-barrier write, program receives SIGSEGV. */ fprintf (stderr, "[TESTVDB] Attempt to write 23 to p[666] (0x%X)... ", &p[666]); p[666] = 23; fprintf (stderr, "[TESTVDB] Wrote p[666] = %d\n", p[666]); /* Stop write-barrier mode. */ if (mprotect (p, PAGESIZE, PROT_READ | PROT_WRITE)) { perror ("Couldn't mprotect"); exit (errno); } fprintf (stderr, "[TESTVDB] Free p: [0x%X--0x%X], length %d\n", (int) p, (int) (p + PAGESIZE), (int) PAGESIZE); free (p_local); } void test_segfault (void) { char *q = 0; q = (void *) 0xdeadbeef; fprintf (stderr, "[TSTSEGV] *****< test run no. %d >*****\n", ++run_number); fprintf (stderr, "[TSTSEGV] Now provoke real SEGFAULT at 0x%X...\n", q); q[0] = 23; } int main (void) { initialize_fault_handler (); test_vdb (); test_vdb (); test_vdb (); test_segfault (); /* This is never reached */ exit (0); } <<<<< --- valgrind-test.c --- Running without valgrind on > uname -a Linux zoiby 2.6.11.4-21.11-smp #1 SMP Thu Feb 2 20:54:26 UTC 2006 i686 i686 i386 GNU/Linux leads to the expected results: >>>>> --- gcc -o valgrind-test valgrind-test.c && ./valgrind-test --- [FAULTHANDLER] initialize_fault_handler [TESTVDB] *****< test run no. 1 >***** [TESTVDB] Allocate p: [0x804B000--0x804C000], length 4096 [TESTVDB] Attempt to read p[666]... read ok. [TESTVDB] Attempt to write 42 to p[666]... write ok, p[666] = 42 [TESTVDB] Write-protect the page. [TESTVDB] Attempt to read p[666]... read ok. [TESTVDB] Attempt to write 23 to p[666] (0x804B29A)... [FAULTHANDLER] fault_handler: caught signal 11 for 0x804B29A. [WRITEBARRIER] designate_modified: 0x804B29A [FAULTHANDLER] Un-protect the page. [TESTVDB] Wrote p[666] = 23 [TESTVDB] Free p: [0x804B000--0x804C000], length 4096 [TESTVDB] *****< test run no. 2 >***** [TESTVDB] Allocate p: [0x804B000--0x804C000], length 4096 [TESTVDB] Attempt to read p[666]... read ok. [TESTVDB] Attempt to write 42 to p[666]... write ok, p[666] = 42 [TESTVDB] Write-protect the page. [TESTVDB] Attempt to read p[666]... read ok. [TESTVDB] Attempt to write 23 to p[666] (0x804B29A)... [FAULTHANDLER] fault_handler: caught signal 11 for 0x804B29A. [WRITEBARRIER] designate_modified: 0x804B29A [FAULTHANDLER] Un-protect the page. [TESTVDB] Wrote p[666] = 23 [TESTVDB] Free p: [0x804B000--0x804C000], length 4096 [TESTVDB] *****< test run no. 3 >***** [TESTVDB] Allocate p: [0x804B000--0x804C000], length 4096 [TESTVDB] Attempt to read p[666]... read ok. [TESTVDB] Attempt to write 42 to p[666]... write ok, p[666] = 42 [TESTVDB] Write-protect the page. [TESTVDB] Attempt to read p[666]... read ok. [TESTVDB] Attempt to write 23 to p[666] (0x804B29A)... [FAULTHANDLER] fault_handler: caught signal 11 for 0x804B29A. [WRITEBARRIER] designate_modified: 0x804B29A [FAULTHANDLER] Un-protect the page. [TESTVDB] Wrote p[666] = 23 [TESTVDB] Free p: [0x804B000--0x804C000], length 4096 [TSTSEGV] *****< test run no. 4 >***** [TSTSEGV] Now provoke real SEGFAULT at 0xDEADBEEF... [FAULTHANDLER] fault_handler: caught signal 11 for 0xDEADBEEF. [FAULTHANDLER] Fatal Error: Received SIGSEGV (11) for address 0xdeadbeef [FAULTHANDLER] remove_fault_handler Speicherzugriffsfehler (core dumped) <<<<< --- gcc -o valgrind-test valgrind-test.c && ./valgrind-test --- Running with valgrind does *not* give the expected result: >>>>> --- valgrind -v --trace-signals=yes ./valgrind-test --- ==23822== Memcheck, a memory error detector. ==23822== Copyright (C) 2002-2005, and GNU GPL'd, by Julian Seward et al. ==23822== Using LibVEX rev 1471, a library for dynamic binary translation. ==23822== Copyright (C) 2004-2005, and GNU GPL'd, by OpenWorks LLP. ==23822== Using valgrind-3.1.0, a dynamic binary instrumentation framework. ==23822== Copyright (C) 2000-2005, and GNU GPL'd, by Julian Seward et al. ==23822== --23822-- Command line --23822-- ./valgrind-test --23822-- Startup, with flags: --23822-- -v --23822-- --trace-signals=yes --23822-- Contents of /proc/version: --23822-- Linux version 2.6.11.4-21.11-smp (geeko@buildhost) (gcc version 3.3.5 20050117 (prerelease) (SUSE Linux)) #1 SMP Thu Feb 2 20:54:26 UTC 2006 --23822-- Arch and subarch: X86, x86-sse2 --23822-- Valgrind library directory: /usr/lib/valgrind --23822-- Reading syms from /lib/ld-2.3.4.so (0x4000000) --23822-- Reading syms from /home/crestani/valgrind-test (0x8048000) --23822-- Reading syms from /usr/lib/valgrind/x86-linux/memcheck (0xB0000000) --23822-- object doesn't have a symbol table --23822-- object doesn't have a dynamic symbol table --23822-- Max kernel-supported signal is 64 --23822-- Reading suppressions file: /usr/lib/valgrind/default.supp --23822-- REDIR: 0x4012B60 (index) redirected to 0xB001B2B2 (???) --23822-- Reading syms from /usr/lib/valgrind/x86-linux/vgpreload_core.so (0x4018000) --23822-- object doesn't have a symbol table --23822-- Reading syms from /usr/lib/valgrind/x86-linux/vgpreload_memcheck.so (0x401B000) --23822-- object doesn't have a symbol table --23822-- REDIR: 0x4012D00 (strlen) redirected to 0x401E010 (strlen) --23822-- signal 11 arrived ... si_code=1, EIP=0x4005915, eip=0x62384E8C --23822-- SIGSEGV: si_code=1 faultaddr=0xBEFFCFD0 tid=1 ESP=0xBEFFCF90 seg=0xBE7FF000-0xBEFFCFFF --23822-- -> extended stack base to 0xBEFFC000 --23822-- Reading syms from /lib/tls/libc.so.6 (0x404D000) --23822-- REDIR: 0x40B4C90 (rindex) redirected to 0x401DC70 (rindex) --23822-- signal 11 arrived ... si_code=1, EIP=0x40863E6, eip=0x623CC155 --23822-- SIGSEGV: si_code=1 faultaddr=0xBEFFB394 tid=1 ESP=0xBEFFB37C seg=0xBE7FF000-0xBEFFBFFF --23822-- -> extended stack base to 0xBEFFB000 --23822-- signal 11 arrived ... si_code=1, EIP=0x40868F7, eip=0x623CCB30 --23822-- SIGSEGV: si_code=1 faultaddr=0xBEFFAD78 tid=1 ESP=0xBEFFAD78 seg=0xBE7FF000-0xBEFFAFFF --23822-- -> extended stack base to 0xBEFFA000 --23822-- REDIR: 0x40007A0 (_dl_sysinfo_int80) redirected to 0xB001B2AF (???) [FAULTHANDLER] initialize_fault_handler --23822-- REDIR: 0x40B5A90 (memset) redirected to 0x401E7E0 (memset) ++23822++ sys_sigaction: sigNo 11, new 0xBEFFD9C4, old 0xBEFFD934, new flags 0x14000004 ++23822++ sys_sigaction: sigNo 7, new 0xBEFFD9C4, old 0xBEFFD934, new flags 0x14000004 [TESTVDB] *****< test run no. 1 >***** --23822-- REDIR: 0x40B1520 (malloc) redirected to 0x401C3D2 (malloc) [TESTVDB] Allocate p: [0x4168000--0x4169000], length 4096 [TESTVDB] Attempt to read p[666]... read ok. [TESTVDB] Attempt to write 42 to p[666]... write ok, p[666] = 42 [TESTVDB] Write-protect the page. [TESTVDB] Attempt to read p[666]... read ok. [TESTVDB] Attempt to write 23 to p[666] (0x416829A)... --23822-- signal 11 arrived ... si_code=2, EIP=0x8048A05, eip=0x623DEF73 --23822-- SIGSEGV: si_code=2 faultaddr=0x416829A tid=1 ESP=0xBEFFDA90 seg=0x4168000-0x4168FFF --23822-- delivering signal 11 (SIGSEGV):2 to thread 1 --23822-- push_signal_frame (thread 1): signal 11 [FAULTHANDLER] fault_handler: caught signal 11 for 0x416829A. [WRITEBARRIER] designate_modified: 0x416829A [FAULTHANDLER] Un-protect the page. --23822-- VG_(signal_return) (thread 1): isRT=1 valid magic; EIP=0x8048A05 ==23822== Invalid write of size 1 ==23822== at 0x8048A05: test_vdb (in /home/crestani/valgrind-test) ==23822== by 0x8048B25: main (in /home/crestani/valgrind-test) ==23822== Address 0x39 is not stack'd, malloc'd or (recently) free'd --23822-- signal 11 arrived ... si_code=1, EIP=0x8048A05, eip=0x623E02AA --23822-- SIGSEGV: si_code=1 faultaddr=0x39 tid=1 ESP=0xBEFFDA90 seg=0x0-0x3FFFFFF --23822-- delivering signal 11 (SIGSEGV):1 to thread 1 --23822-- push_signal_frame (thread 1): signal 11 [FAULTHANDLER] fault_handler: caught signal 11 for 0x39. [FAULTHANDLER] Fatal Error: --23822-- REDIR: 0x40B48D0 (strlen) redirected to 0x401DFF0 (strlen) Received SIGSEGV (11) for address 0x39 [FAULTHANDLER] remove_fault_handler ++23822++ sys_sigaction: sigNo 11, new 0xBEFFD3A8, old 0x0, new flags 0x4000000 ++23822++ sys_sigaction: sigNo 7, new 0xBEFFD3A8, old 0x0, new flags 0x4000000 --23822-- VG_(signal_return) (thread 1): isRT=1 valid magic; EIP=0x8048A05 --23822-- signal 11 arrived ... si_code=1, EIP=0x8048A05, eip=0x623E02AA --23822-- SIGSEGV: si_code=1 faultaddr=0x39 tid=1 ESP=0xBEFFDA90 seg=0x0-0x3FFFFFF --23822-- delivering signal 11 (SIGSEGV):1 to thread 1 --23822-- delivering 11 (code 1) to default handler; action: terminate+core ==23822== ==23822== Process terminating with default action of signal 11 (SIGSEGV): dumping core ==23822== Access not within mapped region at address 0x39 ==23822== at 0x8048A05: test_vdb (in /home/crestani/valgrind-test) ==23822== by 0x8048B25: main (in /home/crestani/valgrind-test) ++23822++ sys_sigaction: sigNo 11, new 0x623598C8, old 0x0, new flags 0x0 ++23822++ sys_sigaction: sigNo 7, new 0x623598C4, old 0x0, new flags 0x0 ++23822++ sys_sigaction: sigNo 4, new 0x623598C0, old 0x0, new flags 0x0 ++23822++ sys_sigaction: sigNo 8, new 0x623598DC, old 0x0, new flags 0x0 --23822-- REDIR: 0x40AF640 (free) redirected to 0x401CEFB (free) ==23822== ==23822== ERROR SUMMARY: 2 errors from 1 contexts (suppressed: 13 from 2) ==23822== ==23822== 2 errors in context 1 of 1: ==23822== Invalid write of size 1 ==23822== at 0x8048A05: test_vdb (in /home/crestani/valgrind-test) ==23822== by 0x8048B25: main (in /home/crestani/valgrind-test) ==23822== Address 0x39 is not stack'd, malloc'd or (recently) free'd --23822-- --23822-- supp: 2 strlen/_dl_init_paths/dl_main/_dl_sysdep_start(Cond) --23822-- supp: 11 dl_relocate_object ==23822== ==23822== IN SUMMARY: 2 errors from 1 contexts (suppressed: 13 from 2) ==23822== ==23822== malloc/free: in use at exit: 8,191 bytes in 1 blocks. ==23822== malloc/free: 1 allocs, 0 frees, 8,191 bytes allocated. ==23822== ==23822== searching for pointers to 1 not-freed blocks. ==23822== checked 58,468 bytes. ==23822== ==23822== LEAK SUMMARY: ==23822== definitely lost: 0 bytes in 0 blocks. ==23822== possibly lost: 0 bytes in 0 blocks. ==23822== still reachable: 8,191 bytes in 1 blocks. ==23822== suppressed: 0 bytes in 0 blocks. ==23822== Reachable blocks (those to which a pointer was found) are not shown. ==23822== To see them, rerun with: --show-reachable=yes --23822-- memcheck: sanity checks: 0 cheap, 1 expensive --23822-- memcheck: auxmaps: 0 auxmap entries (0k, 0M) in use --23822-- memcheck: auxmaps: 0 searches, 0 comparisons --23822-- memcheck: secondaries: 7 issued (448k, 0M) --23822-- memcheck: secondaries: 18 accessible and distinguished (1152k, 1M) --23822-- tt/tc: 3,537 tt lookups requiring 3,574 probes --23822-- tt/tc: 3,537 fast-cache updates, 3 flushes --23822-- translate: new 1,666 (36,422 -> 587,841; ratio 161:10) [0 scs] --23822-- translate: dumped 0 (0 -> ??) --23822-- translate: discarded 9 (217 -> ??) --23822-- scheduler: 27,607 jumps (bb entries). --23822-- scheduler: 0/1,964 major/minor sched events. --23822-- sanity: 1 cheap, 1 expensive checks. --23822-- exectx: 30,011 lists, 10 contexts (avg 0 per list) --23822-- exectx: 16 searches, 6 full compares (375 per 1000) --23822-- exectx: 0 cmp2, 40 cmp4, 0 cmpAll Speicherzugriffsfehler <<<<< --- valgrind -v --trace-signals=yes ./valgrind-test --- After the first invocation of the signal handler things seem to go wrong here: --23822-- SIGSEGV: si_code=1 faultaddr=0x39 Instead of executing the write, that previously caused the first signal, a second SIGSEGV occurs on address 0x39. Where does 0x39 come from?
Giving the flag --vex-iropt-precise-memory-exns=yes to Valgrind "solves" the problem.
Excellent! Thanks for the fast reply, works for me. Thanks for valgrind, it's a great tool.