Bug 124035

Summary: Unexpected behaviour with user defined signal handler and memory protection
Product: [Developer tools] valgrind Reporter: Marcus Crestani <marcus>
Component: memcheckAssignee: Julian Seward <jseward>
Status: RESOLVED WORKSFORME    
Severity: normal    
Priority: NOR    
Version First Reported In: 3.1.0   
Target Milestone: ---   
Platform: openSUSE   
OS: Linux   
Latest Commit: Version Fixed/Implemented In:
Sentry Crash Report:

Description Marcus Crestani 2006-03-21 23:15:49 UTC
I am posting this bug report on recommendation of Tom Hughes and Julian Seward,
see http://article.gmane.org/gmane.comp.debugging.valgrind/5388 .

To demonstrate the problem, I provide a test program here:

>>>>> --- valgrind-test.c ---
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <signal.h>
#include <sys/mman.h>

#include <limits.h>    /* for PAGESIZE */
#ifndef PAGESIZE
#define PAGESIZE 4096
#endif

#define FAULT_HANDLER_ARGUMENTS \
   int signum, struct siginfo *siginfo, void *ctx
#define GET_FAULT_ADDRESS siginfo->si_addr
typedef void (* SIGACTION_TYPE)(int, struct siginfo *, void *);
struct sigaction act, segv_oact, bus_oact;
#define FAULT_HANDLER_REMOVE_HANDLER

char *p;
static int run_number = 0;

void designate_modified (void *addr)
{
  fprintf (stderr, "[WRITEBARRIER] designate_modified: 0x%X\n", (int) addr);
}

void remove_fault_handler ()
{
  fprintf (stderr, "[FAULTHANDLER] remove_fault_handler\n");
  sigaction (SIGSEGV, &segv_oact, 0);
  sigaction (SIGBUS, &bus_oact, 0);
}

void fault_handler (FAULT_HANDLER_ARGUMENTS)
{
  fprintf (stderr, "\n[FAULTHANDLER] fault_handler: caught signal ");
  fprintf (stderr, "%d for 0x%X.\n", signum, GET_FAULT_ADDRESS);
  if ((GET_FAULT_ADDRESS >= (void *) p) 
      && (GET_FAULT_ADDRESS < (void *)p + PAGESIZE))
    {
      designate_modified (GET_FAULT_ADDRESS);
      fprintf (stderr, "[FAULTHANDLER] Un-protect the page.\n");
      if (mprotect (p, PAGESIZE, PROT_READ | PROT_WRITE)) {
	perror ("Couldn't mprotect");
	exit (errno);
      }
#ifdef FAULT_HANDLER_REINSTALL_HANDLER
      initialize_fault_handler ();
#endif
    }
  else  /* default sigsegv handler */
    {
      char *signal_name;
      if (signum == SIGSEGV)
	signal_name = "SIGSEGV";
      else if (signum == SIGBUS)
	signal_name = "SIGBUS";
      else 
	abort (); /* something weird happened: wrong signal caught */
      fprintf (stderr, "\n\n[FAULTHANDLER] Fatal Error: ");
      fprintf (stderr, "Received %s (%d) for address 0x%x\n",
	       signal_name, signum, (int) GET_FAULT_ADDRESS);
#ifdef FAULT_HANDLER_REMOVE_HANDLER
      remove_fault_handler ();
#endif 
    }
}

void initialize_fault_handler ()
{
  fprintf (stderr, "[FAULTHANDLER] initialize_fault_handler\n");

  memset (&act, sizeof(sigaction), 0);
  act.sa_sigaction = (SIGACTION_TYPE) fault_handler;
  sigemptyset (&act.sa_mask);
  act.sa_flags = SA_SIGINFO | SA_RESTART;
  sigaction (SIGSEGV, &act, &segv_oact);
  sigaction (SIGBUS, &act, &bus_oact);
}

void
test_vdb (void)
{
  char c;
  char *p_local;
  fprintf (stderr, "[TESTVDB] *****< test run no. %d >*****\n", ++run_number);

  /* Allocate a buffer; it will have the default
     protection PROT_READ | PROT_WRITE. */
  p_local = malloc (PAGESIZE + PAGESIZE - 1);

  /* Align to a multiple of PAGESIZE, assumed to be a power of two */
  p = (char *)(((int) p_local + PAGESIZE - 1) & ~(PAGESIZE - 1));

  fprintf (stderr, "[TESTVDB] Allocate p: [0x%X--0x%X], length %d\n", 
	   (int) p, (int) (p + PAGESIZE), (int) PAGESIZE);

  /* Test read. */
  fprintf (stderr, "[TESTVDB]   Attempt to read p[666]... ");
  c = p[666];
  fprintf (stderr, "read ok.\n");

  /* Test write. */
  fprintf (stderr, "[TESTVDB]   Attempt to write 42 to p[666]... ");
  p[666] = 42;
  fprintf (stderr, "write ok, p[666] = %d\n", p[666]);

  /* Mark the buffer read-only and set environemnt for write-barrier. */
  fprintf (stderr, "[TESTVDB]  Write-protect the page.\n");
  if (mprotect (p, PAGESIZE, PROT_READ)) {
    perror ("Couldn't mprotect");
    exit (errno);
  }

  /* Test write-barrier read. */
  fprintf (stderr, "[TESTVDB]   Attempt to read p[666]... ");
  c = p[666];
  fprintf (stderr, "read ok.\n");
 
  /* Test write-barrier write, program receives SIGSEGV. */
  fprintf (stderr, "[TESTVDB]   Attempt to write 23 to p[666] (0x%X)... ",
	   &p[666]);
  p[666] = 23;
  fprintf (stderr, "[TESTVDB]   Wrote p[666] = %d\n", p[666]);

  /* Stop write-barrier mode. */
  if (mprotect (p, PAGESIZE, PROT_READ | PROT_WRITE)) {
    perror ("Couldn't mprotect");
    exit (errno);
  }

  fprintf (stderr, "[TESTVDB] Free p: [0x%X--0x%X], length %d\n", 
	   (int) p, (int) (p + PAGESIZE), (int) PAGESIZE);
  free (p_local);
}

void
test_segfault (void)
{
  char *q = 0;
  q = (void *) 0xdeadbeef;
  fprintf (stderr, "[TSTSEGV] *****< test run no. %d >*****\n", ++run_number);
  fprintf (stderr, "[TSTSEGV] Now provoke real SEGFAULT at 0x%X...\n", q);
  q[0] = 23;
}

int
main (void)
{
  initialize_fault_handler ();
  test_vdb ();
  test_vdb ();
  test_vdb ();
  test_segfault ();
  /* This is never reached */
  exit (0);
}
<<<<< --- valgrind-test.c ---



Running without valgrind on

> uname -a
Linux zoiby 2.6.11.4-21.11-smp #1 SMP Thu Feb 2 20:54:26 UTC 2006 i686 i686 i386
GNU/Linux

leads to the expected results:

>>>>> --- gcc -o valgrind-test valgrind-test.c && ./valgrind-test ---
[FAULTHANDLER] initialize_fault_handler
[TESTVDB] *****< test run no. 1 >*****
[TESTVDB] Allocate p: [0x804B000--0x804C000], length 4096
[TESTVDB]   Attempt to read p[666]... read ok.
[TESTVDB]   Attempt to write 42 to p[666]... write ok, p[666] = 42
[TESTVDB]  Write-protect the page.
[TESTVDB]   Attempt to read p[666]... read ok.
[TESTVDB]   Attempt to write 23 to p[666] (0x804B29A)...
[FAULTHANDLER] fault_handler: caught signal 11 for 0x804B29A.
[WRITEBARRIER] designate_modified: 0x804B29A
[FAULTHANDLER] Un-protect the page.
[TESTVDB]   Wrote p[666] = 23
[TESTVDB] Free p: [0x804B000--0x804C000], length 4096
[TESTVDB] *****< test run no. 2 >*****
[TESTVDB] Allocate p: [0x804B000--0x804C000], length 4096
[TESTVDB]   Attempt to read p[666]... read ok.
[TESTVDB]   Attempt to write 42 to p[666]... write ok, p[666] = 42
[TESTVDB]  Write-protect the page.
[TESTVDB]   Attempt to read p[666]... read ok.
[TESTVDB]   Attempt to write 23 to p[666] (0x804B29A)...
[FAULTHANDLER] fault_handler: caught signal 11 for 0x804B29A.
[WRITEBARRIER] designate_modified: 0x804B29A
[FAULTHANDLER] Un-protect the page.
[TESTVDB]   Wrote p[666] = 23
[TESTVDB] Free p: [0x804B000--0x804C000], length 4096
[TESTVDB] *****< test run no. 3 >*****
[TESTVDB] Allocate p: [0x804B000--0x804C000], length 4096
[TESTVDB]   Attempt to read p[666]... read ok.
[TESTVDB]   Attempt to write 42 to p[666]... write ok, p[666] = 42
[TESTVDB]  Write-protect the page.
[TESTVDB]   Attempt to read p[666]... read ok.
[TESTVDB]   Attempt to write 23 to p[666] (0x804B29A)...
[FAULTHANDLER] fault_handler: caught signal 11 for 0x804B29A.
[WRITEBARRIER] designate_modified: 0x804B29A
[FAULTHANDLER] Un-protect the page.
[TESTVDB]   Wrote p[666] = 23
[TESTVDB] Free p: [0x804B000--0x804C000], length 4096
[TSTSEGV] *****< test run no. 4 >*****
[TSTSEGV] Now provoke real SEGFAULT at 0xDEADBEEF...

[FAULTHANDLER] fault_handler: caught signal 11 for 0xDEADBEEF.


[FAULTHANDLER] Fatal Error: Received SIGSEGV (11) for address 0xdeadbeef
[FAULTHANDLER] remove_fault_handler
Speicherzugriffsfehler (core dumped)
<<<<< --- gcc -o valgrind-test valgrind-test.c && ./valgrind-test ---



Running with valgrind does *not* give the expected result:

>>>>> --- valgrind -v --trace-signals=yes ./valgrind-test ---
==23822== Memcheck, a memory error detector.
==23822== Copyright (C) 2002-2005, and GNU GPL'd, by Julian Seward et al.
==23822== Using LibVEX rev 1471, a library for dynamic binary translation.
==23822== Copyright (C) 2004-2005, and GNU GPL'd, by OpenWorks LLP.
==23822== Using valgrind-3.1.0, a dynamic binary instrumentation framework.
==23822== Copyright (C) 2000-2005, and GNU GPL'd, by Julian Seward et al.
==23822==
--23822-- Command line
--23822--    ./valgrind-test
--23822-- Startup, with flags:
--23822--    -v
--23822--    --trace-signals=yes
--23822-- Contents of /proc/version:
--23822--   Linux version 2.6.11.4-21.11-smp (geeko@buildhost) (gcc version
3.3.5 20050117 (prerelease) (SUSE Linux)) #1 SMP Thu Feb 2 20:54:26 UTC 2006
--23822-- Arch and subarch: X86, x86-sse2
--23822-- Valgrind library directory: /usr/lib/valgrind
--23822-- Reading syms from /lib/ld-2.3.4.so (0x4000000)
--23822-- Reading syms from /home/crestani/valgrind-test (0x8048000)
--23822-- Reading syms from /usr/lib/valgrind/x86-linux/memcheck (0xB0000000)
--23822--    object doesn't have a symbol table
--23822--    object doesn't have a dynamic symbol table
--23822-- Max kernel-supported signal is 64
--23822-- Reading suppressions file: /usr/lib/valgrind/default.supp
--23822-- REDIR: 0x4012B60 (index) redirected to 0xB001B2B2 (???)
--23822-- Reading syms from /usr/lib/valgrind/x86-linux/vgpreload_core.so
(0x4018000)
--23822--    object doesn't have a symbol table
--23822-- Reading syms from /usr/lib/valgrind/x86-linux/vgpreload_memcheck.so
(0x401B000)
--23822--    object doesn't have a symbol table
--23822-- REDIR: 0x4012D00 (strlen) redirected to 0x401E010 (strlen)
--23822-- signal 11 arrived ... si_code=1, EIP=0x4005915, eip=0x62384E8C
--23822-- SIGSEGV: si_code=1 faultaddr=0xBEFFCFD0 tid=1 ESP=0xBEFFCF90
seg=0xBE7FF000-0xBEFFCFFF
--23822--        -> extended stack base to 0xBEFFC000
--23822-- Reading syms from /lib/tls/libc.so.6 (0x404D000)
--23822-- REDIR: 0x40B4C90 (rindex) redirected to 0x401DC70 (rindex)
--23822-- signal 11 arrived ... si_code=1, EIP=0x40863E6, eip=0x623CC155
--23822-- SIGSEGV: si_code=1 faultaddr=0xBEFFB394 tid=1 ESP=0xBEFFB37C
seg=0xBE7FF000-0xBEFFBFFF
--23822--        -> extended stack base to 0xBEFFB000
--23822-- signal 11 arrived ... si_code=1, EIP=0x40868F7, eip=0x623CCB30
--23822-- SIGSEGV: si_code=1 faultaddr=0xBEFFAD78 tid=1 ESP=0xBEFFAD78
seg=0xBE7FF000-0xBEFFAFFF
--23822--        -> extended stack base to 0xBEFFA000
--23822-- REDIR: 0x40007A0 (_dl_sysinfo_int80) redirected to 0xB001B2AF (???)
[FAULTHANDLER] initialize_fault_handler
--23822-- REDIR: 0x40B5A90 (memset) redirected to 0x401E7E0 (memset)
++23822++ sys_sigaction: sigNo 11, new 0xBEFFD9C4, old 0xBEFFD934, new flags
0x14000004
++23822++ sys_sigaction: sigNo 7, new 0xBEFFD9C4, old 0xBEFFD934, new flags
0x14000004
[TESTVDB] *****< test run no. 1 >*****
--23822-- REDIR: 0x40B1520 (malloc) redirected to 0x401C3D2 (malloc)
[TESTVDB] Allocate p: [0x4168000--0x4169000], length 4096
[TESTVDB]   Attempt to read p[666]... read ok.
[TESTVDB]   Attempt to write 42 to p[666]... write ok, p[666] = 42
[TESTVDB]  Write-protect the page.
[TESTVDB]   Attempt to read p[666]... read ok.
[TESTVDB]   Attempt to write 23 to p[666] (0x416829A)... --23822-- signal 11
arrived ... si_code=2, EIP=0x8048A05, eip=0x623DEF73
--23822-- SIGSEGV: si_code=2 faultaddr=0x416829A tid=1 ESP=0xBEFFDA90
seg=0x4168000-0x4168FFF
--23822-- delivering signal 11 (SIGSEGV):2 to thread 1
--23822-- push_signal_frame (thread 1): signal 11

[FAULTHANDLER] fault_handler: caught signal 11 for 0x416829A.
[WRITEBARRIER] designate_modified: 0x416829A
[FAULTHANDLER] Un-protect the page.
--23822-- VG_(signal_return) (thread 1): isRT=1 valid magic; EIP=0x8048A05
==23822== Invalid write of size 1
==23822==    at 0x8048A05: test_vdb (in /home/crestani/valgrind-test)
==23822==    by 0x8048B25: main (in /home/crestani/valgrind-test)
==23822==  Address 0x39 is not stack'd, malloc'd or (recently) free'd
--23822-- signal 11 arrived ... si_code=1, EIP=0x8048A05, eip=0x623E02AA
--23822-- SIGSEGV: si_code=1 faultaddr=0x39 tid=1 ESP=0xBEFFDA90 seg=0x0-0x3FFFFFF
--23822-- delivering signal 11 (SIGSEGV):1 to thread 1
--23822-- push_signal_frame (thread 1): signal 11

[FAULTHANDLER] fault_handler: caught signal 11 for 0x39.


[FAULTHANDLER] Fatal Error: --23822-- REDIR: 0x40B48D0 (strlen) redirected to
0x401DFF0 (strlen)
Received SIGSEGV (11) for address 0x39
[FAULTHANDLER] remove_fault_handler
++23822++ sys_sigaction: sigNo 11, new 0xBEFFD3A8, old 0x0, new flags 0x4000000
++23822++ sys_sigaction: sigNo 7, new 0xBEFFD3A8, old 0x0, new flags 0x4000000
--23822-- VG_(signal_return) (thread 1): isRT=1 valid magic; EIP=0x8048A05
--23822-- signal 11 arrived ... si_code=1, EIP=0x8048A05, eip=0x623E02AA
--23822-- SIGSEGV: si_code=1 faultaddr=0x39 tid=1 ESP=0xBEFFDA90 seg=0x0-0x3FFFFFF
--23822-- delivering signal 11 (SIGSEGV):1 to thread 1
--23822-- delivering 11 (code 1) to default handler; action: terminate+core
==23822==
==23822== Process terminating with default action of signal 11 (SIGSEGV):
dumping core
==23822==  Access not within mapped region at address 0x39
==23822==    at 0x8048A05: test_vdb (in /home/crestani/valgrind-test)
==23822==    by 0x8048B25: main (in /home/crestani/valgrind-test)
++23822++ sys_sigaction: sigNo 11, new 0x623598C8, old 0x0, new flags 0x0
++23822++ sys_sigaction: sigNo 7, new 0x623598C4, old 0x0, new flags 0x0
++23822++ sys_sigaction: sigNo 4, new 0x623598C0, old 0x0, new flags 0x0
++23822++ sys_sigaction: sigNo 8, new 0x623598DC, old 0x0, new flags 0x0
--23822-- REDIR: 0x40AF640 (free) redirected to 0x401CEFB (free)
==23822==
==23822== ERROR SUMMARY: 2 errors from 1 contexts (suppressed: 13 from 2)
==23822==
==23822== 2 errors in context 1 of 1:
==23822== Invalid write of size 1
==23822==    at 0x8048A05: test_vdb (in /home/crestani/valgrind-test)
==23822==    by 0x8048B25: main (in /home/crestani/valgrind-test)
==23822==  Address 0x39 is not stack'd, malloc'd or (recently) free'd
--23822--
--23822-- supp:    2 strlen/_dl_init_paths/dl_main/_dl_sysdep_start(Cond)
--23822-- supp:   11 dl_relocate_object
==23822==
==23822== IN SUMMARY: 2 errors from 1 contexts (suppressed: 13 from 2)
==23822==
==23822== malloc/free: in use at exit: 8,191 bytes in 1 blocks.
==23822== malloc/free: 1 allocs, 0 frees, 8,191 bytes allocated.
==23822==
==23822== searching for pointers to 1 not-freed blocks.
==23822== checked 58,468 bytes.
==23822==
==23822== LEAK SUMMARY:
==23822==    definitely lost: 0 bytes in 0 blocks.
==23822==      possibly lost: 0 bytes in 0 blocks.
==23822==    still reachable: 8,191 bytes in 1 blocks.
==23822==         suppressed: 0 bytes in 0 blocks.
==23822== Reachable blocks (those to which a pointer was found) are not shown.
==23822== To see them, rerun with: --show-reachable=yes
--23822--  memcheck: sanity checks: 0 cheap, 1 expensive
--23822--  memcheck: auxmaps: 0 auxmap entries (0k, 0M) in use
--23822--  memcheck: auxmaps: 0 searches, 0 comparisons
--23822--  memcheck: secondaries: 7 issued (448k, 0M)
--23822--  memcheck: secondaries: 18 accessible and distinguished (1152k, 1M)
--23822--     tt/tc: 3,537 tt lookups requiring 3,574 probes
--23822--     tt/tc: 3,537 fast-cache updates, 3 flushes
--23822-- translate: new        1,666 (36,422 -> 587,841; ratio 161:10) [0 scs]
--23822-- translate: dumped     0 (0 -> ??)
--23822-- translate: discarded  9 (217 -> ??)
--23822-- scheduler: 27,607 jumps (bb entries).
--23822-- scheduler: 0/1,964 major/minor sched events.
--23822--    sanity: 1 cheap, 1 expensive checks.
--23822--    exectx: 30,011 lists, 10 contexts (avg 0 per list)
--23822--    exectx: 16 searches, 6 full compares (375 per 1000)
--23822--    exectx: 0 cmp2, 40 cmp4, 0 cmpAll
Speicherzugriffsfehler
<<<<< --- valgrind -v --trace-signals=yes ./valgrind-test ---


After the first invocation of the signal handler things seem to go wrong here:
--23822-- SIGSEGV: si_code=1 faultaddr=0x39

Instead of executing the write, that previously caused the first signal, a
second SIGSEGV occurs on address 0x39.  Where does 0x39 come from?
Comment 1 Julian Seward 2006-03-21 23:57:15 UTC
Giving the flag --vex-iropt-precise-memory-exns=yes to Valgrind
"solves" the problem.
Comment 2 Marcus Crestani 2006-03-22 00:26:14 UTC
Excellent!  Thanks for the fast reply, works for me.
Thanks for valgrind, it's a great tool.