Bug 345077 - linux syscall execveat support (linux 3.19)
Summary: linux syscall execveat support (linux 3.19)
Status: RESOLVED FIXED
Alias: None
Product: valgrind
Classification: Developer tools
Component: memcheck (show other bugs)
Version: unspecified
Platform: Compiled Sources Linux
: NOR wishlist
Target Milestone: ---
Assignee: Julian Seward
URL:
Keywords:
Depends on:
Blocks:
 
Reported: 2015-03-12 03:57 UTC by lberk
Modified: 2020-06-09 07:20 UTC (History)
3 users (show)

See Also:
Latest Commit:
Version Fixed In:
Sentry Crash Report:


Attachments
Proposed Patch v1 (9.81 KB, patch)
2015-03-12 03:58 UTC, lberk
Details
patch (17.89 KB, patch)
2020-04-07 11:52 UTC, Alexandra Hajkova
Details
patch (17.93 KB, patch)
2020-04-08 11:27 UTC, Alexandra Hajkova
Details
patch (18.51 KB, patch)
2020-05-22 12:31 UTC, Alexandra Hajkova
Details
patch (19.59 KB, patch)
2020-05-25 09:34 UTC, Alexandra Hajkova
Details

Note You need to log in before you can comment on or make changes to this bug.
Description lberk 2015-03-12 03:57:24 UTC
execveat (2) support was added in Linux 3.19, syswrap functions 

Reproducible: Always

Steps to Reproduce:
1. Write a C program that makes use of execveat
2. run under valgrind
3.

Actual Results:  
execveat support is not found

Expected Results:  
at least minimal execveat support should be seen

I've written a preliminary patch.  I would be happy to improve upon this patch if needed.

 Linux-specific (new in Linux 3.19)
+DECL_TEMPLATE(linux, sys_execveat);
 /* ---------------------------------------------------------------------
    Wrappers for sockets and ipc-ery.  These are split into standalone
    procedures because x86-linux hides them inside multiplexors
Index: coregrind/m_syswrap/syswrap-amd64-linux.c
===================================================================
--- coregrind/m_syswrap/syswrap-amd64-linux.c	(revision 14976)
+++ coregrind/m_syswrap/syswrap-amd64-linux.c	(working copy)
@@ -1079,10 +1079,11 @@
 //   LIN__(__NR_renameat2,         sys_ni_syscall),       // 316
 //   LIN__(__NR_seccomp,           sys_ni_syscall),       // 317
    LINXY(__NR_getrandom,         sys_getrandom),        // 318
-   LINXY(__NR_memfd_create,      sys_memfd_create)      // 319
+   LINXY(__NR_memfd_create,      sys_memfd_create),      // 319
 
 //   LIN__(__NR_kexec_file_load,   sys_ni_syscall),       // 320
 //   LIN__(__NR_bpf,               sys_ni_syscall)        // 321
+   LINX_(__NR_execveat,          sys_execveat),         // 322
 };
 
 SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno )
Index: coregrind/m_syswrap/syswrap-linux.c
===================================================================
--- coregrind/m_syswrap/syswrap-linux.c	(revision 14976)
+++ coregrind/m_syswrap/syswrap-linux.c	(working copy)
@@ -41,6 +41,7 @@
 #include "pub_core_xarray.h"
 #include "pub_core_clientstate.h"
 #include "pub_core_debuglog.h"
+#include "pub_core_gdbserver.h"     // VG_(gdbserver)
 #include "pub_core_libcbase.h"
 #include "pub_core_libcassert.h"
 #include "pub_core_libcfile.h"
@@ -55,6 +56,7 @@
 #include "pub_core_signals.h"
 #include "pub_core_syscall.h"
 #include "pub_core_syswrap.h"
+#include "pub_core_ume.h"
 #include "pub_core_inner.h"
 #if defined(ENABLE_INNER_CLIENT_REQUEST)
 #include "pub_core_clreq.h"
@@ -3041,6 +3043,204 @@
    }
 }
 
+// Pre_read a char** argument.
+static void pre_argv_envp(Addr a, ThreadId tid, const HChar* s1, const HChar* s2)
+{
+   while (True) {
+      Addr a_deref;
+      Addr* a_p = (Addr*)a;
+      PRE_MEM_READ( s1, (Addr)a_p, sizeof(Addr) );
+      a_deref = *a_p;
+      if (0 == a_deref)
+         break;
+      PRE_MEM_RASCIIZ( s2, a_deref );
+      a += sizeof(char*);
+   }
+}
+
+PRE(sys_execveat)
+{
+   HChar*       path = NULL;       /* path to executable */
+   HChar**      envp = NULL;
+   HChar**      argv = NULL;
+   HChar**      arg3copy;
+   HChar*       launcher_basename = NULL;
+   ThreadState* tst;
+   Int          i, j, tot_args;
+   SysRes       res;
+   Bool         setuid_allowed, trace_this_child;
+   PRINT("sys_execveat ( %ld, %#lx(%s), %#lx, %#lx, %ld", ARG1, ARG2, (char*)ARG2, ARG3, ARG4, ARG5);
+   PRE_REG_READ5(vki_off_t, "execveat",
+		 int, fd, char *, filename, char **, argv, char **, envp, int, flags);
+   PRE_MEM_RASCIIZ( "execveat(filename)", ARG2);
+
+   if (ARG3 != 0)
+     pre_argv_envp( ARG3, tid, "execveat(argv)", "execveat(argv[i])" );
+   if (ARG4 != 0)
+     pre_argv_envp( ARG4, tid, "execveat(envp)", "execveat(envp[i])" );
+
+   vg_assert(VG_(is_valid_tid)(tid));
+   tst = VG_(get_ThreadState)(tid);
+
+   if (ARG1 <= 0 && ARG2 == 0) {
+     SET_STATUS_Failure( VKI_EFAULT );
+     return;
+   }
+
+   if (ARG2 == 0 && ARG5 == VKI_AT_EMPTY_PATH) {
+     vg_assert(FAILURE);
+     VG_(message)(Vg_UserMsg, "AT_EMPTY_PATH flag with file descriptor combination"
+			      " not yet supported, exiting.\n");
+     VG_(exit)(101);
+   }
+
+   if (ML_(safe_to_deref)( (void*)ARG2, 1 )
+       && *(Char *)ARG2 != '/'
+       && ((Int)ARG1) != ((Int)VKI_AT_FDCWD)
+       && !ML_(fd_allowed)(ARG1, "execveat", tid, False))
+      SET_STATUS_Failure( VKI_EBADF );
+
+   {
+     const HChar** child_argv = (const HChar**)ARG3;
+     if (child_argv && child_argv[0] == NULL)
+       child_argv = NULL;
+     trace_this_child = VG_(should_we_trace_this_child)( (HChar*)ARG2, child_argv);
+   }
+
+  setuid_allowed = trace_this_child ? False : True;
+  res = VG_(pre_exec_check)((const HChar *)ARG2, NULL, setuid_allowed);
+  if (sr_isError(res)) {
+    SET_STATUS_Failure( sr_Err(res) );
+    return;
+  }
+
+  if (trace_this_child
+      && (VG_(name_of_launcher) == NULL
+	  || VG_(name_of_launcher)[0] != '/')) {
+    SET_STATUS_Failure( VKI_ECHILD );
+    return;
+  }
+
+  VG_(debugLog)(1, "syswrap", "Exec of %s\n", (HChar*)ARG2);
+
+  if (VG_(clo_vgdb)  != Vg_VgdbNo) {
+ 	VG_(gdbserver) (0);
+  }
+
+
+  VG_(nuke_all_threads_except)( tid, VgSrc_ExitThread );
+  VG_(reap_threads)(tid);
+
+  if (trace_this_child) {
+
+    path = VG_(name_of_launcher);
+    vg_assert(path);
+    launcher_basename = VG_(strrchr)(path, '/');
+    if (launcher_basename == NULL || launcher_basename[1] == 0) {
+      launcher_basename = path;
+    } else {
+      launcher_basename++;
+    }
+  } else {
+    path = (HChar*)ARG2;
+  }
+
+  if (ARG4 == 0) {
+    envp = NULL;
+  } else {
+    envp = VG_(env_clone)( (HChar**)ARG4 );
+    if (envp  == NULL) goto hosed;
+    VG_(env_remove_valgrind_env_stuff)( envp );
+  }
+
+  if (trace_this_child) {
+    VG_(env_setenv)( &envp, VALGRIND_LIB, VG_(libdir));
+  }
+
+  if (!trace_this_child) {
+    argv = (HChar**)ARG3;
+  } else {
+    vg_assert( VG_(args_for_valgrind) );
+    vg_assert( VG_(args_for_valgrind_noexecpass) >= 0 );
+    vg_assert( VG_(args_for_valgrind_noexecpass) 
+	       <= VG_(sizeXA)( VG_(args_for_valgrind) ) );
+    tot_args = 1;
+    tot_args += VG_(sizeXA)( VG_(args_for_valgrind) );
+    tot_args -= VG_(args_for_valgrind_noexecpass);
+    tot_args++;
+
+    arg3copy = (HChar**)ARG3;
+    if (arg3copy && arg3copy[0]) {
+      for (i = 1; arg3copy[i]; i++)
+	tot_args++;
+    }
+    // allocate
+      argv = VG_(malloc)( "di.syswrap.pre_sys_execve.1",
+                          (tot_args+1) * sizeof(HChar*) );
+      // copy
+      j = 0;
+      argv[j++] = launcher_basename;
+      for (i = 0; i < VG_(sizeXA)( VG_(args_for_valgrind) ); i++) {
+         if (i < VG_(args_for_valgrind_noexecpass))
+            continue;
+         argv[j++] = * (HChar**) VG_(indexXA)( VG_(args_for_valgrind), i );
+      }
+      argv[j++] = (HChar*)ARG2;
+      if (arg3copy && arg3copy[0])
+         for (i = 1; arg3copy[i]; i++)
+            argv[j++] = arg3copy[i];
+      argv[j++] = NULL;
+      // check
+      vg_assert(j == tot_args+1);
+   }
+  
+   /* restore the DATA rlimit for the child */
+   VG_(setrlimit)(VKI_RLIMIT_DATA, &VG_(client_rlimit_data));
+
+     {
+      vki_sigset_t allsigs;
+      vki_siginfo_t info;
+
+      /* What this loop does: it queries SCSS (the signal state that
+         the client _thinks_ the kernel is in) by calling
+         VG_(do_sys_sigaction), and modifies the real kernel signal
+         state accordingly. */
+      for (i = 1; i < VG_(max_signal); i++) {
+         vki_sigaction_fromK_t sa_f;
+         vki_sigaction_toK_t   sa_t;
+         VG_(do_sys_sigaction)(i, NULL, &sa_f);
+         VG_(convert_sigaction_fromK_to_toK)(&sa_f, &sa_t);
+         if (sa_t.ksa_handler == VKI_SIG_IGN)
+            VG_(sigaction)(i, &sa_t, NULL);
+         else {
+            sa_t.ksa_handler = VKI_SIG_DFL;
+            VG_(sigaction)(i, &sa_t, NULL);
+         }
+      }
+
+      VG_(sigfillset)(&allsigs);
+      while(VG_(sigtimedwait_zero)(&allsigs, &info) > 0)
+         ;
+
+      VG_(sigprocmask)(VKI_SIG_SETMASK, &tst->sig_mask, NULL);
+   }
+
+  SET_STATUS_from_SysRes(
+     VG_(do_syscall5)(__NR_execveat, ARG1, (UWord)path, (UWord)argv, (UWord)envp, ARG5)
+  );
+  
+  hosed:
+   vg_assert(FAILURE);
+   VG_(message)(Vg_UserMsg, "execveat( %ld, %#lx(%s), %#lx, %#lx, %ld) failed, errno %ld\n",
+                ARG1, ARG2, (char*)ARG2, ARG3, ARG4, ARG5, ERR);
+   VG_(message)(Vg_UserMsg, "EXEC FAILED: I can't recover from "
+                            "execveat() failing, so I'm dying.\n");
+   VG_(message)(Vg_UserMsg, "Add more stringent tests in PRE(sys_execveat), "
+                            "or work out how to recover.\n");
+   VG_(exit)(101);
+
+}
+
 /* ---------------------------------------------------------------------
    utime wrapper
    ------------------------------------------------------------------ */
Index: include/vki/vki-amd64-linux.h
===================================================================
--- include/vki/vki-amd64-linux.h	(revision 14976)
+++ include/vki/vki-amd64-linux.h	(working copy)
@@ -291,6 +291,12 @@
 #define VKI_F_LINUX_SPECIFIC_BASE	1024
 
 //----------------------------------------------------------------------
+// From linux-3.19/include/asm-x86_64/fcntl.h
+//----------------------------------------------------------------------
+
+#define VKI_AT_EMPTY_PATH	0x1000
+
+//----------------------------------------------------------------------
 // From linux-2.6.9/include/asm-x86_64/resource.h
 //----------------------------------------------------------------------
 
Index: include/vki/vki-scnums-amd64-linux.h
===================================================================
--- include/vki/vki-scnums-amd64-linux.h	(revision 14976)
+++ include/vki/vki-scnums-amd64-linux.h	(working copy)
@@ -403,6 +403,7 @@
 #define __NR_memfd_create       319
 #define __NR_kexec_file_load    320
 #define __NR_bpf                321
+#define __NR_execveat           322
 
 #endif /* __VKI_SCNUMS_AMD64_LINUX_H */
 
Index: include/vki/vki-scnums-x86-linux.h
===================================================================
--- include/vki/vki-scnums-x86-linux.h	(revision 14976)
+++ include/vki/vki-scnums-x86-linux.h	(working copy)
@@ -392,6 +392,7 @@
 #define __NR_getrandom          355
 #define __NR_memfd_create       356
 #define __NR_bpf                357
+#define __NR_execveat           358
 
 #endif /* __VKI_SCNUMS_X86_LINUX_H */
Comment 1 lberk 2015-03-12 03:58:56 UTC
Created attachment 91539 [details]
Proposed Patch v1

Adding patch as an attachment, for whatever form is preferred.
Comment 2 Tom Hughes 2015-04-28 11:42:09 UTC
It must be possible to do better than that surely - copying and pasting all that code from the existing sys_execve handler seems like a very had idea. We need to find a way to keep as much as possible common to them both I think.
Comment 3 Julian Seward 2015-08-13 15:02:39 UTC
How urgent/important is it to fix this?  3.19 was some time back now
and I don't see a flood of complaints about missing execveat support.
Is it only occasionally used?
Comment 4 Mark Wielaard 2020-01-17 12:34:23 UTC
glibc doesn't have a wrapper for execveat currently, which is most likely why we have never seen it reported.

But glibc since 2.27 implements fexecve using execveat. So it can be seen with something simple like:

#include <unistd.h>
#include <stddef.h>
#include <sys/stat.h>
#include <fcntl.h>

int main()
{
  char* argv[] = { "foobar", "arg1", NULL };
  char* envp[] = { NULL };

  int fd = open ("/bin/echo", O_RDONLY);
  fexecve (fd, argv, envp);
}

$ valgrind -q ./t
--210730-- WARNING: unhandled amd64-linux syscall: 322
--210730-- You may be able to write your own handler.
--210730-- Read the file README_MISSING_SYSCALL_OR_IOCTL.
--210730-- Nevertheless we consider this a bug.  Please report
--210730-- it at http://valgrind.org/support/bug_reports.html.
arg1

Note that it still works because glibc falls back to using /proc/self/[fdnr] if the syscall fails.
Comment 5 Alexandra Hajkova 2020-04-07 11:52:52 UTC
Created attachment 127346 [details]
patch

This new execveat wrapper passes Mark's fexecve test. The regular test is added to the testsuite but should work only when the glibc wrapper would be added. I have the glibc wrapper patch already prepared and should be able to post it soon.
Comment 6 Alexandra Hajkova 2020-04-08 11:27:46 UTC
Created attachment 127385 [details]
patch
Comment 7 Mark Wielaard 2020-04-09 15:28:48 UTC
(In reply to Alexandra Hajkova from comment #6)
> Created attachment 127385 [details]
> patch

We discussed this on irc a bit:

- Nicely split the generic code from the execve and execveat PRE handlers.
- The test for whether it is an absolute PATH is wrong, should be (path[0] != '/').
- abs_path needs to be allocate before use.
- Because abs_path and buf are allocated they need to be VG_(free) at the end after handle_pre_sys_execve returns (which means the syscall failed, otherwise the function doesn't return).
- Because path might now be replaced the am_is_valid_for_client check should be moved from the generic code to the execve and execveat PRE handlers.
- Since glibc doesn't have a wrapper yet, best to have our own syscall wrapper in the testcase (with a prereq check case, then you can unconditionally add/compile the test and you can get rid of the configure.ac tests and HAVE_EXECVEAT conditional).
- Testcase needs to test errors before success case (because success never returns).
Comment 8 Alexandra Hajkova 2020-05-22 12:31:13 UTC
Created attachment 128690 [details]
patch

The new version
Comment 9 Mark Wielaard 2020-05-22 14:36:19 UTC
This looks good. I read it again, we already discussed earlier versions on irc, but I don't have any more feedback. Except that the memcheck/tests/linux/sys-execveat.std{err,out}.exp files are missing.

For other reviewers. Some of the trickery, like check_pathptr, was done so warnings kept being produced in the same order as previous versions (that existing testcases expect for execve), and not to fail the syscall "too early" even if we know the arguments are bogus, so that the syscalls fail for the "right" reason.
Comment 10 Alexandra Hajkova 2020-05-25 09:34:25 UTC
Created attachment 128761 [details]
patch

stderr.exp added, sorry for forgetting about it
Comment 11 Mark Wielaard 2020-06-08 19:00:32 UTC
Thanks, finally pushed as:

commit 6f6ff49ffa10da4e8027220d70791a72437846fd
Author: Alexandra Hájková <ahajkova@redhat.com>
Date:   Thu Apr 9 17:28:18 2020 +0200

    Add support for execveat syscall
    
    Refactor the code to be reusable between execve and
    execveat syscalls.
    
    https://bugs.kde.org/show_bug.cgi?id=345077

I did add a sys-execveat.stdout.exp to make sure the echo command output is as expected and a check_execveat prereq program in case the execveat syscall isn't supported.
Comment 12 Mark Wielaard 2020-06-09 07:20:16 UTC
The original patch only hooked up execveat for x86_64, this followup adds it for all other arches:

commit 0a69a8f5bdb8c9456104dedfd10744a493dfd06a
Author: Mark Wielaard <mark@klomp.org>
Date:   Tue Jun 9 09:02:51 2020 +0200

    Add execveat for arm[64], [nano]mips[32|64], ppc[32|64], s390x and x86.
    
    https://bugs.kde.org/show_bug.cgi?id=345077