execveat (2) support was added in Linux 3.19, syswrap functions Reproducible: Always Steps to Reproduce: 1. Write a C program that makes use of execveat 2. run under valgrind 3. Actual Results: execveat support is not found Expected Results: at least minimal execveat support should be seen I've written a preliminary patch. I would be happy to improve upon this patch if needed. Linux-specific (new in Linux 3.19) +DECL_TEMPLATE(linux, sys_execveat); /* --------------------------------------------------------------------- Wrappers for sockets and ipc-ery. These are split into standalone procedures because x86-linux hides them inside multiplexors Index: coregrind/m_syswrap/syswrap-amd64-linux.c =================================================================== --- coregrind/m_syswrap/syswrap-amd64-linux.c (revision 14976) +++ coregrind/m_syswrap/syswrap-amd64-linux.c (working copy) @@ -1079,10 +1079,11 @@ // LIN__(__NR_renameat2, sys_ni_syscall), // 316 // LIN__(__NR_seccomp, sys_ni_syscall), // 317 LINXY(__NR_getrandom, sys_getrandom), // 318 - LINXY(__NR_memfd_create, sys_memfd_create) // 319 + LINXY(__NR_memfd_create, sys_memfd_create), // 319 // LIN__(__NR_kexec_file_load, sys_ni_syscall), // 320 // LIN__(__NR_bpf, sys_ni_syscall) // 321 + LINX_(__NR_execveat, sys_execveat), // 322 }; SyscallTableEntry* ML_(get_linux_syscall_entry) ( UInt sysno ) Index: coregrind/m_syswrap/syswrap-linux.c =================================================================== --- coregrind/m_syswrap/syswrap-linux.c (revision 14976) +++ coregrind/m_syswrap/syswrap-linux.c (working copy) @@ -41,6 +41,7 @@ #include "pub_core_xarray.h" #include "pub_core_clientstate.h" #include "pub_core_debuglog.h" +#include "pub_core_gdbserver.h" // VG_(gdbserver) #include "pub_core_libcbase.h" #include "pub_core_libcassert.h" #include "pub_core_libcfile.h" @@ -55,6 +56,7 @@ #include "pub_core_signals.h" #include "pub_core_syscall.h" #include "pub_core_syswrap.h" +#include "pub_core_ume.h" #include "pub_core_inner.h" #if defined(ENABLE_INNER_CLIENT_REQUEST) #include "pub_core_clreq.h" @@ -3041,6 +3043,204 @@ } } +// Pre_read a char** argument. +static void pre_argv_envp(Addr a, ThreadId tid, const HChar* s1, const HChar* s2) +{ + while (True) { + Addr a_deref; + Addr* a_p = (Addr*)a; + PRE_MEM_READ( s1, (Addr)a_p, sizeof(Addr) ); + a_deref = *a_p; + if (0 == a_deref) + break; + PRE_MEM_RASCIIZ( s2, a_deref ); + a += sizeof(char*); + } +} + +PRE(sys_execveat) +{ + HChar* path = NULL; /* path to executable */ + HChar** envp = NULL; + HChar** argv = NULL; + HChar** arg3copy; + HChar* launcher_basename = NULL; + ThreadState* tst; + Int i, j, tot_args; + SysRes res; + Bool setuid_allowed, trace_this_child; + PRINT("sys_execveat ( %ld, %#lx(%s), %#lx, %#lx, %ld", ARG1, ARG2, (char*)ARG2, ARG3, ARG4, ARG5); + PRE_REG_READ5(vki_off_t, "execveat", + int, fd, char *, filename, char **, argv, char **, envp, int, flags); + PRE_MEM_RASCIIZ( "execveat(filename)", ARG2); + + if (ARG3 != 0) + pre_argv_envp( ARG3, tid, "execveat(argv)", "execveat(argv[i])" ); + if (ARG4 != 0) + pre_argv_envp( ARG4, tid, "execveat(envp)", "execveat(envp[i])" ); + + vg_assert(VG_(is_valid_tid)(tid)); + tst = VG_(get_ThreadState)(tid); + + if (ARG1 <= 0 && ARG2 == 0) { + SET_STATUS_Failure( VKI_EFAULT ); + return; + } + + if (ARG2 == 0 && ARG5 == VKI_AT_EMPTY_PATH) { + vg_assert(FAILURE); + VG_(message)(Vg_UserMsg, "AT_EMPTY_PATH flag with file descriptor combination" + " not yet supported, exiting.\n"); + VG_(exit)(101); + } + + if (ML_(safe_to_deref)( (void*)ARG2, 1 ) + && *(Char *)ARG2 != '/' + && ((Int)ARG1) != ((Int)VKI_AT_FDCWD) + && !ML_(fd_allowed)(ARG1, "execveat", tid, False)) + SET_STATUS_Failure( VKI_EBADF ); + + { + const HChar** child_argv = (const HChar**)ARG3; + if (child_argv && child_argv[0] == NULL) + child_argv = NULL; + trace_this_child = VG_(should_we_trace_this_child)( (HChar*)ARG2, child_argv); + } + + setuid_allowed = trace_this_child ? False : True; + res = VG_(pre_exec_check)((const HChar *)ARG2, NULL, setuid_allowed); + if (sr_isError(res)) { + SET_STATUS_Failure( sr_Err(res) ); + return; + } + + if (trace_this_child + && (VG_(name_of_launcher) == NULL + || VG_(name_of_launcher)[0] != '/')) { + SET_STATUS_Failure( VKI_ECHILD ); + return; + } + + VG_(debugLog)(1, "syswrap", "Exec of %s\n", (HChar*)ARG2); + + if (VG_(clo_vgdb) != Vg_VgdbNo) { + VG_(gdbserver) (0); + } + + + VG_(nuke_all_threads_except)( tid, VgSrc_ExitThread ); + VG_(reap_threads)(tid); + + if (trace_this_child) { + + path = VG_(name_of_launcher); + vg_assert(path); + launcher_basename = VG_(strrchr)(path, '/'); + if (launcher_basename == NULL || launcher_basename[1] == 0) { + launcher_basename = path; + } else { + launcher_basename++; + } + } else { + path = (HChar*)ARG2; + } + + if (ARG4 == 0) { + envp = NULL; + } else { + envp = VG_(env_clone)( (HChar**)ARG4 ); + if (envp == NULL) goto hosed; + VG_(env_remove_valgrind_env_stuff)( envp ); + } + + if (trace_this_child) { + VG_(env_setenv)( &envp, VALGRIND_LIB, VG_(libdir)); + } + + if (!trace_this_child) { + argv = (HChar**)ARG3; + } else { + vg_assert( VG_(args_for_valgrind) ); + vg_assert( VG_(args_for_valgrind_noexecpass) >= 0 ); + vg_assert( VG_(args_for_valgrind_noexecpass) + <= VG_(sizeXA)( VG_(args_for_valgrind) ) ); + tot_args = 1; + tot_args += VG_(sizeXA)( VG_(args_for_valgrind) ); + tot_args -= VG_(args_for_valgrind_noexecpass); + tot_args++; + + arg3copy = (HChar**)ARG3; + if (arg3copy && arg3copy[0]) { + for (i = 1; arg3copy[i]; i++) + tot_args++; + } + // allocate + argv = VG_(malloc)( "di.syswrap.pre_sys_execve.1", + (tot_args+1) * sizeof(HChar*) ); + // copy + j = 0; + argv[j++] = launcher_basename; + for (i = 0; i < VG_(sizeXA)( VG_(args_for_valgrind) ); i++) { + if (i < VG_(args_for_valgrind_noexecpass)) + continue; + argv[j++] = * (HChar**) VG_(indexXA)( VG_(args_for_valgrind), i ); + } + argv[j++] = (HChar*)ARG2; + if (arg3copy && arg3copy[0]) + for (i = 1; arg3copy[i]; i++) + argv[j++] = arg3copy[i]; + argv[j++] = NULL; + // check + vg_assert(j == tot_args+1); + } + + /* restore the DATA rlimit for the child */ + VG_(setrlimit)(VKI_RLIMIT_DATA, &VG_(client_rlimit_data)); + + { + vki_sigset_t allsigs; + vki_siginfo_t info; + + /* What this loop does: it queries SCSS (the signal state that + the client _thinks_ the kernel is in) by calling + VG_(do_sys_sigaction), and modifies the real kernel signal + state accordingly. */ + for (i = 1; i < VG_(max_signal); i++) { + vki_sigaction_fromK_t sa_f; + vki_sigaction_toK_t sa_t; + VG_(do_sys_sigaction)(i, NULL, &sa_f); + VG_(convert_sigaction_fromK_to_toK)(&sa_f, &sa_t); + if (sa_t.ksa_handler == VKI_SIG_IGN) + VG_(sigaction)(i, &sa_t, NULL); + else { + sa_t.ksa_handler = VKI_SIG_DFL; + VG_(sigaction)(i, &sa_t, NULL); + } + } + + VG_(sigfillset)(&allsigs); + while(VG_(sigtimedwait_zero)(&allsigs, &info) > 0) + ; + + VG_(sigprocmask)(VKI_SIG_SETMASK, &tst->sig_mask, NULL); + } + + SET_STATUS_from_SysRes( + VG_(do_syscall5)(__NR_execveat, ARG1, (UWord)path, (UWord)argv, (UWord)envp, ARG5) + ); + + hosed: + vg_assert(FAILURE); + VG_(message)(Vg_UserMsg, "execveat( %ld, %#lx(%s), %#lx, %#lx, %ld) failed, errno %ld\n", + ARG1, ARG2, (char*)ARG2, ARG3, ARG4, ARG5, ERR); + VG_(message)(Vg_UserMsg, "EXEC FAILED: I can't recover from " + "execveat() failing, so I'm dying.\n"); + VG_(message)(Vg_UserMsg, "Add more stringent tests in PRE(sys_execveat), " + "or work out how to recover.\n"); + VG_(exit)(101); + +} + /* --------------------------------------------------------------------- utime wrapper ------------------------------------------------------------------ */ Index: include/vki/vki-amd64-linux.h =================================================================== --- include/vki/vki-amd64-linux.h (revision 14976) +++ include/vki/vki-amd64-linux.h (working copy) @@ -291,6 +291,12 @@ #define VKI_F_LINUX_SPECIFIC_BASE 1024 //---------------------------------------------------------------------- +// From linux-3.19/include/asm-x86_64/fcntl.h +//---------------------------------------------------------------------- + +#define VKI_AT_EMPTY_PATH 0x1000 + +//---------------------------------------------------------------------- // From linux-2.6.9/include/asm-x86_64/resource.h //---------------------------------------------------------------------- Index: include/vki/vki-scnums-amd64-linux.h =================================================================== --- include/vki/vki-scnums-amd64-linux.h (revision 14976) +++ include/vki/vki-scnums-amd64-linux.h (working copy) @@ -403,6 +403,7 @@ #define __NR_memfd_create 319 #define __NR_kexec_file_load 320 #define __NR_bpf 321 +#define __NR_execveat 322 #endif /* __VKI_SCNUMS_AMD64_LINUX_H */ Index: include/vki/vki-scnums-x86-linux.h =================================================================== --- include/vki/vki-scnums-x86-linux.h (revision 14976) +++ include/vki/vki-scnums-x86-linux.h (working copy) @@ -392,6 +392,7 @@ #define __NR_getrandom 355 #define __NR_memfd_create 356 #define __NR_bpf 357 +#define __NR_execveat 358 #endif /* __VKI_SCNUMS_X86_LINUX_H */
Created attachment 91539 [details] Proposed Patch v1 Adding patch as an attachment, for whatever form is preferred.
It must be possible to do better than that surely - copying and pasting all that code from the existing sys_execve handler seems like a very had idea. We need to find a way to keep as much as possible common to them both I think.
How urgent/important is it to fix this? 3.19 was some time back now and I don't see a flood of complaints about missing execveat support. Is it only occasionally used?
glibc doesn't have a wrapper for execveat currently, which is most likely why we have never seen it reported. But glibc since 2.27 implements fexecve using execveat. So it can be seen with something simple like: #include <unistd.h> #include <stddef.h> #include <sys/stat.h> #include <fcntl.h> int main() { char* argv[] = { "foobar", "arg1", NULL }; char* envp[] = { NULL }; int fd = open ("/bin/echo", O_RDONLY); fexecve (fd, argv, envp); } $ valgrind -q ./t --210730-- WARNING: unhandled amd64-linux syscall: 322 --210730-- You may be able to write your own handler. --210730-- Read the file README_MISSING_SYSCALL_OR_IOCTL. --210730-- Nevertheless we consider this a bug. Please report --210730-- it at http://valgrind.org/support/bug_reports.html. arg1 Note that it still works because glibc falls back to using /proc/self/[fdnr] if the syscall fails.
Created attachment 127346 [details] patch This new execveat wrapper passes Mark's fexecve test. The regular test is added to the testsuite but should work only when the glibc wrapper would be added. I have the glibc wrapper patch already prepared and should be able to post it soon.
Created attachment 127385 [details] patch
(In reply to Alexandra Hajkova from comment #6) > Created attachment 127385 [details] > patch We discussed this on irc a bit: - Nicely split the generic code from the execve and execveat PRE handlers. - The test for whether it is an absolute PATH is wrong, should be (path[0] != '/'). - abs_path needs to be allocate before use. - Because abs_path and buf are allocated they need to be VG_(free) at the end after handle_pre_sys_execve returns (which means the syscall failed, otherwise the function doesn't return). - Because path might now be replaced the am_is_valid_for_client check should be moved from the generic code to the execve and execveat PRE handlers. - Since glibc doesn't have a wrapper yet, best to have our own syscall wrapper in the testcase (with a prereq check case, then you can unconditionally add/compile the test and you can get rid of the configure.ac tests and HAVE_EXECVEAT conditional). - Testcase needs to test errors before success case (because success never returns).
Created attachment 128690 [details] patch The new version
This looks good. I read it again, we already discussed earlier versions on irc, but I don't have any more feedback. Except that the memcheck/tests/linux/sys-execveat.std{err,out}.exp files are missing. For other reviewers. Some of the trickery, like check_pathptr, was done so warnings kept being produced in the same order as previous versions (that existing testcases expect for execve), and not to fail the syscall "too early" even if we know the arguments are bogus, so that the syscalls fail for the "right" reason.
Created attachment 128761 [details] patch stderr.exp added, sorry for forgetting about it
Thanks, finally pushed as: commit 6f6ff49ffa10da4e8027220d70791a72437846fd Author: Alexandra Hájková <ahajkova@redhat.com> Date: Thu Apr 9 17:28:18 2020 +0200 Add support for execveat syscall Refactor the code to be reusable between execve and execveat syscalls. https://bugs.kde.org/show_bug.cgi?id=345077 I did add a sys-execveat.stdout.exp to make sure the echo command output is as expected and a check_execveat prereq program in case the execveat syscall isn't supported.
The original patch only hooked up execveat for x86_64, this followup adds it for all other arches: commit 0a69a8f5bdb8c9456104dedfd10744a493dfd06a Author: Mark Wielaard <mark@klomp.org> Date: Tue Jun 9 09:02:51 2020 +0200 Add execveat for arm[64], [nano]mips[32|64], ppc[32|64], s390x and x86. https://bugs.kde.org/show_bug.cgi?id=345077