Bug 498492 - none/tests/amd64/lzcnt64 crashes on FreeBSD compiled with clang
Summary: none/tests/amd64/lzcnt64 crashes on FreeBSD compiled with clang
Status: RESOLVED FIXED
Alias: None
Product: valgrind
Classification: Developer tools
Component: general (show other bugs)
Version: unspecified
Platform: FreeBSD Ports FreeBSD
: NOR crash
Target Milestone: ---
Assignee: Paul Floyd
URL:
Keywords:
Depends on:
Blocks:
 
Reported: 2025-01-10 19:08 UTC by Paul Floyd
Modified: 2025-01-11 12:55 UTC (History)
0 users

See Also:
Latest Commit:
Version Fixed In:
Sentry Crash Report:


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description Paul Floyd 2025-01-10 19:08:58 UTC
8 void do_lzcnt64 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, ULong arg )
 9 {                                                                                                                                                                                                                                                         10   ULong block[3] = { arg, 0ULL, 0ULL };
11   __asm__ __volatile__(
12     "movabsq $0x5555555555555555, %%r11" "\n\t"
13     "lzcntq 0(%0), %%r11"     "\n\t"
14     "movq %%r11, 8(%0)"       "\n\t"
15     "pushfq"                  "\n\t"
16     "popq %%r11"              "\n\t"
17     "movq %%r11, 16(%0)"      "\n"
18     : : "r"(&block[0]) : "r11","cc","memory"
19   );
20   *res = block[1];
21   *flags = block[2] & 0x8d5;
22 }

The 'pushfq' on line 15 corrupts the flags pointer causing a segfault.
Comment 1 Paul Floyd 2025-01-11 08:10:44 UTC
objdump -d:

0000000000201750 <do_lzcnt64>:
  201750: 55                            pushq   %rbp
  201751: 48 89 e5                      movq    %rsp, %rbp
  201754: 48 89 7d f8                   movq    %rdi, -0x8(%rbp)
  201758: 48 89 75 f0                   movq    %rsi, -0x10(%rbp)
  20175c: 48 89 55 e8                   movq    %rdx, -0x18(%rbp)
  201760: 48 8b 45 e8                   movq    -0x18(%rbp), %rax
  201764: 48 89 45 d0                   movq    %rax, -0x30(%rbp)
  201768: 48 c7 45 d8 00 00 00 00       movq    $0x0, -0x28(%rbp)
  201770: 48 c7 45 e0 00 00 00 00       movq    $0x0, -0x20(%rbp)
  201778: 48 8d 45 d0                   leaq    -0x30(%rbp), %rax
  20177c: 49 bb 55 55 55 55 55 55 55 55 movabsq $0x5555555555555555, %r11 # imm = 0x5555555555555555
  201786: f3 4c 0f bd 18                lzcntq  (%rax), %r11
  20178b: 4c 89 58 08                   movq    %r11, 0x8(%rax)
  20178f: 9c                            pushfq
  201790: 41 5b                         popq    %r11
  201792: 4c 89 58 10                   movq    %r11, 0x10(%rax)
  201796: 48 8b 4d d8                   movq    -0x28(%rbp), %rcx
  20179a: 48 8b 45 f0                   movq    -0x10(%rbp), %rax
  20179e: 48 89 08                      movq    %rcx, (%rax)
  2017a1: 48 8b 45 e0                   movq    -0x20(%rbp), %rax
  2017a5: 48 25 d5 08 00 00             andq    $0x8d5, %rax            # imm = 0x8D5
  2017ab: 89 c1                         movl    %eax, %ecx
  2017ad: 48 8b 45 f8                   movq    -0x8(%rbp), %rax
  2017b1: 89 08                         movl    %ecx, (%rax)
  2017b3: 5d                            popq    %rbp
  2017b4: c3                            retq
Comment 2 Paul Floyd 2025-01-11 10:50:03 UTC
GCC/Linux

0000000000001139 <do_lzcnt64>:
    1139:       55                      push   %rbp
    113a:       48 89 e5                mov    %rsp,%rbp
    113d:       48 89 7d d8             mov    %rdi,-0x28(%rbp)
    1141:       48 89 75 d0             mov    %rsi,-0x30(%rbp)
    1145:       48 89 55 c8             mov    %rdx,-0x38(%rbp)
    1149:       48 8b 45 c8             mov    -0x38(%rbp),%rax
    114d:       48 89 45 e0             mov    %rax,-0x20(%rbp)
    1151:       48 c7 45 e8 00 00 00    movq   $0x0,-0x18(%rbp)
    1158:       00
    1159:       48 c7 45 f0 00 00 00    movq   $0x0,-0x10(%rbp)
    1160:       00
    1161:       48 8d 45 e0             lea    -0x20(%rbp),%rax
    1165:       49 bb 55 55 55 55 55    movabs $0x5555555555555555,%r11
    116c:       55 55 55
    116f:       f3 4c 0f bd 18          lzcnt  (%rax),%r11
    1174:       4c 89 58 08             mov    %r11,0x8(%rax)
    1178:       9c                      pushf
    1179:       41 5b                   pop    %r11
    117b:       4c 89 58 10             mov    %r11,0x10(%rax)
    117f:       48 8b 55 e8             mov    -0x18(%rbp),%rdx
    1183:       48 8b 45 d0             mov    -0x30(%rbp),%rax
    1187:       48 89 10                mov    %rdx,(%rax)
    118a:       48 8b 45 f0             mov    -0x10(%rbp),%rax
    118e:       25 d5 08 00 00          and    $0x8d5,%eax
    1193:       89 c2                   mov    %eax,%edx
    1195:       48 8b 45 d8             mov    -0x28(%rbp),%rax
    1199:       89 10                   mov    %edx,(%rax)
    119b:       90                      nop
    119c:       5d                      pop    %rbp
    119d:       c3                      ret

I think that it was working by chance with GCC, which happens to leave 8 bytes of space on the stack that doesn't overwrite any outputs.
Comment 3 Paul Floyd 2025-01-11 12:55:38 UTC
commit 5b2fed0f5a4471d87d0763172f29332cf4cc6abe (HEAD -> master, origin/master, origin/HEAD)
Author: Paul Floyd <pjfloyd@wanadoo.fr>
Date:   Sat Jan 11 12:28:00 2025 +0100

    Bug 498492 - none/tests/amd64/lzcnt64 crashes on FreeBSD compiled with clang

    Using push in inline asm is a bit risky. It worked by luck with
    GCC. Fix it by shifting down RSP by 1024 and before the test
    and restoring it after.