8 void do_lzcnt64 ( /*OUT*/UInt* flags, /*OUT*/ULong* res, ULong arg ) 9 { 10 ULong block[3] = { arg, 0ULL, 0ULL }; 11 __asm__ __volatile__( 12 "movabsq $0x5555555555555555, %%r11" "\n\t" 13 "lzcntq 0(%0), %%r11" "\n\t" 14 "movq %%r11, 8(%0)" "\n\t" 15 "pushfq" "\n\t" 16 "popq %%r11" "\n\t" 17 "movq %%r11, 16(%0)" "\n" 18 : : "r"(&block[0]) : "r11","cc","memory" 19 ); 20 *res = block[1]; 21 *flags = block[2] & 0x8d5; 22 } The 'pushfq' on line 15 corrupts the flags pointer causing a segfault.
objdump -d: 0000000000201750 <do_lzcnt64>: 201750: 55 pushq %rbp 201751: 48 89 e5 movq %rsp, %rbp 201754: 48 89 7d f8 movq %rdi, -0x8(%rbp) 201758: 48 89 75 f0 movq %rsi, -0x10(%rbp) 20175c: 48 89 55 e8 movq %rdx, -0x18(%rbp) 201760: 48 8b 45 e8 movq -0x18(%rbp), %rax 201764: 48 89 45 d0 movq %rax, -0x30(%rbp) 201768: 48 c7 45 d8 00 00 00 00 movq $0x0, -0x28(%rbp) 201770: 48 c7 45 e0 00 00 00 00 movq $0x0, -0x20(%rbp) 201778: 48 8d 45 d0 leaq -0x30(%rbp), %rax 20177c: 49 bb 55 55 55 55 55 55 55 55 movabsq $0x5555555555555555, %r11 # imm = 0x5555555555555555 201786: f3 4c 0f bd 18 lzcntq (%rax), %r11 20178b: 4c 89 58 08 movq %r11, 0x8(%rax) 20178f: 9c pushfq 201790: 41 5b popq %r11 201792: 4c 89 58 10 movq %r11, 0x10(%rax) 201796: 48 8b 4d d8 movq -0x28(%rbp), %rcx 20179a: 48 8b 45 f0 movq -0x10(%rbp), %rax 20179e: 48 89 08 movq %rcx, (%rax) 2017a1: 48 8b 45 e0 movq -0x20(%rbp), %rax 2017a5: 48 25 d5 08 00 00 andq $0x8d5, %rax # imm = 0x8D5 2017ab: 89 c1 movl %eax, %ecx 2017ad: 48 8b 45 f8 movq -0x8(%rbp), %rax 2017b1: 89 08 movl %ecx, (%rax) 2017b3: 5d popq %rbp 2017b4: c3 retq
GCC/Linux 0000000000001139 <do_lzcnt64>: 1139: 55 push %rbp 113a: 48 89 e5 mov %rsp,%rbp 113d: 48 89 7d d8 mov %rdi,-0x28(%rbp) 1141: 48 89 75 d0 mov %rsi,-0x30(%rbp) 1145: 48 89 55 c8 mov %rdx,-0x38(%rbp) 1149: 48 8b 45 c8 mov -0x38(%rbp),%rax 114d: 48 89 45 e0 mov %rax,-0x20(%rbp) 1151: 48 c7 45 e8 00 00 00 movq $0x0,-0x18(%rbp) 1158: 00 1159: 48 c7 45 f0 00 00 00 movq $0x0,-0x10(%rbp) 1160: 00 1161: 48 8d 45 e0 lea -0x20(%rbp),%rax 1165: 49 bb 55 55 55 55 55 movabs $0x5555555555555555,%r11 116c: 55 55 55 116f: f3 4c 0f bd 18 lzcnt (%rax),%r11 1174: 4c 89 58 08 mov %r11,0x8(%rax) 1178: 9c pushf 1179: 41 5b pop %r11 117b: 4c 89 58 10 mov %r11,0x10(%rax) 117f: 48 8b 55 e8 mov -0x18(%rbp),%rdx 1183: 48 8b 45 d0 mov -0x30(%rbp),%rax 1187: 48 89 10 mov %rdx,(%rax) 118a: 48 8b 45 f0 mov -0x10(%rbp),%rax 118e: 25 d5 08 00 00 and $0x8d5,%eax 1193: 89 c2 mov %eax,%edx 1195: 48 8b 45 d8 mov -0x28(%rbp),%rax 1199: 89 10 mov %edx,(%rax) 119b: 90 nop 119c: 5d pop %rbp 119d: c3 ret I think that it was working by chance with GCC, which happens to leave 8 bytes of space on the stack that doesn't overwrite any outputs.
commit 5b2fed0f5a4471d87d0763172f29332cf4cc6abe (HEAD -> master, origin/master, origin/HEAD) Author: Paul Floyd <pjfloyd@wanadoo.fr> Date: Sat Jan 11 12:28:00 2025 +0100 Bug 498492 - none/tests/amd64/lzcnt64 crashes on FreeBSD compiled with clang Using push in inline asm is a bit risky. It worked by luck with GCC. Fix it by shifting down RSP by 1024 and before the test and restoring it after.