Skip to content

[x32] Fix BLAKE3 assembly #149617

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open

[x32] Fix BLAKE3 assembly #149617

wants to merge 1 commit into from

Conversation

hvdijk
Copy link
Contributor

@hvdijk hvdijk commented Jul 18, 2025

The x86-64 assembly implementations of BLAKE3 are used both in 64-bit and in 32-bit pointer mode, but only worked in 64-bit pointer mode. This PR adds support to also allow them to work in 32-bit pointer mode.

@llvmbot
Copy link
Member

llvmbot commented Jul 18, 2025

@llvm/pr-subscribers-llvm-support

Author: Harald van Dijk (hvdijk)

Changes

The x86-64 assembly implementations of BLAKE3 are used both in 64-bit and in 32-bit pointer mode, but only worked in 64-bit pointer mode. This PR adds support to also allow them to work in 32-bit pointer mode.


Full diff: https://github.com/llvm/llvm-project/pull/149617.diff

4 Files Affected:

  • (modified) llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S (+43)
  • (modified) llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S (+91)
  • (modified) llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S (+28)
  • (modified) llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S (+28)
diff --git a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S
index e98893c7ef8b8..f285fe119f4c1 100644
--- a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S
+++ b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S
@@ -45,6 +45,10 @@ blake3_hash_many_avx2:
         mov     rbp, rsp
         sub     rsp, 680
         and     rsp, 0xFFFFFFFFFFFFFFC0
+#ifdef _ILP32
+        mov     esi, esi
+        mov     edx, edx
+#endif
         neg     r9d
         vmovd   xmm0, r9d
         vpbroadcastd ymm0, xmm0
@@ -77,6 +81,7 @@ blake3_hash_many_avx2:
         vpbroadcastd ymm5, dword ptr [rcx+0x14]
         vpbroadcastd ymm6, dword ptr [rcx+0x18]
         vpbroadcastd ymm7, dword ptr [rcx+0x1C]
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
         mov     r9, qword ptr [rdi+0x8]
         mov     r10, qword ptr [rdi+0x10]
@@ -85,6 +90,16 @@ blake3_hash_many_avx2:
         mov     r13, qword ptr [rdi+0x28]
         mov     r14, qword ptr [rdi+0x30]
         mov     r15, qword ptr [rdi+0x38]
+#else
+        mov     r8d, dword ptr [rdi]
+        mov     r9d, dword ptr [rdi+0x4]
+        mov     r10d, dword ptr [rdi+0x8]
+        mov     r11d, dword ptr [rdi+0xc]
+        mov     r12d, dword ptr [rdi+0x10]
+        mov     r13d, dword ptr [rdi+0x14]
+        mov     r14d, dword ptr [rdi+0x18]
+        mov     r15d, dword ptr [rdi+0x1c]
+#endif
         movzx   eax, byte ptr [rbp+0x38]
         movzx   ebx, byte ptr [rbp+0x40]
         or      eax, ebx
@@ -1305,7 +1320,11 @@ blake3_hash_many_avx2:
         vmovdqa ymm0, ymmword ptr [rsp+0x260]
         vpsubd  ymm2, ymm0, ymm2
         vmovdqa ymmword ptr [rsp+0x260], ymm2
+#ifndef _ILP32
         add     rdi, 64
+#else
+        add     rdi, 32
+#endif
         add     rbx, 256
         mov     qword ptr [rbp+0x50], rbx
         sub     rsi, 8
@@ -1346,10 +1365,17 @@ blake3_hash_many_avx2:
         vpblendd ymm15, ymm15, ymm12, 0x44
         vmovdqa ymmword ptr [rsp], ymm14
         vmovdqa ymmword ptr [rsp+0x20], ymm15
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
         mov     r9, qword ptr [rdi+0x8]
         mov     r10, qword ptr [rdi+0x10]
         mov     r11, qword ptr [rdi+0x18]
+#else
+        mov     r8d, dword ptr [rdi]
+        mov     r9d, dword ptr [rdi+0x4]
+        mov     r10d, dword ptr [rdi+0x8]
+        mov     r11d, dword ptr [rdi+0xc]
+#endif
         movzx   eax, byte ptr [rbp+0x40]
         or      eax, r13d
         xor     edx, edx
@@ -1557,7 +1583,11 @@ blake3_hash_many_avx2:
         vmovaps xmmword ptr [rsp+0x240], xmm0
         vmovaps xmmword ptr [rsp+0x260], xmm2
         add     rbx, 128
+#ifndef _ILP32
         add     rdi, 32
+#else
+        add     rdi, 16
+#endif
         sub     rsi, 4
 3:
         test    rsi, 0x2
@@ -1573,8 +1603,13 @@ blake3_hash_many_avx2:
         vinserti128 ymm13, ymm13, xmm14, 0x01
         vbroadcasti128 ymm14, xmmword ptr [ROT16+rip]
         vbroadcasti128 ymm15, xmmword ptr [ROT8+rip]
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
         mov     r9, qword ptr [rdi+0x8]
+#else
+        mov     r8d, dword ptr [rdi]
+        mov     r9d, dword ptr [rdi+0x4]
+#endif
         movzx   eax, byte ptr [rbp+0x40]
         or      eax, r13d
         xor     edx, edx
@@ -1683,7 +1718,11 @@ blake3_hash_many_avx2:
         vmovaps ymmword ptr [rsp+0x240], ymm0
         vmovaps ymmword ptr [rsp+0x260], ymm2
         add     rbx, 64
+#ifndef _ILP32
         add     rdi, 16
+#else
+        add     rdi, 8
+#endif
         sub     rsi, 2
 3:
         test    rsi, 0x1
@@ -1695,7 +1734,11 @@ blake3_hash_many_avx2:
         vpinsrd xmm13, xmm3, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
         vmovdqa xmm14, xmmword ptr [ROT16+rip]
         vmovdqa xmm15, xmmword ptr [ROT8+rip]
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
+#else
+        mov     r8d, dword ptr [rdi]
+#endif
         movzx   eax, byte ptr [rbp+0x40]
         or      eax, r13d
         xor     edx, edx
diff --git a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S
index b4b14946de10e..709c4752d4084 100644
--- a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S
+++ b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S
@@ -59,6 +59,10 @@ blake3_hash_many_avx512:
         sub     rsp, 144
         and     rsp, 0xFFFFFFFFFFFFFFC0
         neg     r9
+#ifdef _ILP32
+        mov     esi, esi
+        mov     edx, edx
+#endif
         kmovw   k1, r9d
         vmovd   xmm0, r8d
         vpbroadcastd ymm0, xmm0
@@ -107,6 +111,7 @@ blake3_hash_many_avx512:
         cmp     rdx, qword ptr [rsp+0x80]
         cmove   eax, ebx
         mov     dword ptr [rsp+0x88], eax
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
         mov     r9, qword ptr [rdi+0x8]
         mov     r10, qword ptr [rdi+0x10]
@@ -115,6 +120,16 @@ blake3_hash_many_avx512:
         mov     r13, qword ptr [rdi+0x48]
         mov     r14, qword ptr [rdi+0x50]
         mov     r15, qword ptr [rdi+0x58]
+#else
+        mov     r8d, dword ptr [rdi]
+        mov     r9d, dword ptr [rdi+0x4]
+        mov     r10d, dword ptr [rdi+0x8]
+        mov     r11d, dword ptr [rdi+0xc]
+        mov     r12d, dword ptr [rdi+0x20]
+        mov     r13d, dword ptr [rdi+0x24]
+        mov     r14d, dword ptr [rdi+0x28]
+        mov     r15d, dword ptr [rdi+0x2c]
+#endif
         vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
         vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
         vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
@@ -127,6 +142,7 @@ blake3_hash_many_avx512:
         vinserti64x4 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01
         vpunpcklqdq zmm10, zmm18, zmm19
         vpunpckhqdq zmm11, zmm18, zmm19
+#ifndef _ILP32
         mov     r8, qword ptr [rdi+0x20]
         mov     r9, qword ptr [rdi+0x28]
         mov     r10, qword ptr [rdi+0x30]
@@ -135,6 +151,16 @@ blake3_hash_many_avx512:
         mov     r13, qword ptr [rdi+0x68]
         mov     r14, qword ptr [rdi+0x70]
         mov     r15, qword ptr [rdi+0x78]
+#else
+        mov     r8d, dword ptr [rdi+0x10]
+        mov     r9d, dword ptr [rdi+0x14]
+        mov     r10d, dword ptr [rdi+0x18]
+        mov     r11d, dword ptr [rdi+0x1c]
+        mov     r12d, dword ptr [rdi+0x30]
+        mov     r13d, dword ptr [rdi+0x34]
+        mov     r14d, dword ptr [rdi+0x38]
+        mov     r15d, dword ptr [rdi+0x3c]
+#endif
         vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20]
         vinserti64x4 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01
         vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20]
@@ -169,6 +195,7 @@ blake3_hash_many_avx512:
         vmovdqa32 zmm23, zmm19
         vpermt2d zmm19, zmm27, zmm8
         vpermt2d zmm23, zmm31, zmm8
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
         mov     r9, qword ptr [rdi+0x8]
         mov     r10, qword ptr [rdi+0x10]
@@ -177,6 +204,16 @@ blake3_hash_many_avx512:
         mov     r13, qword ptr [rdi+0x48]
         mov     r14, qword ptr [rdi+0x50]
         mov     r15, qword ptr [rdi+0x58]
+#else
+        mov     r8d, dword ptr [rdi]
+        mov     r9d, dword ptr [rdi+0x4]
+        mov     r10d, dword ptr [rdi+0x8]
+        mov     r11d, dword ptr [rdi+0xc]
+        mov     r12d, dword ptr [rdi+0x20]
+        mov     r13d, dword ptr [rdi+0x24]
+        mov     r14d, dword ptr [rdi+0x28]
+        mov     r15d, dword ptr [rdi+0x2c]
+#endif
         vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
         vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
         vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
@@ -197,6 +234,7 @@ blake3_hash_many_avx512:
         prefetcht0 [r14+rdx+0x80]
         prefetcht0 [r11+rdx+0x80]
         prefetcht0 [r15+rdx+0x80]
+#ifndef _ILP32
         mov     r8, qword ptr [rdi+0x20]
         mov     r9, qword ptr [rdi+0x28]
         mov     r10, qword ptr [rdi+0x30]
@@ -205,6 +243,16 @@ blake3_hash_many_avx512:
         mov     r13, qword ptr [rdi+0x68]
         mov     r14, qword ptr [rdi+0x70]
         mov     r15, qword ptr [rdi+0x78]
+#else
+        mov     r8d, dword ptr [rdi+0x10]
+        mov     r9d, dword ptr [rdi+0x14]
+        mov     r10d, dword ptr [rdi+0x18]
+        mov     r11d, dword ptr [rdi+0x1c]
+        mov     r12d, dword ptr [rdi+0x30]
+        mov     r13d, dword ptr [rdi+0x34]
+        mov     r14d, dword ptr [rdi+0x38]
+        mov     r15d, dword ptr [rdi+0x3c]
+#endif
         vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20]
         vinserti64x4 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01
         vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20]
@@ -1095,7 +1143,11 @@ blake3_hash_many_avx512:
         vpaddd  zmm1 {k2}, zmm1, dword ptr [ADD1+rip] {1to16}
         vmovdqa32 zmmword ptr [rsp], zmm2
         vmovdqa32 zmmword ptr [rsp+0x1*0x40], zmm1
+#ifndef _ILP32
         add     rdi, 128
+#else
+        add     rdi, 64
+#endif
         add     rbx, 512
         mov     qword ptr [rbp+0x50], rbx
         sub     rsi, 16
@@ -1125,6 +1177,7 @@ blake3_hash_many_avx512:
         vpbroadcastd ymm5, dword ptr [rcx+0x14]
         vpbroadcastd ymm6, dword ptr [rcx+0x18]
         vpbroadcastd ymm7, dword ptr [rcx+0x1C]
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
         mov     r9, qword ptr [rdi+0x8]
         mov     r10, qword ptr [rdi+0x10]
@@ -1133,6 +1186,16 @@ blake3_hash_many_avx512:
         mov     r13, qword ptr [rdi+0x28]
         mov     r14, qword ptr [rdi+0x30]
         mov     r15, qword ptr [rdi+0x38]
+#else
+        mov     r8d, dword ptr [rdi]
+        mov     r9d, dword ptr [rdi+0x4]
+        mov     r10d, dword ptr [rdi+0x8]
+        mov     r11d, dword ptr [rdi+0xc]
+        mov     r12d, dword ptr [rdi+0x10]
+        mov     r13d, dword ptr [rdi+0x14]
+        mov     r14d, dword ptr [rdi+0x18]
+        mov     r15d, dword ptr [rdi+0x1c]
+#endif
         movzx   eax, byte ptr [rbp+0x38]
         movzx   ebx, byte ptr [rbp+0x40]
         or      eax, ebx
@@ -2055,7 +2118,11 @@ blake3_hash_many_avx512:
         vmovdqa ymmword ptr [rsp+0x2*0x20], ymm2
         add     rbx, 256
         mov     qword ptr [rbp+0x50], rbx
+#ifndef _ILP32
         add     rdi, 64
+#else
+        add     rdi, 32
+#endif
         sub     rsi, 8
 3:
         mov     rbx, qword ptr [rbp+0x50]
@@ -2078,10 +2145,17 @@ blake3_hash_many_avx512:
         kmovw   k2, eax
         vpblendmd zmm13 {k2}, zmm13, zmm12
         vbroadcasti32x4 zmm15, xmmword ptr [BLAKE3_IV+rip]
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
         mov     r9, qword ptr [rdi+0x8]
         mov     r10, qword ptr [rdi+0x10]
         mov     r11, qword ptr [rdi+0x18]
+#else
+        mov     r8d, dword ptr [rdi]
+        mov     r9d, dword ptr [rdi+0x4]
+        mov     r10d, dword ptr [rdi+0x8]
+        mov     r11d, dword ptr [rdi+0xc]
+#endif
         mov     eax, 43690
         kmovw   k3, eax
         mov     eax, 34952
@@ -2195,7 +2269,11 @@ blake3_hash_many_avx512:
         vmovdqa xmmword ptr [rsp], xmm0
         vmovdqa xmmword ptr [rsp+0x40], xmm2
         add     rbx, 128
+#ifndef _ILP32
         add     rdi, 32
+#else
+        add     rdi, 16
+#endif
         sub     rsi, 4
 3:
         test    esi, 0x2
@@ -2209,8 +2287,13 @@ blake3_hash_many_avx512:
         vpinsrd xmm14, xmm14, dword ptr [rsp+0x44], 1
         vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
         vinserti128 ymm13, ymm13, xmm14, 0x01
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
         mov     r9, qword ptr [rdi+0x8]
+#else
+        mov     r8d, dword ptr [rdi]
+        mov     r9d, dword ptr [rdi+0x4]
+#endif
         movzx   eax, byte ptr [rbp+0x40]
         or      eax, r13d
         xor     edx, edx
@@ -2308,7 +2391,11 @@ blake3_hash_many_avx512:
         vmovdqa xmmword ptr [rsp], xmm0
         vmovdqa xmmword ptr [rsp+0x4*0x10], xmm2
         add     rbx, 64
+#ifndef _ILP32
         add     rdi, 16
+#else
+        add     rdi, 8
+#endif
         sub     rsi, 2
 3:
         test    esi, 0x1
@@ -2319,7 +2406,11 @@ blake3_hash_many_avx512:
         vpinsrd xmm14, xmm14, dword ptr [rsp+0x40], 1
         vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
         vmovdqa xmm15, xmmword ptr [BLAKE3_IV+rip]
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
+#else
+        mov     r8d, dword ptr [rdi]
+#endif
         movzx   eax, byte ptr [rbp+0x40]
         or      eax, r13d
         xor     edx, edx
diff --git a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S
index d69a1706fefe7..85434df927cdd 100644
--- a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S
+++ b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S
@@ -54,6 +54,10 @@ blake3_hash_many_sse2:
         sub     rsp, 360
         and     rsp, 0xFFFFFFFFFFFFFFC0
         neg     r9d
+#ifdef _ILP32
+        mov     esi, esi
+        mov     edx, edx
+#endif
         movd    xmm0, r9d
         pshufd  xmm0, xmm0, 0x00
         movdqa  xmmword ptr [rsp+0x130], xmm0
@@ -91,10 +95,17 @@ blake3_hash_many_sse2:
         pshufd  xmm5, xmm7, 0x55
         pshufd  xmm6, xmm7, 0xAA
         pshufd  xmm7, xmm7, 0xFF
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
         mov     r9, qword ptr [rdi+0x8]
         mov     r10, qword ptr [rdi+0x10]
         mov     r11, qword ptr [rdi+0x18]
+#else
+        mov     r8d, dword ptr [rdi]
+        mov     r9d, dword ptr [rdi+0x4]
+        mov     r10d, dword ptr [rdi+0x8]
+        mov     r11d, dword ptr [rdi+0xc]
+#endif
         movzx   eax, byte ptr [rbp+0x40]
         or      eax, r13d
         xor     edx, edx
@@ -1648,7 +1659,11 @@ blake3_hash_many_sse2:
         psubd   xmm1, xmm0
         movdqa  xmmword ptr [rsp+0x120], xmm1
         add     rbx, 128
+#ifndef _ILP32
         add     rdi, 32
+#else
+        add     rdi, 16
+#endif
         sub     rsi, 4
         cmp     rsi, 4
         jnc     2b
@@ -1679,8 +1694,13 @@ blake3_hash_many_sse2:
         movd    xmm13, dword ptr [rsp+0x124]
         punpckldq xmm14, xmm13
         movaps  xmmword ptr [rsp+0x10], xmm14
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
         mov     r9, qword ptr [rdi+0x8]
+#else
+        mov     r8d, dword ptr [rdi]
+        mov     r9d, dword ptr [rdi+0x4]
+#endif
         movzx   eax, byte ptr [rbp+0x40]
         or      eax, r13d
         xor     edx, edx
@@ -1909,7 +1929,11 @@ blake3_hash_many_sse2:
         mov    r11d, dword ptr [rsp+0x120+8*rax]
         mov dword ptr [rsp+0x110], r10d
         mov dword ptr [rsp+0x120], r11d
+#ifndef _ILP32
         add     rdi, 16
+#else
+        add     rdi, 8
+#endif
         add     rbx, 64
         sub     rsi, 2
 3:
@@ -1920,7 +1944,11 @@ blake3_hash_many_sse2:
         movd    xmm13, dword ptr [rsp+0x110]
         movd    xmm14, dword ptr [rsp+0x120]
         punpckldq xmm13, xmm14
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
+#else
+        mov     r8d, dword ptr [rdi]
+#endif
         movzx   eax, byte ptr [rbp+0x40]
         or      eax, r13d
         xor     edx, edx
diff --git a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S
index c5b103af61c4f..403773421587c 100644
--- a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S
+++ b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S
@@ -54,6 +54,10 @@ blake3_hash_many_sse41:
         sub     rsp, 360
         and     rsp, 0xFFFFFFFFFFFFFFC0
         neg     r9d
+#ifdef _ILP32
+        mov     esi, esi
+        mov     edx, edx
+#endif
         movd    xmm0, r9d
         pshufd  xmm0, xmm0, 0x00
         movdqa  xmmword ptr [rsp+0x130], xmm0
@@ -91,10 +95,17 @@ blake3_hash_many_sse41:
         pshufd  xmm5, xmm7, 0x55
         pshufd  xmm6, xmm7, 0xAA
         pshufd  xmm7, xmm7, 0xFF
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
         mov     r9, qword ptr [rdi+0x8]
         mov     r10, qword ptr [rdi+0x10]
         mov     r11, qword ptr [rdi+0x18]
+#else
+        mov     r8d, dword ptr [rdi]
+        mov     r9d, dword ptr [rdi+0x4]
+        mov     r10d, dword ptr [rdi+0x8]
+        mov     r11d, dword ptr [rdi+0xc]
+#endif
         movzx   eax, byte ptr [rbp+0x40]
         or      eax, r13d
         xor     edx, edx
@@ -1452,7 +1463,11 @@ blake3_hash_many_sse41:
         psubd   xmm1, xmm0
         movdqa  xmmword ptr [rsp+0x120], xmm1
         add     rbx, 128
+#ifndef _ILP32
         add     rdi, 32
+#else
+        add     rdi, 16
+#endif
         sub     rsi, 4
         cmp     rsi, 4
         jnc     2b
@@ -1483,8 +1498,13 @@ blake3_hash_many_sse41:
         pinsrd  xmm14, dword ptr [rsp+0x124], 1
         pinsrd  xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
         movaps  xmmword ptr [rsp+0x10], xmm14
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
         mov     r9, qword ptr [rdi+0x8]
+#else
+        mov     r8d, dword ptr [rdi]
+        mov     r9d, dword ptr [rdi+0x4]
+#endif
         movzx   eax, byte ptr [rbp+0x40]
         or      eax, r13d
         xor     edx, edx
@@ -1686,7 +1706,11 @@ blake3_hash_many_sse41:
         blendvps xmm2, xmm4, xmm0
         movdqa  xmmword ptr [rsp+0x110], xmm1
         movdqa  xmmword ptr [rsp+0x120], xmm2
+#ifndef _ILP32
         add     rdi, 16
+#else
+        add     rdi, 8
+#endif
         add     rbx, 64
         sub     rsi, 2
 3:
@@ -1699,7 +1723,11 @@ blake3_hash_many_sse41:
         pinsrd  xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2
         movaps  xmm14, xmmword ptr [ROT8+rip]
         movaps  xmm15, xmmword ptr [ROT16+rip]
+#ifndef _ILP32
         mov     r8, qword ptr [rdi]
+#else
+        mov     r8d, dword ptr [rdi]
+#endif
         movzx   eax, byte ptr [rbp+0x40]
         or      eax, r13d
         xor     edx, edx

The x86-64 assembly implementations of BLAKE3 are used both in 64-bit
and in 32-bit pointer mode, but only worked in 64-bit pointer mode. This
PR adds support to also allow them to work in 32-bit pointer mode.
@hvdijk
Copy link
Contributor Author

hvdijk commented Jul 19, 2025

The CI failures are unrelated to this PR; I will rebase after #149611 is merged.

@nikic
Copy link
Contributor

nikic commented Jul 19, 2025

Please also submit this upstream (https://github.com/BLAKE3-team/BLAKE3).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants