Skip to content

Commit 09c27a1

Browse files
robnbehlendorf
authored andcommitted
icp: add SHA512 implementation using Intel SHA512 extensions
Generated from crypto/sha/asm/sha512-x86_64.pl in openssl/openssl@241d4826f8. Sponsored-by: TrueNAS Reviewed-by: Tony Hutter <hutter2@llnl.gov> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: Attila Fülöp <attila@fueloep.org> Signed-off-by: Rob Norris <rob.norris@truenas.com> Closes #18233
1 parent 3547a35 commit 09c27a1

File tree

2 files changed

+338
-1
lines changed

2 files changed

+338
-1
lines changed

module/icp/algs/sha2/sha512_impl.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
/*
2424
* Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
25+
* Copyright (c) 2026, TrueNAS.
2526
*/
2627

2728
#include <sys/simd.h>
@@ -92,6 +93,20 @@ const sha512_ops_t sha512_avx2_impl = {
9293
};
9394
#endif
9495

96+
#if defined(HAVE_SHA512EXT)
97+
static boolean_t sha2_have_sha512ext(void)
98+
{
99+
return (kfpu_allowed() && zfs_sha512ext_available());
100+
}
101+
102+
TF(zfs_sha512_transform_sha512ext, tf_sha512_sha512ext);
103+
const sha512_ops_t sha512_sha512ext_impl = {
104+
.is_supported = sha2_have_sha512ext,
105+
.transform = tf_sha512_sha512ext,
106+
.name = "sha512ext"
107+
};
108+
#endif
109+
95110
#elif defined(__aarch64__) || defined(__arm__)
96111
extern void zfs_sha512_block_armv7(uint64_t s[8], const void *, size_t);
97112
const sha512_ops_t sha512_armv7_impl = {
@@ -164,6 +179,9 @@ static const sha512_ops_t *const sha512_impls[] = {
164179
#if defined(__x86_64) && defined(HAVE_AVX2)
165180
&sha512_avx2_impl,
166181
#endif
182+
#if defined(__x86_64) && defined(HAVE_SHA512EXT)
183+
&sha512_sha512ext_impl,
184+
#endif
167185
#if defined(__aarch64__) || defined(__arm__)
168186
&sha512_armv7_impl,
169187
#if defined(__aarch64__)

module/icp/asm-x86_64/sha2/sha512-x86_64.S

Lines changed: 320 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
// SPDX-License-Identifier: Apache-2.0
22
/*
3-
* Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved.
3+
* Copyright 2004-2025 The OpenSSL Project Authors. All Rights Reserved.
44
*
55
* Licensed under the Apache License, Version 2.0 (the "License");
66
* you may not use this file except in compliance with the License.
@@ -114,6 +114,50 @@ K512:
114114
.quad 0x0001020304050607,0x08090a0b0c0d0e0f
115115
.quad 0x0001020304050607,0x08090a0b0c0d0e0f
116116

117+
.balign 64
118+
SET_OBJ(K512_single)
119+
K512_single:
120+
.quad 0x428a2f98d728ae22, 0x7137449123ef65cd
121+
.quad 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc
122+
.quad 0x3956c25bf348b538, 0x59f111f1b605d019
123+
.quad 0x923f82a4af194f9b, 0xab1c5ed5da6d8118
124+
.quad 0xd807aa98a3030242, 0x12835b0145706fbe
125+
.quad 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2
126+
.quad 0x72be5d74f27b896f, 0x80deb1fe3b1696b1
127+
.quad 0x9bdc06a725c71235, 0xc19bf174cf692694
128+
.quad 0xe49b69c19ef14ad2, 0xefbe4786384f25e3
129+
.quad 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65
130+
.quad 0x2de92c6f592b0275, 0x4a7484aa6ea6e483
131+
.quad 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5
132+
.quad 0x983e5152ee66dfab, 0xa831c66d2db43210
133+
.quad 0xb00327c898fb213f, 0xbf597fc7beef0ee4
134+
.quad 0xc6e00bf33da88fc2, 0xd5a79147930aa725
135+
.quad 0x06ca6351e003826f, 0x142929670a0e6e70
136+
.quad 0x27b70a8546d22ffc, 0x2e1b21385c26c926
137+
.quad 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df
138+
.quad 0x650a73548baf63de, 0x766a0abb3c77b2a8
139+
.quad 0x81c2c92e47edaee6, 0x92722c851482353b
140+
.quad 0xa2bfe8a14cf10364, 0xa81a664bbc423001
141+
.quad 0xc24b8b70d0f89791, 0xc76c51a30654be30
142+
.quad 0xd192e819d6ef5218, 0xd69906245565a910
143+
.quad 0xf40e35855771202a, 0x106aa07032bbd1b8
144+
.quad 0x19a4c116b8d2d0c8, 0x1e376c085141ab53
145+
.quad 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8
146+
.quad 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb
147+
.quad 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3
148+
.quad 0x748f82ee5defb2fc, 0x78a5636f43172f60
149+
.quad 0x84c87814a1f0ab72, 0x8cc702081a6439ec
150+
.quad 0x90befffa23631e28, 0xa4506cebde82bde9
151+
.quad 0xbef9a3f7b2c67915, 0xc67178f2e372532b
152+
.quad 0xca273eceea26619c, 0xd186b8c721c0c207
153+
.quad 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178
154+
.quad 0x06f067aa72176fba, 0x0a637dc5a2c898a6
155+
.quad 0x113f9804bef90dae, 0x1b710b35131c471b
156+
.quad 0x28db77f523047d84, 0x32caab7b40c72493
157+
.quad 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c
158+
.quad 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a
159+
.quad 0x5fcb6fab3ad6faec, 0x6c44198c4a475817
160+
117161
ENTRY_ALIGN(zfs_sha512_transform_x64, 16)
118162
.cfi_startproc
119163
ENDBR
@@ -4010,13 +4054,288 @@ ENTRY_ALIGN(zfs_sha512_transform_avx2, 64)
40104054
SET_SIZE(zfs_sha512_transform_avx2)
40114055
STACK_FRAME_NON_STANDARD zfs_sha512_transform_avx2
40124056

4057+
#ifdef HAVE_SHA512EXT
4058+
ENTRY_ALIGN(zfs_sha512_transform_sha512ext, 64)
4059+
.cfi_startproc
4060+
ENDBR
4061+
orq %rdx,%rdx
4062+
je .Lsha512ext_done
4063+
4064+
vbroadcasti128 1280+K512(%rip),%ymm15
4065+
4066+
4067+
4068+
4069+
4070+
4071+
4072+
4073+
4074+
4075+
vmovdqu 0(%rdi),%ymm0
4076+
vmovdqu 32(%rdi),%ymm1
4077+
4078+
vperm2i128 $0x20,%ymm1,%ymm0,%ymm2
4079+
vperm2i128 $0x31,%ymm1,%ymm0,%ymm3
4080+
4081+
vpermq $0x1b,%ymm2,%ymm13
4082+
vpermq $0x1b,%ymm3,%ymm14
4083+
4084+
4085+
leaq K512_single(%rip),%r9
4086+
4087+
.balign 32
4088+
.Lsha512ext_block_loop:
4089+
4090+
vmovdqa %ymm13,%ymm11
4091+
vmovdqa %ymm14,%ymm12
4092+
4093+
4094+
vmovdqu 0(%rsi),%ymm0
4095+
vpshufb %ymm15,%ymm0,%ymm3
4096+
vpaddq 0(%r9),%ymm3,%ymm0
4097+
.byte 0xc4,0x62,0x27,0xcb,0xe0
4098+
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
4099+
.byte 0xc4,0x62,0x1f,0xcb,0xd8
4100+
4101+
4102+
vmovdqu 32(%rsi),%ymm0
4103+
vpshufb %ymm15,%ymm0,%ymm4
4104+
vpaddq 32(%r9),%ymm4,%ymm0
4105+
.byte 0xc4,0x62,0x27,0xcb,0xe0
4106+
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
4107+
.byte 0xc4,0x62,0x1f,0xcb,0xd8
4108+
.byte 0xc4,0xe2,0x7f,0xcc,0xdc
4109+
4110+
4111+
vmovdqu 64(%rsi),%ymm0
4112+
vpshufb %ymm15,%ymm0,%ymm5
4113+
vpaddq 64(%r9),%ymm5,%ymm0
4114+
.byte 0xc4,0x62,0x27,0xcb,0xe0
4115+
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
4116+
.byte 0xc4,0x62,0x1f,0xcb,0xd8
4117+
.byte 0xc4,0xe2,0x7f,0xcc,0xe5
4118+
4119+
4120+
vmovdqu 96(%rsi),%ymm0
4121+
vpshufb %ymm15,%ymm0,%ymm6
4122+
vpaddq 96(%r9),%ymm6,%ymm0
4123+
vpermq $0x1b,%ymm6,%ymm8
4124+
vpermq $0x39,%ymm5,%ymm9
4125+
vpblendd $0x3f,%ymm9,%ymm8,%ymm8
4126+
vpaddq %ymm8,%ymm3,%ymm3
4127+
.byte 0xc4,0xe2,0x7f,0xcd,0xde
4128+
.byte 0xc4,0x62,0x27,0xcb,0xe0
4129+
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
4130+
.byte 0xc4,0x62,0x1f,0xcb,0xd8
4131+
.byte 0xc4,0xe2,0x7f,0xcc,0xee
4132+
4133+
vpaddq 128(%r9),%ymm3,%ymm0
4134+
vpermq $0x1b,%ymm3,%ymm8
4135+
vpermq $0x39,%ymm6,%ymm9
4136+
vpblendd $0x3f,%ymm9,%ymm8,%ymm7
4137+
vpaddq %ymm7,%ymm4,%ymm4
4138+
.byte 0xc4,0xe2,0x7f,0xcd,0xe3
4139+
.byte 0xc4,0x62,0x27,0xcb,0xe0
4140+
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
4141+
.byte 0xc4,0x62,0x1f,0xcb,0xd8
4142+
.byte 0xc4,0xe2,0x7f,0xcc,0xf3
4143+
4144+
vpaddq 160(%r9),%ymm4,%ymm0
4145+
vpermq $0x1b,%ymm4,%ymm8
4146+
vpermq $0x39,%ymm3,%ymm9
4147+
vpblendd $0x3f,%ymm9,%ymm8,%ymm7
4148+
vpaddq %ymm7,%ymm5,%ymm5
4149+
.byte 0xc4,0xe2,0x7f,0xcd,0xec
4150+
.byte 0xc4,0x62,0x27,0xcb,0xe0
4151+
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
4152+
.byte 0xc4,0x62,0x1f,0xcb,0xd8
4153+
.byte 0xc4,0xe2,0x7f,0xcc,0xdc
4154+
4155+
vpaddq 192(%r9),%ymm5,%ymm0
4156+
vpermq $0x1b,%ymm5,%ymm8
4157+
vpermq $0x39,%ymm4,%ymm9
4158+
vpblendd $0x3f,%ymm9,%ymm8,%ymm7
4159+
vpaddq %ymm7,%ymm6,%ymm6
4160+
.byte 0xc4,0xe2,0x7f,0xcd,0xf5
4161+
.byte 0xc4,0x62,0x27,0xcb,0xe0
4162+
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
4163+
.byte 0xc4,0x62,0x1f,0xcb,0xd8
4164+
.byte 0xc4,0xe2,0x7f,0xcc,0xe5
4165+
4166+
vpaddq 224(%r9),%ymm6,%ymm0
4167+
vpermq $0x1b,%ymm6,%ymm8
4168+
vpermq $0x39,%ymm5,%ymm9
4169+
vpblendd $0x3f,%ymm9,%ymm8,%ymm7
4170+
vpaddq %ymm7,%ymm3,%ymm3
4171+
.byte 0xc4,0xe2,0x7f,0xcd,0xde
4172+
.byte 0xc4,0x62,0x27,0xcb,0xe0
4173+
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
4174+
.byte 0xc4,0x62,0x1f,0xcb,0xd8
4175+
.byte 0xc4,0xe2,0x7f,0xcc,0xee
4176+
4177+
vpaddq 256(%r9),%ymm3,%ymm0
4178+
vpermq $0x1b,%ymm3,%ymm8
4179+
vpermq $0x39,%ymm6,%ymm9
4180+
vpblendd $0x3f,%ymm9,%ymm8,%ymm7
4181+
vpaddq %ymm7,%ymm4,%ymm4
4182+
.byte 0xc4,0xe2,0x7f,0xcd,0xe3
4183+
.byte 0xc4,0x62,0x27,0xcb,0xe0
4184+
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
4185+
.byte 0xc4,0x62,0x1f,0xcb,0xd8
4186+
.byte 0xc4,0xe2,0x7f,0xcc,0xf3
4187+
4188+
vpaddq 288(%r9),%ymm4,%ymm0
4189+
vpermq $0x1b,%ymm4,%ymm8
4190+
vpermq $0x39,%ymm3,%ymm9
4191+
vpblendd $0x3f,%ymm9,%ymm8,%ymm7
4192+
vpaddq %ymm7,%ymm5,%ymm5
4193+
.byte 0xc4,0xe2,0x7f,0xcd,0xec
4194+
.byte 0xc4,0x62,0x27,0xcb,0xe0
4195+
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
4196+
.byte 0xc4,0x62,0x1f,0xcb,0xd8
4197+
.byte 0xc4,0xe2,0x7f,0xcc,0xdc
4198+
4199+
vpaddq 320(%r9),%ymm5,%ymm0
4200+
vpermq $0x1b,%ymm5,%ymm8
4201+
vpermq $0x39,%ymm4,%ymm9
4202+
vpblendd $0x3f,%ymm9,%ymm8,%ymm7
4203+
vpaddq %ymm7,%ymm6,%ymm6
4204+
.byte 0xc4,0xe2,0x7f,0xcd,0xf5
4205+
.byte 0xc4,0x62,0x27,0xcb,0xe0
4206+
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
4207+
.byte 0xc4,0x62,0x1f,0xcb,0xd8
4208+
.byte 0xc4,0xe2,0x7f,0xcc,0xe5
4209+
4210+
vpaddq 352(%r9),%ymm6,%ymm0
4211+
vpermq $0x1b,%ymm6,%ymm8
4212+
vpermq $0x39,%ymm5,%ymm9
4213+
vpblendd $0x3f,%ymm9,%ymm8,%ymm7
4214+
vpaddq %ymm7,%ymm3,%ymm3
4215+
.byte 0xc4,0xe2,0x7f,0xcd,0xde
4216+
.byte 0xc4,0x62,0x27,0xcb,0xe0
4217+
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
4218+
.byte 0xc4,0x62,0x1f,0xcb,0xd8
4219+
.byte 0xc4,0xe2,0x7f,0xcc,0xee
4220+
4221+
vpaddq 384(%r9),%ymm3,%ymm0
4222+
vpermq $0x1b,%ymm3,%ymm8
4223+
vpermq $0x39,%ymm6,%ymm9
4224+
vpblendd $0x3f,%ymm9,%ymm8,%ymm7
4225+
vpaddq %ymm7,%ymm4,%ymm4
4226+
.byte 0xc4,0xe2,0x7f,0xcd,0xe3
4227+
.byte 0xc4,0x62,0x27,0xcb,0xe0
4228+
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
4229+
.byte 0xc4,0x62,0x1f,0xcb,0xd8
4230+
.byte 0xc4,0xe2,0x7f,0xcc,0xf3
4231+
4232+
vpaddq 416(%r9),%ymm4,%ymm0
4233+
vpermq $0x1b,%ymm4,%ymm8
4234+
vpermq $0x39,%ymm3,%ymm9
4235+
vpblendd $0x3f,%ymm9,%ymm8,%ymm7
4236+
vpaddq %ymm7,%ymm5,%ymm5
4237+
.byte 0xc4,0xe2,0x7f,0xcd,0xec
4238+
.byte 0xc4,0x62,0x27,0xcb,0xe0
4239+
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
4240+
.byte 0xc4,0x62,0x1f,0xcb,0xd8
4241+
.byte 0xc4,0xe2,0x7f,0xcc,0xdc
4242+
4243+
vpaddq 448(%r9),%ymm5,%ymm0
4244+
vpermq $0x1b,%ymm5,%ymm8
4245+
vpermq $0x39,%ymm4,%ymm9
4246+
vpblendd $0x3f,%ymm9,%ymm8,%ymm7
4247+
vpaddq %ymm7,%ymm6,%ymm6
4248+
.byte 0xc4,0xe2,0x7f,0xcd,0xf5
4249+
.byte 0xc4,0x62,0x27,0xcb,0xe0
4250+
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
4251+
.byte 0xc4,0x62,0x1f,0xcb,0xd8
4252+
.byte 0xc4,0xe2,0x7f,0xcc,0xe5
4253+
4254+
vpaddq 480(%r9),%ymm6,%ymm0
4255+
vpermq $0x1b,%ymm6,%ymm8
4256+
vpermq $0x39,%ymm5,%ymm9
4257+
vpblendd $0x3f,%ymm9,%ymm8,%ymm7
4258+
vpaddq %ymm7,%ymm3,%ymm3
4259+
.byte 0xc4,0xe2,0x7f,0xcd,0xde
4260+
.byte 0xc4,0x62,0x27,0xcb,0xe0
4261+
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
4262+
.byte 0xc4,0x62,0x1f,0xcb,0xd8
4263+
.byte 0xc4,0xe2,0x7f,0xcc,0xee
4264+
4265+
vpaddq 512(%r9),%ymm3,%ymm0
4266+
vpermq $0x1b,%ymm3,%ymm8
4267+
vpermq $0x39,%ymm6,%ymm9
4268+
vpblendd $0x3f,%ymm9,%ymm8,%ymm7
4269+
vpaddq %ymm7,%ymm4,%ymm4
4270+
.byte 0xc4,0xe2,0x7f,0xcd,0xe3
4271+
.byte 0xc4,0x62,0x27,0xcb,0xe0
4272+
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
4273+
.byte 0xc4,0x62,0x1f,0xcb,0xd8
4274+
.byte 0xc4,0xe2,0x7f,0xcc,0xf3
4275+
4276+
4277+
vpaddq 544(%r9),%ymm4,%ymm0
4278+
vpermq $0x1b,%ymm4,%ymm8
4279+
vpermq $0x39,%ymm3,%ymm9
4280+
vpblendd $0x3f,%ymm9,%ymm8,%ymm7
4281+
vpaddq %ymm7,%ymm5,%ymm5
4282+
.byte 0xc4,0xe2,0x7f,0xcd,0xec
4283+
.byte 0xc4,0x62,0x27,0xcb,0xe0
4284+
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
4285+
.byte 0xc4,0x62,0x1f,0xcb,0xd8
4286+
4287+
4288+
vpaddq 576(%r9),%ymm5,%ymm0
4289+
vpermq $0x1b,%ymm5,%ymm8
4290+
vpermq $0x39,%ymm4,%ymm9
4291+
vpblendd $0x3f,%ymm9,%ymm8,%ymm7
4292+
vpaddq %ymm7,%ymm6,%ymm6
4293+
.byte 0xc4,0xe2,0x7f,0xcd,0xf5
4294+
.byte 0xc4,0x62,0x27,0xcb,0xe0
4295+
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
4296+
.byte 0xc4,0x62,0x1f,0xcb,0xd8
4297+
4298+
4299+
vpaddq 608(%r9),%ymm6,%ymm0
4300+
.byte 0xc4,0x62,0x27,0xcb,0xe0
4301+
vperm2i128 $0x1,%ymm0,%ymm0,%ymm0
4302+
.byte 0xc4,0x62,0x1f,0xcb,0xd8
4303+
4304+
4305+
vpaddq %ymm12,%ymm14,%ymm14
4306+
vpaddq %ymm11,%ymm13,%ymm13
4307+
addq $128,%rsi
4308+
decq %rdx
4309+
jnz .Lsha512ext_block_loop
4310+
4311+
4312+
4313+
4314+
vperm2i128 $0x31,%ymm14,%ymm13,%ymm1
4315+
vperm2i128 $0x20,%ymm14,%ymm13,%ymm2
4316+
vpermq $0xb1,%ymm1,%ymm1
4317+
vpermq $0xb1,%ymm2,%ymm2
4318+
vmovdqu %ymm1,0(%rdi)
4319+
vmovdqu %ymm2,32(%rdi)
4320+
4321+
vzeroupper
4322+
.Lsha512ext_done:
4323+
RET
4324+
.cfi_endproc
4325+
SET_SIZE(zfs_sha512_transform_sha512ext)
4326+
STACK_FRAME_NON_STANDARD zfs_sha512_transform_sha512ext
4327+
#endif /* HAVE_SHA512EXT */
4328+
40134329
/* Workaround for missing asm macro in RHEL 8. */
40144330
#if defined(__linux__) && defined(HAVE_STACK_FRAME_NON_STANDARD) && \
40154331
! defined(HAVE_STACK_FRAME_NON_STANDARD_ASM)
40164332
.section .discard.func_stack_frame_non_standard, "aw"
40174333
.long zfs_sha512_transform_x64 - .
40184334
.long zfs_sha512_transform_avx - .
40194335
.long zfs_sha512_transform_avx2 - .
4336+
#ifdef HAVE_SHA512EXT
4337+
.long zfs_sha512_transform_sha512ext - .
4338+
#endif
40204339
#endif
40214340

40224341
#if defined(__ELF__)

0 commit comments

Comments
 (0)