Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

optimize Array::concat #111

Merged
merged 2 commits into from
Jan 10, 2025
Merged

Conversation

ericlagergren
Copy link
Contributor

The resulting assembly is much nicer. As a bonus, concat no longer has panicking branches.

The resulting assembly is much nicer. As a bonus, `concat` no longer has
panicking branches.

Signed-off-by: Eric Lagergren <[email protected]>
Signed-off-by: Eric Lagergren <[email protected]>
@ericlagergren
Copy link
Contributor Author

For reference, here is the old assembly when concatenating Array<u8, U256> and Array<u8, U16>:

sha3_utils::test_concat:
Lfunc_begin0:
	stp x28, x27, [sp, #-64]!
	stp x22, x21, [sp, #16]
	stp x20, x19, [sp, #32]
	stp x29, x30, [sp, #48]
	add x29, sp, #48
	sub sp, sp, #864
	mov x19, x8
	mov x21, #0
	mov x20, #0
	mov x9, #0
	mov x10, #0
	mov x8, #0
	ldp q0, q1, [x0, #192]
	mov x12, sp
	stur q0, [x12, #200]
	stur q1, [x12, #216]
	ldp q0, q1, [x0, #224]
	stur q0, [x12, #232]
	stur q1, [x12, #248]
	ldp q0, q1, [x0, #128]
	stur q0, [x12, #136]
	stur q1, [x12, #152]
	ldp q0, q1, [x0, #160]
	stur q0, [x12, #168]
	stur q1, [x12, #184]
	ldp q0, q1, [x0, #64]
	stur q0, [sp, #72]
	stur q1, [sp, #88]
	ldp q0, q1, [x0, #96]
	stur q0, [sp, #104]
	stur q1, [sp, #120]
	ldp q0, q1, [x0]
	stur q0, [sp, #8]
	stur q1, [sp, #24]
	ldp q0, q1, [x0, #32]
	stur q0, [sp, #40]
	add x11, x12, #8
	stur q1, [sp, #56]
	add x12, x12, #288
	ldr q0, [x1]
	mov w22, #1
	add x13, sp, #592
	mov w14, #1
	str q0, [sp, #288]
	b LBB0_2
LBB0_1:
	add x20, x10, #1
	add x15, x11, x10
	mov w14, #1
	mov x10, x20
	ldrb w15, [x15]
	strb w15, [x13, x8]
	add x8, x8, #1
	cmp x8, #272
	b.eq LBB0_7
LBB0_2:
	tbz w14, #0, LBB0_5
	cmp x10, #256
	b.ne LBB0_1
	mov w22, #0
	cmp x9, #16
	b.eq LBB0_10
	mov w14, #0
	add x21, x9, #1
	add x15, x12, x9
	mov x9, x21
	ldrb w15, [x15]
	strb w15, [x13, x8]
	add x8, x8, #1
	cmp x8, #272
	b.ne LBB0_2
LBB0_7:
	add x0, sp, #320
	add x1, sp, #592
	mov w2, #272
	bl _memcpy
	cmp x20, #256
	cset w8, ne
	and w8, w22, w8
	tbnz w8, #0, LBB0_11
	cmp x21, #16
	b.ne LBB0_11
	add x1, sp, #320
	mov x0, x19
	mov w2, #272
	bl _memcpy
	add sp, sp, #864
	ldp x29, x30, [sp, #48]
	ldp x20, x19, [sp, #32]
	ldp x22, x21, [sp, #16]
	ldp x28, x27, [sp], #64
	ret
LBB0_10:
Lloh0:
	adrp x0, l___unnamed_1@PAGE
Lloh1:
	add x0, x0, l___unnamed_1@PAGEOFF
Lloh2:
	adrp x2, l___unnamed_2@PAGE
Lloh3:
	add x2, x2, l___unnamed_2@PAGEOFF
	mov w1, #47
	bl core::option::expect_failed
LBB0_11:
Lloh4:
	adrp x8, l___unnamed_3@PAGE
Lloh5:
	add x8, x8, l___unnamed_3@PAGEOFF
	mov w9, #1
	str x8, [sp, #592]
	str x9, [sp, #600]
	mov w8, #8
	str xzr, [sp, #624]
	str x8, [sp, #608]
	str xzr, [sp, #616]
Lloh6:
	adrp x1, l___unnamed_4@PAGE
Lloh7:
	add x1, x1, l___unnamed_4@PAGEOFF
	add x0, sp, #592
	bl core::panicking::panic_fmt

and here is the new assembly

sha3_utils::test_concat:
Lfunc_begin0:
	ldp q0, q1, [x0, #192]
	stp q0, q1, [x8, #192]
	ldp q0, q1, [x0, #224]
	stp q0, q1, [x8, #224]
	ldp q0, q1, [x0, #128]
	stp q0, q1, [x8, #128]
	ldp q0, q1, [x0, #160]
	stp q0, q1, [x8, #160]
	ldp q0, q1, [x0, #64]
	stp q0, q1, [x8, #64]
	ldp q0, q1, [x0, #96]
	stp q0, q1, [x8, #96]
	ldp q0, q1, [x0]
	stp q0, q1, [x8]
	ldp q0, q1, [x0, #32]
	stp q0, q1, [x8, #32]
	ldr q0, [x1]
	str q0, [x8, #256]
	ret

@tarcieri tarcieri merged commit 03b3c79 into RustCrypto:master Jan 10, 2025
14 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants