Skip to content

Commit 7cfd90a

Browse files
committed
fix: don't clobber saved frame pointer in arm64 assembly functions
The arm64 neon assembly functions in this repository overwrite the frame pointer saved by their callers, leading to crashes from the Go runtime execution tracer and profilers which use frame pointer unwinding. For historical reasons, on arm64 Go functions save the caller's frame pointer register (x29) one word below their stack frame. See go.dev/s/regabi#arm64-architecture. The assembly functions here, translated from C compiler output, save values at the top of their frame, and overwrite the frame pointer saved by the caller. We can fix this by decrementing the stack pointer past where that frame pointer is saved before saving anything on the stack. Fixed with this sed script on my macos laptop + manual cleanup to match indentation: ```sed /stp[\t ]*x29/i\ // The Go ABI saves the frame pointer register one word below the \ // caller's frame. Make room so we don't overwrite it. Needs to stay \ // 16-byte aligned \ SUB $16, RSP /ldp[\t ]*x29/a\ // Put the stack pointer back where it was \ ADD $16, RSP ``` Ran the script from the root of this repository with find . -name '*_arm64.s' -exec sed -f fix.sed -i '' {} + Then manually inspected the assembly for missing SUBs/ADDs at the beginning of functions and prior to returns. Fixes #150
1 parent 6843412 commit 7cfd90a

File tree

8 files changed

+78
-0
lines changed

8 files changed

+78
-0
lines changed

arrow/compute/internal/kernels/cast_numeric_neon_arm64.s

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@ TEXT ·_cast_type_numeric_neon(SB), $0-40
1010
MOVD len+32(FP), R4
1111

1212

13+
// The Go ABI saves the frame pointer register one word below the
14+
// caller's frame. Make room so we don't overwrite it. Needs to stay
15+
// 16-byte aligned
16+
SUB $16, RSP
1317
WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
1418
WORD $0x7100181f // cmp w0, #6
1519
WORD $0x910003fd // mov x29, sp
@@ -4447,6 +4451,8 @@ LBB0_892:
44474451
BNE LBB0_892
44484452
LBB0_893:
44494453
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
4454+
// Put the stack pointer back where it was
4455+
ADD $16, RSP
44504456
RET
44514457
LBB0_894:
44524458
WORD $0x927b6909 // and x9, x8, #0xffffffe0

arrow/math/int64_neon_arm64.s

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@ TEXT ·_sum_int64_neon(SB), $0-24
1111
MOVD len+8(FP), R1
1212
MOVD res+16(FP), R2
1313
14+
// The Go ABI saves the frame pointer register one word below the
15+
// caller's frame. Make room so we don't overwrite it. Needs to stay
16+
// 16-byte aligned
17+
SUB $16, RSP
1418
WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
1519
WORD $0x910003fd // mov x29, sp
1620
CBZ R1, LBB0_3
@@ -23,6 +27,8 @@ LBB0_3:
2327
WORD $0xaa1f03e9 // mov x9, xzr
2428
WORD $0xf9000049 // str x9, [x2]
2529
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
30+
// Put the stack pointer back where it was
31+
ADD $16, RSP
2632
RET
2733
LBB0_4:
2834
WORD $0x927ef428 // and x8, x1, #0xfffffffffffffffc
@@ -54,5 +60,7 @@ LBB0_8:
5460
LBB0_9:
5561
WORD $0xf9000049 // str x9, [x2]
5662
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
63+
// Put the stack pointer back where it was
64+
ADD $16, RSP
5765
RET
5866

arrow/math/uint64_neon_arm64.s

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@ TEXT ·_sum_uint64_neon(SB), $0-24
1111
MOVD len+8(FP), R1
1212
MOVD res+16(FP), R2
1313
14+
// The Go ABI saves the frame pointer register one word below the
15+
// caller's frame. Make room so we don't overwrite it. Needs to stay
16+
// 16-byte aligned
17+
SUB $16, RSP
1418
WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
1519
WORD $0x910003fd // mov x29, sp
1620
CBZ R1, LBB0_3
@@ -23,6 +27,8 @@ LBB0_3:
2327
WORD $0xaa1f03e9 // mov x9, xzr
2428
WORD $0xf9000049 // str x9, [x2]
2529
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
30+
// Put the stack pointer back where it was
31+
ADD $16, RSP
2632
RET
2733
LBB0_4:
2834
WORD $0x927ef428 // and x8, x1, #0xfffffffffffffffc
@@ -54,5 +60,7 @@ LBB0_8:
5460
LBB0_9:
5561
WORD $0xf9000049 // str x9, [x2]
5662
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
63+
// Put the stack pointer back where it was
64+
ADD $16, RSP
5765
RET
5866

arrow/memory/memory_neon_arm64.s

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@ TEXT ·_memset_neon(SB), $0-24
1111
MOVD len+8(FP), R1
1212
MOVD c+16(FP), R2
1313

14+
// The Go ABI saves the frame pointer register one word below the
15+
// caller's frame. Make room so we don't overwrite it. Needs to stay
16+
// 16-byte aligned
17+
SUB $16, RSP
1418
WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
1519
WORD $0x8b010008 // add x8, x0, x1
1620
WORD $0xeb00011f // cmp x8, x0
@@ -40,4 +44,6 @@ LBB0_6:
4044
BNE LBB0_6
4145
LBB0_7:
4246
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
47+
// Put the stack pointer back where it was
48+
ADD $16, RSP
4349
RET

internal/utils/min_max_neon_arm64.s

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@ TEXT ·_int32_max_min_neon(SB), $0-32
1313
MOVD minout+16(FP), R2
1414
MOVD maxout+24(FP), R3
1515

16+
// The Go ABI saves the frame pointer register one word below the
17+
// caller's frame. Make room so we don't overwrite it. Needs to stay
18+
// 16-byte aligned
19+
SUB $16, RSP
1620
WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
1721
WORD $0x7100043f // cmp w1, #1
1822
WORD $0x910003fd // mov x29, sp
@@ -32,6 +36,8 @@ LBB0_3:
3236
WORD $0xb900006b // str w11, [x3]
3337
WORD $0xb900004a // str w10, [x2]
3438
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
39+
// Put the stack pointer back where it was
40+
ADD $16, RSP
3541
RET
3642
LBB0_4:
3743
WORD $0x927e7509 // and x9, x8, #0xfffffffc
@@ -76,6 +82,8 @@ LBB0_9:
7682
WORD $0xb900006b // str w11, [x3]
7783
WORD $0xb900004a // str w10, [x2]
7884
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
85+
// Put the stack pointer back where it was
86+
ADD $16, RSP
7987
RET
8088

8189
// func _uint32_max_min_neon(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer)
@@ -86,6 +94,10 @@ TEXT ·_uint32_max_min_neon(SB), $0-32
8694
MOVD minout+16(FP), R2
8795
MOVD maxout+24(FP), R3
8896
97+
// The Go ABI saves the frame pointer register one word below the
98+
// caller's frame. Make room so we don't overwrite it. Needs to stay
99+
// 16-byte aligned
100+
SUB $16, RSP
89101
WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
90102
WORD $0x7100043f // cmp w1, #1
91103
WORD $0x910003fd // mov x29, sp
@@ -105,6 +117,8 @@ LBB1_3:
105117
WORD $0xb900006a // str w10, [x3]
106118
WORD $0xb900004b // str w11, [x2]
107119
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
120+
// Put the stack pointer back where it was
121+
ADD $16, RSP
108122
RET
109123
LBB1_4:
110124
WORD $0x927e7509 // and x9, x8, #0xfffffffc
@@ -149,6 +163,8 @@ LBB1_9:
149163
WORD $0xb900006a // str w10, [x3]
150164
WORD $0xb900004b // str w11, [x2]
151165
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
166+
// Put the stack pointer back where it was
167+
ADD $16, RSP
152168
RET
153169

154170
// func _int64_max_min_neon(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer)
@@ -159,6 +175,10 @@ TEXT ·_int64_max_min_neon(SB), $0-32
159175
MOVD minout+16(FP), R2
160176
MOVD maxout+24(FP), R3
161177

178+
// The Go ABI saves the frame pointer register one word below the
179+
// caller's frame. Make room so we don't overwrite it. Needs to stay
180+
// 16-byte aligned
181+
SUB $16, RSP
162182
WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
163183
WORD $0x7100043f // cmp w1, #1
164184
WORD $0x910003fd // mov x29, sp
@@ -178,6 +198,8 @@ LBB2_3:
178198
WORD $0xf900006b // str x11, [x3]
179199
WORD $0xf900004a // str x10, [x2]
180200
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
201+
// Put the stack pointer back where it was
202+
ADD $16, RSP
181203
RET
182204
LBB2_4:
183205
WORD $0x927e7509 // and x9, x8, #0xfffffffc
@@ -234,6 +256,8 @@ LBB2_9:
234256
WORD $0xf900006b // str x11, [x3]
235257
WORD $0xf900004a // str x10, [x2]
236258
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
259+
// Put the stack pointer back where it was
260+
ADD $16, RSP
237261
RET
238262

239263

@@ -245,6 +269,10 @@ TEXT ·_uint64_max_min_neon(SB), $0-32
245269
MOVD minout+16(FP), R2
246270
MOVD maxout+24(FP), R3
247271

272+
// The Go ABI saves the frame pointer register one word below the
273+
// caller's frame. Make room so we don't overwrite it. Needs to stay
274+
// 16-byte aligned
275+
SUB $16, RSP
248276
WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
249277
WORD $0x7100043f // cmp w1, #1
250278
WORD $0x910003fd // mov x29, sp
@@ -264,6 +292,8 @@ LBB3_3:
264292
WORD $0xf900006a // str x10, [x3]
265293
WORD $0xf900004b // str x11, [x2]
266294
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
295+
// Put the stack pointer back where it was
296+
ADD $16, RSP
267297
RET
268298
LBB3_4:
269299
WORD $0x927e7509 // and x9, x8, #0xfffffffc
@@ -320,5 +350,7 @@ LBB3_9:
320350
WORD $0xf900006a // str x10, [x3]
321351
WORD $0xf900004b // str x11, [x2]
322352
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
353+
// Put the stack pointer back where it was
354+
ADD $16, RSP
323355
RET
324356

parquet/internal/bmi/bitmap_neon_arm64.s

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ TEXT ·_levels_to_bitmap_neon(SB), $0-32
88
MOVD numLevels+8(FP), R1
99
MOVD rhs+16(FP), R2
1010

11+
// The Go ABI saves the frame pointer register one word below the
12+
// caller's frame. Make room so we don't overwrite it. Needs to stay
13+
// 16-byte aligned
14+
SUB $16, RSP
1115
WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
1216
WORD $0x7100043f // cmp w1, #1
1317
WORD $0x910003fd // mov x29, sp
@@ -79,6 +83,8 @@ LBB1_7:
7983
BNE LBB1_7
8084
LBB1_8:
8185
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
86+
// Put the stack pointer back where it was
87+
ADD $16, RSP
8288
MOVD R8, res+24(FP)
8389
RET
8490

parquet/internal/utils/bit_packing_neon_arm64.s

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,10 @@ TEXT ·_unpack32_neon(SB), $0-40
281281
// LEAQ LCDATA1<>(SB), BP
282282

283283
// %bb.0:
284+
// The Go ABI saves the frame pointer register one word below the
285+
// caller's frame. Make room so we don't overwrite it. Needs to stay
286+
// 16-byte aligned
287+
SUB $16, RSP
284288
WORD $0xa9ba7bfd // stp x29, x30, [sp, #-96]!
285289
WORD $0xd10643e9 // sub x9, sp, #400
286290
WORD $0xa9016ffc // stp x28, x27, [sp, #16]
@@ -6922,5 +6926,7 @@ LBB0_156:
69226926
WORD $0xa94267fa // ldp x26, x25, [sp, #32]
69236927
WORD $0xa9416ffc // ldp x28, x27, [sp, #16]
69246928
WORD $0xa8c67bfd // ldp x29, x30, [sp], #96
6929+
// Put the stack pointer back where it was
6930+
ADD $16, RSP
69256931
MOVD R0, num+32(FP)
69266932
RET

parquet/internal/utils/unpack_bool_neon_arm64.s

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ TEXT ·_bytes_to_bools_neon(SB), $0-32
1212
MOVD out+16(FP), R2
1313
MOVD outlen+24(FP), R3
1414

15+
// The Go ABI saves the frame pointer register one word below the
16+
// caller's frame. Make room so we don't overwrite it. Needs to stay
17+
// 16-byte aligned
18+
SUB $16, RSP
1519
WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
1620
WORD $0x7100043f // cmp w1, #1
1721
WORD $0x910003fd // mov x29, sp
@@ -78,4 +82,6 @@ LBB0_3:
7882
JMP LBB0_2
7983
LBB0_12:
8084
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
85+
// Put the stack pointer back where it was
86+
ADD $16, RSP
8187
RET

0 commit comments

Comments
 (0)