From 19bd31356781cd10f304d87ca08f37e127494bd8 Mon Sep 17 00:00:00 2001 From: Nick Ripley Date: Fri, 25 Oct 2024 16:43:28 -0400 Subject: [PATCH] fix: don't clobber saved frame pointer in arm64 assembly functions (#170) The arm64 neon assembly functions in this repository overwrite the frame pointer saved by their callers, leading to crashes from the Go runtime execution tracer and profilers which use frame pointer unwinding. For historical reasons, on arm64 Go functions save the caller's frame pointer register (x29) one word below their stack frame. See go.dev/s/regabi#arm64-architecture. The assembly functions here, translated from C compiler output, save values at the top of their frame, and overwrite the frame pointer saved by the caller. We can fix this by decrementing the stack pointer past where that frame pointer is saved before saving anything on the stack. Fixed with this sed script on my macos laptop + manual cleanup to match indentation: ``` /stp[\t ]*x29/i\ // The Go ABI saves the frame pointer register one word below the \ // caller's frame. Make room so we don't overwrite it. Needs to stay \ // 16-byte aligned \ SUB $16, RSP /ldp[\t ]*x29/a\ // Put the stack pointer back where it was \ ADD $16, RSP ``` Ran the script from the root of this repository with find . -name '*_arm64.s' -exec sed -f fix.sed -i '' {} + Then manually inspected the assembly for missing SUBs/ADDs at the beginning of functions and prior to returns. Fixes #150 --- .../kernels/cast_numeric_neon_arm64.s | 6 ++++ arrow/internal/testing/tools/fpunwind.go | 23 +++++++++++++ arrow/internal/testing/tools/fpunwind_arm64.s | 25 +++++++++++++++ .../testing/tools/fpunwind_default.go | 24 ++++++++++++++ arrow/math/int64_neon_arm64.s | 8 +++++ arrow/math/uint64_neon_arm64.s | 8 +++++ arrow/memory/memory_neon_arm64.s | 6 ++++ arrow/memory/memory_test.go | 10 ++++++ internal/utils/min_max_neon_arm64.s | 32 +++++++++++++++++++ parquet/internal/bmi/bitmap_neon_arm64.s | 6 ++++ .../internal/utils/bit_packing_neon_arm64.s | 6 ++++ .../internal/utils/unpack_bool_neon_arm64.s | 6 ++++ 12 files changed, 160 insertions(+) create mode 100644 arrow/internal/testing/tools/fpunwind.go create mode 100644 arrow/internal/testing/tools/fpunwind_arm64.s create mode 100644 arrow/internal/testing/tools/fpunwind_default.go diff --git a/arrow/compute/internal/kernels/cast_numeric_neon_arm64.s b/arrow/compute/internal/kernels/cast_numeric_neon_arm64.s index c54eac44..3d56efc5 100644 --- a/arrow/compute/internal/kernels/cast_numeric_neon_arm64.s +++ b/arrow/compute/internal/kernels/cast_numeric_neon_arm64.s @@ -10,6 +10,10 @@ TEXT ·_cast_type_numeric_neon(SB), $0-40 MOVD len+32(FP), R4 + // The Go ABI saves the frame pointer register one word below the + // caller's frame. Make room so we don't overwrite it. Needs to stay + // 16-byte aligned + SUB $16, RSP WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]! WORD $0x7100181f // cmp w0, #6 WORD $0x910003fd // mov x29, sp @@ -4447,6 +4451,8 @@ LBB0_892: BNE LBB0_892 LBB0_893: WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 + // Put the stack pointer back where it was + ADD $16, RSP RET LBB0_894: WORD $0x927b6909 // and x9, x8, #0xffffffe0 diff --git a/arrow/internal/testing/tools/fpunwind.go b/arrow/internal/testing/tools/fpunwind.go new file mode 100644 index 00000000..9b104931 --- /dev/null +++ b/arrow/internal/testing/tools/fpunwind.go @@ -0,0 +1,23 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build arm64 + +package tools + +// FPUnwind does frame pointer unwinding. It is implemented in assembly. +// If frame pointers are broken, it will crash. +func FPUnwind() diff --git a/arrow/internal/testing/tools/fpunwind_arm64.s b/arrow/internal/testing/tools/fpunwind_arm64.s new file mode 100644 index 00000000..113092c0 --- /dev/null +++ b/arrow/internal/testing/tools/fpunwind_arm64.s @@ -0,0 +1,25 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +TEXT ·FPUnwind(SB), $0-0 + MOVD R29, R19 +loop: + CMP $0, R19 + BEQ exit + MOVD (R19), R19 + B loop +exit: + RET diff --git a/arrow/internal/testing/tools/fpunwind_default.go b/arrow/internal/testing/tools/fpunwind_default.go new file mode 100644 index 00000000..2a07c630 --- /dev/null +++ b/arrow/internal/testing/tools/fpunwind_default.go @@ -0,0 +1,24 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//go:build !arm64 + +package tools + +// FPUnwind does frame pointer unwinding. It is implemented in assembly. +// If frame pointers are broken, it will crash. +// It is currently only implemented for arm64 +func FPUnwind() {} diff --git a/arrow/math/int64_neon_arm64.s b/arrow/math/int64_neon_arm64.s index 4f55163c..c0b639bd 100755 --- a/arrow/math/int64_neon_arm64.s +++ b/arrow/math/int64_neon_arm64.s @@ -11,6 +11,10 @@ TEXT ·_sum_int64_neon(SB), $0-24 MOVD len+8(FP), R1 MOVD res+16(FP), R2 + // The Go ABI saves the frame pointer register one word below the + // caller's frame. Make room so we don't overwrite it. Needs to stay + // 16-byte aligned + SUB $16, RSP WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]! WORD $0x910003fd // mov x29, sp CBZ R1, LBB0_3 @@ -23,6 +27,8 @@ LBB0_3: WORD $0xaa1f03e9 // mov x9, xzr WORD $0xf9000049 // str x9, [x2] WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 + // Put the stack pointer back where it was + ADD $16, RSP RET LBB0_4: WORD $0x927ef428 // and x8, x1, #0xfffffffffffffffc @@ -54,5 +60,7 @@ LBB0_8: LBB0_9: WORD $0xf9000049 // str x9, [x2] WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 + // Put the stack pointer back where it was + ADD $16, RSP RET diff --git a/arrow/math/uint64_neon_arm64.s b/arrow/math/uint64_neon_arm64.s index edbc1a63..0f8d66a5 100755 --- a/arrow/math/uint64_neon_arm64.s +++ b/arrow/math/uint64_neon_arm64.s @@ -11,6 +11,10 @@ TEXT ·_sum_uint64_neon(SB), $0-24 MOVD len+8(FP), R1 MOVD res+16(FP), R2 + // The Go ABI saves the frame pointer register one word below the + // caller's frame. Make room so we don't overwrite it. Needs to stay + // 16-byte aligned + SUB $16, RSP WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]! WORD $0x910003fd // mov x29, sp CBZ R1, LBB0_3 @@ -23,6 +27,8 @@ LBB0_3: WORD $0xaa1f03e9 // mov x9, xzr WORD $0xf9000049 // str x9, [x2] WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 + // Put the stack pointer back where it was + ADD $16, RSP RET LBB0_4: WORD $0x927ef428 // and x8, x1, #0xfffffffffffffffc @@ -54,5 +60,7 @@ LBB0_8: LBB0_9: WORD $0xf9000049 // str x9, [x2] WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 + // Put the stack pointer back where it was + ADD $16, RSP RET diff --git a/arrow/memory/memory_neon_arm64.s b/arrow/memory/memory_neon_arm64.s index 18655cc7..18b0af5c 100755 --- a/arrow/memory/memory_neon_arm64.s +++ b/arrow/memory/memory_neon_arm64.s @@ -11,6 +11,10 @@ TEXT ·_memset_neon(SB), $0-24 MOVD len+8(FP), R1 MOVD c+16(FP), R2 + // The Go ABI saves the frame pointer register one word below the + // caller's frame. Make room so we don't overwrite it. Needs to stay + // 16-byte aligned + SUB $16, RSP WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]! WORD $0x8b010008 // add x8, x0, x1 WORD $0xeb00011f // cmp x8, x0 @@ -40,4 +44,6 @@ LBB0_6: BNE LBB0_6 LBB0_7: WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 + // Put the stack pointer back where it was + ADD $16, RSP RET diff --git a/arrow/memory/memory_test.go b/arrow/memory/memory_test.go index 4a975319..ca1ad531 100644 --- a/arrow/memory/memory_test.go +++ b/arrow/memory/memory_test.go @@ -19,6 +19,7 @@ package memory_test import ( "testing" + "github.com/apache/arrow-go/v18/arrow/internal/testing/tools" "github.com/apache/arrow-go/v18/arrow/memory" "github.com/stretchr/testify/assert" ) @@ -123,3 +124,12 @@ func BenchmarkSet_8000(b *testing.B) { func BenchmarkSet_8192(b *testing.B) { benchmarkSet(b, 8192) } + +func TestSetNoClobberFramePointer(t *testing.T) { + var b [8]byte + memory.Set(b[:], 0xff) + // We should be able to safely frame pointer unwind after calling this + // function. The arm64 assembly implementation used to clobber the + // frame pointer. See GH-150 + tools.FPUnwind() +} diff --git a/internal/utils/min_max_neon_arm64.s b/internal/utils/min_max_neon_arm64.s index b679bb6e..a31c5d2e 100755 --- a/internal/utils/min_max_neon_arm64.s +++ b/internal/utils/min_max_neon_arm64.s @@ -13,6 +13,10 @@ TEXT ·_int32_max_min_neon(SB), $0-32 MOVD minout+16(FP), R2 MOVD maxout+24(FP), R3 + // The Go ABI saves the frame pointer register one word below the + // caller's frame. Make room so we don't overwrite it. Needs to stay + // 16-byte aligned + SUB $16, RSP WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]! WORD $0x7100043f // cmp w1, #1 WORD $0x910003fd // mov x29, sp @@ -32,6 +36,8 @@ LBB0_3: WORD $0xb900006b // str w11, [x3] WORD $0xb900004a // str w10, [x2] WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 + // Put the stack pointer back where it was + ADD $16, RSP RET LBB0_4: WORD $0x927e7509 // and x9, x8, #0xfffffffc @@ -76,6 +82,8 @@ LBB0_9: WORD $0xb900006b // str w11, [x3] WORD $0xb900004a // str w10, [x2] WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 + // Put the stack pointer back where it was + ADD $16, RSP RET // func _uint32_max_min_neon(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer) @@ -86,6 +94,10 @@ TEXT ·_uint32_max_min_neon(SB), $0-32 MOVD minout+16(FP), R2 MOVD maxout+24(FP), R3 + // The Go ABI saves the frame pointer register one word below the + // caller's frame. Make room so we don't overwrite it. Needs to stay + // 16-byte aligned + SUB $16, RSP WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]! WORD $0x7100043f // cmp w1, #1 WORD $0x910003fd // mov x29, sp @@ -105,6 +117,8 @@ LBB1_3: WORD $0xb900006a // str w10, [x3] WORD $0xb900004b // str w11, [x2] WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 + // Put the stack pointer back where it was + ADD $16, RSP RET LBB1_4: WORD $0x927e7509 // and x9, x8, #0xfffffffc @@ -149,6 +163,8 @@ LBB1_9: WORD $0xb900006a // str w10, [x3] WORD $0xb900004b // str w11, [x2] WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 + // Put the stack pointer back where it was + ADD $16, RSP RET // func _int64_max_min_neon(values unsafe.Pointer, length int, minout, maxout unsafe.Pointer) @@ -159,6 +175,10 @@ TEXT ·_int64_max_min_neon(SB), $0-32 MOVD minout+16(FP), R2 MOVD maxout+24(FP), R3 + // The Go ABI saves the frame pointer register one word below the + // caller's frame. Make room so we don't overwrite it. Needs to stay + // 16-byte aligned + SUB $16, RSP WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]! WORD $0x7100043f // cmp w1, #1 WORD $0x910003fd // mov x29, sp @@ -178,6 +198,8 @@ LBB2_3: WORD $0xf900006b // str x11, [x3] WORD $0xf900004a // str x10, [x2] WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 + // Put the stack pointer back where it was + ADD $16, RSP RET LBB2_4: WORD $0x927e7509 // and x9, x8, #0xfffffffc @@ -234,6 +256,8 @@ LBB2_9: WORD $0xf900006b // str x11, [x3] WORD $0xf900004a // str x10, [x2] WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 + // Put the stack pointer back where it was + ADD $16, RSP RET @@ -245,6 +269,10 @@ TEXT ·_uint64_max_min_neon(SB), $0-32 MOVD minout+16(FP), R2 MOVD maxout+24(FP), R3 + // The Go ABI saves the frame pointer register one word below the + // caller's frame. Make room so we don't overwrite it. Needs to stay + // 16-byte aligned + SUB $16, RSP WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]! WORD $0x7100043f // cmp w1, #1 WORD $0x910003fd // mov x29, sp @@ -264,6 +292,8 @@ LBB3_3: WORD $0xf900006a // str x10, [x3] WORD $0xf900004b // str x11, [x2] WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 + // Put the stack pointer back where it was + ADD $16, RSP RET LBB3_4: WORD $0x927e7509 // and x9, x8, #0xfffffffc @@ -320,5 +350,7 @@ LBB3_9: WORD $0xf900006a // str x10, [x3] WORD $0xf900004b // str x11, [x2] WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 + // Put the stack pointer back where it was + ADD $16, RSP RET diff --git a/parquet/internal/bmi/bitmap_neon_arm64.s b/parquet/internal/bmi/bitmap_neon_arm64.s index abde5843..f711e490 100755 --- a/parquet/internal/bmi/bitmap_neon_arm64.s +++ b/parquet/internal/bmi/bitmap_neon_arm64.s @@ -8,6 +8,10 @@ TEXT ·_levels_to_bitmap_neon(SB), $0-32 MOVD numLevels+8(FP), R1 MOVD rhs+16(FP), R2 + // The Go ABI saves the frame pointer register one word below the + // caller's frame. Make room so we don't overwrite it. Needs to stay + // 16-byte aligned + SUB $16, RSP WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]! WORD $0x7100043f // cmp w1, #1 WORD $0x910003fd // mov x29, sp @@ -79,6 +83,8 @@ LBB1_7: BNE LBB1_7 LBB1_8: WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 + // Put the stack pointer back where it was + ADD $16, RSP MOVD R8, res+24(FP) RET diff --git a/parquet/internal/utils/bit_packing_neon_arm64.s b/parquet/internal/utils/bit_packing_neon_arm64.s index 2d18dccd..b3a02f4c 100644 --- a/parquet/internal/utils/bit_packing_neon_arm64.s +++ b/parquet/internal/utils/bit_packing_neon_arm64.s @@ -281,6 +281,10 @@ TEXT ·_unpack32_neon(SB), $0-40 // LEAQ LCDATA1<>(SB), BP // %bb.0: + // The Go ABI saves the frame pointer register one word below the + // caller's frame. Make room so we don't overwrite it. Needs to stay + // 16-byte aligned + SUB $16, RSP WORD $0xa9ba7bfd // stp x29, x30, [sp, #-96]! WORD $0xd10643e9 // sub x9, sp, #400 WORD $0xa9016ffc // stp x28, x27, [sp, #16] @@ -6922,5 +6926,7 @@ LBB0_156: WORD $0xa94267fa // ldp x26, x25, [sp, #32] WORD $0xa9416ffc // ldp x28, x27, [sp, #16] WORD $0xa8c67bfd // ldp x29, x30, [sp], #96 + // Put the stack pointer back where it was + ADD $16, RSP MOVD R0, num+32(FP) RET diff --git a/parquet/internal/utils/unpack_bool_neon_arm64.s b/parquet/internal/utils/unpack_bool_neon_arm64.s index 24278959..3d1edaca 100755 --- a/parquet/internal/utils/unpack_bool_neon_arm64.s +++ b/parquet/internal/utils/unpack_bool_neon_arm64.s @@ -12,6 +12,10 @@ TEXT ·_bytes_to_bools_neon(SB), $0-32 MOVD out+16(FP), R2 MOVD outlen+24(FP), R3 + // The Go ABI saves the frame pointer register one word below the + // caller's frame. Make room so we don't overwrite it. Needs to stay + // 16-byte aligned + SUB $16, RSP WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]! WORD $0x7100043f // cmp w1, #1 WORD $0x910003fd // mov x29, sp @@ -78,4 +82,6 @@ LBB0_3: JMP LBB0_2 LBB0_12: WORD $0xa8c17bfd // ldp x29, x30, [sp], #16 + // Put the stack pointer back where it was + ADD $16, RSP RET