diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def index 5fb199b1b2b032..56110df79e8077 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -49,6 +49,7 @@ BUILTIN(__builtin_arm_wfe, "v", "") BUILTIN(__builtin_arm_wfi, "v", "") BUILTIN(__builtin_arm_sev, "v", "") BUILTIN(__builtin_arm_sevl, "v", "") +BUILTIN(__builtin_arm_chkfeat, "WUiWUi", "") // Like __builtin_trap but provide an 16-bit immediate reason code (which goes into `brk #N`). BUILTIN(__builtin_arm_trap, "vUIs", "nr") @@ -136,6 +137,10 @@ TARGET_BUILTIN(__builtin_arm_st64b, "vv*WUiC*", "n", "ls64") TARGET_BUILTIN(__builtin_arm_st64bv, "WUiv*WUiC*", "n", "ls64") TARGET_BUILTIN(__builtin_arm_st64bv0, "WUiv*WUiC*", "n", "ls64") +// Armv9.3-A Guarded Control Stack +TARGET_BUILTIN(__builtin_arm_gcspopm, "WUiWUi", "n", "gcs") +TARGET_BUILTIN(__builtin_arm_gcsss, "vC*vC*", "n", "gcs") + TARGET_HEADER_BUILTIN(_BitScanForward, "UcUNi*UNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_BitScanReverse, "UcUNi*UNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_BitScanForward64, "UcUNi*ULLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h index 5785954c9171ab..1518b0c4c8428f 100644 --- a/clang/lib/Headers/arm_acle.h +++ b/clang/lib/Headers/arm_acle.h @@ -75,6 +75,14 @@ static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(v #define __dbg(t) __builtin_arm_dbg(t) #endif +#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE +#define _CHKFEAT_GCS 1 +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) +__chkfeat(uint64_t __features) { + return __builtin_arm_chkfeat(__features) ^ __features; +} +#endif + /* 7.5 Swap */ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__)) __swp(uint32_t __x, volatile uint32_t *__p) { @@ -855,6 +863,24 @@ __rndrrs(uint64_t *__p) { } #endif +/* 11.2 Guarded Control Stack intrinsics */ +#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE +static __inline__ void * __attribute__((__always_inline__, __nodebug__)) +__gcspr() { + return (void *)__builtin_arm_rsr64("gcspr_el0"); +} + +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__, target("gcs"))) +__gcspopm() { + return __builtin_arm_gcspopm(0); +} + +static __inline__ const void * __attribute__((__always_inline__, __nodebug__, target("gcs"))) +__gcsss(const void *__stack) { + return __builtin_arm_gcsss(__stack); +} +#endif + #if defined(__cplusplus) } #endif diff --git a/clang/test/CodeGen/aarch64-gcs.c b/clang/test/CodeGen/aarch64-gcs.c new file mode 100644 index 00000000000000..767b1b8bfaf2dc --- /dev/null +++ b/clang/test/CodeGen/aarch64-gcs.c @@ -0,0 +1,56 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// RUN: %clang_cc1 -triple aarch64-eabi -target-feature +gcs -emit-llvm %s -o - | FileCheck %s + +#include + +// CHECK-LABEL: define dso_local i64 @test_chkfeat +// CHECK-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[__FEATURES_ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: store i64 1, ptr [[__FEATURES_ADDR_I]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[__FEATURES_ADDR_I]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.aarch64.chkfeat(i64 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[__FEATURES_ADDR_I]], align 8 +// CHECK-NEXT: [[XOR_I:%.*]] = xor i64 [[TMP1]], [[TMP2]] +// CHECK-NEXT: ret i64 [[XOR_I]] +// +uint64_t test_chkfeat() { + return __chkfeat(_CHKFEAT_GCS); +} + +// CHECK-LABEL: define dso_local ptr @test_gcspr +// CHECK-SAME: () #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.read_volatile_register.i64(metadata [[META2:![0-9]+]]) +// CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr +// CHECK-NEXT: ret ptr [[TMP1]] +// +void *test_gcspr() { + return __gcspr(); +} + +// CHECK-LABEL: define dso_local i64 @test_gcspopm +// CHECK-SAME: () #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.aarch64.gcspopm(i64 0) +// CHECK-NEXT: ret i64 [[TMP0]] +// +uint64_t test_gcspopm() { + return __gcspopm(); +} + +// CHECK-LABEL: define dso_local ptr @test_gcsss +// CHECK-SAME: (ptr noundef [[P:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[__STACK_ADDR_I:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[P]], ptr [[P_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[P_ADDR]], align 8 +// CHECK-NEXT: store ptr [[TMP0]], ptr [[__STACK_ADDR_I]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__STACK_ADDR_I]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = call ptr @llvm.aarch64.gcsss(ptr [[TMP1]]) +// CHECK-NEXT: ret ptr [[TMP2]] +// +const void *test_gcsss(const void *p) { + return __gcsss(p); +} diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 38d71b17b476d5..246f717aaef96a 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -89,6 +89,23 @@ def int_aarch64_isb : ClangBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">, // ordering during ISel. def int_aarch64_space : DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty], []>; +//===----------------------------------------------------------------------===// +// Guarded Control Stack + +def int_aarch64_chkfeat : ClangBuiltin<"__builtin_arm_chkfeat">, + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty], + [IntrNoMem]>; + +// FIXME: This should be marked as [IntrReadMem, IntrHasSideEffects], as it has +// the side-effect of updating gcspr, but this combination doesn't work +// correctly. +def int_aarch64_gcspopm : ClangBuiltin<"__builtin_arm_gcspopm">, + DefaultAttrsIntrinsic<[llvm_i64_ty], [llvm_i64_ty], + []>; + +def int_aarch64_gcsss : ClangBuiltin<"__builtin_arm_gcsss">, + DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>; + } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 544eec3ab9cecf..97e03be0f6ef0a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -4577,6 +4577,18 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { switch (IntNo) { default: break; + case Intrinsic::aarch64_gcsss: { + SDLoc DL(Node); + SDValue Chain = Node->getOperand(0); + SDValue Val = Node->getOperand(2); + SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64); + SDNode *SS1 = + CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain); + SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64, + MVT::Other, Zero, SDValue(SS1, 0)); + ReplaceNode(Node, SS2); + return; + } case Intrinsic::aarch64_ldaxp: case Intrinsic::aarch64_ldxp: { unsigned Op = diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index f3aac3b46d1730..49ea9d952521c4 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -1267,23 +1267,34 @@ class GCSRtIn op1, bits<3> op2, string mnemonic, let Inst{15-8} = 0b01110111; let Inst{7-5} = op2; let Predicates = [HasGCS]; + let hasSideEffects = 1; } +let mayStore = 1, mayLoad = 1 in def GCSSS1 : GCSRtIn<0b011, 0b010, "gcsss1">; +let mayStore = 1 in def GCSPUSHM : GCSRtIn<0b011, 0b000, "gcspushm">; class GCSRtOut op1, bits<3> op2, string mnemonic, list pattern = []> - : RtSystemI<1, (outs GPR64:$Rt), (ins), mnemonic, "\t$Rt", pattern> { + : RtSystemI<1, (outs GPR64:$Rt), (ins GPR64:$src), mnemonic, "\t$Rt", pattern> { let Inst{20-19} = 0b01; let Inst{18-16} = op1; let Inst{15-8} = 0b01110111; let Inst{7-5} = op2; let Predicates = [HasGCS]; + let hasSideEffects = 1; + // The input register is unchanged when GCS is disabled, so we need it as + // both an input and output operand. + let Constraints = "$src = $Rt"; } +let mayStore = 1, mayLoad = 1 in def GCSSS2 : GCSRtOut<0b011, 0b011, "gcsss2">; -def GCSPOPM : GCSRtOut<0b011, 0b001, "gcspopm">; +// FIXME: mayStore = 1 only needed to match the intrinsic definition +let mayStore = 1, mayLoad = 1 in +def GCSPOPM : GCSRtOut<0b011, 0b001, "gcspopm", + [(set GPR64:$Rt, (int_aarch64_gcspopm GPR64:$src))]>; def GCSPOPM_NoOp : InstAlias<"gcspopm", (GCSPOPM XZR)>, Requires<[HasGCS]>; // Rt defaults to XZR if absent def GCSB_DSYNC_disable : InstAlias<"gcsb\tdsync", (HINT 19), 0>; @@ -1292,7 +1303,8 @@ def GCSB_DSYNC : InstAlias<"gcsb\tdsync", (HINT 19), 1>, Requires<[HasGC def : TokenAlias<"DSYNC", "dsync">; let Uses = [X16], Defs = [X16], CRm = 0b0101 in { - def CHKFEAT : SystemNoOperands<0b000, "hint\t#40">; + def CHKFEAT : SystemNoOperands<0b000, "hint\t#40", + [(set X16, (int_aarch64_chkfeat X16))]>; } def : InstAlias<"chkfeat\tx16", (CHKFEAT), 0>; def : InstAlias<"chkfeat\tx16", (CHKFEAT), 1>, Requires<[HasCHK]>; @@ -1311,7 +1323,6 @@ class GCSSt op> def GCSSTR : GCSSt<"gcsstr", 0b000>; def GCSSTTR : GCSSt<"gcssttr", 0b001>; - // ARMv8.2-A Dot Product let Predicates = [HasDotProd] in { defm SDOT : SIMDThreeSameVectorDot<0, 0, "sdot", AArch64sdot>; diff --git a/llvm/test/CodeGen/AArch64/gcs-intrinsics.ll b/llvm/test/CodeGen/AArch64/gcs-intrinsics.ll new file mode 100644 index 00000000000000..b2f9b4d34ac4c0 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/gcs-intrinsics.ll @@ -0,0 +1,54 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64 -mattr=+gcs -verify-machineinstrs -o - %s | FileCheck %s + +; We call each intrinsic twice, once with the result being unused and once with +; it being used, to check that dead code elimination is being done correctly. +; chkfeat does not have side effects so can be eliminated, but the others do and +; can't be eliminated. + +define i64 @test_chkfeat(i64 %arg) { +; CHECK-LABEL: test_chkfeat: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x16, x0 +; CHECK-NEXT: chkfeat x16 +; CHECK-NEXT: mov x0, x16 +; CHECK-NEXT: ret +entry: + %0 = call i64 @llvm.aarch64.chkfeat(i64 %arg) + %1 = call i64 @llvm.aarch64.chkfeat(i64 %arg) + ret i64 %1 +} + +define i64 @test_gcspopm(i64 %arg) { +; CHECK-LABEL: test_gcspopm: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: gcspopm x8 +; CHECK-NEXT: gcspopm x0 +; CHECK-NEXT: ret +entry: + %0 = call i64 @llvm.aarch64.gcspopm(i64 %arg) + %1 = call i64 @llvm.aarch64.gcspopm(i64 %arg) + ret i64 %1 +} + +define ptr @test_gcsss(ptr %p) { +; CHECK-LABEL: test_gcsss: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov x9, xzr +; CHECK-NEXT: gcsss1 x0 +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: gcsss2 x9 +; CHECK-NEXT: gcsss1 x0 +; CHECK-NEXT: gcsss2 x8 +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ret +entry: + %0 = call ptr @llvm.aarch64.gcsss(ptr %p) + %1 = call ptr @llvm.aarch64.gcsss(ptr %p) + ret ptr %1 +} + +declare i64 @llvm.aarch64.chkfeat(i64) +declare i64 @llvm.aarch64.gcspopm(i64) +declare ptr @llvm.aarch64.gcsss(ptr)