-
Notifications
You must be signed in to change notification settings - Fork 15.6k
[AArch64] Add support for range prefetch intrinsic #170490
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
This patch adds support in Clang for the RPRFM instruction, which is
available when FEAT_RPRFM is defined:
void __rpld(int64_t access_kind, uint64_t retention_policy
uint64_t reuse distance, int64_t stride,
uint64_t count, int64_t length, void const *addr);
If FEAT_RPRFM is not available, this instruction is a NOP.
This implements the following ACLE proposal:
ARM-software/acle#423
|
@llvm/pr-subscribers-clang-codegen @llvm/pr-subscribers-backend-x86 Author: Kerry McLaughlin (kmclaughlin-arm) ChangesThis patch adds support in Clang for the RPRFM instruction, which is available when FEAT_RPRFM is defined: If FEAT_RPRFM is not available, this instruction is a NOP. This implements the following ACLE proposal: Patch is 21.71 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/170490.diff 19 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def
index adb6c941e852a..7bbf747d705c7 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -96,6 +96,9 @@ TARGET_BUILTIN(__builtin_arm_jcvt, "Zid", "nc", "v8.3a")
// Prefetch
BUILTIN(__builtin_arm_prefetch, "vvC*UiUiUiUi", "nc")
+// Range Prefetch
+BUILTIN(__builtin_arm_range_prefetch, "vvC*UiUiUiiUii", "nc")
+
// System Registers
BUILTIN(__builtin_arm_rsr, "UicC*", "nc")
BUILTIN(__builtin_arm_rsr64, "WUicC*", "nc")
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index d7f36c0f9b79a..38018953a269e 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -612,6 +612,9 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
if (HasLSE)
Builder.defineMacro("__ARM_FEATURE_ATOMICS", "1");
+ if (HasRPRFM)
+ Builder.defineMacro("__ARM_FEATURE_RPRFM", "1");
+
if (HasBFloat16) {
Builder.defineMacro("__ARM_FEATURE_BF16", "1");
Builder.defineMacro("__ARM_FEATURE_BF16_VECTOR_ARITHMETIC", "1");
@@ -870,6 +873,7 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const {
.Case("ssve-fp8fma", HasSSVE_FP8FMA)
.Case("sme-f8f32", HasSME_F8F32)
.Case("sme-f8f16", HasSME_F8F16)
+ .Case("rprfm", HasRPRFM)
.Default(false);
}
@@ -1100,6 +1104,9 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
if (Feature == "+strict-align")
HasUnalignedAccess = false;
+ if (Feature == "+rprfm")
+ HasRPRFM = true;
+
// All predecessor archs are added but select the latest one for ArchKind.
if (Feature == "+v8a" && ArchInfo->Version < llvm::AArch64::ARMV8A.Version)
ArchInfo = &llvm::AArch64::ARMV8A;
diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h
index 1a7aa658e9d87..866a9cb2c2711 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -131,6 +131,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
bool HasRCPC3 = false;
bool HasSMEFA64 = false;
bool HasPAuthLR = false;
+ bool HasRPRFM = false;
const llvm::AArch64::ArchInfo *ArchInfo = &llvm::AArch64::ARMV8A;
diff --git a/clang/lib/Headers/arm_acle.h b/clang/lib/Headers/arm_acle.h
index 97f63e8ecf71f..4b6cd97be602a 100644
--- a/clang/lib/Headers/arm_acle.h
+++ b/clang/lib/Headers/arm_acle.h
@@ -98,6 +98,10 @@ __swp(uint32_t __x, volatile uint32_t *__p) {
#else
#define __pldx(access_kind, cache_level, retention_policy, addr) \
__builtin_arm_prefetch(addr, access_kind, cache_level, retention_policy, 1)
+#define __rpld(access_kind, retention_policy, reuse_distance, stride, count, \
+ length, addr) \
+ __builtin_arm_range_prefetch(addr, access_kind, retention_policy, \
+ reuse_distance, stride, count, length)
#endif
/* 7.6.2 Instruction prefetch */
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index a5164a94b57fa..da3438fb77118 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -1122,6 +1122,15 @@ bool SemaARM::CheckAArch64BuiltinFunctionCall(const TargetInfo &TI,
SemaRef.BuiltinConstantArgRange(TheCall, 4, 0, 1);
}
+ if (BuiltinID == AArch64::BI__builtin_arm_range_prefetch) {
+ return SemaRef.BuiltinConstantArgRange(TheCall, 1, 0, 1) ||
+ SemaRef.BuiltinConstantArgRange(TheCall, 2, 0, 1) ||
+ SemaRef.BuiltinConstantArgRange(TheCall, 3, 0, 15) ||
+ SemaRef.BuiltinConstantArgRange(TheCall, 4, -2048, 2040) ||
+ SemaRef.BuiltinConstantArgRange(TheCall, 5, 0, 65535) ||
+ SemaRef.BuiltinConstantArgRange(TheCall, 6, -2048, 2040);
+ }
+
if (BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
BuiltinID == AArch64::BI__builtin_arm_rsr128 ||
diff --git a/clang/test/CodeGen/arm_acle.c b/clang/test/CodeGen/arm_acle.c
index 0f539cba5c758..1f1c8b82c0ae1 100644
--- a/clang/test/CodeGen/arm_acle.c
+++ b/clang/test/CodeGen/arm_acle.c
@@ -164,6 +164,19 @@ void test_pld() {
__pld(0);
}
+#if defined(__ARM_64BIT_STATE)
+
+// AArch64-LABEL: @test_rpld(
+// AArch64-NEXT: entry:
+// AArch64-NEXT: call void @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 1, i32 15, i32 -2048, i32 65535, i32 2040)
+// AArch64-NEXT: ret void
+//
+void test_rpld() {
+ __rpld(1, 1, 15, -2048, 65535, 2040, 0);
+}
+
+#endif
+
// AArch32-LABEL: @test_pldx(
// AArch32-NEXT: entry:
// AArch32-NEXT: call void @llvm.prefetch.p0(ptr null, i32 1, i32 3, i32 1)
diff --git a/clang/test/CodeGen/builtins-arm64.c b/clang/test/CodeGen/builtins-arm64.c
index 86c2812434643..1262823bf6ed3 100644
--- a/clang/test/CodeGen/builtins-arm64.c
+++ b/clang/test/CodeGen/builtins-arm64.c
@@ -62,6 +62,20 @@ void prefetch(void) {
// CHECK: call {{.*}} @llvm.aarch64.prefetch(ptr null, i32 0, i32 3, i32 0, i32 1)
}
+void range_prefetch(void) {
+ __builtin_arm_range_prefetch(0, 0, 0, 15, 1024, 24, 2); // pldkeep
+ // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 0, i32 15, i32 1024, i32 24, i32 2)
+
+ __builtin_arm_range_prefetch(0, 0, 1, 15, 1024, 24, 2); // pldstrm
+ // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 0, i32 1, i32 15, i32 1024, i32 24, i32 2)
+
+ __builtin_arm_range_prefetch(0, 1, 0, 15, 1024, 24, 2); // pstkeep
+ // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 0, i32 15, i32 1024, i32 24, i32 2)
+
+ __builtin_arm_range_prefetch(0, 1, 1, 15, 1024, 24, 2); // pststrm
+ // CHECK: call {{.*}} @llvm.aarch64.range.prefetch(ptr null, i32 1, i32 1, i32 15, i32 1024, i32 24, i32 2)
+}
+
__attribute__((target("v8.5a")))
int32_t jcvt(double v) {
//CHECK-LABEL: @jcvt(
diff --git a/clang/test/Driver/print-supported-extensions-aarch64.c b/clang/test/Driver/print-supported-extensions-aarch64.c
index 1f8929e705e4c..1a34478f11c6b 100644
--- a/clang/test/Driver/print-supported-extensions-aarch64.c
+++ b/clang/test/Driver/print-supported-extensions-aarch64.c
@@ -69,6 +69,7 @@
// CHECK-NEXT: rcpc FEAT_LRCPC Enable support for RCPC extension
// CHECK-NEXT: rcpc3 FEAT_LRCPC3 Enable Armv8.9-A RCPC instructions for A64 and Advanced SIMD and floating-point instruction set
// CHECK-NEXT: rdm FEAT_RDM Enable Armv8.1-A Rounding Double Multiply Add/Subtract instructions
+// CHECK-NEXT: rprfm FEAT_RPRFM Enable Armv8.0-A Range Prefetch Memory instruction
// CHECK-NEXT: sb FEAT_SB Enable Armv8.5-A Speculation Barrier
// CHECK-NEXT: sha2 FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support
// CHECK-NEXT: sha3 FEAT_SHA3, FEAT_SHA512 Enable SHA512 and SHA3 support
diff --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c
index 4dd243e57a63e..d5d78f1118a4f 100644
--- a/clang/test/Preprocessor/aarch64-target-features.c
+++ b/clang/test/Preprocessor/aarch64-target-features.c
@@ -789,3 +789,6 @@
// CHECK-SMEF8F16: __ARM_FEATURE_FP8 1
// CHECK-SMEF8F16: __ARM_FEATURE_SME2 1
// CHECK-SMEF8F16: __ARM_FEATURE_SME_F8F16 1
+
+// RUN: %clang --target=aarch64 -march=armv8-a+rprfm -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-RPRFM %s
+// CHECK-RPRFM: __ARM_FEATURE_RPRFM 1
diff --git a/clang/test/Sema/builtins-arm64.c b/clang/test/Sema/builtins-arm64.c
index f094162b3aadc..3d26b16d461d0 100644
--- a/clang/test/Sema/builtins-arm64.c
+++ b/clang/test/Sema/builtins-arm64.c
@@ -30,6 +30,17 @@ void test_prefetch(void) {
__builtin_arm_prefetch(0, 0, 0, 0, 2); // expected-error-re {{argument value {{.*}} is outside the valid range}}
}
+void test_range_prefetch(void) {
+ __builtin_arm_range_prefetch(0, 2, 0, 0, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ __builtin_arm_range_prefetch(0, 0, 2, 0, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ __builtin_arm_range_prefetch(0, 0, 0, 16, 0, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ __builtin_arm_range_prefetch(0, 0, 0, 0, -2049, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ __builtin_arm_range_prefetch(0, 0, 0, 0, 2041, 0, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ __builtin_arm_range_prefetch(0, 0, 0, 0, 0, 65536, 0); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ __builtin_arm_range_prefetch(0, 0, 0, 0, 0, 0, -2049); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ __builtin_arm_range_prefetch(0, 0, 0, 0, 0, 0, 2041); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+}
+
void test_trap(short s, unsigned short us) {
__builtin_arm_trap(42);
__builtin_arm_trap(65535);
@@ -37,4 +48,4 @@ void test_trap(short s, unsigned short us) {
__builtin_arm_trap(65536); // expected-warning {{implicit conversion from 'int' to 'unsigned short' changes value from 65536 to 0}}
__builtin_arm_trap(s); // expected-error {{argument to '__builtin_arm_trap' must be a constant integer}}
__builtin_arm_trap(us); // expected-error {{argument to '__builtin_arm_trap' must be a constant integer}}
-}
\ No newline at end of file
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 1c86c6815f049..43a7f10ce2618 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -76,6 +76,14 @@ def int_aarch64_prefetch : Intrinsic<[],
]>,
ClangBuiltin<"__builtin_arm_prefetch">;
+def int_aarch64_range_prefetch : Intrinsic<[],
+ [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
+ llvm_i32_ty, llvm_i32_ty],
+ [IntrInaccessibleMemOrArgMemOnly, IntrWillReturn, ReadOnly<ArgIndex<0>>,
+ ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>,
+ ImmArg<ArgIndex<5>>, ImmArg<ArgIndex<6>>]>,
+ ClangBuiltin<"__builtin_arm_range_prefetch">;
+
//===----------------------------------------------------------------------===//
// Data Barrier Instructions
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index a1e14d8f25bf7..17c4bfd67b4c0 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -6701,6 +6701,27 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
"isdata argument to llvm.aarch64.prefetch must be 0 or 1", Call);
break;
}
+ case Intrinsic::aarch64_range_prefetch: {
+ Check(cast<ConstantInt>(Call.getArgOperand(1))->getZExtValue() < 2,
+ "write argument to llvm.aarch64.range.prefetch must be 0 or 1", Call);
+ Check(cast<ConstantInt>(Call.getArgOperand(2))->getZExtValue() < 2,
+ "stream argument to llvm.aarch64.range.prefetch must be 0 or 1",
+ Call);
+ Check(cast<ConstantInt>(Call.getArgOperand(3))->getZExtValue() < 16,
+ "reuse distance argument to llvm.aarch64.range.prefetch must be < 16",
+ Call);
+ int Stride = cast<ConstantInt>(Call.getArgOperand(4))->getZExtValue();
+ Check(Stride > -2049 && Stride < 2041,
+ "stride argument to llvm.aarch64.range.prefetch must be -2048 - 2040",
+ Call);
+ Check(cast<ConstantInt>(Call.getArgOperand(5))->getZExtValue() < 65536,
+ "count argument to llvm.aarch64.range.prefetch must be < 65536");
+ int Length = cast<ConstantInt>(Call.getArgOperand(6))->getZExtValue();
+ Check(Length > -2049 && Length < 2041,
+ "length argument to llvm.aarch64.range.prefetch must be -2048 -"
+ "2040");
+ break;
+ }
case Intrinsic::callbr_landingpad: {
const auto *CBR = dyn_cast<CallBrInst>(Call.getOperand(0));
Check(CBR, "intrinstic requires callbr operand", &Call);
diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td
index 066724bea92c9..e643bdf6fea74 100644
--- a/llvm/lib/Target/AArch64/AArch64Features.td
+++ b/llvm/lib/Target/AArch64/AArch64Features.td
@@ -101,6 +101,9 @@ def FeaturePerfMon : ExtensionWithMArch<"perfmon", "PerfMon", "FEAT_PMUv3",
def FeatureSpecRestrict : Extension<"specrestrict", "SpecRestrict", "FEAT_CSV2_2",
"Enable architectural speculation restriction">;
+def FeatureRPRFM : ExtensionWithMArch<"rprfm", "RPRFM", "FEAT_RPRFM",
+ "Enable Armv8.0-A Range Prefetch Memory instruction">;
+
//===----------------------------------------------------------------------===//
// Armv8.1 Architecture Extensions
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 2ce8f6d924a78..dd6248afe9358 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6161,6 +6161,29 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op,
return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Chain,
DAG.getTargetConstant(PrfOp, DL, MVT::i32), Addr);
}
+ case Intrinsic::aarch64_range_prefetch: {
+ SDValue Chain = Op.getOperand(0);
+ SDValue Addr = Op.getOperand(2);
+
+ unsigned IsWrite = Op.getConstantOperandVal(3);
+ unsigned IsStream = Op.getConstantOperandVal(4);
+ unsigned PrfOp = (IsStream << 2) | IsWrite;
+
+ uint64_t Distance = Op.getConstantOperandVal(5);
+ int64_t Stride = Op.getConstantOperandVal(6);
+ uint64_t Count = Op.getConstantOperandVal(7);
+ int64_t Length = Op.getConstantOperandVal(8);
+ uint64_t Mask22 = (1ULL << 22) - 1;
+ uint64_t Mask16 = (1ULL << 16) - 1;
+ uint64_t Metadata = (Distance << 60) |
+ ((Stride & Mask22) << 38) |
+ ((Count & Mask16) << 22) |
+ (Length & Mask22);
+
+ return DAG.getNode(AArch64ISD::RANGE_PREFETCH, DL, MVT::Other, Chain,
+ DAG.getTargetConstant(PrfOp, DL, MVT::i32), Addr,
+ DAG.getConstant(Metadata, DL, MVT::i64));
+ }
case Intrinsic::aarch64_sme_str:
case Intrinsic::aarch64_sme_ldr: {
return LowerSMELdrStr(Op, DAG, IntNo == Intrinsic::aarch64_sme_ldr);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index 7d99786830e3d..c40a9e34b37a2 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -192,6 +192,12 @@ def G_AARCH64_PREFETCH : AArch64GenericInstruction {
let hasSideEffects = 1;
}
+def G_AARCH64_RANGE_PREFETCH : AArch64GenericInstruction {
+ let OutOperandList = (outs);
+ let InOperandList = (ins type0:$imm, ptype0:$src1, type1:$src2);
+ let hasSideEffects = 1;
+}
+
def G_UMULL : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2);
@@ -303,6 +309,7 @@ def : GINodeEquiv<G_USDOT, AArch64usdot>;
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
def : GINodeEquiv<G_AARCH64_PREFETCH, AArch64Prefetch>;
+def : GINodeEquiv<G_AARCH64_RANGE_PREFETCH, AArch64RangePrefetch>;
def : GINodeEquiv<G_FPTRUNC_ODD, AArch64fcvtxn_n>;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index da93a2b13fc11..0007ddba3d941 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -411,6 +411,8 @@ def HasS1POE2 : Predicate<"Subtarget->hasS1POE2()">,
AssemblerPredicateWithAll<(all_of FeatureS1POE2), "poe2">;
def HasTEV : Predicate<"Subtarget->hasTEV()">,
AssemblerPredicateWithAll<(all_of FeatureTEV), "tev">;
+def HasRPRFM : Predicate<"Subtarget->hasRPRFM()">,
+ AssemblerPredicateWithAll<(all_of FeatureRPRFM), "rprfm">;
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
def IsWindows : Predicate<"Subtarget->isTargetWindows()">;
@@ -536,6 +538,7 @@ def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,3>]>;
def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>;
def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>;
+def SDT_AArch64RANGE_PREFETCH: SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisPtrTy<2>]>;
def SDT_AArch64ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
@@ -1038,6 +1041,10 @@ def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET,
def AArch64Prefetch : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH,
[SDNPHasChain, SDNPSideEffect]>;
+def AArch64RangePrefetch: SDNode<"AArch64ISD::RANGE_PREFETCH",
+ SDT_AArch64RANGE_PREFETCH,
+ [SDNPHasChain, SDNPSideEffect]>;
+
// {s|u}int to FP within a FP register.
def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>;
def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>;
@@ -10980,6 +10987,9 @@ def RPRFM:
let DecoderNamespace = "Fallback";
}
+def : Pat<(AArch64RangePrefetch rprfop:$Rt, GPR64sp:$Rn, GPR64:$Rm),
+ (RPRFM rprfop:$Rt, GPR64:$Rm, GPR64sp:$Rn)>;
+
//===----------------------------------------------------------------------===//
// 128-bit Atomics (FEAT_LSE128)
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 433cb0387c470..09075d7fac90a 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -3954,6 +3954,7 @@ static const struct Extension {
{"poe2", {AArch64::FeatureS1POE2}},
{"tev", {AArch64::FeatureTEV}},
{"btie", {AArch64::FeatureBTIE}},
+ {"rprfm", {AArch64::FeatureRPRFM}},
};
static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 1025b2502211a..dad362785ba3f 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1749,6 +1749,33 @@ bool AArch64LegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MI.eraseFromParent();
return true;
}
+ case Intrinsic::aarch64_range_prefetch: {
+ auto &AddrVal = MI.getOperand(1);
+
+ int64_t IsWrite = MI.getOperand(2).getImm();
+ int64_t IsStream = MI.getOperand(3).getImm();
+ unsigned PrfOp = (IsStream << 2) | IsWrite;
+
+ int64_t Distance = MI.getOperand(4).getImm();
+ int64_t Stride = MI.getOperand(5).getImm();
+ int64_t Count = MI.getOperand(6).getImm();
+ int64_t Length = MI.getOperand(7).getImm();
+ uint64_t Mask22 = (1ULL << 22) - 1;
+ uint64_t Mask16 = (1ULL << 16) - 1;
+ uint64_t Metadata = (Distance << 60) |
+ ((Stride & Mask22) << 38) |
+ ((Count & Mask16) << 22) |
+ (Length & Mask22);
+
+ auto MetadataReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
+ MIB.buildConstant(MetadataReg, Metadata);
+ MIB.buildInstr(AArch64::G_AARCH64_RANGE_PREFETCH)
+ .addImm(PrfOp)
+ .add(AddrVal)
+ .addUse(MetadataReg);
+ MI.eraseFromParent();
+ return true;
+ }
case Intrinsic::aarch64_neon_uaddv:
case Intrinsic::aarch64_neon_saddv:
case Intrinsic::aarch64_neon_umaxv:
@@ -2506,4...
[truncated]
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
…owering - Add Verifier tests - Run clang-format
CarolineConcatto
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe more changes will come, due to changes in the ACLE proposal, but just in case I left some comments.
| ; CHECK-NEXT: mov x9, #4192256 | ||
| ; CHECK-NEXT: movk x8, #2040 | ||
| ; CHECK-NEXT: orr x9, x9, #0x1fe0000000000 | ||
| ; CHECK-NEXT: movk x8, #65472, lsl #16 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why these lanes never change, even if you change the values of stride, count and length?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As I understand it, these instructions are moving values representing all metadata into x8 & x9, not the individual values for stride, count, length & distance. There are only two values being calculated because two of the intrinsics in this test are using identical values to the others.
I've rewritten this test a bit to try and make it easier to check that the correct values are being created, especially for the reuse distance.
- Add second intrinsic (__pld_range) which takes a single metadata value - Reorder the arguments of __pldx_range - Removed RPRFM feature macro & added __ARM_PREFETCH_RANGE - Map ReuseDistance to decreasing powers of two from 512MiB-32KiB
- Move handling of immediate values to Clang - Remove @llvm.aarch64.range.prefetch.imm intrinsic
| TARGET_HEADER_BUILTIN(__builtin_arm_range_prefetch_x, "vvC*UiUiiUiiz", "n", ARMACLE_H, ALL_LANGUAGES, "") | ||
| TARGET_HEADER_BUILTIN(__builtin_arm_range_prefetch, "vvC*UiUiULLi", "n", ARMACLE_H, ALL_LANGUAGES, "") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this use BUILTIN? because the ACLE intrinsics __pld_range and __pldx_range are in the header but __builtin_arm_range_prefetch and __builtin_arm_range_prefetch_x are always available? (e.g. like __builtin_arm_prefetch)
| SDTCisSameAs<0,3>]>; | ||
| def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>; | ||
| def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; | ||
| def SDT_AArch64RANGE_PREFETCH: SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisPtrTy<1>, SDTCisPtrTy<2>]>; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should the metadata operand be SDTCisVT<2, i64> rather than PtrTy? I assume the current form works because the two are synonymous, but best not to rely on that.
| define void @range_prefetch_metadata_const(ptr %a) { | ||
| ; CHECK-LABEL: range_prefetch_metadata_const: | ||
| ; CHECK: // %bb.0: | ||
| ; CHECK-NEXT: mov x8, #2097152 // =0x200000 | ||
| ; CHECK-NEXT: movk x8, #2048, lsl #48 | ||
| ; CHECK-NEXT: rprfm pldkeep, x8, [x0] | ||
| ; CHECK-NEXT: ret | ||
| call void @llvm.aarch64.range.prefetch(ptr %a, i32 0, i32 0, i64 576460752305520640) | ||
| ret void | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this test anything that's not already covered by range_prefetch_metadata_accesses?
This patch adds support in Clang for the RPRFM instruction, which is available when FEAT_RPRFM is defined:
If FEAT_RPRFM is not available, this instruction is a NOP.
This implements the following ACLE proposal:
ARM-software/acle#423