From d07f22ba27467f8a4a400254c846f63f04661f25 Mon Sep 17 00:00:00 2001 From: Sai Abhinay Anubola Date: Thu, 12 Dec 2024 16:27:33 +0530 Subject: [PATCH 1/2] Combine sext(trunc x) to x --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 3 + .../include/llvm/Target/GlobalISel/Combine.td | 12 +- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 14 ++ .../prelegalizercombiner-extending-loads.mir | 4 +- .../AMDGPU/GlobalISel/combine-sext-trunc.mir | 170 ++++++++++++++++++ .../CodeGen/RISCV/GlobalISel/jumptable.ll | 3 - 6 files changed, 199 insertions(+), 7 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-trunc.mir diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index b2132562ac3f..4f661b3d1be7 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -387,6 +387,9 @@ class CombinerHelper { /// Transform zext(trunc(x)) to x. bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg); + /// Transform sext(trunc(x)) to x. + bool matchCombineSextTrunc(MachineInstr &MI, Register &Reg); + /// Transform [asz]ext([asz]ext(x)) to [asz]ext x. bool matchCombineExtOfExt(MachineInstr &MI, std::tuple &MatchInfo); diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 3c8d968c2764..9cc2bbab086f 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -731,6 +731,16 @@ def zext_trunc_fold: GICombineRule < (apply [{ Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }]) >; +// Fold (sext (trunc x)) -> x if the source type is same as the destination type +// and truncated bits are known to be sign extended. +def sext_trunc_fold_matchinfo : GIDefMatchData<"Register">; +def sext_trunc_fold: GICombineRule < + (defs root:$root, sext_trunc_fold_matchinfo:$matchinfo), + (match (wip_match_opcode G_SEXT):$root, + [{ return Helper.matchCombineSextTrunc(*${root}, ${matchinfo}); }]), + (apply [{ Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }]) +>; + // Fold ([asz]ext ([asz]ext x)) -> ([asz]ext x). def ext_ext_fold_matchinfo : GIDefMatchData<"std::tuple">; def ext_ext_fold: GICombineRule < @@ -1597,7 +1607,7 @@ def const_combines : GICombineGroup<[constant_fold_fp_ops, const_ptradd_to_i2p, def known_bits_simplifications : GICombineGroup<[ redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask, - zext_trunc_fold, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits, + zext_trunc_fold, sext_trunc_fold, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits, sext_inreg_to_zext_inreg]>; def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend, diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index ec7ca5dc8e2b..6256095ff371 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -2465,6 +2465,20 @@ bool CombinerHelper::matchCombineZextTrunc(MachineInstr &MI, Register &Reg) { return false; } +bool CombinerHelper::matchCombineSextTrunc(MachineInstr &MI, Register &Reg) { + assert(MI.getOpcode() == TargetOpcode::G_SEXT && "Expected a G_SEXT"); + const Register DstReg = MI.getOperand(0).getReg(); + const Register SrcReg = MI.getOperand(1).getReg(); + const LLT DstTy = MRI.getType(DstReg); + if (mi_match(SrcReg, MRI, + m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) { + const unsigned DstSize = DstTy.getScalarSizeInBits(); + const unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits(); + return KB->computeNumSignBits(Reg) >= (DstSize - SrcSize + 1); + } + return false; +} + bool CombinerHelper::matchCombineExtOfExt( MachineInstr &MI, std::tuple &MatchInfo) { assert((MI.getOpcode() == TargetOpcode::G_ANYEXT || diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads.mir index 47c85f767859..28c7485bbf60 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-extending-loads.mir @@ -469,10 +469,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8)) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[ZEXTLOAD]](s64) - ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s32) ; CHECK-NEXT: $x0 = COPY [[ZEXTLOAD]](s64) - ; CHECK-NEXT: $x1 = COPY [[SEXT]](s64) + ; CHECK-NEXT: $x1 = COPY [[ZEXTLOAD]](s64) %0:_(p0) = COPY $x0 %1:_(s32) = G_ZEXTLOAD %0 :: (load (s8)) %2:_(s64) = G_ZEXT %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-trunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-trunc.mir new file mode 100644 index 000000000000..cdeb39a7ee66 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-trunc.mir @@ -0,0 +1,170 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s + +--- +name: sext_trunc_s32_s16_s32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: sext_trunc_s32_s16_s32 + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: %var:_(s32) = COPY $vgpr0 + ; GCN-NEXT: %assert:_(s32) = G_ASSERT_SEXT %var, 16 + ; GCN-NEXT: $vgpr0 = COPY %assert(s32) + %var:_(s32) = COPY $vgpr0 + %assert:_(s32) = G_ASSERT_SEXT %var, 16 + %trunc:_(s16) = G_TRUNC %assert(s32) + %sext:_(s32) = G_SEXT %trunc(s16) + $vgpr0 = COPY %sext(s32) +... + +--- +name: sext_trunc_s32_s16_s32_unknown_high_bits +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: sext_trunc_s32_s16_s32_unknown_high_bits + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: %var:_(s32) = COPY $vgpr0 + ; GCN-NEXT: %trunc:_(s16) = G_TRUNC %var(s32) + ; GCN-NEXT: %sext:_(s32) = G_SEXT %trunc(s16) + ; GCN-NEXT: $vgpr0 = COPY %sext(s32) + %var:_(s32) = COPY $vgpr0 + %trunc:_(s16) = G_TRUNC %var(s32) + %sext:_(s32) = G_SEXT %trunc(s16) + $vgpr0 = COPY %sext(s32) +... + +--- +name: sext_trunc_s64_s16_s32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GCN-LABEL: name: sext_trunc_s64_s16_s32 + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: %var:_(s64) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: %assert:_(s64) = G_ASSERT_SEXT %var, 32 + ; GCN-NEXT: %trunc:_(s16) = G_TRUNC %assert(s64) + ; GCN-NEXT: %sext:_(s32) = G_SEXT %trunc(s16) + ; GCN-NEXT: $vgpr0 = COPY %sext(s32) + %var:_(s64) = COPY $vgpr0_vgpr1 + %assert:_(s64) = G_ASSERT_SEXT %var, 32 + %trunc:_(s16) = G_TRUNC %assert(s64) + %sext:_(s32) = G_SEXT %trunc(s16) + $vgpr0 = COPY %sext(s32) +... + +--- +name: sext_trunc_s32_s16_s64 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + ; GCN-LABEL: name: sext_trunc_s32_s16_s64 + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: %var:_(s32) = COPY $vgpr0 + ; GCN-NEXT: %assert:_(s32) = G_ASSERT_SEXT %var, 16 + ; GCN-NEXT: %trunc:_(s16) = G_TRUNC %assert(s32) + ; GCN-NEXT: %sext:_(s64) = G_SEXT %trunc(s16) + ; GCN-NEXT: $vgpr0_vgpr1 = COPY %sext(s64) + %var:_(s32) = COPY $vgpr0 + %assert:_(s32) = G_ASSERT_SEXT %var, 16 + %trunc:_(s16) = G_TRUNC %assert(s32) + %sext:_(s64) = G_SEXT %trunc(s16) + $vgpr0_vgpr1 = COPY %sext(s64) +... + +--- +name: sext_trunc_v2s32_v2s16_v2s32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GCN-LABEL: name: sext_trunc_v2s32_v2s16_v2s32 + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: %var:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: %assert:_(<2 x s32>) = G_ASSERT_SEXT %var, 16 + ; GCN-NEXT: $vgpr0_vgpr1 = COPY %assert(<2 x s32>) + %var:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %assert:_(<2 x s32>) = G_ASSERT_SEXT %var, 16 + %trunc:_(<2 x s16>) = G_TRUNC %assert(<2 x s32>) + %sext:_(<2 x s32>) = G_SEXT %trunc(<2 x s16>) + $vgpr0_vgpr1 = COPY %sext(<2 x s32>) +... + +--- +name: sext_trunc_v2s32_v2s16_v2s32_unknown_high_bits +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GCN-LABEL: name: sext_trunc_v2s32_v2s16_v2s32_unknown_high_bits + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: %var:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: %trunc:_(<2 x s16>) = G_TRUNC %var(<2 x s32>) + ; GCN-NEXT: %sext:_(<2 x s32>) = G_SEXT %trunc(<2 x s16>) + ; GCN-NEXT: $vgpr0_vgpr1 = COPY %sext(<2 x s32>) + %var:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %trunc:_(<2 x s16>) = G_TRUNC %var(<2 x s32>) + %sext:_(<2 x s32>) = G_SEXT %trunc(<2 x s16>) + $vgpr0_vgpr1 = COPY %sext(<2 x s32>) +... + +--- +name: sext_trunc_v2s64_v2s16_v2s32 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + + ; GCN-LABEL: name: sext_trunc_v2s64_v2s16_v2s32 + ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: %var:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; GCN-NEXT: %assert:_(<2 x s64>) = G_ASSERT_SEXT %var, 32 + ; GCN-NEXT: %trunc:_(<2 x s16>) = G_TRUNC %assert(<2 x s64>) + ; GCN-NEXT: %sext:_(<2 x s32>) = G_SEXT %trunc(<2 x s16>) + ; GCN-NEXT: $vgpr0_vgpr1 = COPY %sext(<2 x s32>) + %var:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + %assert:_(<2 x s64>) = G_ASSERT_SEXT %var, 32 + %trunc:_(<2 x s16>) = G_TRUNC %assert(<2 x s64>) + %sext:_(<2 x s32>) = G_SEXT %trunc(<2 x s16>) + $vgpr0_vgpr1 = COPY %sext(<2 x s32>) +... + +--- +name: sext_trunc_v2s32_v2s16_v2s64 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; GCN-LABEL: name: sext_trunc_v2s32_v2s16_v2s64 + ; GCN: liveins: $vgpr0_vgpr1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: %var:_(<2 x s32>) = COPY $vgpr0_vgpr1 + ; GCN-NEXT: %assert:_(<2 x s32>) = G_ASSERT_SEXT %var, 16 + ; GCN-NEXT: %trunc:_(<2 x s16>) = G_TRUNC %assert(<2 x s32>) + ; GCN-NEXT: %sext:_(<2 x s64>) = G_SEXT %trunc(<2 x s16>) + ; GCN-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %sext(<2 x s64>) + %var:_(<2 x s32>) = COPY $vgpr0_vgpr1 + %assert:_(<2 x s32>) = G_ASSERT_SEXT %var, 16 + %trunc:_(<2 x s16>) = G_TRUNC %assert(<2 x s32>) + %sext:_(<2 x s64>) = G_SEXT %trunc(<2 x s16>) + $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %sext(<2 x s64>) +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/jumptable.ll b/llvm/test/CodeGen/RISCV/GlobalISel/jumptable.ll index 601290812bb2..9dda1a241e04 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/jumptable.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/jumptable.ll @@ -121,7 +121,6 @@ define void @above_threshold(i32 signext %in, ptr %out) nounwind { ; RV64I-SMALL-LABEL: above_threshold: ; RV64I-SMALL: # %bb.0: # %entry ; RV64I-SMALL-NEXT: li a2, 5 -; RV64I-SMALL-NEXT: sext.w a0, a0 ; RV64I-SMALL-NEXT: addi a0, a0, -1 ; RV64I-SMALL-NEXT: bltu a2, a0, .LBB0_9 ; RV64I-SMALL-NEXT: # %bb.1: # %entry @@ -156,7 +155,6 @@ define void @above_threshold(i32 signext %in, ptr %out) nounwind { ; RV64I-MEDIUM-LABEL: above_threshold: ; RV64I-MEDIUM: # %bb.0: # %entry ; RV64I-MEDIUM-NEXT: li a2, 5 -; RV64I-MEDIUM-NEXT: sext.w a0, a0 ; RV64I-MEDIUM-NEXT: addi a0, a0, -1 ; RV64I-MEDIUM-NEXT: bltu a2, a0, .LBB0_9 ; RV64I-MEDIUM-NEXT: # %bb.1: # %entry @@ -192,7 +190,6 @@ define void @above_threshold(i32 signext %in, ptr %out) nounwind { ; RV64I-PIC-LABEL: above_threshold: ; RV64I-PIC: # %bb.0: # %entry ; RV64I-PIC-NEXT: li a2, 5 -; RV64I-PIC-NEXT: sext.w a0, a0 ; RV64I-PIC-NEXT: addi a0, a0, -1 ; RV64I-PIC-NEXT: bltu a2, a0, .LBB0_9 ; RV64I-PIC-NEXT: # %bb.1: # %entry From de8468edf032983d7c3d8de2940bd9d5c817cdf5 Mon Sep 17 00:00:00 2001 From: Sai Abhinay Anubola Date: Thu, 14 Nov 2024 12:21:47 +0530 Subject: [PATCH 2/2] Combine VExtract intrinsics into generic opcode in PreLegalizerCombiner --- llvm/lib/Target/AIE/AIE2InstrInfo.cpp | 5 +- .../Target/AIE/AIE2PreLegalizerCombiner.cpp | 88 ++++++++ .../combine-vextract-prelegalizer.mir | 196 ++++++++++++++++++ .../verifier/verify-szext-extract-vec-elt.mir | 4 +- 4 files changed, 290 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/AIE/aie2/GlobalISel/combine-vextract-prelegalizer.mir diff --git a/llvm/lib/Target/AIE/AIE2InstrInfo.cpp b/llvm/lib/Target/AIE/AIE2InstrInfo.cpp index cdd39e6732fa..4e6df730b9a1 100644 --- a/llvm/lib/Target/AIE/AIE2InstrInfo.cpp +++ b/llvm/lib/Target/AIE/AIE2InstrInfo.cpp @@ -152,8 +152,9 @@ bool AIE2InstrInfo::verifyGenericInstruction(const MachineInstr &MI, switch (MI.getOpcode()) { case AIE2::G_AIE_ZEXT_EXTRACT_VECTOR_ELT: case AIE2::G_AIE_SEXT_EXTRACT_VECTOR_ELT: - ErrInfo = "Expected 32bit scalar destination"; - return MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(32); + ErrInfo = "Expected 32bit or 20bit scalar destination"; + return (MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(32) || + MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(20)); case AIE2::G_AIE_PAD_VECTOR_UNDEF: return verifySameLaneTypes(MI, ErrInfo) && isLegalTypeToUnpad(MRI.getType(MI.getOperand(0).getReg()), diff --git a/llvm/lib/Target/AIE/AIE2PreLegalizerCombiner.cpp b/llvm/lib/Target/AIE/AIE2PreLegalizerCombiner.cpp index 500b06adf019..f0ac325b4001 100644 --- a/llvm/lib/Target/AIE/AIE2PreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AIE/AIE2PreLegalizerCombiner.cpp @@ -59,6 +59,8 @@ class AIE2PreLegalizerCombinerImpl : public Combiner { std::map getVectorInsertIndices(MachineInstr *CurMI, unsigned SclSrcBits, MachineRegisterInfo &MRI) const; + bool isTruncExtToS20Sequence(Register DstReg, bool SignVal, + unsigned SrcEltSize) const; public: AIE2PreLegalizerCombinerImpl( @@ -80,6 +82,8 @@ class AIE2PreLegalizerCombinerImpl : public Combiner { bool tryToCombineVectorInserts(MachineInstr &MI, unsigned SclSrcBits) const; + bool tryToCombineVExtractElt(MachineInstr &MI) const; + bool tryToCombineIntrinsic(MachineInstr &MI) const; private: @@ -288,6 +292,86 @@ bool AIE2PreLegalizerCombinerImpl::tryToCombineVectorInserts( return true; } +/// Determines if it is safe to combine vextract by checking the uses of DstReg, +/// specifically for a pattern involving TRUNC followed by EXT. +bool AIE2PreLegalizerCombinerImpl::isTruncExtToS20Sequence( + Register DstReg, bool SignVal, unsigned SrcEltSize) const { + // Returns the single non-debug use of a register with a specific opcode + // and destination size. + auto GetOneUseWithOpcode = + [&](const Register Reg, const unsigned OpcodeToCheck, + const unsigned DstSize) -> std::optional { + if (MRI.hasOneNonDBGUser(Reg)) { + MachineInstr &Use = *MRI.use_nodbg_instructions(Reg).begin(); + if (Use.getOpcode() == OpcodeToCheck) { + const LLT DstRegTy = MRI.getType(Use.getOperand(0).getReg()); + if (DstRegTy.getSizeInBits() == DstSize) + return &Use; + } + } + return std::nullopt; + }; + auto Trunc = GetOneUseWithOpcode(DstReg, TargetOpcode::G_TRUNC, SrcEltSize); + if (!Trunc) + return false; + + const MachineInstr *TruncMI = *Trunc; + const unsigned ExtOpcode = + SignVal ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; + const Register UseDstReg = TruncMI->getOperand(0).getReg(); + return GetOneUseWithOpcode(UseDstReg, ExtOpcode, 20).has_value(); +} + +/// \returns true if it is possible to combine the below sequence of MIRs +/// From : %3:_(s32) = G_INTRINSIC +/// intrinsic(@llvm.aie2.vextract.elem[8/16].I512), %2(<32 x s16>), +/// %0(s32), %1(s32) +/// %4:_(s16) = G_TRUNC %3(s32) +/// %5:_(s20) = G_SEXT %4(s16) +/// To : %9:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %2(<32 x s16>), %0(s32) +/// %10:_(s20) = G_ASSERT_[S/Z]EXT %9, 16 +/// %4:_(s16) = G_TRUNC %10(s20) +/// %5:_(s20) = G_[S/Z]EXT %4(s16) +/// This combine enables S20Narrowing for vextract +bool AIE2PreLegalizerCombinerImpl::tryToCombineVExtractElt( + MachineInstr &MI) const { + const Register DstReg = MI.getOperand(0).getReg(); + // In this case of G_INTRINSIC operand 1 is target intrinsic + const Register SrcReg = MI.getOperand(2).getReg(); + const Register IdxReg = MI.getOperand(3).getReg(); + const Register SignReg = MI.getOperand(4).getReg(); + + const auto SignVal = getIConstantVRegSExtVal(SignReg, MRI); + if (!SignVal) + return false; + + const LLT SrcVecTy = MRI.getType(SrcReg); + const unsigned SrcEltSize = SrcVecTy.getScalarSizeInBits(); + // Checks for the required pattern in uses of DstReg + if (!isTruncExtToS20Sequence(DstReg, SignVal.value(), SrcEltSize)) + return false; + + auto *TII = static_cast(STI.getInstrInfo()); + const unsigned Opcode = + TII->getGenericExtractVectorEltOpcode(SignVal.value()); + const unsigned AssertExtOpcode = SignVal.value() + ? TargetOpcode::G_ASSERT_SEXT + : TargetOpcode::G_ASSERT_ZEXT; + const unsigned ExtOpcode = + SignVal.value() ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT; + const LLT S20 = LLT::scalar(20); + Register DstReg20Bit = MRI.createGenericVirtualRegister(S20); + Register ExtReg20Bit = MRI.createGenericVirtualRegister(S20); + MachineIRBuilder MIRBuilder(MI); + + MIRBuilder.buildInstr(Opcode, {DstReg20Bit}, {SrcReg, IdxReg}); + MIRBuilder.buildAssertInstr(AssertExtOpcode, ExtReg20Bit, DstReg20Bit, + SrcEltSize); + MIRBuilder.buildInstr(ExtOpcode, {DstReg}, {ExtReg20Bit}); + MI.eraseFromParent(); + return true; +} + bool AIE2PreLegalizerCombinerImpl::tryToCombineIntrinsic( MachineInstr &MI) const { const unsigned IntrinsicID = cast(MI).getIntrinsicID(); @@ -306,6 +390,10 @@ bool AIE2PreLegalizerCombinerImpl::tryToCombineIntrinsic( case Intrinsic::aie2_vinsert32_I512: { return tryToCombineVectorInserts(MI, getVInsertScalarSize(IntrinsicID)); } + case Intrinsic::aie2_vextract_elem8_I512: + case Intrinsic::aie2_vextract_elem16_I512: { + return tryToCombineVExtractElt(MI); + } default: break; } diff --git a/llvm/test/CodeGen/AIE/aie2/GlobalISel/combine-vextract-prelegalizer.mir b/llvm/test/CodeGen/AIE/aie2/GlobalISel/combine-vextract-prelegalizer.mir new file mode 100644 index 000000000000..8bcbf3973a6e --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2/GlobalISel/combine-vextract-prelegalizer.mir @@ -0,0 +1,196 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates +# RUN: llc -mtriple aie2 -run-pass=aie2-prelegalizer-combiner %s -verify-machineinstrs -o - | FileCheck %s + +--- +name: vextract.8.zext +legalized: false +body: | + bb.1.entry: + liveins: $x0 + ; CHECK-LABEL: name: vextract.8.zext + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<64 x s8>) = COPY $x0 + ; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<64 x s8>), [[C]](s32) + ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s20) = G_ASSERT_ZEXT [[AIE_ZEXT_EXTRACT_VECTOR_ELT]], 8 + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_ZEXT]](s20) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<64 x s8>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32) + %4:_(s8) = G_TRUNC %3(s32) + %5:_(s20) = G_ZEXT %4(s8) + PseudoRET implicit $lr, implicit %5 +... + +--- +name: vextract.8.sext +legalized: false +body: | + bb.1.entry: + liveins: $x0 + ; CHECK-LABEL: name: vextract.8.sext + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<64 x s8>) = COPY $x0 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY]](<64 x s8>), [[C]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s20) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT]], 8 + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_SEXT]](s20) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(<64 x s8>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32) + %4:_(s8) = G_TRUNC %3(s32) + %5:_(s20) = G_SEXT %4(s8) + PseudoRET implicit $lr, implicit %5 +... + +# Negative Test Case: Combining is not possible because the vextract8 is used directly without being truncated and extended +--- +name: vextract.8.neg +legalized: false +body: | + bb.1.entry: + liveins: $x0 + ; CHECK-LABEL: name: vextract.8.neg + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<64 x s8>) = COPY $x0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), [[COPY]](<64 x s8>), [[C]](s32), [[C1]](s32) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[INT]](s32) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(<64 x s8>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32) + PseudoRET implicit $lr, implicit %3 +... + +# Negative Test Case: Combining is not possible because the vextract8 has a non-constant sign register +--- +name: vextract.8.non.constant.sign +legalized: false +body: | + bb.1.entry: + liveins: $x0 + ; CHECK-LABEL: name: vextract.8.non.constant.sign + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<64 x s8>) = COPY $x0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), [[COPY1]](<64 x s8>), [[C]](s32), [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[INT]](s32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s20) = G_SEXT [[TRUNC]](s8) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[SEXT]](s20) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = COPY $r1 + %2:_(<64 x s8>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32) + %4:_(s8) = G_TRUNC %3(s32) + %5:_(s20) = G_SEXT %4(s8) + PseudoRET implicit $lr, implicit %5 +... + +--- +name: vextract.16.zext +legalized: false +body: | + bb.1.entry: + liveins: $x0 + ; CHECK-LABEL: name: vextract.16.zext + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0 + ; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<32 x s16>), [[C]](s32) + ; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s20) = G_ASSERT_ZEXT [[AIE_ZEXT_EXTRACT_VECTOR_ELT]], 16 + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_ZEXT]](s20) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<32 x s16>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32) + %4:_(s16) = G_TRUNC %3(s32) + %5:_(s20) = G_ZEXT %4(s16) + PseudoRET implicit $lr, implicit %5 +... + +--- +name: vextract.16.sext +legalized: false +body: | + bb.1.entry: + liveins: $x0 + ; CHECK-LABEL: name: vextract.16.sext + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY]](<32 x s16>), [[C]](s32) + ; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s20) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT]], 16 + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_SEXT]](s20) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(<32 x s16>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32) + %4:_(s16) = G_TRUNC %3(s32) + %5:_(s20) = G_SEXT %4(s16) + PseudoRET implicit $lr, implicit %5 +... + +# Negative Test Case: Combining is not possible because the vextract16 is used directly without being truncated and extended +--- +name: vextract.16.neg +legalized: false +body: | + bb.1.entry: + liveins: $x0 + ; CHECK-LABEL: name: vextract.16.neg + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), [[COPY]](<32 x s16>), [[C]](s32), [[C1]](s32) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[INT]](s32) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 1 + %2:_(<32 x s16>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32) + PseudoRET implicit $lr, implicit %3 +... + +# Negative Test Case: Combining is not possible because the vextract16 has a non-constant sign register +--- +name: vextract.16.non.constant.sign +legalized: false +body: | + bb.1.entry: + liveins: $x0 + ; CHECK-LABEL: name: vextract.16.non.constant.sign + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<32 x s16>) = COPY $x0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), [[COPY1]](<32 x s16>), [[C]](s32), [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[INT]](s32) + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s20) = G_SEXT [[TRUNC]](s16) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[SEXT]](s20) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = COPY $r1 + %2:_(<32 x s16>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32) + %4:_(s16) = G_TRUNC %3(s32) + %5:_(s20) = G_SEXT %4(s16) + PseudoRET implicit $lr, implicit %5 +... diff --git a/llvm/test/CodeGen/AIE/aie2/verifier/verify-szext-extract-vec-elt.mir b/llvm/test/CodeGen/AIE/aie2/verifier/verify-szext-extract-vec-elt.mir index b9f489914d56..b56fa14667f8 100644 --- a/llvm/test/CodeGen/AIE/aie2/verifier/verify-szext-extract-vec-elt.mir +++ b/llvm/test/CodeGen/AIE/aie2/verifier/verify-szext-extract-vec-elt.mir @@ -18,6 +18,8 @@ body: | %1:_(s32) = G_CONSTANT i32 1 %2:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32) %3:_(s32) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32) + %4:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32) + %5:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32) ... --- @@ -25,7 +27,7 @@ name: nok alignment: 16 body: | bb.0 (align 16): - ; CHECK-COUNT-4: Bad machine code: Expected 32bit scalar destination + ; CHECK-COUNT-4: Bad machine code: Expected 32bit or 20bit scalar destination ; CHECK-NOT: Bad machine code %0:_(<16 x s16>) = COPY $wl0 %1:_(s32) = G_CONSTANT i32 1