Skip to content

Commit

Permalink
Combine VExtract intrinsics into generic opcode in PreLegalizerCombiner
Browse files Browse the repository at this point in the history
  • Loading branch information
abhinay-anubola committed Jan 8, 2025
1 parent d07f22b commit de8468e
Show file tree
Hide file tree
Showing 4 changed files with 290 additions and 3 deletions.
5 changes: 3 additions & 2 deletions llvm/lib/Target/AIE/AIE2InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,9 @@ bool AIE2InstrInfo::verifyGenericInstruction(const MachineInstr &MI,
switch (MI.getOpcode()) {
case AIE2::G_AIE_ZEXT_EXTRACT_VECTOR_ELT:
case AIE2::G_AIE_SEXT_EXTRACT_VECTOR_ELT:
ErrInfo = "Expected 32bit scalar destination";
return MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(32);
ErrInfo = "Expected 32bit or 20bit scalar destination";
return (MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(32) ||
MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(20));
case AIE2::G_AIE_PAD_VECTOR_UNDEF:
return verifySameLaneTypes(MI, ErrInfo) &&
isLegalTypeToUnpad(MRI.getType(MI.getOperand(0).getReg()),
Expand Down
88 changes: 88 additions & 0 deletions llvm/lib/Target/AIE/AIE2PreLegalizerCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ class AIE2PreLegalizerCombinerImpl : public Combiner {
std::map<unsigned, Register>
getVectorInsertIndices(MachineInstr *CurMI, unsigned SclSrcBits,
MachineRegisterInfo &MRI) const;
bool isTruncExtToS20Sequence(Register DstReg, bool SignVal,
unsigned SrcEltSize) const;

public:
AIE2PreLegalizerCombinerImpl(
Expand All @@ -80,6 +82,8 @@ class AIE2PreLegalizerCombinerImpl : public Combiner {

bool tryToCombineVectorInserts(MachineInstr &MI, unsigned SclSrcBits) const;

bool tryToCombineVExtractElt(MachineInstr &MI) const;

bool tryToCombineIntrinsic(MachineInstr &MI) const;

private:
Expand Down Expand Up @@ -288,6 +292,86 @@ bool AIE2PreLegalizerCombinerImpl::tryToCombineVectorInserts(
return true;
}

/// Determines if it is safe to combine vextract by checking the uses of DstReg,
/// specifically for a pattern involving TRUNC followed by EXT.
bool AIE2PreLegalizerCombinerImpl::isTruncExtToS20Sequence(
Register DstReg, bool SignVal, unsigned SrcEltSize) const {
// Returns the single non-debug use of a register with a specific opcode
// and destination size.
auto GetOneUseWithOpcode =
[&](const Register Reg, const unsigned OpcodeToCheck,
const unsigned DstSize) -> std::optional<MachineInstr *> {
if (MRI.hasOneNonDBGUser(Reg)) {
MachineInstr &Use = *MRI.use_nodbg_instructions(Reg).begin();
if (Use.getOpcode() == OpcodeToCheck) {
const LLT DstRegTy = MRI.getType(Use.getOperand(0).getReg());
if (DstRegTy.getSizeInBits() == DstSize)
return &Use;
}
}
return std::nullopt;
};
auto Trunc = GetOneUseWithOpcode(DstReg, TargetOpcode::G_TRUNC, SrcEltSize);
if (!Trunc)
return false;

const MachineInstr *TruncMI = *Trunc;
const unsigned ExtOpcode =
SignVal ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
const Register UseDstReg = TruncMI->getOperand(0).getReg();
return GetOneUseWithOpcode(UseDstReg, ExtOpcode, 20).has_value();
}

/// \returns true if it is possible to combine the below sequence of MIRs
/// From : %3:_(s32) = G_INTRINSIC
/// intrinsic(@llvm.aie2.vextract.elem[8/16].I512), %2(<32 x s16>),
/// %0(s32), %1(s32)
/// %4:_(s16) = G_TRUNC %3(s32)
/// %5:_(s20) = G_SEXT %4(s16)
/// To : %9:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %2(<32 x s16>), %0(s32)
/// %10:_(s20) = G_ASSERT_[S/Z]EXT %9, 16
/// %4:_(s16) = G_TRUNC %10(s20)
/// %5:_(s20) = G_[S/Z]EXT %4(s16)
/// This combine enables S20Narrowing for vextract
bool AIE2PreLegalizerCombinerImpl::tryToCombineVExtractElt(
MachineInstr &MI) const {
const Register DstReg = MI.getOperand(0).getReg();
// In this case of G_INTRINSIC operand 1 is target intrinsic
const Register SrcReg = MI.getOperand(2).getReg();
const Register IdxReg = MI.getOperand(3).getReg();
const Register SignReg = MI.getOperand(4).getReg();

const auto SignVal = getIConstantVRegSExtVal(SignReg, MRI);
if (!SignVal)
return false;

const LLT SrcVecTy = MRI.getType(SrcReg);
const unsigned SrcEltSize = SrcVecTy.getScalarSizeInBits();
// Checks for the required pattern in uses of DstReg
if (!isTruncExtToS20Sequence(DstReg, SignVal.value(), SrcEltSize))
return false;

auto *TII = static_cast<const AIE2InstrInfo *>(STI.getInstrInfo());
const unsigned Opcode =
TII->getGenericExtractVectorEltOpcode(SignVal.value());
const unsigned AssertExtOpcode = SignVal.value()
? TargetOpcode::G_ASSERT_SEXT
: TargetOpcode::G_ASSERT_ZEXT;
const unsigned ExtOpcode =
SignVal.value() ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
const LLT S20 = LLT::scalar(20);
Register DstReg20Bit = MRI.createGenericVirtualRegister(S20);
Register ExtReg20Bit = MRI.createGenericVirtualRegister(S20);
MachineIRBuilder MIRBuilder(MI);

MIRBuilder.buildInstr(Opcode, {DstReg20Bit}, {SrcReg, IdxReg});
MIRBuilder.buildAssertInstr(AssertExtOpcode, ExtReg20Bit, DstReg20Bit,
SrcEltSize);
MIRBuilder.buildInstr(ExtOpcode, {DstReg}, {ExtReg20Bit});
MI.eraseFromParent();
return true;
}

bool AIE2PreLegalizerCombinerImpl::tryToCombineIntrinsic(
MachineInstr &MI) const {
const unsigned IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
Expand All @@ -306,6 +390,10 @@ bool AIE2PreLegalizerCombinerImpl::tryToCombineIntrinsic(
case Intrinsic::aie2_vinsert32_I512: {
return tryToCombineVectorInserts(MI, getVInsertScalarSize(IntrinsicID));
}
case Intrinsic::aie2_vextract_elem8_I512:
case Intrinsic::aie2_vextract_elem16_I512: {
return tryToCombineVExtractElt(MI);
}
default:
break;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
#
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
# RUN: llc -mtriple aie2 -run-pass=aie2-prelegalizer-combiner %s -verify-machineinstrs -o - | FileCheck %s

---
name: vextract.8.zext
legalized: false
body: |
bb.1.entry:
liveins: $x0
; CHECK-LABEL: name: vextract.8.zext
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<64 x s8>) = COPY $x0
; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<64 x s8>), [[C]](s32)
; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s20) = G_ASSERT_ZEXT [[AIE_ZEXT_EXTRACT_VECTOR_ELT]], 8
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_ZEXT]](s20)
%0:_(s32) = G_CONSTANT i32 7
%1:_(s32) = G_CONSTANT i32 0
%2:_(<64 x s8>) = COPY $x0
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32)
%4:_(s8) = G_TRUNC %3(s32)
%5:_(s20) = G_ZEXT %4(s8)
PseudoRET implicit $lr, implicit %5
...

---
name: vextract.8.sext
legalized: false
body: |
bb.1.entry:
liveins: $x0
; CHECK-LABEL: name: vextract.8.sext
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<64 x s8>) = COPY $x0
; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY]](<64 x s8>), [[C]](s32)
; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s20) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT]], 8
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_SEXT]](s20)
%0:_(s32) = G_CONSTANT i32 7
%1:_(s32) = G_CONSTANT i32 1
%2:_(<64 x s8>) = COPY $x0
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32)
%4:_(s8) = G_TRUNC %3(s32)
%5:_(s20) = G_SEXT %4(s8)
PseudoRET implicit $lr, implicit %5
...

# Negative Test Case: Combining is not possible because the vextract8 is used directly without being truncated and extended
---
name: vextract.8.neg
legalized: false
body: |
bb.1.entry:
liveins: $x0
; CHECK-LABEL: name: vextract.8.neg
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<64 x s8>) = COPY $x0
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), [[COPY]](<64 x s8>), [[C]](s32), [[C1]](s32)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[INT]](s32)
%0:_(s32) = G_CONSTANT i32 7
%1:_(s32) = G_CONSTANT i32 1
%2:_(<64 x s8>) = COPY $x0
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32)
PseudoRET implicit $lr, implicit %3
...

# Negative Test Case: Combining is not possible because the vextract8 has a non-constant sign register
---
name: vextract.8.non.constant.sign
legalized: false
body: |
bb.1.entry:
liveins: $x0
; CHECK-LABEL: name: vextract.8.non.constant.sign
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<64 x s8>) = COPY $x0
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), [[COPY1]](<64 x s8>), [[C]](s32), [[COPY]](s32)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[INT]](s32)
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s20) = G_SEXT [[TRUNC]](s8)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[SEXT]](s20)
%0:_(s32) = G_CONSTANT i32 7
%1:_(s32) = COPY $r1
%2:_(<64 x s8>) = COPY $x0
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32)
%4:_(s8) = G_TRUNC %3(s32)
%5:_(s20) = G_SEXT %4(s8)
PseudoRET implicit $lr, implicit %5
...

---
name: vextract.16.zext
legalized: false
body: |
bb.1.entry:
liveins: $x0
; CHECK-LABEL: name: vextract.16.zext
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0
; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<32 x s16>), [[C]](s32)
; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s20) = G_ASSERT_ZEXT [[AIE_ZEXT_EXTRACT_VECTOR_ELT]], 16
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_ZEXT]](s20)
%0:_(s32) = G_CONSTANT i32 7
%1:_(s32) = G_CONSTANT i32 0
%2:_(<32 x s16>) = COPY $x0
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32)
%4:_(s16) = G_TRUNC %3(s32)
%5:_(s20) = G_ZEXT %4(s16)
PseudoRET implicit $lr, implicit %5
...

---
name: vextract.16.sext
legalized: false
body: |
bb.1.entry:
liveins: $x0
; CHECK-LABEL: name: vextract.16.sext
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0
; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY]](<32 x s16>), [[C]](s32)
; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s20) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT]], 16
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_SEXT]](s20)
%0:_(s32) = G_CONSTANT i32 7
%1:_(s32) = G_CONSTANT i32 1
%2:_(<32 x s16>) = COPY $x0
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32)
%4:_(s16) = G_TRUNC %3(s32)
%5:_(s20) = G_SEXT %4(s16)
PseudoRET implicit $lr, implicit %5
...

# Negative Test Case: Combining is not possible because the vextract16 is used directly without being truncated and extended
---
name: vextract.16.neg
legalized: false
body: |
bb.1.entry:
liveins: $x0
; CHECK-LABEL: name: vextract.16.neg
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), [[COPY]](<32 x s16>), [[C]](s32), [[C1]](s32)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[INT]](s32)
%0:_(s32) = G_CONSTANT i32 7
%1:_(s32) = G_CONSTANT i32 1
%2:_(<32 x s16>) = COPY $x0
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32)
PseudoRET implicit $lr, implicit %3
...

# Negative Test Case: Combining is not possible because the vextract16 has a non-constant sign register
---
name: vextract.16.non.constant.sign
legalized: false
body: |
bb.1.entry:
liveins: $x0
; CHECK-LABEL: name: vextract.16.non.constant.sign
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<32 x s16>) = COPY $x0
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), [[COPY1]](<32 x s16>), [[C]](s32), [[COPY]](s32)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[INT]](s32)
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s20) = G_SEXT [[TRUNC]](s16)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[SEXT]](s20)
%0:_(s32) = G_CONSTANT i32 7
%1:_(s32) = COPY $r1
%2:_(<32 x s16>) = COPY $x0
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32)
%4:_(s16) = G_TRUNC %3(s32)
%5:_(s20) = G_SEXT %4(s16)
PseudoRET implicit $lr, implicit %5
...
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,16 @@ body: |
%1:_(s32) = G_CONSTANT i32 1
%2:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32)
%3:_(s32) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32)
%4:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32)
%5:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32)
...

---
name: nok
alignment: 16
body: |
bb.0 (align 16):
; CHECK-COUNT-4: Bad machine code: Expected 32bit scalar destination
; CHECK-COUNT-4: Bad machine code: Expected 32bit or 20bit scalar destination
; CHECK-NOT: Bad machine code
%0:_(<16 x s16>) = COPY $wl0
%1:_(s32) = G_CONSTANT i32 1
Expand Down

0 comments on commit de8468e

Please sign in to comment.