Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for allowing direct VEXTRACT to 20-bit registers #233

Open
wants to merge 2 commits into
base: aie-public
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,9 @@ class CombinerHelper {
/// Transform zext(trunc(x)) to x.
bool matchCombineZextTrunc(MachineInstr &MI, Register &Reg);

/// Transform sext(trunc(x)) to x.
bool matchCombineSextTrunc(MachineInstr &MI, Register &Reg);

/// Transform [asz]ext([asz]ext(x)) to [asz]ext x.
bool matchCombineExtOfExt(MachineInstr &MI,
std::tuple<Register, unsigned> &MatchInfo);
Expand Down
12 changes: 11 additions & 1 deletion llvm/include/llvm/Target/GlobalISel/Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -731,6 +731,16 @@ def zext_trunc_fold: GICombineRule <
(apply [{ Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }])
>;

// Fold (sext (trunc x)) -> x if the source type is same as the destination type
// and truncated bits are known to be sign extended.
def sext_trunc_fold_matchinfo : GIDefMatchData<"Register">;
def sext_trunc_fold: GICombineRule <
(defs root:$root, sext_trunc_fold_matchinfo:$matchinfo),
(match (wip_match_opcode G_SEXT):$root,
[{ return Helper.matchCombineSextTrunc(*${root}, ${matchinfo}); }]),
(apply [{ Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }])
>;

// Fold ([asz]ext ([asz]ext x)) -> ([asz]ext x).
def ext_ext_fold_matchinfo : GIDefMatchData<"std::tuple<Register, unsigned>">;
def ext_ext_fold: GICombineRule <
Expand Down Expand Up @@ -1597,7 +1607,7 @@ def const_combines : GICombineGroup<[constant_fold_fp_ops, const_ptradd_to_i2p,

def known_bits_simplifications : GICombineGroup<[
redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask,
zext_trunc_fold, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits,
zext_trunc_fold, sext_trunc_fold, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits,
sext_inreg_to_zext_inreg]>;

def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend,
Expand Down
14 changes: 14 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2465,6 +2465,20 @@ bool CombinerHelper::matchCombineZextTrunc(MachineInstr &MI, Register &Reg) {
return false;
}

bool CombinerHelper::matchCombineSextTrunc(MachineInstr &MI, Register &Reg) {
abhinay-anubola marked this conversation as resolved.
Show resolved Hide resolved
assert(MI.getOpcode() == TargetOpcode::G_SEXT && "Expected a G_SEXT");
const Register DstReg = MI.getOperand(0).getReg();
const Register SrcReg = MI.getOperand(1).getReg();
const LLT DstTy = MRI.getType(DstReg);
if (mi_match(SrcReg, MRI,
m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) {
const unsigned DstSize = DstTy.getScalarSizeInBits();
const unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
return KB->computeNumSignBits(Reg) >= (DstSize - SrcSize + 1);
}
return false;
}

bool CombinerHelper::matchCombineExtOfExt(
MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/Target/AIE/AIE2InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,9 @@ bool AIE2InstrInfo::verifyGenericInstruction(const MachineInstr &MI,
switch (MI.getOpcode()) {
case AIE2::G_AIE_ZEXT_EXTRACT_VECTOR_ELT:
case AIE2::G_AIE_SEXT_EXTRACT_VECTOR_ELT:
ErrInfo = "Expected 32bit scalar destination";
return MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(32);
ErrInfo = "Expected 32bit or 20bit scalar destination";
return (MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(32) ||
MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(20));
case AIE2::G_AIE_PAD_VECTOR_UNDEF:
return verifySameLaneTypes(MI, ErrInfo) &&
isLegalTypeToUnpad(MRI.getType(MI.getOperand(0).getReg()),
Expand Down
88 changes: 88 additions & 0 deletions llvm/lib/Target/AIE/AIE2PreLegalizerCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ class AIE2PreLegalizerCombinerImpl : public Combiner {
std::map<unsigned, Register>
getVectorInsertIndices(MachineInstr *CurMI, unsigned SclSrcBits,
MachineRegisterInfo &MRI) const;
bool isTruncExtToS20Sequence(Register DstReg, bool SignVal,
unsigned SrcEltSize) const;

public:
AIE2PreLegalizerCombinerImpl(
Expand All @@ -80,6 +82,8 @@ class AIE2PreLegalizerCombinerImpl : public Combiner {

bool tryToCombineVectorInserts(MachineInstr &MI, unsigned SclSrcBits) const;

bool tryToCombineVExtractElt(MachineInstr &MI) const;

bool tryToCombineIntrinsic(MachineInstr &MI) const;

private:
Expand Down Expand Up @@ -288,6 +292,86 @@ bool AIE2PreLegalizerCombinerImpl::tryToCombineVectorInserts(
return true;
}

/// Determines if it is safe to combine vextract by checking the uses of DstReg,
/// specifically for a pattern involving TRUNC followed by EXT.
bool AIE2PreLegalizerCombinerImpl::isTruncExtToS20Sequence(
Register DstReg, bool SignVal, unsigned SrcEltSize) const {
// Returns the single non-debug use of a register with a specific opcode
// and destination size.
auto GetOneUseWithOpcode =
[&](const Register Reg, const unsigned OpcodeToCheck,
const unsigned DstSize) -> std::optional<MachineInstr *> {
if (MRI.hasOneNonDBGUser(Reg)) {
MachineInstr &Use = *MRI.use_nodbg_instructions(Reg).begin();
if (Use.getOpcode() == OpcodeToCheck) {
const LLT DstRegTy = MRI.getType(Use.getOperand(0).getReg());
if (DstRegTy.getSizeInBits() == DstSize)
return &Use;
}
}
return std::nullopt;
};
auto Trunc = GetOneUseWithOpcode(DstReg, TargetOpcode::G_TRUNC, SrcEltSize);
if (!Trunc)
return false;

const MachineInstr *TruncMI = *Trunc;
const unsigned ExtOpcode =
SignVal ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
const Register UseDstReg = TruncMI->getOperand(0).getReg();
return GetOneUseWithOpcode(UseDstReg, ExtOpcode, 20).has_value();
}

/// \returns true if it is possible to combine the below sequence of MIRs
/// From : %3:_(s32) = G_INTRINSIC
/// intrinsic(@llvm.aie2.vextract.elem[8/16].I512), %2(<32 x s16>),
/// %0(s32), %1(s32)
/// %4:_(s16) = G_TRUNC %3(s32)
/// %5:_(s20) = G_SEXT %4(s16)
/// To : %9:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %2(<32 x s16>), %0(s32)
/// %10:_(s20) = G_ASSERT_[S/Z]EXT %9, 16
/// %4:_(s16) = G_TRUNC %10(s20)
/// %5:_(s20) = G_[S/Z]EXT %4(s16)
Copy link
Collaborator

@gbossu gbossu Dec 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to change the return types? I would expect that we only need to add a %10:_(s32) = G_ASSERT_[S/Z]EXT %9, 16 and keep the rest intact thanks to the new sext(trunc x) combiner you added previously.

Copy link
Collaborator Author

@abhinay-anubola abhinay-anubola Jan 2, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we need to change the return types, because the pattern that is written in new sext(trunc x) combiner will not match in this case as m_SpecificType is trying to match s20 but return type here is s32.
mi_match(SrcReg, MRI, m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))

/// This combine enables S20Narrowing for vextract
bool AIE2PreLegalizerCombinerImpl::tryToCombineVExtractElt(
abhinay-anubola marked this conversation as resolved.
Show resolved Hide resolved
MachineInstr &MI) const {
abhinay-anubola marked this conversation as resolved.
Show resolved Hide resolved
const Register DstReg = MI.getOperand(0).getReg();
// In this case of G_INTRINSIC operand 1 is target intrinsic
const Register SrcReg = MI.getOperand(2).getReg();
const Register IdxReg = MI.getOperand(3).getReg();
const Register SignReg = MI.getOperand(4).getReg();

const auto SignVal = getIConstantVRegSExtVal(SignReg, MRI);
if (!SignVal)
abhinay-anubola marked this conversation as resolved.
Show resolved Hide resolved
return false;

const LLT SrcVecTy = MRI.getType(SrcReg);
const unsigned SrcEltSize = SrcVecTy.getScalarSizeInBits();
// Checks for the required pattern in uses of DstReg
if (!isTruncExtToS20Sequence(DstReg, SignVal.value(), SrcEltSize))
return false;

auto *TII = static_cast<const AIE2InstrInfo *>(STI.getInstrInfo());
const unsigned Opcode =
TII->getGenericExtractVectorEltOpcode(SignVal.value());
const unsigned AssertExtOpcode = SignVal.value()
? TargetOpcode::G_ASSERT_SEXT
: TargetOpcode::G_ASSERT_ZEXT;
const unsigned ExtOpcode =
SignVal.value() ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
const LLT S20 = LLT::scalar(20);
Register DstReg20Bit = MRI.createGenericVirtualRegister(S20);
Register ExtReg20Bit = MRI.createGenericVirtualRegister(S20);
MachineIRBuilder MIRBuilder(MI);

MIRBuilder.buildInstr(Opcode, {DstReg20Bit}, {SrcReg, IdxReg});
MIRBuilder.buildAssertInstr(AssertExtOpcode, ExtReg20Bit, DstReg20Bit,
SrcEltSize);
MIRBuilder.buildInstr(ExtOpcode, {DstReg}, {ExtReg20Bit});
MI.eraseFromParent();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now we are safe ;-)

return true;
}

bool AIE2PreLegalizerCombinerImpl::tryToCombineIntrinsic(
MachineInstr &MI) const {
const unsigned IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
Expand All @@ -306,6 +390,10 @@ bool AIE2PreLegalizerCombinerImpl::tryToCombineIntrinsic(
case Intrinsic::aie2_vinsert32_I512: {
return tryToCombineVectorInserts(MI, getVInsertScalarSize(IntrinsicID));
}
case Intrinsic::aie2_vextract_elem8_I512:
case Intrinsic::aie2_vextract_elem16_I512: {
return tryToCombineVExtractElt(MI);
}
default:
break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -469,10 +469,8 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s64) = G_ZEXTLOAD [[COPY]](p0) :: (load (s8))
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[ZEXTLOAD]](s64)
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[TRUNC]](s32)
; CHECK-NEXT: $x0 = COPY [[ZEXTLOAD]](s64)
; CHECK-NEXT: $x1 = COPY [[SEXT]](s64)
; CHECK-NEXT: $x1 = COPY [[ZEXTLOAD]](s64)
%0:_(p0) = COPY $x0
%1:_(s32) = G_ZEXTLOAD %0 :: (load (s8))
%2:_(s64) = G_ZEXT %1
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
#
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
# RUN: llc -mtriple aie2 -run-pass=aie2-prelegalizer-combiner %s -verify-machineinstrs -o - | FileCheck %s

---
name: vextract.8.zext
legalized: false
body: |
bb.1.entry:
liveins: $x0
; CHECK-LABEL: name: vextract.8.zext
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<64 x s8>) = COPY $x0
; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<64 x s8>), [[C]](s32)
; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s20) = G_ASSERT_ZEXT [[AIE_ZEXT_EXTRACT_VECTOR_ELT]], 8
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_ZEXT]](s20)
%0:_(s32) = G_CONSTANT i32 7
%1:_(s32) = G_CONSTANT i32 0
%2:_(<64 x s8>) = COPY $x0
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32)
%4:_(s8) = G_TRUNC %3(s32)
%5:_(s20) = G_ZEXT %4(s8)
PseudoRET implicit $lr, implicit %5
...

---
name: vextract.8.sext
legalized: false
body: |
bb.1.entry:
liveins: $x0
; CHECK-LABEL: name: vextract.8.sext
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<64 x s8>) = COPY $x0
; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY]](<64 x s8>), [[C]](s32)
; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s20) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT]], 8
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_SEXT]](s20)
%0:_(s32) = G_CONSTANT i32 7
%1:_(s32) = G_CONSTANT i32 1
%2:_(<64 x s8>) = COPY $x0
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32)
%4:_(s8) = G_TRUNC %3(s32)
%5:_(s20) = G_SEXT %4(s8)
PseudoRET implicit $lr, implicit %5
...

# Negative Test Case: Combining is not possible because the vextract8 is used directly without being truncated and extended
---
name: vextract.8.neg
legalized: false
body: |
bb.1.entry:
liveins: $x0
; CHECK-LABEL: name: vextract.8.neg
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<64 x s8>) = COPY $x0
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), [[COPY]](<64 x s8>), [[C]](s32), [[C1]](s32)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[INT]](s32)
%0:_(s32) = G_CONSTANT i32 7
%1:_(s32) = G_CONSTANT i32 1
%2:_(<64 x s8>) = COPY $x0
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32)
PseudoRET implicit $lr, implicit %3
...

# Negative Test Case: Combining is not possible because the vextract8 has a non-constant sign register
---
name: vextract.8.non.constant.sign
legalized: false
body: |
bb.1.entry:
liveins: $x0
; CHECK-LABEL: name: vextract.8.non.constant.sign
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<64 x s8>) = COPY $x0
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), [[COPY1]](<64 x s8>), [[C]](s32), [[COPY]](s32)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[INT]](s32)
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s20) = G_SEXT [[TRUNC]](s8)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[SEXT]](s20)
%0:_(s32) = G_CONSTANT i32 7
%1:_(s32) = COPY $r1
%2:_(<64 x s8>) = COPY $x0
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32)
%4:_(s8) = G_TRUNC %3(s32)
%5:_(s20) = G_SEXT %4(s8)
PseudoRET implicit $lr, implicit %5
...

---
name: vextract.16.zext
legalized: false
body: |
bb.1.entry:
liveins: $x0
; CHECK-LABEL: name: vextract.16.zext
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0
; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<32 x s16>), [[C]](s32)
; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s20) = G_ASSERT_ZEXT [[AIE_ZEXT_EXTRACT_VECTOR_ELT]], 16
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_ZEXT]](s20)
%0:_(s32) = G_CONSTANT i32 7
%1:_(s32) = G_CONSTANT i32 0
%2:_(<32 x s16>) = COPY $x0
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32)
%4:_(s16) = G_TRUNC %3(s32)
%5:_(s20) = G_ZEXT %4(s16)
PseudoRET implicit $lr, implicit %5
...

---
name: vextract.16.sext
legalized: false
body: |
bb.1.entry:
liveins: $x0
; CHECK-LABEL: name: vextract.16.sext
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0
; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY]](<32 x s16>), [[C]](s32)
; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s20) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT]], 16
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_SEXT]](s20)
%0:_(s32) = G_CONSTANT i32 7
%1:_(s32) = G_CONSTANT i32 1
%2:_(<32 x s16>) = COPY $x0
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32)
%4:_(s16) = G_TRUNC %3(s32)
%5:_(s20) = G_SEXT %4(s16)
PseudoRET implicit $lr, implicit %5
...

# Negative Test Case: Combining is not possible because the vextract16 is used directly without being truncated and extended
---
name: vextract.16.neg
legalized: false
body: |
bb.1.entry:
liveins: $x0
; CHECK-LABEL: name: vextract.16.neg
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), [[COPY]](<32 x s16>), [[C]](s32), [[C1]](s32)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[INT]](s32)
%0:_(s32) = G_CONSTANT i32 7
%1:_(s32) = G_CONSTANT i32 1
%2:_(<32 x s16>) = COPY $x0
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32)
PseudoRET implicit $lr, implicit %3
...

# Negative Test Case: Combining is not possible because the vextract16 has a non-constant sign register
---
name: vextract.16.non.constant.sign
legalized: false
body: |
bb.1.entry:
liveins: $x0
; CHECK-LABEL: name: vextract.16.non.constant.sign
; CHECK: liveins: $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<32 x s16>) = COPY $x0
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), [[COPY1]](<32 x s16>), [[C]](s32), [[COPY]](s32)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[INT]](s32)
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s20) = G_SEXT [[TRUNC]](s16)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[SEXT]](s20)
%0:_(s32) = G_CONSTANT i32 7
%1:_(s32) = COPY $r1
%2:_(<32 x s16>) = COPY $x0
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32)
%4:_(s16) = G_TRUNC %3(s32)
%5:_(s20) = G_SEXT %4(s16)
PseudoRET implicit $lr, implicit %5
...
Loading
Loading