Skip to content

Commit de8468e

Browse files
Combine VExtract intrinsics into generic opcode in PreLegalizerCombiner
1 parent d07f22b commit de8468e

File tree

4 files changed

+290
-3
lines changed

4 files changed

+290
-3
lines changed

llvm/lib/Target/AIE/AIE2InstrInfo.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,8 +152,9 @@ bool AIE2InstrInfo::verifyGenericInstruction(const MachineInstr &MI,
152152
switch (MI.getOpcode()) {
153153
case AIE2::G_AIE_ZEXT_EXTRACT_VECTOR_ELT:
154154
case AIE2::G_AIE_SEXT_EXTRACT_VECTOR_ELT:
155-
ErrInfo = "Expected 32bit scalar destination";
156-
return MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(32);
155+
ErrInfo = "Expected 32bit or 20bit scalar destination";
156+
return (MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(32) ||
157+
MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(20));
157158
case AIE2::G_AIE_PAD_VECTOR_UNDEF:
158159
return verifySameLaneTypes(MI, ErrInfo) &&
159160
isLegalTypeToUnpad(MRI.getType(MI.getOperand(0).getReg()),

llvm/lib/Target/AIE/AIE2PreLegalizerCombiner.cpp

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ class AIE2PreLegalizerCombinerImpl : public Combiner {
5959
std::map<unsigned, Register>
6060
getVectorInsertIndices(MachineInstr *CurMI, unsigned SclSrcBits,
6161
MachineRegisterInfo &MRI) const;
62+
bool isTruncExtToS20Sequence(Register DstReg, bool SignVal,
63+
unsigned SrcEltSize) const;
6264

6365
public:
6466
AIE2PreLegalizerCombinerImpl(
@@ -80,6 +82,8 @@ class AIE2PreLegalizerCombinerImpl : public Combiner {
8082

8183
bool tryToCombineVectorInserts(MachineInstr &MI, unsigned SclSrcBits) const;
8284

85+
bool tryToCombineVExtractElt(MachineInstr &MI) const;
86+
8387
bool tryToCombineIntrinsic(MachineInstr &MI) const;
8488

8589
private:
@@ -288,6 +292,86 @@ bool AIE2PreLegalizerCombinerImpl::tryToCombineVectorInserts(
288292
return true;
289293
}
290294

295+
/// Determines if it is safe to combine vextract by checking the uses of DstReg,
296+
/// specifically for a pattern involving TRUNC followed by EXT.
297+
bool AIE2PreLegalizerCombinerImpl::isTruncExtToS20Sequence(
298+
Register DstReg, bool SignVal, unsigned SrcEltSize) const {
299+
// Returns the single non-debug use of a register with a specific opcode
300+
// and destination size.
301+
auto GetOneUseWithOpcode =
302+
[&](const Register Reg, const unsigned OpcodeToCheck,
303+
const unsigned DstSize) -> std::optional<MachineInstr *> {
304+
if (MRI.hasOneNonDBGUser(Reg)) {
305+
MachineInstr &Use = *MRI.use_nodbg_instructions(Reg).begin();
306+
if (Use.getOpcode() == OpcodeToCheck) {
307+
const LLT DstRegTy = MRI.getType(Use.getOperand(0).getReg());
308+
if (DstRegTy.getSizeInBits() == DstSize)
309+
return &Use;
310+
}
311+
}
312+
return std::nullopt;
313+
};
314+
auto Trunc = GetOneUseWithOpcode(DstReg, TargetOpcode::G_TRUNC, SrcEltSize);
315+
if (!Trunc)
316+
return false;
317+
318+
const MachineInstr *TruncMI = *Trunc;
319+
const unsigned ExtOpcode =
320+
SignVal ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
321+
const Register UseDstReg = TruncMI->getOperand(0).getReg();
322+
return GetOneUseWithOpcode(UseDstReg, ExtOpcode, 20).has_value();
323+
}
324+
325+
/// \returns true if it is possible to combine the below sequence of MIRs
326+
/// From : %3:_(s32) = G_INTRINSIC
327+
/// intrinsic(@llvm.aie2.vextract.elem[8/16].I512), %2(<32 x s16>),
328+
/// %0(s32), %1(s32)
329+
/// %4:_(s16) = G_TRUNC %3(s32)
330+
/// %5:_(s20) = G_SEXT %4(s16)
331+
/// To : %9:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %2(<32 x s16>), %0(s32)
332+
/// %10:_(s20) = G_ASSERT_[S/Z]EXT %9, 16
333+
/// %4:_(s16) = G_TRUNC %10(s20)
334+
/// %5:_(s20) = G_[S/Z]EXT %4(s16)
335+
/// This combine enables S20Narrowing for vextract
336+
bool AIE2PreLegalizerCombinerImpl::tryToCombineVExtractElt(
337+
MachineInstr &MI) const {
338+
const Register DstReg = MI.getOperand(0).getReg();
339+
// In this case of G_INTRINSIC operand 1 is target intrinsic
340+
const Register SrcReg = MI.getOperand(2).getReg();
341+
const Register IdxReg = MI.getOperand(3).getReg();
342+
const Register SignReg = MI.getOperand(4).getReg();
343+
344+
const auto SignVal = getIConstantVRegSExtVal(SignReg, MRI);
345+
if (!SignVal)
346+
return false;
347+
348+
const LLT SrcVecTy = MRI.getType(SrcReg);
349+
const unsigned SrcEltSize = SrcVecTy.getScalarSizeInBits();
350+
// Checks for the required pattern in uses of DstReg
351+
if (!isTruncExtToS20Sequence(DstReg, SignVal.value(), SrcEltSize))
352+
return false;
353+
354+
auto *TII = static_cast<const AIE2InstrInfo *>(STI.getInstrInfo());
355+
const unsigned Opcode =
356+
TII->getGenericExtractVectorEltOpcode(SignVal.value());
357+
const unsigned AssertExtOpcode = SignVal.value()
358+
? TargetOpcode::G_ASSERT_SEXT
359+
: TargetOpcode::G_ASSERT_ZEXT;
360+
const unsigned ExtOpcode =
361+
SignVal.value() ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
362+
const LLT S20 = LLT::scalar(20);
363+
Register DstReg20Bit = MRI.createGenericVirtualRegister(S20);
364+
Register ExtReg20Bit = MRI.createGenericVirtualRegister(S20);
365+
MachineIRBuilder MIRBuilder(MI);
366+
367+
MIRBuilder.buildInstr(Opcode, {DstReg20Bit}, {SrcReg, IdxReg});
368+
MIRBuilder.buildAssertInstr(AssertExtOpcode, ExtReg20Bit, DstReg20Bit,
369+
SrcEltSize);
370+
MIRBuilder.buildInstr(ExtOpcode, {DstReg}, {ExtReg20Bit});
371+
MI.eraseFromParent();
372+
return true;
373+
}
374+
291375
bool AIE2PreLegalizerCombinerImpl::tryToCombineIntrinsic(
292376
MachineInstr &MI) const {
293377
const unsigned IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();
@@ -306,6 +390,10 @@ bool AIE2PreLegalizerCombinerImpl::tryToCombineIntrinsic(
306390
case Intrinsic::aie2_vinsert32_I512: {
307391
return tryToCombineVectorInserts(MI, getVInsertScalarSize(IntrinsicID));
308392
}
393+
case Intrinsic::aie2_vextract_elem8_I512:
394+
case Intrinsic::aie2_vextract_elem16_I512: {
395+
return tryToCombineVExtractElt(MI);
396+
}
309397
default:
310398
break;
311399
}
Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
#
3+
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
4+
# See https://llvm.org/LICENSE.txt for license information.
5+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
#
7+
# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
8+
# RUN: llc -mtriple aie2 -run-pass=aie2-prelegalizer-combiner %s -verify-machineinstrs -o - | FileCheck %s
9+
10+
---
11+
name: vextract.8.zext
12+
legalized: false
13+
body: |
14+
bb.1.entry:
15+
liveins: $x0
16+
; CHECK-LABEL: name: vextract.8.zext
17+
; CHECK: liveins: $x0
18+
; CHECK-NEXT: {{ $}}
19+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
20+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<64 x s8>) = COPY $x0
21+
; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<64 x s8>), [[C]](s32)
22+
; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s20) = G_ASSERT_ZEXT [[AIE_ZEXT_EXTRACT_VECTOR_ELT]], 8
23+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_ZEXT]](s20)
24+
%0:_(s32) = G_CONSTANT i32 7
25+
%1:_(s32) = G_CONSTANT i32 0
26+
%2:_(<64 x s8>) = COPY $x0
27+
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32)
28+
%4:_(s8) = G_TRUNC %3(s32)
29+
%5:_(s20) = G_ZEXT %4(s8)
30+
PseudoRET implicit $lr, implicit %5
31+
...
32+
33+
---
34+
name: vextract.8.sext
35+
legalized: false
36+
body: |
37+
bb.1.entry:
38+
liveins: $x0
39+
; CHECK-LABEL: name: vextract.8.sext
40+
; CHECK: liveins: $x0
41+
; CHECK-NEXT: {{ $}}
42+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
43+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<64 x s8>) = COPY $x0
44+
; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY]](<64 x s8>), [[C]](s32)
45+
; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s20) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT]], 8
46+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_SEXT]](s20)
47+
%0:_(s32) = G_CONSTANT i32 7
48+
%1:_(s32) = G_CONSTANT i32 1
49+
%2:_(<64 x s8>) = COPY $x0
50+
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32)
51+
%4:_(s8) = G_TRUNC %3(s32)
52+
%5:_(s20) = G_SEXT %4(s8)
53+
PseudoRET implicit $lr, implicit %5
54+
...
55+
56+
# Negative Test Case: Combining is not possible because the vextract8 is used directly without being truncated and extended
57+
---
58+
name: vextract.8.neg
59+
legalized: false
60+
body: |
61+
bb.1.entry:
62+
liveins: $x0
63+
; CHECK-LABEL: name: vextract.8.neg
64+
; CHECK: liveins: $x0
65+
; CHECK-NEXT: {{ $}}
66+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
67+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
68+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<64 x s8>) = COPY $x0
69+
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), [[COPY]](<64 x s8>), [[C]](s32), [[C1]](s32)
70+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[INT]](s32)
71+
%0:_(s32) = G_CONSTANT i32 7
72+
%1:_(s32) = G_CONSTANT i32 1
73+
%2:_(<64 x s8>) = COPY $x0
74+
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32)
75+
PseudoRET implicit $lr, implicit %3
76+
...
77+
78+
# Negative Test Case: Combining is not possible because the vextract8 has a non-constant sign register
79+
---
80+
name: vextract.8.non.constant.sign
81+
legalized: false
82+
body: |
83+
bb.1.entry:
84+
liveins: $x0
85+
; CHECK-LABEL: name: vextract.8.non.constant.sign
86+
; CHECK: liveins: $x0
87+
; CHECK-NEXT: {{ $}}
88+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
89+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r1
90+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<64 x s8>) = COPY $x0
91+
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), [[COPY1]](<64 x s8>), [[C]](s32), [[COPY]](s32)
92+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[INT]](s32)
93+
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s20) = G_SEXT [[TRUNC]](s8)
94+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[SEXT]](s20)
95+
%0:_(s32) = G_CONSTANT i32 7
96+
%1:_(s32) = COPY $r1
97+
%2:_(<64 x s8>) = COPY $x0
98+
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32)
99+
%4:_(s8) = G_TRUNC %3(s32)
100+
%5:_(s20) = G_SEXT %4(s8)
101+
PseudoRET implicit $lr, implicit %5
102+
...
103+
104+
---
105+
name: vextract.16.zext
106+
legalized: false
107+
body: |
108+
bb.1.entry:
109+
liveins: $x0
110+
; CHECK-LABEL: name: vextract.16.zext
111+
; CHECK: liveins: $x0
112+
; CHECK-NEXT: {{ $}}
113+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
114+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0
115+
; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<32 x s16>), [[C]](s32)
116+
; CHECK-NEXT: [[ASSERT_ZEXT:%[0-9]+]]:_(s20) = G_ASSERT_ZEXT [[AIE_ZEXT_EXTRACT_VECTOR_ELT]], 16
117+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_ZEXT]](s20)
118+
%0:_(s32) = G_CONSTANT i32 7
119+
%1:_(s32) = G_CONSTANT i32 0
120+
%2:_(<32 x s16>) = COPY $x0
121+
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32)
122+
%4:_(s16) = G_TRUNC %3(s32)
123+
%5:_(s20) = G_ZEXT %4(s16)
124+
PseudoRET implicit $lr, implicit %5
125+
...
126+
127+
---
128+
name: vextract.16.sext
129+
legalized: false
130+
body: |
131+
bb.1.entry:
132+
liveins: $x0
133+
; CHECK-LABEL: name: vextract.16.sext
134+
; CHECK: liveins: $x0
135+
; CHECK-NEXT: {{ $}}
136+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
137+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0
138+
; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY]](<32 x s16>), [[C]](s32)
139+
; CHECK-NEXT: [[ASSERT_SEXT:%[0-9]+]]:_(s20) = G_ASSERT_SEXT [[AIE_SEXT_EXTRACT_VECTOR_ELT]], 16
140+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[ASSERT_SEXT]](s20)
141+
%0:_(s32) = G_CONSTANT i32 7
142+
%1:_(s32) = G_CONSTANT i32 1
143+
%2:_(<32 x s16>) = COPY $x0
144+
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32)
145+
%4:_(s16) = G_TRUNC %3(s32)
146+
%5:_(s20) = G_SEXT %4(s16)
147+
PseudoRET implicit $lr, implicit %5
148+
...
149+
150+
# Negative Test Case: Combining is not possible because the vextract16 is used directly without being truncated and extended
151+
---
152+
name: vextract.16.neg
153+
legalized: false
154+
body: |
155+
bb.1.entry:
156+
liveins: $x0
157+
; CHECK-LABEL: name: vextract.16.neg
158+
; CHECK: liveins: $x0
159+
; CHECK-NEXT: {{ $}}
160+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
161+
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
162+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0
163+
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), [[COPY]](<32 x s16>), [[C]](s32), [[C1]](s32)
164+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[INT]](s32)
165+
%0:_(s32) = G_CONSTANT i32 7
166+
%1:_(s32) = G_CONSTANT i32 1
167+
%2:_(<32 x s16>) = COPY $x0
168+
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32)
169+
PseudoRET implicit $lr, implicit %3
170+
...
171+
172+
# Negative Test Case: Combining is not possible because the vextract16 has a non-constant sign register
173+
---
174+
name: vextract.16.non.constant.sign
175+
legalized: false
176+
body: |
177+
bb.1.entry:
178+
liveins: $x0
179+
; CHECK-LABEL: name: vextract.16.non.constant.sign
180+
; CHECK: liveins: $x0
181+
; CHECK-NEXT: {{ $}}
182+
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
183+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r1
184+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<32 x s16>) = COPY $x0
185+
; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), [[COPY1]](<32 x s16>), [[C]](s32), [[COPY]](s32)
186+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[INT]](s32)
187+
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s20) = G_SEXT [[TRUNC]](s16)
188+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[SEXT]](s20)
189+
%0:_(s32) = G_CONSTANT i32 7
190+
%1:_(s32) = COPY $r1
191+
%2:_(<32 x s16>) = COPY $x0
192+
%3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32)
193+
%4:_(s16) = G_TRUNC %3(s32)
194+
%5:_(s20) = G_SEXT %4(s16)
195+
PseudoRET implicit $lr, implicit %5
196+
...

llvm/test/CodeGen/AIE/aie2/verifier/verify-szext-extract-vec-elt.mir

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,16 @@ body: |
1818
%1:_(s32) = G_CONSTANT i32 1
1919
%2:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32)
2020
%3:_(s32) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32)
21+
%4:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32)
22+
%5:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32)
2123
...
2224

2325
---
2426
name: nok
2527
alignment: 16
2628
body: |
2729
bb.0 (align 16):
28-
; CHECK-COUNT-4: Bad machine code: Expected 32bit scalar destination
30+
; CHECK-COUNT-4: Bad machine code: Expected 32bit or 20bit scalar destination
2931
; CHECK-NOT: Bad machine code
3032
%0:_(<16 x s16>) = COPY $wl0
3133
%1:_(s32) = G_CONSTANT i32 1

0 commit comments

Comments
 (0)