diff --git a/llvm/lib/Target/AIE/AIE2InstrInfo.cpp b/llvm/lib/Target/AIE/AIE2InstrInfo.cpp index cdd39e6732fa..7f4ae6f0fd31 100644 --- a/llvm/lib/Target/AIE/AIE2InstrInfo.cpp +++ b/llvm/lib/Target/AIE/AIE2InstrInfo.cpp @@ -152,8 +152,9 @@ bool AIE2InstrInfo::verifyGenericInstruction(const MachineInstr &MI, switch (MI.getOpcode()) { case AIE2::G_AIE_ZEXT_EXTRACT_VECTOR_ELT: case AIE2::G_AIE_SEXT_EXTRACT_VECTOR_ELT: - ErrInfo = "Expected 32bit scalar destination"; - return MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(32); + ErrInfo = "Expected 32bit or 20bit scalar destination"; + return (MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(32) || + MRI.getType(MI.getOperand(0).getReg()) == LLT::scalar(20)); case AIE2::G_AIE_PAD_VECTOR_UNDEF: return verifySameLaneTypes(MI, ErrInfo) && isLegalTypeToUnpad(MRI.getType(MI.getOperand(0).getReg()), @@ -1417,6 +1418,22 @@ AIE2InstrInfo::getVExtractOpInfo(const MachineInstr &MI) const { } } +bool AIE2InstrInfo::isS20Intrinsic(const MachineInstr &MI, + const MachineRegisterInfo &MRI) const { + const unsigned IntrinsicID = cast(MI).getIntrinsicID(); + switch (IntrinsicID) { + case Intrinsic::aie2_vextract_elem8_I512: + case Intrinsic::aie2_vextract_elem16_I512: + case Intrinsic::aie2_vextract_elem32_I512: { + // Check if the sign value is constant + const unsigned SignValueReg = MI.getOperand(4).getReg(); + return getIConstantVRegSExtVal(SignValueReg, MRI).has_value(); + } + default: + return false; + } +} + unsigned AIE2InstrInfo::getMaxLoadStoreSize() const { return 256; } bool AIE2InstrInfo::canCombineWithLoadStore(const MachineInstr &MI) const { diff --git a/llvm/lib/Target/AIE/AIE2InstrInfo.h b/llvm/lib/Target/AIE/AIE2InstrInfo.h index 5a79a652feb9..39362ff5c198 100644 --- a/llvm/lib/Target/AIE/AIE2InstrInfo.h +++ b/llvm/lib/Target/AIE/AIE2InstrInfo.h @@ -180,6 +180,9 @@ class AIE2InstrInfo : public AIE2GenInstrInfo { std::optional getVExtractOpInfo(const MachineInstr &MI) const override; + bool isS20Intrinsic(const MachineInstr &MI, + const MachineRegisterInfo &MRI) const override; + unsigned getMaxLoadStoreSize() const override; bool canCombineWithLoadStore(const MachineInstr &MI) const override; diff --git a/llvm/lib/Target/AIE/AIEBaseInstrInfo.h b/llvm/lib/Target/AIE/AIEBaseInstrInfo.h index 51f103d422e1..519b929cf826 100644 --- a/llvm/lib/Target/AIE/AIEBaseInstrInfo.h +++ b/llvm/lib/Target/AIE/AIEBaseInstrInfo.h @@ -428,6 +428,12 @@ struct AIEBaseInstrInfo : public TargetInstrInfo { llvm_unreachable("Target didn't implement getVExtractOpInfo!"); } + /// Check if the Intrinsic can produce S20 + virtual bool isS20Intrinsic(const MachineInstr &MI, + const MachineRegisterInfo &MRI) const { + llvm_unreachable("Target didn't implement isS20Intrinsic!"); + } + /// Return the maximun size for memory operations on this target. virtual unsigned getMaxLoadStoreSize() const { llvm_unreachable("Target didn't implement getMaxLoadStoreSize!"); diff --git a/llvm/lib/Target/AIE/AIECombinerHelper.cpp b/llvm/lib/Target/AIE/AIECombinerHelper.cpp index 1590f9e045a0..814801e53e27 100644 --- a/llvm/lib/Target/AIE/AIECombinerHelper.cpp +++ b/llvm/lib/Target/AIE/AIECombinerHelper.cpp @@ -581,6 +581,11 @@ static bool canProduceS20(const MachineRegisterInfo &MRI, case TargetOpcode::G_CONSTANT: case TargetOpcode::G_IMPLICIT_DEF: return true; + case TargetOpcode::G_INTRINSIC: { + const AIEBaseSubtarget &STI = AIEBaseSubtarget::get(*MI.getMF()); + const AIEBaseInstrInfo *TII = STI.getInstrInfo(); + return TII->isS20Intrinsic(MI, MRI); + } default: return false; } @@ -842,6 +847,8 @@ bool modifyToS20(InstrNode Start, MachineRegisterInfo &MRI, MachineIRBuilder &B, GISelChangeObserver &Observer, CombinerHelper &Helper) { const LLT S20 = LLT::scalar(20); MachineInstr *StartNodeMI = Start.getBaseNode(); + const AIEBaseSubtarget &STI = AIEBaseSubtarget::get(*StartNodeMI->getMF()); + const AIEBaseInstrInfo *TII = STI.getInstrInfo(); // If Start can be rematerialized, only modify one user to use the // rematerialized instruction and leave the others unchanged. @@ -901,16 +908,43 @@ bool modifyToS20(InstrNode Start, MachineRegisterInfo &MRI, MachineIRBuilder &B, Helper.tryCombineCopy(*StartNodeMI); return true; } + case TargetOpcode::G_INTRINSIC: { + if (TII->isS20Intrinsic(*StartNodeMI, MRI)) { + assert(StartNodeMI->getNumOperands() == 5); + Register ExtractDstReg = StartNodeMI->getOperand(0).getReg(); + // Note: Operand 1 is the ID of the intrinsic + const Register SrcReg0 = StartNodeMI->getOperand(2).getReg(); + const Register SrcReg1 = StartNodeMI->getOperand(3).getReg(); + const Register SignReg = StartNodeMI->getOperand(4).getReg(); + + const auto SignVal = getIConstantVRegSExtVal(SignReg, MRI); + assert(SignVal.has_value() && "Expected SignVal to be constant"); + + B.setInsertPt(*StartNodeMI->getParent(), StartNodeMI); + Observer.changingInstr(*StartNodeMI); + MRI.setType(ExtractDstReg, S20); + auto NewMI = + B.buildInstr(TII->getGenericExtractVectorEltOpcode(SignVal.value()), + {ExtractDstReg}, {SrcReg0, SrcReg1}) + .getInstr(); + Observer.changedInstr(*NewMI); + + Helper.eraseInst(*StartNodeMI); + StartNodeMI = NewMI; + } else { + LLVM_DEBUG(dbgs() << "Node :" << *StartNodeMI); + llvm_unreachable("Unexpected G_INTRINSIC, while modifying IR"); + } + break; + } default: { LLVM_DEBUG(dbgs() << "Node :" << *StartNodeMI); llvm_unreachable("Unexpected OpCode, while modifying IR"); } } - switch (StartNodeMI->getOpcode()) { - case TargetOpcode::COPY: - case TargetOpcode::G_LOAD: - case TargetOpcode::G_PHI: { + // Function to handle the modification of instructions + auto modifyInstructionUses = [&](MachineInstr *StartNodeMI) { const auto UseInstIter = MRI.use_nodbg_instructions(StartNodeMI->getOperand(0).getReg()); std::vector UseInstr; @@ -925,11 +959,27 @@ bool modifyToS20(InstrNode Start, MachineRegisterInfo &MRI, MachineIRBuilder &B, if (!modifyToS20(NextNodeToModify, MRI, B, Observer, Helper)) llvm_unreachable("All input nodes should have updated"); } + }; + + switch (StartNodeMI->getOpcode()) { + case TargetOpcode::COPY: + case TargetOpcode::G_LOAD: + case TargetOpcode::G_PHI: { + modifyInstructionUses(StartNodeMI); break; } default: { - LLVM_DEBUG(dbgs() << "Node :" << *StartNodeMI); - llvm_unreachable("Unexpected OpCode, while modifying IR"); + const unsigned Opcode = StartNodeMI->getOpcode(); + // Check if the opcode corresponds to a generic extract vector element + // operation. Since these opcodes are determined at runtime, we use an if + // statement to handle them. + if (Opcode == TII->getGenericExtractVectorEltOpcode(false) || + Opcode == TII->getGenericExtractVectorEltOpcode(true)) { + modifyInstructionUses(StartNodeMI); + } else { + LLVM_DEBUG(dbgs() << "Node :" << *StartNodeMI); + llvm_unreachable("Unexpected OpCode, while modifying IR"); + } } } return true; diff --git a/llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-s20-narrowing.mir b/llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-s20-narrowing.mir index 3bfdf5b1a88b..ddcf8e04acfd 100644 --- a/llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-s20-narrowing.mir +++ b/llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-s20-narrowing.mir @@ -781,3 +781,294 @@ body: | $r0 = COPY %11 G_BR %bb.2 ... + +--- +name: valid_vextract8_add2d +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0 + + ; CHECK-LABEL: name: valid_vextract8_add2d + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<64 x s8>) = COPY $x0 + ; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<64 x s8>), [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p0), [[INT1:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20) + ; CHECK-NEXT: $p0 = COPY [[INT]](p0) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<64 x s8>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32) + %4:_(s20) = G_TRUNC %3(s32) + %5:_(p0) = G_CONSTANT i20 0 + %6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20) + $p0 = COPY %6 +... + +--- +name: valid_vextract16_add2d +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0 + + ; CHECK-LABEL: name: valid_vextract16_add2d + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s16>) = COPY $x0 + ; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<32 x s16>), [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p0), [[INT1:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20) + ; CHECK-NEXT: $p0 = COPY [[INT]](p0) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<32 x s16>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32) + %4:_(s20) = G_TRUNC %3(s32) + %5:_(p0) = G_CONSTANT i20 0 + %6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20) + $p0 = COPY %6 +... + +--- +name: valid_vextract32_add2d +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0 + + ; CHECK-LABEL: name: valid_vextract32_add2d + ; CHECK: liveins: $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0 + ; CHECK-NEXT: [[AIE_ZEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT [[COPY]](<16 x s32>), [[C]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p0), [[INT1:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20), [[AIE_ZEXT_EXTRACT_VECTOR_ELT]](s20) + ; CHECK-NEXT: $p0 = COPY [[INT]](p0) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(<16 x s32>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem32.I512), %2(<16 x s32>), %0(s32), %1(s32) + %4:_(s20) = G_TRUNC %3(s32) + %5:_(p0) = G_CONSTANT i20 0 + %6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20) + $p0 = COPY %6 +... + +# Only one Src Node (vextract8) for G_PTR_ADD that is narrowed to S20 type, intermediate G_TRUNC is removed +--- +name: valid_vextract8_PTR_ADD +legalized: false +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: valid_vextract8_PTR_ADD + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<64 x s8>) = COPY $x0 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY1]](<64 x s8>), [[C]](s32) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s20) + ; CHECK-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32)) + ; CHECK-NEXT: G_BR %bb.1 + bb.1: + successors: %bb.2(0x80000000); %bb.2(100.00%) + liveins: $p0, $x0 + %0:_(p0) = COPY $p0 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(s32) = G_CONSTANT i32 1 + %3:_(<64 x s8>) = COPY $x0 + %4:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %3(<64 x s8>), %1(s32), %2(s32) + + bb.2: + successors: %bb.2(0x80000000); %bb.2(100.00%) + + %5:_(s20) = G_TRUNC %4:_(s32) + %6:_(p0) = G_PTR_ADD %0:_, %5:_(s20) + G_STORE %1:_(s32), %6:_(p0) :: (store (s32)) + G_BR %bb.2 +... + +# Only one Src Node (vextract16) for G_PTR_ADD that is narrowed to S20 type, intermediate G_TRUNC is removed +--- +name: valid_vextract16_PTR_ADD +legalized: false +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: valid_vextract16_PTR_ADD + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<32 x s16>) = COPY $x0 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY1]](<32 x s16>), [[C]](s32) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s20) + ; CHECK-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32)) + ; CHECK-NEXT: G_BR %bb.1 + bb.1: + successors: %bb.2(0x80000000); %bb.2(100.00%) + liveins: $p0, $x0 + %0:_(p0) = COPY $p0 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(s32) = G_CONSTANT i32 1 + %3:_(<32 x s16>) = COPY $x0 + %4:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %3(<32 x s16>), %1(s32), %2(s32) + + bb.2: + successors: %bb.2(0x80000000); %bb.2(100.00%) + + %5:_(s20) = G_TRUNC %4:_(s32) + %6:_(p0) = G_PTR_ADD %0:_, %5:_(s20) + G_STORE %1:_(s32), %6:_(p0) :: (store (s32)) + G_BR %bb.2 +... + +# Only one Src Node (vextract32) for G_PTR_ADD that is narrowed to S20 type, intermediate G_TRUNC is removed +--- +name: valid_vextract32_PTR_ADD +legalized: false +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: valid_vextract32_PTR_ADD + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s32>) = COPY $x0 + ; CHECK-NEXT: [[AIE_SEXT_EXTRACT_VECTOR_ELT:%[0-9]+]]:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT [[COPY1]](<16 x s32>), [[C]](s32) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[AIE_SEXT_EXTRACT_VECTOR_ELT]](s20) + ; CHECK-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32)) + ; CHECK-NEXT: G_BR %bb.1 + bb.1: + successors: %bb.2(0x80000000); %bb.2(100.00%) + liveins: $p0, $x0 + %0:_(p0) = COPY $p0 + %1:_(s32) = G_CONSTANT i32 0 + %2:_(s32) = G_CONSTANT i32 1 + %3:_(<16 x s32>) = COPY $x0 + %4:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem32.I512), %3(<16 x s32>), %1(s32), %2(s32) + + bb.2: + successors: %bb.2(0x80000000); %bb.2(100.00%) + + %5:_(s20) = G_TRUNC %4:_(s32) + %6:_(p0) = G_PTR_ADD %0:_, %5:_(s20) + G_STORE %1:_(s32), %6:_(p0) :: (store (s32)) + G_BR %bb.2 +... + +# Negative Test Case: Narrowing to s20 is not possible because the vextract8 source node has a non-constant sign register +--- +name: valid_vextract8_add2d_neg +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0, $r0 + + ; CHECK-LABEL: name: valid_vextract8_add2d_neg + ; CHECK: liveins: $x0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<64 x s8>) = COPY $x0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), [[COPY1]](<64 x s8>), [[C]](s32), [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[INT]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(p0), [[INT2:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20) + ; CHECK-NEXT: $p0 = COPY [[INT1]](p0) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = COPY $r0 + %2:_(<64 x s8>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem8.I512), %2(<64 x s8>), %0(s32), %1(s32) + %4:_(s20) = G_TRUNC %3(s32) + %5:_(p0) = G_CONSTANT i20 0 + %6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20) + $p0 = COPY %6 +... + +# Negative Test Case: Narrowing to s20 is not possible because the vextract16 source node has a non-constant sign register +--- +name: valid_vextract16_add2d_neg +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0, $r0 + + ; CHECK-LABEL: name: valid_vextract16_add2d_neg + ; CHECK: liveins: $x0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<32 x s16>) = COPY $x0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), [[COPY1]](<32 x s16>), [[C]](s32), [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[INT]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(p0), [[INT2:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20) + ; CHECK-NEXT: $p0 = COPY [[INT1]](p0) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = COPY $r0 + %2:_(<32 x s16>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem16.I512), %2(<32 x s16>), %0(s32), %1(s32) + %4:_(s20) = G_TRUNC %3(s32) + %5:_(p0) = G_CONSTANT i20 0 + %6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20) + $p0 = COPY %6 +... + +# Negative Test Case: Narrowing to s20 is not possible because the vextract32 source node has a non-constant sign register +--- +name: valid_vextract32_add2d_neg +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x0, $r0 + + ; CHECK-LABEL: name: valid_vextract32_add2d_neg + ; CHECK: liveins: $x0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s32>) = COPY $x0 + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem32.I512), [[COPY1]](<16 x s32>), [[C]](s32), [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[INT]](s32) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[INT1:%[0-9]+]]:_(p0), [[INT2:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), [[C1]](p0), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20), [[TRUNC]](s20) + ; CHECK-NEXT: $p0 = COPY [[INT1]](p0) + %0:_(s32) = G_CONSTANT i32 7 + %1:_(s32) = COPY $r0 + %2:_(<16 x s32>) = COPY $x0 + %3:_(s32) = G_INTRINSIC intrinsic(@llvm.aie2.vextract.elem32.I512), %2(<16 x s32>), %0(s32), %1(s32) + %4:_(s20) = G_TRUNC %3(s32) + %5:_(p0) = G_CONSTANT i20 0 + %6:_(p0), %7:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2.add.2d), %5:_(p0), %4:_(s20), %4:_(s20), %4:_(s20), %4:_(s20) + $p0 = COPY %6 +... diff --git a/llvm/test/CodeGen/AIE/aie2/verifier/verify-szext-extract-vec-elt.mir b/llvm/test/CodeGen/AIE/aie2/verifier/verify-szext-extract-vec-elt.mir index b9f489914d56..b56fa14667f8 100644 --- a/llvm/test/CodeGen/AIE/aie2/verifier/verify-szext-extract-vec-elt.mir +++ b/llvm/test/CodeGen/AIE/aie2/verifier/verify-szext-extract-vec-elt.mir @@ -18,6 +18,8 @@ body: | %1:_(s32) = G_CONSTANT i32 1 %2:_(s32) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32) %3:_(s32) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32) + %4:_(s20) = G_AIE_SEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32) + %5:_(s20) = G_AIE_ZEXT_EXTRACT_VECTOR_ELT %0(<16 x s16>), %1(s32) ... --- @@ -25,7 +27,7 @@ name: nok alignment: 16 body: | bb.0 (align 16): - ; CHECK-COUNT-4: Bad machine code: Expected 32bit scalar destination + ; CHECK-COUNT-4: Bad machine code: Expected 32bit or 20bit scalar destination ; CHECK-NOT: Bad machine code %0:_(<16 x s16>) = COPY $wl0 %1:_(s32) = G_CONSTANT i32 1