Skip to content

Commit 5626adc

Browse files
committed
[X86][SSE] combineVectorSignBitsTruncation - fold trunc(srl(x,c)) -> packss(sra(x,c))
If a srl doesn't introduce any sign bits into the truncated result, then replace with a sra to let us use a PACKSS truncation - fixes a regression noticed in D56387 on pre-SSE41 targets that don't have PACKUSDW.
1 parent 58bdfcf commit 5626adc

File tree

2 files changed

+18
-5
lines changed

2 files changed

+18
-5
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46071,9 +46071,23 @@ static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL,
4607146071
if (SVT == MVT::i32 && NumSignBits != InSVT.getSizeInBits())
4607246072
return SDValue();
4607346073

46074-
if (NumSignBits > (InSVT.getSizeInBits() - NumPackedSignBits))
46074+
unsigned MinSignBits = InSVT.getSizeInBits() - NumPackedSignBits;
46075+
if (NumSignBits > MinSignBits)
4607546076
return truncateVectorWithPACK(X86ISD::PACKSS, VT, In, DL, DAG, Subtarget);
4607646077

46078+
// If we have a srl that only generates signbits that we will discard in
46079+
// the truncation then we can use PACKSS by converting the srl to a sra.
46080+
// SimplifyDemandedBits often relaxes sra to srl so we need to reverse it.
46081+
if (In.getOpcode() == ISD::SRL && N->isOnlyUserOf(In.getNode()))
46082+
if (const APInt *ShAmt = DAG.getValidShiftAmountConstant(
46083+
In, APInt::getAllOnesValue(VT.getVectorNumElements()))) {
46084+
if (*ShAmt == MinSignBits) {
46085+
SDValue NewIn = DAG.getNode(ISD::SRA, DL, InVT, In->ops());
46086+
return truncateVectorWithPACK(X86ISD::PACKSS, VT, NewIn, DL, DAG,
46087+
Subtarget);
46088+
}
46089+
}
46090+
4607746091
return SDValue();
4607846092
}
4607946093

llvm/test/CodeGen/X86/vector-trunc.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -452,10 +452,9 @@ define <8 x i16> @trunc8i32_8i16_lshr(<8 x i32> %a) {
452452
;
453453
; SSSE3-LABEL: trunc8i32_8i16_lshr:
454454
; SSSE3: # %bb.0: # %entry
455-
; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [2,3,6,7,10,11,14,15,10,11,14,15,14,15,128,128]
456-
; SSSE3-NEXT: pshufb %xmm2, %xmm1
457-
; SSSE3-NEXT: pshufb %xmm2, %xmm0
458-
; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
455+
; SSSE3-NEXT: psrad $16, %xmm1
456+
; SSSE3-NEXT: psrad $16, %xmm0
457+
; SSSE3-NEXT: packssdw %xmm1, %xmm0
459458
; SSSE3-NEXT: retq
460459
;
461460
; SSE41-LABEL: trunc8i32_8i16_lshr:

0 commit comments

Comments
 (0)