Skip to content

Commit b0f20f2

Browse files
committed
[X86] combineVectorCompare - add constant folding support for PCMPEQ/PCMPGT instructions
1 parent aca71ef commit b0f20f2

File tree

2 files changed

+35
-8
lines changed

2 files changed

+35
-8
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55692,14 +55692,43 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
5569255692

5569355693
static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG,
5569455694
const X86Subtarget &Subtarget) {
55695+
unsigned Opcode = N->getOpcode();
55696+
assert((Opcode == X86ISD::PCMPEQ || Opcode == X86ISD::PCMPGT) &&
55697+
"Unknown PCMP opcode");
55698+
55699+
SDValue LHS = N->getOperand(0);
55700+
SDValue RHS = N->getOperand(1);
5569555701
MVT VT = N->getSimpleValueType(0);
55702+
unsigned EltBits = VT.getScalarSizeInBits();
55703+
unsigned NumElts = VT.getVectorNumElements();
5569655704
SDLoc DL(N);
5569755705

55698-
if (N->getOperand(0) == N->getOperand(1)) {
55699-
if (N->getOpcode() == X86ISD::PCMPEQ)
55700-
return DAG.getConstant(-1, DL, VT);
55701-
if (N->getOpcode() == X86ISD::PCMPGT)
55702-
return DAG.getConstant(0, DL, VT);
55706+
if (LHS == RHS)
55707+
return (Opcode == X86ISD::PCMPEQ) ? DAG.getAllOnesConstant(DL, VT)
55708+
: DAG.getConstant(0, DL, VT);
55709+
55710+
// Constant Folding.
55711+
// PCMPEQ(X,UNDEF) -> UNDEF
55712+
// PCMPGT(X,UNDEF) -> 0
55713+
// PCMPGT(UNDEF,X) -> 0
55714+
APInt LHSUndefs, RHSUndefs;
55715+
SmallVector<APInt> LHSBits, RHSBits;
55716+
if (getTargetConstantBitsFromNode(LHS, EltBits, LHSUndefs, LHSBits) &&
55717+
getTargetConstantBitsFromNode(RHS, EltBits, RHSUndefs, RHSBits)) {
55718+
APInt Ones = APInt::getAllOnes(EltBits);
55719+
APInt Zero = APInt::getZero(EltBits);
55720+
SmallVector<APInt> Results(NumElts);
55721+
for (unsigned I = 0; I != NumElts; ++I) {
55722+
if (Opcode == X86ISD::PCMPEQ) {
55723+
Results[I] = (LHSBits[I] == RHSBits[I]) ? Ones : Zero;
55724+
} else {
55725+
bool AnyUndef = LHSUndefs[I] || RHSUndefs[I];
55726+
Results[I] = (!AnyUndef && LHSBits[I].sgt(RHSBits[I])) ? Ones : Zero;
55727+
}
55728+
}
55729+
if (Opcode == X86ISD::PCMPEQ)
55730+
return getConstVector(Results, LHSUndefs | RHSUndefs, VT, DAG, DL);
55731+
return getConstVector(Results, VT, DAG, DL);
5570355732
}
5570455733

5570555734
return SDValue();

llvm/test/CodeGen/X86/pr81136.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,17 @@ define i64 @PR81136(i32 %a0, i32 %a1, ptr %a2) {
88
; CHECK-NEXT: vmovd %esi, %xmm1
99
; CHECK-NEXT: vmovdqa (%rdx), %ymm2
1010
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
11-
; CHECK-NEXT: vpmovzxbq {{.*#+}} xmm4 = [128,1]
12-
; CHECK-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm4
1311
; CHECK-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
1412
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
1513
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
1614
; CHECK-NEXT: vpmovsxwq %xmm0, %xmm0
1715
; CHECK-NEXT: vpalignr {{.*#+}} xmm0 = mem[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
1816
; CHECK-NEXT: vpcmpeqq %xmm3, %xmm0, %xmm0
1917
; CHECK-NEXT: vpxor %xmm1, %xmm0, %xmm0
18+
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
2019
; CHECK-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm1
2120
; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm2
2221
; CHECK-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
23-
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm4, %ymm0
2422
; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
2523
; CHECK-NEXT: vandnpd %ymm0, %ymm1, %ymm0
2624
; CHECK-NEXT: vmovmskpd %ymm0, %eax

0 commit comments

Comments
 (0)