Skip to content

Commit 4f2e183

Browse files
committed
[X86] combineOr - don't demand operand elements if the other operand element is 'allones'
If either operand has an element with allbits set, then we don't need the equivalent element from the other operand, as allbits are guaranteed to be set.
1 parent a9d8114 commit 4f2e183

File tree

3 files changed

+38
-20
lines changed

3 files changed

+38
-20
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46647,6 +46647,7 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
4664746647
SDValue N1 = N->getOperand(1);
4664846648
EVT VT = N->getValueType(0);
4664946649
SDLoc dl(N);
46650+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4665046651

4665146652
// If this is SSE1 only convert to FOR to avoid scalarization.
4665246653
if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) {
@@ -46663,7 +46664,6 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
4666346664
SmallVector<APInt, 2> SrcPartials;
4666446665
if (matchScalarReduction(SDValue(N, 0), ISD::OR, SrcOps, &SrcPartials) &&
4666546666
SrcOps.size() == 1) {
46666-
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4666746667
unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements();
4666846668
EVT MaskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
4666946669
SDValue Mask = combineBitcastvxi1(DAG, MaskVT, SrcOps[0], dl, Subtarget);
@@ -46724,11 +46724,36 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
4672446724
}
4672546725
}
4672646726

46727-
// Attempt to recursively combine an OR of shuffles.
4672846727
if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
46728+
// Attempt to recursively combine an OR of shuffles.
4672946729
SDValue Op(N, 0);
4673046730
if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget))
4673146731
return Res;
46732+
46733+
// If either operand is a constant mask, then only the elements that aren't
46734+
// allones are actually demanded by the other operand.
46735+
auto SimplifyUndemandedElts = [&](SDValue Op, SDValue OtherOp) {
46736+
APInt UndefElts;
46737+
SmallVector<APInt> EltBits;
46738+
int NumElts = VT.getVectorNumElements();
46739+
int EltSizeInBits = VT.getScalarSizeInBits();
46740+
if (!getTargetConstantBitsFromNode(Op, EltSizeInBits, UndefElts, EltBits))
46741+
return false;
46742+
46743+
APInt DemandedElts = APInt::getZero(NumElts);
46744+
for (int I = 0; I != NumElts; ++I)
46745+
if (!EltBits[I].isAllOnes())
46746+
DemandedElts.setBit(I);
46747+
46748+
APInt KnownUndef, KnownZero;
46749+
return TLI.SimplifyDemandedVectorElts(OtherOp, DemandedElts, KnownUndef,
46750+
KnownZero, DCI);
46751+
};
46752+
if (SimplifyUndemandedElts(N0, N1) || SimplifyUndemandedElts(N1, N0)) {
46753+
if (N->getOpcode() != ISD::DELETED_NODE)
46754+
DCI.AddToWorklist(N);
46755+
return SDValue(N, 0);
46756+
}
4673246757
}
4673346758

4673446759
// We should fold "masked merge" patterns when `andn` is not available.

llvm/test/CodeGen/X86/setcc-lowering.ll

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,11 @@
99
define <8 x i16> @pr25080(<8 x i32> %a) {
1010
; AVX-LABEL: pr25080:
1111
; AVX: # %bb.0: # %entry
12-
; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
13-
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
14-
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
15-
; AVX-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
16-
; AVX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
17-
; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
12+
; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0
13+
; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
14+
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
15+
; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
16+
; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
1817
; AVX-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
1918
; AVX-NEXT: vzeroupper
2019
; AVX-NEXT: retq

llvm/test/CodeGen/X86/vshift-6.ll

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,9 @@ define <16 x i8> @do_not_crash(i8*, i32*, i64*, i32, i64, i8) {
3030
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
3131
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
3232
; X86-NEXT: movb %al, (%ecx)
33-
; X86-NEXT: movd %eax, %xmm0
34-
; X86-NEXT: psllq $56, %xmm0
35-
; X86-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255]
36-
; X86-NEXT: movdqa %xmm2, %xmm1
37-
; X86-NEXT: pandn %xmm0, %xmm1
38-
; X86-NEXT: por %xmm2, %xmm1
33+
; X86-NEXT: movd %eax, %xmm1
34+
; X86-NEXT: psllq $56, %xmm1
35+
; X86-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
3936
; X86-NEXT: pcmpeqd %xmm3, %xmm3
4037
; X86-NEXT: psllw $5, %xmm1
4138
; X86-NEXT: pxor %xmm2, %xmm2
@@ -65,12 +62,9 @@ define <16 x i8> @do_not_crash(i8*, i32*, i64*, i32, i64, i8) {
6562
; X64-LABEL: do_not_crash:
6663
; X64: # %bb.0: # %entry
6764
; X64-NEXT: movb %r9b, (%rdi)
68-
; X64-NEXT: movd %r9d, %xmm0
69-
; X64-NEXT: psllq $56, %xmm0
70-
; X64-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255]
71-
; X64-NEXT: movdqa %xmm2, %xmm1
72-
; X64-NEXT: pandn %xmm0, %xmm1
73-
; X64-NEXT: por %xmm2, %xmm1
65+
; X64-NEXT: movd %r9d, %xmm1
66+
; X64-NEXT: psllq $56, %xmm1
67+
; X64-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
7468
; X64-NEXT: pcmpeqd %xmm2, %xmm2
7569
; X64-NEXT: psllw $5, %xmm1
7670
; X64-NEXT: pxor %xmm3, %xmm3

0 commit comments

Comments
 (0)