Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit 20db096

Browse files
committed
[X86] Correct SHRUNKBLEND creation to work correctly when there are multiple uses of the condition.
SimplifyDemandedBits forces the demanded mask to all 1s if the node has multiple uses, unless the AssumeSingleUse flag is set. So previously we were only really likely to simplify something if the condition had a single use. And on the off chance we did simplify with multiple uses the demanded mask being used was all ones so there was no reason to create a shrunkblend. This patch now checks that the condition is only used by selects first, and then sets the AssumeSingleUse flag for the simplifcation. Then we convert the selects to shrunkblend, and finally replace condition. Differential Revision: https://reviews.llvm.org/D43446 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@325604 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent bff6449 commit 20db096

File tree

2 files changed

+23
-35
lines changed

2 files changed

+23
-35
lines changed

lib/Target/X86/X86ISelLowering.cpp

Lines changed: 23 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -31910,46 +31910,38 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
3191031910
if (VT.is512BitVector())
3191131911
return SDValue();
3191231912

31913-
assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
31914-
APInt DemandedMask(APInt::getSignMask(BitWidth));
31915-
KnownBits Known;
31916-
TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
31917-
!DCI.isBeforeLegalizeOps());
31918-
if (TLI.SimplifyDemandedBits(Cond, DemandedMask, Known, TLO)) {
31919-
// If we changed the computation somewhere in the DAG, this change will
31920-
// affect all users of Cond. Make sure it is fine and update all the nodes
31921-
// so that we do not use the generic VSELECT anymore. Otherwise, we may
31922-
// perform wrong optimizations as we messed with the actual expectation
31923-
// for the vector boolean values.
31924-
if (Cond != TLO.Old) {
31925-
// Check all uses of the condition operand to check whether it will be
31926-
// consumed by non-BLEND instructions. Those may require that all bits
31927-
// are set properly.
31928-
for (SDNode::use_iterator UI = Cond->use_begin(), UE = Cond->use_end();
31929-
UI != UE; ++UI) {
31930-
// TODO: Add other opcodes eventually lowered into BLEND.
31931-
if (UI->getOpcode() != ISD::VSELECT || UI.getOperandNo() != 0)
31932-
return SDValue();
31933-
}
31913+
bool CanShrinkCond = true;
31914+
for (SDNode::use_iterator UI = Cond->use_begin(), UE = Cond->use_end();
31915+
UI != UE; ++UI) {
31916+
// TODO: Add other opcodes eventually lowered into BLEND.
31917+
if (UI->getOpcode() != ISD::VSELECT || UI.getOperandNo() != 0) {
31918+
CanShrinkCond = false;
31919+
break;
31920+
}
31921+
}
3193431922

31935-
// Update all users of the condition before committing the change, so
31936-
// that the VSELECT optimizations that expect the correct vector boolean
31937-
// value will not be triggered.
31923+
if (CanShrinkCond) {
31924+
assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
31925+
APInt DemandedMask(APInt::getSignMask(BitWidth));
31926+
KnownBits Known;
31927+
TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
31928+
!DCI.isBeforeLegalizeOps());
31929+
if (TLI.SimplifyDemandedBits(Cond, DemandedMask, Known, TLO, 0,
31930+
/*AssumeSingleUse*/true)) {
31931+
// If we changed the computation somewhere in the DAG, this change will
31932+
// affect all users of Cond. Update all the nodes so that we do not use
31933+
// the generic VSELECT anymore. Otherwise, we may perform wrong
31934+
// optimizations as we messed with the actual expectation for the vector
31935+
// boolean values.
3193831936
for (SDNode *U : Cond->uses()) {
3193931937
SDValue SB = DAG.getNode(X86ISD::SHRUNKBLEND, SDLoc(U),
3194031938
U->getValueType(0), Cond, U->getOperand(1),
3194131939
U->getOperand(2));
3194231940
DAG.ReplaceAllUsesOfValueWith(SDValue(U, 0), SB);
3194331941
}
3194431942
DCI.CommitTargetLoweringOpt(TLO);
31945-
return SDValue();
31943+
return SDValue(N, 0);
3194631944
}
31947-
// Only Cond (rather than other nodes in the computation chain) was
31948-
// changed. Change the condition just for N to keep the opportunity to
31949-
// optimize all other users their own way.
31950-
SDValue SB = DAG.getNode(X86ISD::SHRUNKBLEND, DL, VT, TLO.New, LHS, RHS);
31951-
DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), SB);
31952-
return SDValue();
3195331945
}
3195431946
}
3195531947

test/CodeGen/X86/vselect.ll

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -503,8 +503,6 @@ define <2 x i64> @shrunkblend_2uses(<2 x i1> %cond, <2 x i64> %a, <2 x i64> %b,
503503
; SSE41-LABEL: shrunkblend_2uses:
504504
; SSE41: # %bb.0:
505505
; SSE41-NEXT: psllq $63, %xmm0
506-
; SSE41-NEXT: psrad $31, %xmm0
507-
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
508506
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
509507
; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm4
510508
; SSE41-NEXT: paddq %xmm2, %xmm4
@@ -514,8 +512,6 @@ define <2 x i64> @shrunkblend_2uses(<2 x i1> %cond, <2 x i64> %a, <2 x i64> %b,
514512
; AVX-LABEL: shrunkblend_2uses:
515513
; AVX: # %bb.0:
516514
; AVX-NEXT: vpsllq $63, %xmm0, %xmm0
517-
; AVX-NEXT: vpxor %xmm5, %xmm5, %xmm5
518-
; AVX-NEXT: vpcmpgtq %xmm0, %xmm5, %xmm0
519515
; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm1
520516
; AVX-NEXT: vblendvpd %xmm0, %xmm3, %xmm4, %xmm0
521517
; AVX-NEXT: vpaddq %xmm0, %xmm1, %xmm0

0 commit comments

Comments
 (0)