Skip to content

Commit 3e7ef03

Browse files
committed
[WIP][DAG] combineVSelectWithAllOnesOrZeros - fold "select Cond, 0, x -> and not(Cond), x"
Extend #145298 to remove the x86 combineVSelectWithLastZeros special case WIP - still a couple of x86 regressions to address wrt unnecessary duplicated comparisons Fixes #144513
1 parent fcdb91e commit 3e7ef03

File tree

7 files changed

+386
-323
lines changed

7 files changed

+386
-323
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13159,6 +13159,15 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal,
1315913159
return DAG.getBitcast(VT, And);
1316013160
}
1316113161

13162+
// select Cond, 0, x -> and not(Cond), x
13163+
if (IsTAllZero &&
13164+
(isBitwiseNot(peekThroughBitcasts(Cond)) || TLI.hasAndNot(Cond))) {
13165+
SDValue X = DAG.getBitcast(CondVT, FVal);
13166+
SDValue And =
13167+
DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT), X);
13168+
return DAG.getBitcast(VT, And);
13169+
}
13170+
1316213171
return SDValue();
1316313172
}
1316413173

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 0 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -47262,57 +47262,6 @@ static SDValue combineToExtendBoolVectorInReg(
4726247262
DAG.getConstant(EltSizeInBits - 1, DL, VT));
4726347263
}
4726447264

47265-
/// If a vector select has an left operand that is 0, try to simplify the
47266-
/// select to a bitwise logic operation.
47267-
/// TODO: Move to DAGCombiner.combineVSelectWithAllOnesOrZeros, possibly using
47268-
/// TargetLowering::hasAndNot()?
47269-
static SDValue combineVSelectWithLastZeros(SDNode *N, SelectionDAG &DAG,
47270-
const SDLoc &DL,
47271-
TargetLowering::DAGCombinerInfo &DCI,
47272-
const X86Subtarget &Subtarget) {
47273-
SDValue Cond = N->getOperand(0);
47274-
SDValue LHS = N->getOperand(1);
47275-
SDValue RHS = N->getOperand(2);
47276-
EVT VT = LHS.getValueType();
47277-
EVT CondVT = Cond.getValueType();
47278-
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
47279-
47280-
if (N->getOpcode() != ISD::VSELECT)
47281-
return SDValue();
47282-
47283-
assert(CondVT.isVector() && "Vector select expects a vector selector!");
47284-
47285-
// To use the condition operand as a bitwise mask, it must have elements that
47286-
// are the same size as the select elements. Ie, the condition operand must
47287-
// have already been promoted from the IR select condition type <N x i1>.
47288-
// Don't check if the types themselves are equal because that excludes
47289-
// vector floating-point selects.
47290-
if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
47291-
return SDValue();
47292-
47293-
// Cond value must be 'sign splat' to be converted to a logical op.
47294-
if (DAG.ComputeNumSignBits(Cond) != CondVT.getScalarSizeInBits())
47295-
return SDValue();
47296-
47297-
if (!TLI.isTypeLegal(CondVT))
47298-
return SDValue();
47299-
47300-
// vselect Cond, 000..., X -> andn Cond, X
47301-
if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
47302-
SDValue CastRHS = DAG.getBitcast(CondVT, RHS);
47303-
SDValue AndN;
47304-
// The canonical form differs for i1 vectors - x86andnp is not used
47305-
if (CondVT.getScalarType() == MVT::i1)
47306-
AndN = DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT),
47307-
CastRHS);
47308-
else
47309-
AndN = DAG.getNode(X86ISD::ANDNP, DL, CondVT, Cond, CastRHS);
47310-
return DAG.getBitcast(VT, AndN);
47311-
}
47312-
47313-
return SDValue();
47314-
}
47315-
4731647265
/// If both arms of a vector select are concatenated vectors, split the select,
4731747266
/// and concatenate the result to eliminate a wide (256-bit) vector instruction:
4731847267
/// vselect Cond, (concat T0, T1), (concat F0, F1) -->
@@ -48059,9 +48008,6 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
4805948008
if (!TLI.isTypeLegal(VT) || isSoftF16(VT, Subtarget))
4806048009
return SDValue();
4806148010

48062-
if (SDValue V = combineVSelectWithLastZeros(N, DAG, DL, DCI, Subtarget))
48063-
return V;
48064-
4806548011
if (SDValue V = combineVSelectToBLENDV(N, DAG, DL, DCI, Subtarget))
4806648012
return V;
4806748013

llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,9 @@ define void @crash_when_lowering_extract_shuffle(ptr %dst, i1 %cond) vscale_rang
3030
; CHECK-NEXT: // %bb.1: // %vector.body
3131
; CHECK-NEXT: movi v0.2d, #0000000000000000
3232
; CHECK-NEXT: movi v1.2d, #0000000000000000
33-
; CHECK-NEXT: ptrue p0.s
33+
; CHECK-NEXT: ldr z4, [x0]
34+
; CHECK-NEXT: ldr z5, [x0, #2, mul vl]
35+
; CHECK-NEXT: ldr z6, [x0, #3, mul vl]
3436
; CHECK-NEXT: umov w8, v0.b[8]
3537
; CHECK-NEXT: mov v1.b[1], v0.b[1]
3638
; CHECK-NEXT: fmov s2, w8
@@ -60,31 +62,20 @@ define void @crash_when_lowering_extract_shuffle(ptr %dst, i1 %cond) vscale_rang
6062
; CHECK-NEXT: asr z1.s, z1.s, #31
6163
; CHECK-NEXT: uunpklo z3.s, z3.h
6264
; CHECK-NEXT: lsl z0.s, z0.s, #31
63-
; CHECK-NEXT: and z1.s, z1.s, #0x1
65+
; CHECK-NEXT: bic z1.d, z4.d, z1.d
6466
; CHECK-NEXT: lsl z2.s, z2.s, #31
67+
; CHECK-NEXT: ldr z4, [x0, #1, mul vl]
6568
; CHECK-NEXT: asr z0.s, z0.s, #31
66-
; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0
67-
; CHECK-NEXT: ldr z1, [x0]
69+
; CHECK-NEXT: str z1, [x0]
6870
; CHECK-NEXT: lsl z3.s, z3.s, #31
6971
; CHECK-NEXT: asr z2.s, z2.s, #31
70-
; CHECK-NEXT: and z0.s, z0.s, #0x1
72+
; CHECK-NEXT: bic z0.d, z5.d, z0.d
7173
; CHECK-NEXT: asr z3.s, z3.s, #31
72-
; CHECK-NEXT: and z2.s, z2.s, #0x1
73-
; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0
74-
; CHECK-NEXT: cmpne p2.s, p0/z, z0.s, #0
75-
; CHECK-NEXT: ldr z0, [x0, #2, mul vl]
76-
; CHECK-NEXT: and z3.s, z3.s, #0x1
77-
; CHECK-NEXT: str z1, [x0]
78-
; CHECK-NEXT: cmpne p3.s, p0/z, z3.s, #0
79-
; CHECK-NEXT: cmpne p0.s, p0/z, z2.s, #0
80-
; CHECK-NEXT: ldr z3, [x0, #3, mul vl]
81-
; CHECK-NEXT: ldr z2, [x0, #1, mul vl]
82-
; CHECK-NEXT: mov z0.s, p2/m, #0 // =0x0
83-
; CHECK-NEXT: mov z3.s, p3/m, #0 // =0x0
84-
; CHECK-NEXT: mov z2.s, p0/m, #0 // =0x0
74+
; CHECK-NEXT: bic z1.d, z4.d, z2.d
8575
; CHECK-NEXT: str z0, [x0, #2, mul vl]
76+
; CHECK-NEXT: bic z3.d, z6.d, z3.d
77+
; CHECK-NEXT: str z1, [x0, #1, mul vl]
8678
; CHECK-NEXT: str z3, [x0, #3, mul vl]
87-
; CHECK-NEXT: str z2, [x0, #1, mul vl]
8879
; CHECK-NEXT: .LBB1_2: // %exit
8980
; CHECK-NEXT: ret
9081
%broadcast.splat = shufflevector <32 x i1> zeroinitializer, <32 x i1> zeroinitializer, <32 x i32> zeroinitializer

llvm/test/CodeGen/AArch64/vselect-constants.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -169,11 +169,9 @@ define <4 x i32> @cmp_sel_1_or_0_vec(<4 x i32> %x, <4 x i32> %y) {
169169
define <4 x i32> @sel_0_or_1_vec(<4 x i1> %cond) {
170170
; CHECK-LABEL: sel_0_or_1_vec:
171171
; CHECK: // %bb.0:
172-
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
173172
; CHECK-NEXT: movi v1.4s, #1
174-
; CHECK-NEXT: shl v0.4s, v0.4s, #31
175-
; CHECK-NEXT: cmge v0.4s, v0.4s, #0
176-
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
173+
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
174+
; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b
177175
; CHECK-NEXT: ret
178176
%add = select <4 x i1> %cond, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
179177
ret <4 x i32> %add

0 commit comments

Comments
 (0)