Skip to content

[WIP][DAG] combineVSelectWithAllOnesOrZeros - fold "select Cond, 0, x -> and not(Cond), x" #146831

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13158,6 +13158,15 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal,
return DAG.getBitcast(VT, And);
}

// select Cond, 0, x -> and not(Cond), x
if (IsTAllZero &&
(isBitwiseNot(peekThroughBitcasts(Cond)) || TLI.hasAndNot(Cond))) {
SDValue X = DAG.getBitcast(CondVT, FVal);
SDValue And =
DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT), X);
return DAG.getBitcast(VT, And);
}

return SDValue();
}

Expand Down
54 changes: 0 additions & 54 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47262,57 +47262,6 @@ static SDValue combineToExtendBoolVectorInReg(
DAG.getConstant(EltSizeInBits - 1, DL, VT));
}

/// If a vector select has an left operand that is 0, try to simplify the
/// select to a bitwise logic operation.
/// TODO: Move to DAGCombiner.combineVSelectWithAllOnesOrZeros, possibly using
/// TargetLowering::hasAndNot()?
static SDValue combineVSelectWithLastZeros(SDNode *N, SelectionDAG &DAG,
const SDLoc &DL,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
SDValue Cond = N->getOperand(0);
SDValue LHS = N->getOperand(1);
SDValue RHS = N->getOperand(2);
EVT VT = LHS.getValueType();
EVT CondVT = Cond.getValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();

if (N->getOpcode() != ISD::VSELECT)
return SDValue();

assert(CondVT.isVector() && "Vector select expects a vector selector!");

// To use the condition operand as a bitwise mask, it must have elements that
// are the same size as the select elements. Ie, the condition operand must
// have already been promoted from the IR select condition type <N x i1>.
// Don't check if the types themselves are equal because that excludes
// vector floating-point selects.
if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
return SDValue();

// Cond value must be 'sign splat' to be converted to a logical op.
if (DAG.ComputeNumSignBits(Cond) != CondVT.getScalarSizeInBits())
return SDValue();

if (!TLI.isTypeLegal(CondVT))
return SDValue();

// vselect Cond, 000..., X -> andn Cond, X
if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
SDValue CastRHS = DAG.getBitcast(CondVT, RHS);
SDValue AndN;
// The canonical form differs for i1 vectors - x86andnp is not used
if (CondVT.getScalarType() == MVT::i1)
AndN = DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT),
CastRHS);
else
AndN = DAG.getNode(X86ISD::ANDNP, DL, CondVT, Cond, CastRHS);
return DAG.getBitcast(VT, AndN);
}

return SDValue();
}

/// If both arms of a vector select are concatenated vectors, split the select,
/// and concatenate the result to eliminate a wide (256-bit) vector instruction:
/// vselect Cond, (concat T0, T1), (concat F0, F1) -->
Expand Down Expand Up @@ -48059,9 +48008,6 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
if (!TLI.isTypeLegal(VT) || isSoftF16(VT, Subtarget))
return SDValue();

if (SDValue V = combineVSelectWithLastZeros(N, DAG, DL, DCI, Subtarget))
return V;

if (SDValue V = combineVSelectToBLENDV(N, DAG, DL, DCI, Subtarget))
return V;

Expand Down
29 changes: 10 additions & 19 deletions llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ define void @crash_when_lowering_extract_shuffle(ptr %dst, i1 %cond) vscale_rang
; CHECK-NEXT: // %bb.1: // %vector.body
; CHECK-NEXT: movi v0.2d, #0000000000000000
; CHECK-NEXT: movi v1.2d, #0000000000000000
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ldr z4, [x0]
; CHECK-NEXT: ldr z5, [x0, #2, mul vl]
; CHECK-NEXT: ldr z6, [x0, #3, mul vl]
; CHECK-NEXT: umov w8, v0.b[8]
; CHECK-NEXT: mov v1.b[1], v0.b[1]
; CHECK-NEXT: fmov s2, w8
Expand Down Expand Up @@ -60,31 +62,20 @@ define void @crash_when_lowering_extract_shuffle(ptr %dst, i1 %cond) vscale_rang
; CHECK-NEXT: asr z1.s, z1.s, #31
; CHECK-NEXT: uunpklo z3.s, z3.h
; CHECK-NEXT: lsl z0.s, z0.s, #31
; CHECK-NEXT: and z1.s, z1.s, #0x1
; CHECK-NEXT: bic z1.d, z4.d, z1.d
; CHECK-NEXT: lsl z2.s, z2.s, #31
; CHECK-NEXT: ldr z4, [x0, #1, mul vl]
; CHECK-NEXT: asr z0.s, z0.s, #31
; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0
; CHECK-NEXT: ldr z1, [x0]
; CHECK-NEXT: str z1, [x0]
; CHECK-NEXT: lsl z3.s, z3.s, #31
; CHECK-NEXT: asr z2.s, z2.s, #31
; CHECK-NEXT: and z0.s, z0.s, #0x1
; CHECK-NEXT: bic z0.d, z5.d, z0.d
; CHECK-NEXT: asr z3.s, z3.s, #31
; CHECK-NEXT: and z2.s, z2.s, #0x1
; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0
; CHECK-NEXT: cmpne p2.s, p0/z, z0.s, #0
; CHECK-NEXT: ldr z0, [x0, #2, mul vl]
; CHECK-NEXT: and z3.s, z3.s, #0x1
; CHECK-NEXT: str z1, [x0]
; CHECK-NEXT: cmpne p3.s, p0/z, z3.s, #0
; CHECK-NEXT: cmpne p0.s, p0/z, z2.s, #0
; CHECK-NEXT: ldr z3, [x0, #3, mul vl]
; CHECK-NEXT: ldr z2, [x0, #1, mul vl]
; CHECK-NEXT: mov z0.s, p2/m, #0 // =0x0
; CHECK-NEXT: mov z3.s, p3/m, #0 // =0x0
; CHECK-NEXT: mov z2.s, p0/m, #0 // =0x0
; CHECK-NEXT: bic z1.d, z4.d, z2.d
; CHECK-NEXT: str z0, [x0, #2, mul vl]
; CHECK-NEXT: bic z3.d, z6.d, z3.d
; CHECK-NEXT: str z1, [x0, #1, mul vl]
; CHECK-NEXT: str z3, [x0, #3, mul vl]
; CHECK-NEXT: str z2, [x0, #1, mul vl]
; CHECK-NEXT: .LBB1_2: // %exit
; CHECK-NEXT: ret
%broadcast.splat = shufflevector <32 x i1> zeroinitializer, <32 x i1> zeroinitializer, <32 x i32> zeroinitializer
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/AArch64/vselect-constants.ll
Original file line number Diff line number Diff line change
Expand Up @@ -169,11 +169,9 @@ define <4 x i32> @cmp_sel_1_or_0_vec(<4 x i32> %x, <4 x i32> %y) {
define <4 x i32> @sel_0_or_1_vec(<4 x i1> %cond) {
; CHECK-LABEL: sel_0_or_1_vec:
; CHECK: // %bb.0:
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: shl v0.4s, v0.4s, #31
; CHECK-NEXT: cmge v0.4s, v0.4s, #0
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b
; CHECK-NEXT: ret
%add = select <4 x i1> %cond, <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
ret <4 x i32> %add
Expand Down
Loading