Skip to content

Commit 33fc322

Browse files
authored
[SelectionDAG] Simplify vselect true, T, F -> T (llvm#100992)
This addresses a TODO where we can fold a vselect to it's true operand if the boolean is known to be all trues, by factoring out the logic from extractBooleanFlip which checks TLI.getBooleanContents.
1 parent 7a0d5bd commit 33fc322

File tree

7 files changed

+79
-52
lines changed

7 files changed

+79
-52
lines changed

llvm/include/llvm/CodeGen/SelectionDAG.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2320,6 +2320,11 @@ class SelectionDAG {
23202320
isConstantFPBuildVectorOrConstantFP(N);
23212321
}
23222322

2323+
/// Check if a value \op N is a constant using the target's BooleanContent for
2324+
/// its type.
2325+
std::optional<bool> isBoolConstant(SDValue N,
2326+
bool AllowTruncation = false) const;
2327+
23232328
/// Set CallSiteInfo to be associated with Node.
23242329
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo) {
23252330
SDEI[Node].CSInfo = std::move(CallInfo);

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3260,28 +3260,9 @@ static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
32603260
if (V.getOpcode() != ISD::XOR)
32613261
return SDValue();
32623262

3263-
ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
3264-
if (!Const)
3265-
return SDValue();
3266-
3267-
EVT VT = V.getValueType();
3268-
3269-
bool IsFlip = false;
3270-
switch(TLI.getBooleanContents(VT)) {
3271-
case TargetLowering::ZeroOrOneBooleanContent:
3272-
IsFlip = Const->isOne();
3273-
break;
3274-
case TargetLowering::ZeroOrNegativeOneBooleanContent:
3275-
IsFlip = Const->isAllOnes();
3276-
break;
3277-
case TargetLowering::UndefinedBooleanContent:
3278-
IsFlip = (Const->getAPIntValue() & 0x01) == 1;
3279-
break;
3280-
}
3281-
3282-
if (IsFlip)
3263+
if (DAG.isBoolConstant(V.getOperand(1)) == true)
32833264
return V.getOperand(0);
3284-
if (Force)
3265+
if (Force && isConstOrConstSplat(V.getOperand(1), false))
32853266
return DAG.getLogicalNOT(SDLoc(V), V, V.getValueType());
32863267
return SDValue();
32873268
}

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9924,15 +9924,8 @@ SDValue SelectionDAG::simplifySelect(SDValue Cond, SDValue T, SDValue F) {
99249924

99259925
// select true, T, F --> T
99269926
// select false, T, F --> F
9927-
if (auto *CondC = dyn_cast<ConstantSDNode>(Cond))
9928-
return CondC->isZero() ? F : T;
9929-
9930-
// TODO: This should simplify VSELECT with non-zero constant condition using
9931-
// something like this (but check boolean contents to be complete?):
9932-
if (ConstantSDNode *CondC = isConstOrConstSplat(Cond, /*AllowUndefs*/ false,
9933-
/*AllowTruncation*/ true))
9934-
if (CondC->isZero())
9935-
return F;
9927+
if (auto C = isBoolConstant(Cond, /*AllowTruncation=*/true))
9928+
return *C ? T : F;
99369929

99379930
// select ?, T, T --> T
99389931
if (T == F)
@@ -13141,6 +13134,31 @@ SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) const {
1314113134
return nullptr;
1314213135
}
1314313136

13137+
std::optional<bool> SelectionDAG::isBoolConstant(SDValue N,
13138+
bool AllowTruncation) const {
13139+
ConstantSDNode *Const = isConstOrConstSplat(N, false, AllowTruncation);
13140+
if (!Const)
13141+
return std::nullopt;
13142+
13143+
const APInt &CVal = Const->getAPIntValue();
13144+
switch (TLI->getBooleanContents(N.getValueType())) {
13145+
case TargetLowering::ZeroOrOneBooleanContent:
13146+
if (CVal.isOne())
13147+
return true;
13148+
if (CVal.isZero())
13149+
return false;
13150+
return std::nullopt;
13151+
case TargetLowering::ZeroOrNegativeOneBooleanContent:
13152+
if (CVal.isAllOnes())
13153+
return true;
13154+
if (CVal.isZero())
13155+
return false;
13156+
return std::nullopt;
13157+
case TargetLowering::UndefinedBooleanContent:
13158+
return CVal[0];
13159+
}
13160+
}
13161+
1314413162
void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {
1314513163
assert(!Node->OperandList && "Node already has operands");
1314613164
assert(SDNode::getMaxNumOperands() >= Vals.size() &&

llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ define <4 x i32> @test_srem_int_min(<4 x i32> %X) nounwind {
206206
; CHECK-NEXT: movi v1.4s, #128, lsl #24
207207
; CHECK-NEXT: usra v3.4s, v2.4s, #1
208208
; CHECK-NEXT: and v1.16b, v3.16b, v1.16b
209-
; CHECK-NEXT: add v0.4s, v1.4s, v0.4s
209+
; CHECK-NEXT: add v0.4s, v0.4s, v1.4s
210210
; CHECK-NEXT: movi v1.4s, #1
211211
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
212212
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s
3+
4+
define <vscale x 1 x i64> @all_ones(<vscale x 1 x i64> %true, <vscale x 1 x i64> %false, i32 %evl) {
5+
; CHECK-LABEL: all_ones:
6+
; CHECK: # %bb.0:
7+
; CHECK-NEXT: ret
8+
%v = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> %true, <vscale x 1 x i64> %false, i32 %evl)
9+
ret <vscale x 1 x i64> %v
10+
}
11+
12+
define <vscale x 1 x i64> @all_zeroes(<vscale x 1 x i64> %true, <vscale x 1 x i64> %false, i32 %evl) {
13+
; CHECK-LABEL: all_zeroes:
14+
; CHECK: # %bb.0:
15+
; CHECK-NEXT: vmv1r.v v8, v9
16+
; CHECK-NEXT: ret
17+
%v = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 false), <vscale x 1 x i64> %true, <vscale x 1 x i64> %false, i32 %evl)
18+
ret <vscale x 1 x i64> %v
19+
}

llvm/test/CodeGen/X86/combine-srem.ll

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ define <4 x i32> @combine_vec_srem_by_minsigned(<4 x i32> %x) {
8383
; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
8484
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
8585
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
86-
; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0
86+
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
8787
; AVX1-NEXT: retq
8888
;
8989
; AVX2-LABEL: combine_vec_srem_by_minsigned:
@@ -93,7 +93,7 @@ define <4 x i32> @combine_vec_srem_by_minsigned(<4 x i32> %x) {
9393
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1
9494
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
9595
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
96-
; AVX2-NEXT: vpaddd %xmm0, %xmm1, %xmm0
96+
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
9797
; AVX2-NEXT: retq
9898
%1 = srem <4 x i32> %x, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
9999
ret <4 x i32> %1
@@ -225,24 +225,28 @@ define <4 x i32> @combine_vec_srem_by_pow2a_neg(<4 x i32> %x) {
225225
; SSE-NEXT: psrad $31, %xmm1
226226
; SSE-NEXT: psrld $30, %xmm1
227227
; SSE-NEXT: paddd %xmm0, %xmm1
228-
; SSE-NEXT: psrld $2, %xmm1
229-
; SSE-NEXT: pxor %xmm2, %xmm2
230-
; SSE-NEXT: psubd %xmm1, %xmm2
231-
; SSE-NEXT: pslld $2, %xmm2
232-
; SSE-NEXT: paddd %xmm2, %xmm0
228+
; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
229+
; SSE-NEXT: psubd %xmm1, %xmm0
233230
; SSE-NEXT: retq
234231
;
235-
; AVX-LABEL: combine_vec_srem_by_pow2a_neg:
236-
; AVX: # %bb.0:
237-
; AVX-NEXT: vpsrad $31, %xmm0, %xmm1
238-
; AVX-NEXT: vpsrld $30, %xmm1, %xmm1
239-
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm1
240-
; AVX-NEXT: vpsrld $2, %xmm1, %xmm1
241-
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
242-
; AVX-NEXT: vpsubd %xmm1, %xmm2, %xmm1
243-
; AVX-NEXT: vpslld $2, %xmm1, %xmm1
244-
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
245-
; AVX-NEXT: retq
232+
; AVX1-LABEL: combine_vec_srem_by_pow2a_neg:
233+
; AVX1: # %bb.0:
234+
; AVX1-NEXT: vpsrad $31, %xmm0, %xmm1
235+
; AVX1-NEXT: vpsrld $30, %xmm1, %xmm1
236+
; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
237+
; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
238+
; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
239+
; AVX1-NEXT: retq
240+
;
241+
; AVX2-LABEL: combine_vec_srem_by_pow2a_neg:
242+
; AVX2: # %bb.0:
243+
; AVX2-NEXT: vpsrad $31, %xmm0, %xmm1
244+
; AVX2-NEXT: vpsrld $30, %xmm1, %xmm1
245+
; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1
246+
; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [4294967292,4294967292,4294967292,4294967292]
247+
; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
248+
; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
249+
; AVX2-NEXT: retq
246250
%1 = srem <4 x i32> %x, <i32 -4, i32 -4, i32 -4, i32 -4>
247251
ret <4 x i32> %1
248252
}

llvm/test/CodeGen/X86/srem-seteq-vec-splat.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -624,7 +624,7 @@ define <4 x i32> @test_srem_int_min(<4 x i32> %X) nounwind {
624624
; CHECK-AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
625625
; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
626626
; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
627-
; CHECK-AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0
627+
; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0
628628
; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
629629
; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
630630
; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
@@ -637,7 +637,7 @@ define <4 x i32> @test_srem_int_min(<4 x i32> %X) nounwind {
637637
; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1
638638
; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
639639
; CHECK-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
640-
; CHECK-AVX2-NEXT: vpaddd %xmm0, %xmm1, %xmm0
640+
; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
641641
; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
642642
; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
643643
; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
@@ -649,7 +649,7 @@ define <4 x i32> @test_srem_int_min(<4 x i32> %X) nounwind {
649649
; CHECK-AVX512VL-NEXT: vpsrld $1, %xmm1, %xmm1
650650
; CHECK-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm1
651651
; CHECK-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
652-
; CHECK-AVX512VL-NEXT: vpaddd %xmm0, %xmm1, %xmm0
652+
; CHECK-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm0
653653
; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
654654
; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
655655
; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0

0 commit comments

Comments
 (0)