Skip to content

Commit daa8033

Browse files
committed
[CodeGen] Support folds of not(cmp(cc, ...)) -> cmp(!cc, ...) for scalable vectors
I have updated TargetLowering::isConstTrueVal to also consider SPLAT_VECTOR nodes with constant integer operands. This allows the optimisation to also work for targets that support scalable vectors. Differential Revision: https://reviews.llvm.org/D117210
1 parent a24cc48 commit daa8033

File tree

6 files changed

+130
-26
lines changed

6 files changed

+130
-26
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3682,11 +3682,11 @@ class TargetLowering : public TargetLoweringBase {
36823682

36833683
/// Return if the N is a constant or constant vector equal to the true value
36843684
/// from getBooleanContents().
3685-
bool isConstTrueVal(const SDNode *N) const;
3685+
bool isConstTrueVal(SDValue N) const;
36863686

36873687
/// Return if the N is a constant or constant vector equal to the false value
36883688
/// from getBooleanContents().
3689-
bool isConstFalseVal(const SDNode *N) const;
3689+
bool isConstFalseVal(SDValue N) const;
36903690

36913691
/// Return if \p N is a True value when extended to \p VT.
36923692
bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const;

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -906,9 +906,8 @@ bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
906906
return true;
907907
}
908908

909-
if (N.getOpcode() != ISD::SELECT_CC ||
910-
!TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
911-
!TLI.isConstFalseVal(N.getOperand(3).getNode()))
909+
if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2)) ||
910+
!TLI.isConstFalseVal(N.getOperand(3)))
912911
return false;
913912

914913
if (TLI.getBooleanContents(N.getValueType()) ==
@@ -8035,8 +8034,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
80358034
// fold !(x cc y) -> (x !cc y)
80368035
unsigned N0Opcode = N0.getOpcode();
80378036
SDValue LHS, RHS, CC;
8038-
if (TLI.isConstTrueVal(N1.getNode()) &&
8039-
isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/true)) {
8037+
if (TLI.isConstTrueVal(N1) &&
8038+
isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/ true)) {
80408039
ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
80418040
LHS.getValueType());
80428041
if (!LegalOperations ||

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3194,29 +3194,25 @@ bool TargetLowering::isSplatValueForTargetNode(SDValue Op,
31943194
// FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
31953195
// work with truncating build vectors and vectors with elements of less than
31963196
// 8 bits.
3197-
bool TargetLowering::isConstTrueVal(const SDNode *N) const {
3197+
bool TargetLowering::isConstTrueVal(SDValue N) const {
31983198
if (!N)
31993199
return false;
32003200

3201+
unsigned EltWidth;
32013202
APInt CVal;
3202-
if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
3203+
if (ConstantSDNode *CN = isConstOrConstSplat(N, /*AllowUndefs=*/false,
3204+
/*AllowTruncation=*/true)) {
32033205
CVal = CN->getAPIntValue();
3204-
} else if (auto *BV = dyn_cast<BuildVectorSDNode>(N)) {
3205-
auto *CN = BV->getConstantSplatNode();
3206-
if (!CN)
3207-
return false;
3208-
3209-
// If this is a truncating build vector, truncate the splat value.
3210-
// Otherwise, we may fail to match the expected values below.
3211-
unsigned BVEltWidth = BV->getValueType(0).getScalarSizeInBits();
3212-
CVal = CN->getAPIntValue();
3213-
if (BVEltWidth < CVal.getBitWidth())
3214-
CVal = CVal.trunc(BVEltWidth);
3215-
} else {
3206+
EltWidth = N.getValueType().getScalarSizeInBits();
3207+
} else
32163208
return false;
3217-
}
32183209

3219-
switch (getBooleanContents(N->getValueType(0))) {
3210+
// If this is a truncating splat, truncate the splat value.
3211+
// Otherwise, we may fail to match the expected values below.
3212+
if (EltWidth < CVal.getBitWidth())
3213+
CVal = CVal.trunc(EltWidth);
3214+
3215+
switch (getBooleanContents(N.getValueType())) {
32203216
case UndefinedBooleanContent:
32213217
return CVal[0];
32223218
case ZeroOrOneBooleanContent:
@@ -3228,7 +3224,7 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const {
32283224
llvm_unreachable("Invalid boolean contents");
32293225
}
32303226

3231-
bool TargetLowering::isConstFalseVal(const SDNode *N) const {
3227+
bool TargetLowering::isConstFalseVal(SDValue N) const {
32323228
if (!N)
32333229
return false;
32343230

@@ -3763,7 +3759,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
37633759
if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
37643760
TopSetCC.getOpcode() == ISD::SETCC &&
37653761
(N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
3766-
(isConstFalseVal(N1C) ||
3762+
(isConstFalseVal(N1) ||
37673763
isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
37683764

37693765
bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14527,7 +14527,7 @@ static SDValue PerformXORCombine(SDNode *N,
1452714527
SDValue N0 = N->getOperand(0);
1452814528
SDValue N1 = N->getOperand(1);
1452914529
const TargetLowering *TLI = Subtarget->getTargetLowering();
14530-
if (TLI->isConstTrueVal(N1.getNode()) &&
14530+
if (TLI->isConstTrueVal(N1) &&
1453114531
(N0->getOpcode() == ARMISD::VCMP || N0->getOpcode() == ARMISD::VCMPZ)) {
1453214532
if (CanInvertMVEVCMP(N0)) {
1453314533
SDLoc DL(N0);
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=aarch64-linux-unknown -mattr=+sve -o - < %s | FileCheck %s
3+
4+
define <vscale x 8 x i1> @not_icmp_sle_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
5+
; CHECK-LABEL: not_icmp_sle_nxv8i16:
6+
; CHECK: // %bb.0:
7+
; CHECK-NEXT: ptrue p0.h
8+
; CHECK-NEXT: cmpgt p0.h, p0/z, z0.h, z1.h
9+
; CHECK-NEXT: ret
10+
%icmp = icmp sle <vscale x 8 x i16> %a, %b
11+
%tmp = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
12+
%ones = shufflevector <vscale x 8 x i1> %tmp, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
13+
%not = xor <vscale x 8 x i1> %ones, %icmp
14+
ret <vscale x 8 x i1> %not
15+
}
16+
17+
define <vscale x 4 x i1> @not_icmp_sgt_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
18+
; CHECK-LABEL: not_icmp_sgt_nxv4i32:
19+
; CHECK: // %bb.0:
20+
; CHECK-NEXT: ptrue p0.s
21+
; CHECK-NEXT: cmpge p0.s, p0/z, z1.s, z0.s
22+
; CHECK-NEXT: ret
23+
%icmp = icmp sgt <vscale x 4 x i32> %a, %b
24+
%tmp = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
25+
%ones = shufflevector <vscale x 4 x i1> %tmp, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
26+
%not = xor <vscale x 4 x i1> %icmp, %ones
27+
ret <vscale x 4 x i1> %not
28+
}
29+
30+
define <vscale x 2 x i1> @not_fcmp_une_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
31+
; CHECK-LABEL: not_fcmp_une_nxv2f64:
32+
; CHECK: // %bb.0:
33+
; CHECK-NEXT: ptrue p0.d
34+
; CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, z1.d
35+
; CHECK-NEXT: ret
36+
%icmp = fcmp une <vscale x 2 x double> %a, %b
37+
%tmp = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
38+
%ones = shufflevector <vscale x 2 x i1> %tmp, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
39+
%not = xor <vscale x 2 x i1> %icmp, %ones
40+
ret <vscale x 2 x i1> %not
41+
}
42+
43+
define <vscale x 4 x i1> @not_fcmp_uge_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
44+
; CHECK-LABEL: not_fcmp_uge_nxv4f32:
45+
; CHECK: // %bb.0:
46+
; CHECK-NEXT: ptrue p0.s
47+
; CHECK-NEXT: fcmgt p0.s, p0/z, z1.s, z0.s
48+
; CHECK-NEXT: ret
49+
%icmp = fcmp uge <vscale x 4 x float> %a, %b
50+
%tmp = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
51+
%ones = shufflevector <vscale x 4 x i1> %tmp, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
52+
%not = xor <vscale x 4 x i1> %icmp, %ones
53+
ret <vscale x 4 x i1> %not
54+
}
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v -verify-machineinstrs < %s | FileCheck %s
3+
; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v -verify-machineinstrs < %s | FileCheck %s
4+
5+
define <vscale x 8 x i1> @not_icmp_sle_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
6+
; CHECK-LABEL: not_icmp_sle_nxv8i16:
7+
; CHECK: # %bb.0:
8+
; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu
9+
; CHECK-NEXT: vmslt.vv v0, v10, v8
10+
; CHECK-NEXT: ret
11+
%icmp = icmp sle <vscale x 8 x i16> %a, %b
12+
%tmp = insertelement <vscale x 8 x i1> undef, i1 true, i32 0
13+
%ones = shufflevector <vscale x 8 x i1> %tmp, <vscale x 8 x i1> undef, <vscale x 8 x i32> zeroinitializer
14+
%not = xor <vscale x 8 x i1> %ones, %icmp
15+
ret <vscale x 8 x i1> %not
16+
}
17+
18+
define <vscale x 4 x i1> @not_icmp_sgt_nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
19+
; CHECK-LABEL: not_icmp_sgt_nxv4i32:
20+
; CHECK: # %bb.0:
21+
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu
22+
; CHECK-NEXT: vmsle.vv v0, v8, v10
23+
; CHECK-NEXT: ret
24+
%icmp = icmp sgt <vscale x 4 x i32> %a, %b
25+
%tmp = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
26+
%ones = shufflevector <vscale x 4 x i1> %tmp, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
27+
%not = xor <vscale x 4 x i1> %icmp, %ones
28+
ret <vscale x 4 x i1> %not
29+
}
30+
31+
define <vscale x 2 x i1> @not_fcmp_une_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
32+
; CHECK-LABEL: not_fcmp_une_nxv2f64:
33+
; CHECK: # %bb.0:
34+
; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu
35+
; CHECK-NEXT: vmfeq.vv v0, v8, v10
36+
; CHECK-NEXT: ret
37+
%icmp = fcmp une <vscale x 2 x double> %a, %b
38+
%tmp = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
39+
%ones = shufflevector <vscale x 2 x i1> %tmp, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
40+
%not = xor <vscale x 2 x i1> %icmp, %ones
41+
ret <vscale x 2 x i1> %not
42+
}
43+
44+
define <vscale x 4 x i1> @not_fcmp_uge_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
45+
; CHECK-LABEL: not_fcmp_uge_nxv4f32:
46+
; CHECK: # %bb.0:
47+
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, mu
48+
; CHECK-NEXT: vmflt.vv v0, v8, v10
49+
; CHECK-NEXT: ret
50+
%icmp = fcmp uge <vscale x 4 x float> %a, %b
51+
%tmp = insertelement <vscale x 4 x i1> undef, i1 true, i32 0
52+
%ones = shufflevector <vscale x 4 x i1> %tmp, <vscale x 4 x i1> undef, <vscale x 4 x i32> zeroinitializer
53+
%not = xor <vscale x 4 x i1> %icmp, %ones
54+
ret <vscale x 4 x i1> %not
55+
}

0 commit comments

Comments
 (0)