Skip to content

Commit 1e69ba4

Browse files
committed
[DAGCombiner][RISCV] Handle truncating splats in isNeutralConstant
On RV64, we legalize zexts of i1s to (vselect m, (splat_vector i64 1), (splat_vector i64 0)), where the splat_vectors are implicitly truncating regardless of the vector element type. When the vselect is used by a binop we want to pull the vselect out via foldSelectWithIdentityConstant. But because vectors with an element size < i64 will truncate, isNeutralConstant will return false. This patch handles truncating splats by getting the APInt value and truncating it. We almost don't need to do this since most of the neutral elements are either one/zero/all ones, but it will make a difference for smax and smin. I wasn't able to figure out a way to write the tests in terms of select, since we're need the i1 zext legalization to create a truncating splat_vector. This supercedes llvm#87236. Fixed vectors are unfortunately not handled by this patch (since they get legalized to _VL nodes), but they don't seem to appear in the wild.
1 parent ec4dadb commit 1e69ba4

File tree

4 files changed

+143
-184
lines changed

4 files changed

+143
-184
lines changed

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11549,30 +11549,31 @@ bool llvm::isNeutralConstant(unsigned Opcode, SDNodeFlags Flags, SDValue V,
1154911549
unsigned OperandNo) {
1155011550
// NOTE: The cases should match with IR's ConstantExpr::getBinOpIdentity().
1155111551
// TODO: Target-specific opcodes could be added.
11552-
if (auto *Const = isConstOrConstSplat(V)) {
11552+
if (auto *ConstV = isConstOrConstSplat(V, false, true)) {
11553+
APInt Const = ConstV->getAPIntValue().trunc(V.getScalarValueSizeInBits());
1155311554
switch (Opcode) {
1155411555
case ISD::ADD:
1155511556
case ISD::OR:
1155611557
case ISD::XOR:
1155711558
case ISD::UMAX:
11558-
return Const->isZero();
11559+
return Const.isZero();
1155911560
case ISD::MUL:
11560-
return Const->isOne();
11561+
return Const.isOne();
1156111562
case ISD::AND:
1156211563
case ISD::UMIN:
11563-
return Const->isAllOnes();
11564+
return Const.isAllOnes();
1156411565
case ISD::SMAX:
11565-
return Const->isMinSignedValue();
11566+
return Const.isMinSignedValue();
1156611567
case ISD::SMIN:
11567-
return Const->isMaxSignedValue();
11568+
return Const.isMaxSignedValue();
1156811569
case ISD::SUB:
1156911570
case ISD::SHL:
1157011571
case ISD::SRA:
1157111572
case ISD::SRL:
11572-
return OperandNo == 1 && Const->isZero();
11573+
return OperandNo == 1 && Const.isZero();
1157311574
case ISD::UDIV:
1157411575
case ISD::SDIV:
11575-
return OperandNo == 1 && Const->isOne();
11576+
return OperandNo == 1 && Const.isOne();
1157611577
}
1157711578
} else if (auto *ConstFP = isConstOrConstSplatFP(V)) {
1157811579
switch (Opcode) {

llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,12 @@ define i32 @ctz_nxv4i32(<vscale x 4 x i32> %a) #0 {
1818
; RV32-NEXT: vmsne.vi v0, v8, 0
1919
; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma
2020
; RV32-NEXT: vmv.v.i v8, 0
21-
; RV32-NEXT: vmerge.vim v8, v8, -1, v0
22-
; RV32-NEXT: vand.vv v8, v11, v8
21+
; RV32-NEXT: vmerge.vvm v8, v8, v11, v0
2322
; RV32-NEXT: vredmaxu.vs v8, v8, v8
2423
; RV32-NEXT: vmv.x.s a1, v8
2524
; RV32-NEXT: sub a0, a0, a1
26-
; RV32-NEXT: lui a1, 16
27-
; RV32-NEXT: addi a1, a1, -1
28-
; RV32-NEXT: and a0, a0, a1
25+
; RV32-NEXT: slli a0, a0, 16
26+
; RV32-NEXT: srli a0, a0, 16
2927
; RV32-NEXT: ret
3028
;
3129
; RV64-LABEL: ctz_nxv4i32:
@@ -41,14 +39,12 @@ define i32 @ctz_nxv4i32(<vscale x 4 x i32> %a) #0 {
4139
; RV64-NEXT: vmsne.vi v0, v8, 0
4240
; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma
4341
; RV64-NEXT: vmv.v.i v8, 0
44-
; RV64-NEXT: vmerge.vim v8, v8, -1, v0
45-
; RV64-NEXT: vand.vv v8, v11, v8
42+
; RV64-NEXT: vmerge.vvm v8, v8, v11, v0
4643
; RV64-NEXT: vredmaxu.vs v8, v8, v8
4744
; RV64-NEXT: vmv.x.s a1, v8
48-
; RV64-NEXT: sub a0, a0, a1
49-
; RV64-NEXT: lui a1, 16
50-
; RV64-NEXT: addiw a1, a1, -1
51-
; RV64-NEXT: and a0, a0, a1
45+
; RV64-NEXT: subw a0, a0, a1
46+
; RV64-NEXT: slli a0, a0, 48
47+
; RV64-NEXT: srli a0, a0, 48
5248
; RV64-NEXT: ret
5349
%res = call i32 @llvm.experimental.cttz.elts.i32.nxv4i32(<vscale x 4 x i32> %a, i1 0)
5450
ret i32 %res
@@ -158,8 +154,7 @@ define i32 @ctz_nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i1> %a) {
158154
; RV64-NEXT: li a1, -1
159155
; RV64-NEXT: vmadd.vx v16, a1, v8
160156
; RV64-NEXT: vmv.v.i v8, 0
161-
; RV64-NEXT: vmerge.vim v8, v8, -1, v0
162-
; RV64-NEXT: vand.vv v8, v16, v8
157+
; RV64-NEXT: vmerge.vvm v8, v8, v16, v0
163158
; RV64-NEXT: vredmaxu.vs v8, v8, v8
164159
; RV64-NEXT: vmv.x.s a1, v8
165160
; RV64-NEXT: subw a0, a0, a1
Lines changed: 23 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2-
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV32 %s
3-
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV64 %s
2+
; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
3+
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
44

55
; The following binop x, (zext i1) tests will be vector-legalized into a vselect
66
; of two splat_vectors, but on RV64 the splat value will be implicitly
@@ -15,80 +15,46 @@
1515
; truncating splat, so we pull the vselect back and fold it into a mask.
1616

1717
define <vscale x 2 x i32> @i1_zext_add(<vscale x 2 x i1> %a, <vscale x 2 x i32> %b) {
18-
; RV32-LABEL: i1_zext_add:
19-
; RV32: # %bb.0:
20-
; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu
21-
; RV32-NEXT: vadd.vi v8, v8, 1, v0.t
22-
; RV32-NEXT: ret
23-
;
24-
; RV64-LABEL: i1_zext_add:
25-
; RV64: # %bb.0:
26-
; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
27-
; RV64-NEXT: vmv.v.i v9, 0
28-
; RV64-NEXT: vmerge.vim v9, v9, 1, v0
29-
; RV64-NEXT: vadd.vv v8, v8, v9
30-
; RV64-NEXT: ret
18+
; CHECK-LABEL: i1_zext_add:
19+
; CHECK: # %bb.0:
20+
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
21+
; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t
22+
; CHECK-NEXT: ret
3123
%zext = zext <vscale x 2 x i1> %a to <vscale x 2 x i32>
3224
%add = add <vscale x 2 x i32> %b, %zext
3325
ret <vscale x 2 x i32> %add
3426
}
3527

3628
define <vscale x 2 x i32> @i1_zext_add_commuted(<vscale x 2 x i1> %a, <vscale x 2 x i32> %b) {
37-
; RV32-LABEL: i1_zext_add_commuted:
38-
; RV32: # %bb.0:
39-
; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu
40-
; RV32-NEXT: vadd.vi v8, v8, 1, v0.t
41-
; RV32-NEXT: ret
42-
;
43-
; RV64-LABEL: i1_zext_add_commuted:
44-
; RV64: # %bb.0:
45-
; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
46-
; RV64-NEXT: vmv.v.i v9, 0
47-
; RV64-NEXT: vmerge.vim v9, v9, 1, v0
48-
; RV64-NEXT: vadd.vv v8, v9, v8
49-
; RV64-NEXT: ret
29+
; CHECK-LABEL: i1_zext_add_commuted:
30+
; CHECK: # %bb.0:
31+
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
32+
; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t
33+
; CHECK-NEXT: ret
5034
%zext = zext <vscale x 2 x i1> %a to <vscale x 2 x i32>
5135
%add = add <vscale x 2 x i32> %zext, %b
5236
ret <vscale x 2 x i32> %add
5337
}
5438

5539
define <vscale x 2 x i32> @i1_zext_sub(<vscale x 2 x i1> %a, <vscale x 2 x i32> %b) {
56-
; RV32-LABEL: i1_zext_sub:
57-
; RV32: # %bb.0:
58-
; RV32-NEXT: li a0, 1
59-
; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, mu
60-
; RV32-NEXT: vsub.vx v8, v8, a0, v0.t
61-
; RV32-NEXT: ret
62-
;
63-
; RV64-LABEL: i1_zext_sub:
64-
; RV64: # %bb.0:
65-
; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
66-
; RV64-NEXT: vmv.v.i v9, 0
67-
; RV64-NEXT: vmerge.vim v9, v9, 1, v0
68-
; RV64-NEXT: vsub.vv v8, v8, v9
69-
; RV64-NEXT: ret
40+
; CHECK-LABEL: i1_zext_sub:
41+
; CHECK: # %bb.0:
42+
; CHECK-NEXT: li a0, 1
43+
; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu
44+
; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
45+
; CHECK-NEXT: ret
7046
%zext = zext <vscale x 2 x i1> %a to <vscale x 2 x i32>
7147
%sub = sub <vscale x 2 x i32> %b, %zext
7248
ret <vscale x 2 x i32> %sub
7349
}
7450

7551
define <vscale x 2 x i32> @i1_zext_or(<vscale x 2 x i1> %a, <vscale x 2 x i32> %b) {
76-
; RV32-LABEL: i1_zext_or:
77-
; RV32: # %bb.0:
78-
; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, mu
79-
; RV32-NEXT: vor.vi v8, v8, 1, v0.t
80-
; RV32-NEXT: ret
81-
;
82-
; RV64-LABEL: i1_zext_or:
83-
; RV64: # %bb.0:
84-
; RV64-NEXT: vsetvli a0, zero, e32, m1, ta, ma
85-
; RV64-NEXT: vmv.v.i v9, 0
86-
; RV64-NEXT: vmerge.vim v9, v9, 1, v0
87-
; RV64-NEXT: vor.vv v8, v8, v9
88-
; RV64-NEXT: ret
52+
; CHECK-LABEL: i1_zext_or:
53+
; CHECK: # %bb.0:
54+
; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu
55+
; CHECK-NEXT: vor.vi v8, v8, 1, v0.t
56+
; CHECK-NEXT: ret
8957
%zext = zext <vscale x 2 x i1> %a to <vscale x 2 x i32>
9058
%or = or <vscale x 2 x i32> %b, %zext
9159
ret <vscale x 2 x i32> %or
9260
}
93-
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
94-
; CHECK: {{.*}}

0 commit comments

Comments
 (0)