Skip to content

Commit 369c617

Browse files
authored
[RISCV] Fix the cost of llvm.vector.reduce.and (#119160)
I added some CodeGen test cases related to reduce. To maintain consistency, I also added cases for instructions like `vector.reduce.or`. For cases where `v1i1` type generates `VFIRST`, please refer to: https://reviews.llvm.org/D139512.
1 parent 41e4018 commit 369c617

File tree

5 files changed

+279
-24
lines changed

5 files changed

+279
-24
lines changed

llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1536,15 +1536,31 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
15361536
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
15371537
Type *ElementTy = Ty->getElementType();
15381538
if (ElementTy->isIntegerTy(1)) {
1539+
// Example sequences:
1540+
// vfirst.m a0, v0
1541+
// seqz a0, a0
1542+
if (LT.second == MVT::v1i1)
1543+
return getRISCVInstructionCost(RISCV::VFIRST_M, LT.second, CostKind) +
1544+
getCmpSelInstrCost(Instruction::ICmp, ElementTy, ElementTy,
1545+
CmpInst::ICMP_EQ, CostKind);
1546+
15391547
if (ISD == ISD::AND) {
15401548
// Example sequences:
1541-
// vsetvli a0, zero, e8, mf8, ta, ma
15421549
// vmand.mm v8, v9, v8 ; needed every time type is split
1543-
// vmnot.m v8, v0
1550+
// vmnot.m v8, v0 ; alias for vmnand
15441551
// vcpop.m a0, v8
15451552
// seqz a0, a0
1546-
return LT.first * getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second,
1547-
CostKind) +
1553+
1554+
// See the discussion: https://github.com/llvm/llvm-project/pull/119160
1555+
// For LMUL <= 8, there is no splitting,
1556+
// the sequences are vmnot, vcpop and seqz.
1557+
// When LMUL > 8 and split = 1,
1558+
// the sequences are vmnand, vcpop and seqz.
1559+
// When LMUL > 8 and split > 1,
1560+
// the sequences are (LT.first-2) * vmand, vmnand, vcpop and seqz.
1561+
return ((LT.first > 2) ? (LT.first - 2) : 0) *
1562+
getRISCVInstructionCost(RISCV::VMAND_MM, LT.second, CostKind) +
1563+
getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second, CostKind) +
15481564
getRISCVInstructionCost(RISCV::VCPOP_M, LT.second, CostKind) +
15491565
getCmpSelInstrCost(Instruction::ICmp, ElementTy, ElementTy,
15501566
CmpInst::ICMP_EQ, CostKind);
Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -mtriple=riscv32 -mattr=+v,+zvl128b -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output \
3+
; RUN: | FileCheck %s --check-prefixes=THROUGHPUT,THROUGHPUT-VL128B
4+
; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zvl128b -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output \
5+
; RUN: | FileCheck %s --check-prefixes=THROUGHPUT,THROUGHPUT-VL128B
6+
; RUN: opt < %s -mtriple=riscv32 -mattr=+v,+zvl256b -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output \
7+
; RUN: | FileCheck %s --check-prefixes=THROUGHPUT,THROUGHPUT-VL256B
8+
; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zvl256b -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output \
9+
; RUN: | FileCheck %s --check-prefixes=THROUGHPUT,THROUGHPUT-VL256B
10+
; RUN: opt < %s -mtriple=riscv32 -mattr=+v,+zvl512b -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output \
11+
; RUN: | FileCheck %s --check-prefixes=THROUGHPUT,THROUGHPUT-VL512B
12+
; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zvl512b -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output \
13+
; RUN: | FileCheck %s --check-prefixes=THROUGHPUT,THROUGHPUT-VL512B
14+
; RUN: opt < %s -mtriple=riscv32 -mattr=+v,+zvl1024b -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output \
15+
; RUN: | FileCheck %s --check-prefixes=THROUGHPUT,THROUGHPUT-VL1024B
16+
; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zvl1024b -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output \
17+
; RUN: | FileCheck %s --check-prefixes=THROUGHPUT,THROUGHPUT-VL1024B
18+
19+
define zeroext i1 @vreduce_and_v1i1(<1 x i1> %v) {
20+
; THROUGHPUT-LABEL: 'vreduce_and_v1i1'
21+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %v)
22+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
23+
;
24+
%red = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %v)
25+
ret i1 %red
26+
}
27+
28+
define zeroext i1 @vreduce_and_v2i1(<2 x i1> %v) {
29+
; THROUGHPUT-LABEL: 'vreduce_and_v2i1'
30+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %v)
31+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
32+
;
33+
%red = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %v)
34+
ret i1 %red
35+
}
36+
37+
define zeroext i1 @vreduce_and_v4i1(<4 x i1> %v) {
38+
; THROUGHPUT-LABEL: 'vreduce_and_v4i1'
39+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %v)
40+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
41+
;
42+
%red = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %v)
43+
ret i1 %red
44+
}
45+
46+
define zeroext i1 @vreduce_and_v8i1(<8 x i1> %v) {
47+
; THROUGHPUT-LABEL: 'vreduce_and_v8i1'
48+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %v)
49+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
50+
;
51+
%red = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %v)
52+
ret i1 %red
53+
}
54+
55+
define zeroext i1 @vreduce_and_v16i1(<16 x i1> %v) {
56+
; THROUGHPUT-LABEL: 'vreduce_and_v16i1'
57+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %v)
58+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
59+
;
60+
%red = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %v)
61+
ret i1 %red
62+
}
63+
64+
define zeroext i1 @vreduce_and_v32i1(<32 x i1> %v) {
65+
; THROUGHPUT-LABEL: 'vreduce_and_v32i1'
66+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %v)
67+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
68+
;
69+
%red = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %v)
70+
ret i1 %red
71+
}
72+
73+
define zeroext i1 @vreduce_and_v64i1(<64 x i1> %v) {
74+
; THROUGHPUT-LABEL: 'vreduce_and_v64i1'
75+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> %v)
76+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
77+
;
78+
%red = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> %v)
79+
ret i1 %red
80+
}
81+
82+
define zeroext i1 @vreduce_and_v128i1(<128 x i1> %v) {
83+
; THROUGHPUT-LABEL: 'vreduce_and_v128i1'
84+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> %v)
85+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
86+
;
87+
%red = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> %v)
88+
ret i1 %red
89+
}
90+
91+
define zeroext i1 @vreduce_and_v256i1(<256 x i1> %v) {
92+
; THROUGHPUT-LABEL: 'vreduce_and_v256i1'
93+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> %v)
94+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
95+
;
96+
%red = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> %v)
97+
ret i1 %red
98+
}
99+
100+
define zeroext i1 @vreduce_and_v512i1(<512 x i1> %v) {
101+
; THROUGHPUT-VL128B-LABEL: 'vreduce_and_v512i1'
102+
; THROUGHPUT-VL128B-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> %v)
103+
; THROUGHPUT-VL128B-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
104+
;
105+
; THROUGHPUT-VL256B-LABEL: 'vreduce_and_v512i1'
106+
; THROUGHPUT-VL256B-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> %v)
107+
; THROUGHPUT-VL256B-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
108+
;
109+
; THROUGHPUT-VL512B-LABEL: 'vreduce_and_v512i1'
110+
; THROUGHPUT-VL512B-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> %v)
111+
; THROUGHPUT-VL512B-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
112+
;
113+
; THROUGHPUT-VL1024B-LABEL: 'vreduce_and_v512i1'
114+
; THROUGHPUT-VL1024B-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> %v)
115+
; THROUGHPUT-VL1024B-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
116+
;
117+
%red = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> %v)
118+
ret i1 %red
119+
}
120+
121+
define zeroext i1 @vreduce_and_v1024i1(<1024 x i1> %v) {
122+
; THROUGHPUT-VL128B-LABEL: 'vreduce_and_v1024i1'
123+
; THROUGHPUT-VL128B-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %red = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> %v)
124+
; THROUGHPUT-VL128B-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
125+
;
126+
; THROUGHPUT-VL256B-LABEL: 'vreduce_and_v1024i1'
127+
; THROUGHPUT-VL256B-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> %v)
128+
; THROUGHPUT-VL256B-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
129+
;
130+
; THROUGHPUT-VL512B-LABEL: 'vreduce_and_v1024i1'
131+
; THROUGHPUT-VL512B-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> %v)
132+
; THROUGHPUT-VL512B-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
133+
;
134+
; THROUGHPUT-VL1024B-LABEL: 'vreduce_and_v1024i1'
135+
; THROUGHPUT-VL1024B-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> %v)
136+
; THROUGHPUT-VL1024B-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
137+
;
138+
%red = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> %v)
139+
ret i1 %red
140+
}
141+
142+
define zeroext i1 @vreduce_and_nxv1i1(<vscale x 1 x i1> %v) {
143+
; THROUGHPUT-LABEL: 'vreduce_and_nxv1i1'
144+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv1i1(<vscale x 1 x i1> %v)
145+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
146+
;
147+
%red = call i1 @llvm.vector.reduce.and.nxv1i1(<vscale x 1 x i1> %v)
148+
ret i1 %red
149+
}
150+
151+
define zeroext i1 @vreduce_and_nxv2i1(<vscale x 2 x i1> %v) {
152+
; THROUGHPUT-LABEL: 'vreduce_and_nxv2i1'
153+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv2i1(<vscale x 2 x i1> %v)
154+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
155+
;
156+
%red = call i1 @llvm.vector.reduce.and.nxv2i1(<vscale x 2 x i1> %v)
157+
ret i1 %red
158+
}
159+
160+
define zeroext i1 @vreduce_and_nxv4i1(<vscale x 4 x i1> %v) {
161+
; THROUGHPUT-LABEL: 'vreduce_and_nxv4i1'
162+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv4i1(<vscale x 4 x i1> %v)
163+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
164+
;
165+
%red = call i1 @llvm.vector.reduce.and.nxv4i1(<vscale x 4 x i1> %v)
166+
ret i1 %red
167+
}
168+
169+
define zeroext i1 @vreduce_and_nxv8i1(<vscale x 8 x i1> %v) {
170+
; THROUGHPUT-LABEL: 'vreduce_and_nxv8i1'
171+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv8i1(<vscale x 8 x i1> %v)
172+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
173+
;
174+
%red = call i1 @llvm.vector.reduce.and.nxv8i1(<vscale x 8 x i1> %v)
175+
ret i1 %red
176+
}
177+
178+
define zeroext i1 @vreduce_and_nxv16i1(<vscale x 16 x i1> %v) {
179+
; THROUGHPUT-LABEL: 'vreduce_and_nxv16i1'
180+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv16i1(<vscale x 16 x i1> %v)
181+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
182+
;
183+
%red = call i1 @llvm.vector.reduce.and.nxv16i1(<vscale x 16 x i1> %v)
184+
ret i1 %red
185+
}
186+
187+
define zeroext i1 @vreduce_and_nxv32i1(<vscale x 32 x i1> %v) {
188+
; THROUGHPUT-LABEL: 'vreduce_and_nxv32i1'
189+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1> %v)
190+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
191+
;
192+
%red = call i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1> %v)
193+
ret i1 %red
194+
}
195+
196+
define zeroext i1 @vreduce_and_nxv64i1(<vscale x 64 x i1> %v) {
197+
; THROUGHPUT-LABEL: 'vreduce_and_nxv64i1'
198+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv64i1(<vscale x 64 x i1> %v)
199+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
200+
;
201+
%red = call i1 @llvm.vector.reduce.and.nxv64i1(<vscale x 64 x i1> %v)
202+
ret i1 %red
203+
}
204+
205+
define zeroext i1 @vreduce_and_nxv128i1(<vscale x 128 x i1> %v) {
206+
; THROUGHPUT-LABEL: 'vreduce_and_nxv128i1'
207+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv128i1(<vscale x 128 x i1> %v)
208+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
209+
;
210+
%red = call i1 @llvm.vector.reduce.and.nxv128i1(<vscale x 128 x i1> %v)
211+
ret i1 %red
212+
}
213+
214+
define zeroext i1 @vreduce_and_nxv256i1(<vscale x 256 x i1> %v) {
215+
; THROUGHPUT-LABEL: 'vreduce_and_nxv256i1'
216+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv256i1(<vscale x 256 x i1> %v)
217+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
218+
;
219+
%red = call i1 @llvm.vector.reduce.and.nxv256i1(<vscale x 256 x i1> %v)
220+
ret i1 %red
221+
}
222+
223+
define zeroext i1 @vreduce_and_nxv512i1(<vscale x 512 x i1> %v) {
224+
; THROUGHPUT-LABEL: 'vreduce_and_nxv512i1'
225+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv512i1(<vscale x 512 x i1> %v)
226+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
227+
;
228+
%red = call i1 @llvm.vector.reduce.and.nxv512i1(<vscale x 512 x i1> %v)
229+
ret i1 %red
230+
}
231+
232+
define zeroext i1 @vreduce_and_nxv1024i1(<vscale x 1024 x i1> %v) {
233+
; THROUGHPUT-LABEL: 'vreduce_and_nxv1024i1'
234+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv1024i1(<vscale x 1024 x i1> %v)
235+
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
236+
;
237+
%red = call i1 @llvm.vector.reduce.and.nxv1024i1(<vscale x 1024 x i1> %v)
238+
ret i1 %red
239+
}

0 commit comments

Comments
 (0)