Skip to content

Commit 322f372

Browse files
preamesJaddyen
authored andcommitted
[RISCV][TTI] Add test coverage for getPartialReductionCost [nfc]
Adding testing in advance of a change to cost the zvqdotq instructions such that we emit them from LV.
1 parent 58d448e commit 322f372

File tree

1 file changed

+250
-0
lines changed

1 file changed

+250
-0
lines changed
Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,250 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph:" --version 4
2+
; RUN: opt -passes=loop-vectorize -mattr=+v -S < %s | FileCheck %s --check-prefixes=CHECK,V
3+
; RUN: opt -passes=loop-vectorize -mattr=+v,+experimental-zvqdotq -S < %s | FileCheck %s --check-prefixes=CHECK,ZVQDOTQ
4+
5+
target triple = "riscv64-none-unknown-elf"
6+
7+
define i32 @vqdot(ptr %a, ptr %b) #0 {
8+
; CHECK-LABEL: define i32 @vqdot(
9+
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
10+
; CHECK-NEXT: entry:
11+
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
12+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
13+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
14+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
15+
; CHECK: vector.ph:
16+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
17+
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
18+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
19+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
20+
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
21+
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
22+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
23+
; CHECK: vector.body:
24+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
25+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
26+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
27+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP6]], i32 0
28+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i8>, ptr [[TMP7]], align 1
29+
; CHECK-NEXT: [[TMP8:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD]] to <vscale x 4 x i32>
30+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
31+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP9]], i32 0
32+
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i8>, ptr [[TMP10]], align 1
33+
; CHECK-NEXT: [[TMP11:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD1]] to <vscale x 4 x i32>
34+
; CHECK-NEXT: [[TMP12:%.*]] = mul <vscale x 4 x i32> [[TMP11]], [[TMP8]]
35+
; CHECK-NEXT: [[TMP13]] = add <vscale x 4 x i32> [[TMP12]], [[VEC_PHI]]
36+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
37+
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
38+
; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
39+
; CHECK: middle.block:
40+
; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP13]])
41+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
42+
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
43+
; CHECK: scalar.ph:
44+
;
45+
entry:
46+
br label %for.body
47+
48+
for.body: ; preds = %for.body, %entry
49+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
50+
%accum = phi i32 [ 0, %entry ], [ %add, %for.body ]
51+
%gep.a = getelementptr i8, ptr %a, i64 %iv
52+
%load.a = load i8, ptr %gep.a, align 1
53+
%ext.a = sext i8 %load.a to i32
54+
%gep.b = getelementptr i8, ptr %b, i64 %iv
55+
%load.b = load i8, ptr %gep.b, align 1
56+
%ext.b = sext i8 %load.b to i32
57+
%mul = mul i32 %ext.b, %ext.a
58+
%add = add i32 %mul, %accum
59+
%iv.next = add i64 %iv, 1
60+
%exitcond.not = icmp eq i64 %iv.next, 1024
61+
br i1 %exitcond.not, label %for.exit, label %for.body
62+
63+
for.exit: ; preds = %for.body
64+
ret i32 %add
65+
}
66+
67+
68+
define i32 @vqdotu(ptr %a, ptr %b) #0 {
69+
; CHECK-LABEL: define i32 @vqdotu(
70+
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
71+
; CHECK-NEXT: entry:
72+
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
73+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
74+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
75+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
76+
; CHECK: vector.ph:
77+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
78+
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
79+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
80+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
81+
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
82+
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
83+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
84+
; CHECK: vector.body:
85+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
86+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
87+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
88+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP6]], i32 0
89+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i8>, ptr [[TMP7]], align 1
90+
; CHECK-NEXT: [[TMP8:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD]] to <vscale x 4 x i32>
91+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
92+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP9]], i32 0
93+
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i8>, ptr [[TMP10]], align 1
94+
; CHECK-NEXT: [[TMP11:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD1]] to <vscale x 4 x i32>
95+
; CHECK-NEXT: [[TMP12:%.*]] = mul <vscale x 4 x i32> [[TMP11]], [[TMP8]]
96+
; CHECK-NEXT: [[TMP13]] = add <vscale x 4 x i32> [[TMP12]], [[VEC_PHI]]
97+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
98+
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
99+
; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
100+
; CHECK: middle.block:
101+
; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP13]])
102+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
103+
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
104+
; CHECK: scalar.ph:
105+
;
106+
entry:
107+
br label %for.body
108+
109+
for.body: ; preds = %for.body, %entry
110+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
111+
%accum = phi i32 [ 0, %entry ], [ %add, %for.body ]
112+
%gep.a = getelementptr i8, ptr %a, i64 %iv
113+
%load.a = load i8, ptr %gep.a, align 1
114+
%ext.a = zext i8 %load.a to i32
115+
%gep.b = getelementptr i8, ptr %b, i64 %iv
116+
%load.b = load i8, ptr %gep.b, align 1
117+
%ext.b = zext i8 %load.b to i32
118+
%mul = mul i32 %ext.b, %ext.a
119+
%add = add i32 %mul, %accum
120+
%iv.next = add i64 %iv, 1
121+
%exitcond.not = icmp eq i64 %iv.next, 1024
122+
br i1 %exitcond.not, label %for.exit, label %for.body
123+
124+
for.exit: ; preds = %for.body
125+
ret i32 %add
126+
}
127+
128+
129+
define i32 @vqdotsu(ptr %a, ptr %b) #0 {
130+
; CHECK-LABEL: define i32 @vqdotsu(
131+
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
132+
; CHECK-NEXT: entry:
133+
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
134+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
135+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
136+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
137+
; CHECK: vector.ph:
138+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
139+
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
140+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
141+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
142+
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
143+
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
144+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
145+
; CHECK: vector.body:
146+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
147+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
148+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
149+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP6]], i32 0
150+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i8>, ptr [[TMP7]], align 1
151+
; CHECK-NEXT: [[TMP8:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD]] to <vscale x 4 x i32>
152+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
153+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP9]], i32 0
154+
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i8>, ptr [[TMP10]], align 1
155+
; CHECK-NEXT: [[TMP11:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD1]] to <vscale x 4 x i32>
156+
; CHECK-NEXT: [[TMP12:%.*]] = mul <vscale x 4 x i32> [[TMP11]], [[TMP8]]
157+
; CHECK-NEXT: [[TMP13]] = add <vscale x 4 x i32> [[TMP12]], [[VEC_PHI]]
158+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
159+
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
160+
; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
161+
; CHECK: middle.block:
162+
; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP13]])
163+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
164+
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
165+
; CHECK: scalar.ph:
166+
;
167+
entry:
168+
br label %for.body
169+
170+
for.body: ; preds = %for.body, %entry
171+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
172+
%accum = phi i32 [ 0, %entry ], [ %add, %for.body ]
173+
%gep.a = getelementptr i8, ptr %a, i64 %iv
174+
%load.a = load i8, ptr %gep.a, align 1
175+
%ext.a = zext i8 %load.a to i32
176+
%gep.b = getelementptr i8, ptr %b, i64 %iv
177+
%load.b = load i8, ptr %gep.b, align 1
178+
%ext.b = sext i8 %load.b to i32
179+
%mul = mul i32 %ext.b, %ext.a
180+
%add = add i32 %mul, %accum
181+
%iv.next = add i64 %iv, 1
182+
%exitcond.not = icmp eq i64 %iv.next, 1024
183+
br i1 %exitcond.not, label %for.exit, label %for.body
184+
185+
for.exit: ; preds = %for.body
186+
ret i32 %add
187+
}
188+
189+
define i32 @vqdotsu2(ptr %a, ptr %b) #0 {
190+
; CHECK-LABEL: define i32 @vqdotsu2(
191+
; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
192+
; CHECK-NEXT: entry:
193+
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
194+
; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
195+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
196+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
197+
; CHECK: vector.ph:
198+
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
199+
; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
200+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
201+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
202+
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
203+
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
204+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
205+
; CHECK: vector.body:
206+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
207+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
208+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
209+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP6]], i32 0
210+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i8>, ptr [[TMP7]], align 1
211+
; CHECK-NEXT: [[TMP8:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD]] to <vscale x 4 x i32>
212+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[B]], i64 [[INDEX]]
213+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP9]], i32 0
214+
; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x i8>, ptr [[TMP10]], align 1
215+
; CHECK-NEXT: [[TMP11:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD1]] to <vscale x 4 x i32>
216+
; CHECK-NEXT: [[TMP12:%.*]] = mul <vscale x 4 x i32> [[TMP11]], [[TMP8]]
217+
; CHECK-NEXT: [[TMP13]] = add <vscale x 4 x i32> [[TMP12]], [[VEC_PHI]]
218+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
219+
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
220+
; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
221+
; CHECK: middle.block:
222+
; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP13]])
223+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
224+
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
225+
; CHECK: scalar.ph:
226+
;
227+
entry:
228+
br label %for.body
229+
230+
for.body: ; preds = %for.body, %entry
231+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
232+
%accum = phi i32 [ 0, %entry ], [ %add, %for.body ]
233+
%gep.a = getelementptr i8, ptr %a, i64 %iv
234+
%load.a = load i8, ptr %gep.a, align 1
235+
%ext.a = sext i8 %load.a to i32
236+
%gep.b = getelementptr i8, ptr %b, i64 %iv
237+
%load.b = load i8, ptr %gep.b, align 1
238+
%ext.b = zext i8 %load.b to i32
239+
%mul = mul i32 %ext.b, %ext.a
240+
%add = add i32 %mul, %accum
241+
%iv.next = add i64 %iv, 1
242+
%exitcond.not = icmp eq i64 %iv.next, 1024
243+
br i1 %exitcond.not, label %for.exit, label %for.body
244+
245+
for.exit: ; preds = %for.body
246+
ret i32 %add
247+
}
248+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
249+
; V: {{.*}}
250+
; ZVQDOTQ: {{.*}}

0 commit comments

Comments
 (0)