Skip to content

Commit 2618095

Browse files
committed
Initial tests for histogram autovec
1 parent 7102eae commit 2618095

File tree

2 files changed

+316
-0
lines changed

2 files changed

+316
-0
lines changed
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -disable-output -passes='print<access-info>' %s 2>&1 | FileCheck %s
3+
4+
5+
define void @simple_histogram(ptr noalias %buckets, ptr readonly %indices, i64 %N) {
6+
; CHECK-LABEL: 'simple_histogram'
7+
; CHECK-NEXT: for.body:
8+
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
9+
; CHECK-NEXT: Unknown data dependence.
10+
; CHECK-NEXT: Dependences:
11+
; CHECK-NEXT: Unknown:
12+
; CHECK-NEXT: %1 = load i32, ptr %arrayidx2, align 4 ->
13+
; CHECK-NEXT: store i32 %inc, ptr %arrayidx2, align 4
14+
; CHECK-EMPTY:
15+
; CHECK-NEXT: Run-time memory checks:
16+
; CHECK-NEXT: Grouped accesses:
17+
; CHECK-EMPTY:
18+
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
19+
; CHECK-NEXT: SCEV assumptions:
20+
; CHECK-EMPTY:
21+
; CHECK-NEXT: Expressions re-written:
22+
;
23+
entry:
24+
br label %for.body
25+
26+
for.body:
27+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
28+
%arrayidx = getelementptr inbounds i32, ptr %indices, i64 %iv
29+
%0 = load i32, ptr %arrayidx, align 4
30+
%idxprom1 = zext i32 %0 to i64
31+
%arrayidx2 = getelementptr inbounds i32, ptr %buckets, i64 %idxprom1
32+
%1 = load i32, ptr %arrayidx2, align 4
33+
%inc = add nsw i32 %1, 1
34+
store i32 %inc, ptr %arrayidx2, align 4
35+
%iv.next = add nuw nsw i64 %iv, 1
36+
%exitcond = icmp eq i64 %iv.next, %N
37+
br i1 %exitcond, label %for.exit, label %for.body
38+
39+
for.exit:
40+
ret void
41+
}
Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,275 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
2+
; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -sve-gather-overhead=2 -sve-scatter-overhead=2 -S | FileCheck %s
3+
4+
target triple = "aarch64-unknown-linux-gnu"
5+
6+
;; Based on the following C code:
7+
;;
8+
;; void simple_histogram(int *buckets, unsigned *indices, int N) {
9+
;; for (int i = 0; i < N; ++i)
10+
;; buckets[indices[i]]++;
11+
;; }
12+
13+
define void @simple_histogram(ptr noalias %buckets, ptr readonly %indices, i64 %N) #0 {
14+
; CHECK-LABEL: define void @simple_histogram(
15+
; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
16+
; CHECK-NEXT: entry:
17+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
18+
; CHECK: for.body:
19+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
20+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
21+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
22+
; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP0]] to i64
23+
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
24+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
25+
; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
26+
; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4
27+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
28+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
29+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]]
30+
; CHECK: for.exit:
31+
; CHECK-NEXT: ret void
32+
;
33+
entry:
34+
br label %for.body
35+
36+
for.body:
37+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
38+
%arrayidx = getelementptr inbounds i32, ptr %indices, i64 %iv
39+
%0 = load i32, ptr %arrayidx, align 4
40+
%idxprom1 = zext i32 %0 to i64
41+
%arrayidx2 = getelementptr inbounds i32, ptr %buckets, i64 %idxprom1
42+
%1 = load i32, ptr %arrayidx2, align 4
43+
%inc = add nsw i32 %1, 1
44+
store i32 %inc, ptr %arrayidx2, align 4
45+
%iv.next = add nuw nsw i64 %iv, 1
46+
%exitcond = icmp eq i64 %iv.next, %N
47+
br i1 %exitcond, label %for.exit, label %for.body
48+
49+
for.exit:
50+
ret void
51+
}
52+
53+
define void @simple_histogram_sub(ptr noalias %buckets, ptr readonly %indices, i64 %N) #0 {
54+
; CHECK-LABEL: define void @simple_histogram_sub(
55+
; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
56+
; CHECK-NEXT: entry:
57+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
58+
; CHECK: for.body:
59+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
60+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
61+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
62+
; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP0]] to i64
63+
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
64+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
65+
; CHECK-NEXT: [[INC:%.*]] = sub nsw i32 [[TMP1]], 1
66+
; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4
67+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
68+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
69+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]]
70+
; CHECK: for.exit:
71+
; CHECK-NEXT: ret void
72+
;
73+
entry:
74+
br label %for.body
75+
76+
for.body:
77+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
78+
%arrayidx = getelementptr inbounds i32, ptr %indices, i64 %iv
79+
%0 = load i32, ptr %arrayidx, align 4
80+
%idxprom1 = zext i32 %0 to i64
81+
%arrayidx2 = getelementptr inbounds i32, ptr %buckets, i64 %idxprom1
82+
%1 = load i32, ptr %arrayidx2, align 4
83+
%inc = sub nsw i32 %1, 1
84+
store i32 %inc, ptr %arrayidx2, align 4
85+
%iv.next = add nuw nsw i64 %iv, 1
86+
%exitcond = icmp eq i64 %iv.next, %N
87+
br i1 %exitcond, label %for.exit, label %for.body
88+
89+
for.exit:
90+
ret void
91+
}
92+
93+
define void @conditional_histogram(ptr noalias %buckets, ptr readonly %indices, ptr readonly %conds, i64 %N) #0 {
94+
; CHECK-LABEL: define void @conditional_histogram(
95+
; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], ptr readonly [[CONDS:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
96+
; CHECK-NEXT: entry:
97+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
98+
; CHECK: for.body:
99+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[NEXT:%.*]] ]
100+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
101+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
102+
; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP0]] to i64
103+
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
104+
; CHECK-NEXT: [[CONDIDX:%.*]] = getelementptr inbounds i32, ptr [[CONDS]], i64 [[IV]]
105+
; CHECK-NEXT: [[CONDDATA:%.*]] = load i32, ptr [[CONDIDX]], align 4
106+
; CHECK-NEXT: [[IFCOND:%.*]] = icmp sgt i32 [[CONDDATA]], 5100
107+
; CHECK-NEXT: br i1 [[IFCOND]], label [[IFTRUE:%.*]], label [[NEXT]]
108+
; CHECK: iftrue:
109+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
110+
; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1
111+
; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4
112+
; CHECK-NEXT: br label [[NEXT]]
113+
; CHECK: next:
114+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
115+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
116+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]]
117+
; CHECK: for.exit:
118+
; CHECK-NEXT: ret void
119+
;
120+
entry:
121+
br label %for.body
122+
123+
for.body:
124+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %next ]
125+
%arrayidx = getelementptr inbounds i32, ptr %indices, i64 %iv
126+
%0 = load i32, ptr %arrayidx, align 4
127+
%idxprom1 = zext i32 %0 to i64
128+
%arrayidx2 = getelementptr inbounds i32, ptr %buckets, i64 %idxprom1
129+
%condidx = getelementptr inbounds i32, ptr %conds, i64 %iv
130+
%conddata = load i32, ptr %condidx, align 4
131+
%ifcond = icmp sgt i32 %conddata, 5100
132+
br i1 %ifcond, label %iftrue, label %next
133+
134+
iftrue:
135+
%1 = load i32, ptr %arrayidx2, align 4
136+
%inc = add nsw i32 %1, 1
137+
store i32 %inc, ptr %arrayidx2, align 4
138+
br label %next
139+
140+
next:
141+
%iv.next = add nuw nsw i64 %iv, 1
142+
%exitcond = icmp eq i64 %iv.next, %N
143+
br i1 %exitcond, label %for.exit, label %for.body
144+
145+
for.exit:
146+
ret void
147+
}
148+
149+
;; Need to support legalization of smaller int types.
150+
define void @histogram_8bit(ptr noalias %buckets, ptr readonly %indices, i64 %N) #0 {
151+
; CHECK-LABEL: define void @histogram_8bit(
152+
; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
153+
; CHECK-NEXT: entry:
154+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
155+
; CHECK: for.body:
156+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
157+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
158+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
159+
; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP0]] to i64
160+
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[BUCKETS]], i64 [[IDXPROM1]]
161+
; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX2]], align 4
162+
; CHECK-NEXT: [[INC:%.*]] = add nsw i8 [[TMP1]], 1
163+
; CHECK-NEXT: store i8 [[INC]], ptr [[ARRAYIDX2]], align 4
164+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
165+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
166+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]]
167+
; CHECK: for.exit:
168+
; CHECK-NEXT: ret void
169+
;
170+
entry:
171+
br label %for.body
172+
173+
for.body:
174+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
175+
%arrayidx = getelementptr inbounds i32, ptr %indices, i64 %iv
176+
%0 = load i32, ptr %arrayidx, align 4
177+
%idxprom1 = zext i32 %0 to i64
178+
%arrayidx2 = getelementptr inbounds i8, ptr %buckets, i64 %idxprom1
179+
%1 = load i8, ptr %arrayidx2, align 4
180+
%inc = add nsw i8 %1, 1
181+
store i8 %inc, ptr %arrayidx2, align 4
182+
%iv.next = add nuw nsw i64 %iv, 1
183+
%exitcond = icmp eq i64 %iv.next, %N
184+
br i1 %exitcond, label %for.exit, label %for.body
185+
186+
for.exit:
187+
ret void
188+
}
189+
190+
;; We don't currently support floating point histograms.
191+
define void @histogram_float(ptr noalias %buckets, ptr readonly %indices, i64 %N) #0 {
192+
; CHECK-LABEL: define void @histogram_float(
193+
; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
194+
; CHECK-NEXT: entry:
195+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
196+
; CHECK: for.body:
197+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
198+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
199+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
200+
; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP0]] to i64
201+
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[BUCKETS]], i64 [[IDXPROM1]]
202+
; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX2]], align 4
203+
; CHECK-NEXT: [[INC:%.*]] = fadd fast float [[TMP1]], 1.000000e+00
204+
; CHECK-NEXT: store float [[INC]], ptr [[ARRAYIDX2]], align 4
205+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
206+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
207+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]]
208+
; CHECK: for.exit:
209+
; CHECK-NEXT: ret void
210+
;
211+
entry:
212+
br label %for.body
213+
214+
for.body:
215+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
216+
%arrayidx = getelementptr inbounds i32, ptr %indices, i64 %iv
217+
%0 = load i32, ptr %arrayidx, align 4
218+
%idxprom1 = zext i32 %0 to i64
219+
%arrayidx2 = getelementptr inbounds float, ptr %buckets, i64 %idxprom1
220+
%1 = load float, ptr %arrayidx2, align 4
221+
%inc = fadd fast float %1, 1.0
222+
store float %inc, ptr %arrayidx2, align 4
223+
%iv.next = add nuw nsw i64 %iv, 1
224+
%exitcond = icmp eq i64 %iv.next, %N
225+
br i1 %exitcond, label %for.exit, label %for.body
226+
227+
for.exit:
228+
ret void
229+
}
230+
231+
define void @histogram_varying_increment(ptr noalias %buckets, ptr readonly %indices, ptr readonly %incvals, i64 %N) #0 {
232+
; CHECK-LABEL: define void @histogram_varying_increment(
233+
; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr readonly [[INDICES:%.*]], ptr readonly [[INCVALS:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
234+
; CHECK-NEXT: entry:
235+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
236+
; CHECK: for.body:
237+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
238+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[INDICES]], i64 [[IV]]
239+
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
240+
; CHECK-NEXT: [[IDXPROM1:%.*]] = zext i32 [[TMP0]] to i64
241+
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[BUCKETS]], i64 [[IDXPROM1]]
242+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4
243+
; CHECK-NEXT: [[INCIDX:%.*]] = getelementptr inbounds i32, ptr [[INCVALS]], i64 [[IV]]
244+
; CHECK-NEXT: [[INCVAL:%.*]] = load i32, ptr [[INCIDX]], align 4
245+
; CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], [[INCVAL]]
246+
; CHECK-NEXT: store i32 [[INC]], ptr [[ARRAYIDX2]], align 4
247+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
248+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
249+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]]
250+
; CHECK: for.exit:
251+
; CHECK-NEXT: ret void
252+
;
253+
entry:
254+
br label %for.body
255+
256+
for.body:
257+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
258+
%arrayidx = getelementptr inbounds i32, ptr %indices, i64 %iv
259+
%0 = load i32, ptr %arrayidx, align 4
260+
%idxprom1 = zext i32 %0 to i64
261+
%arrayidx2 = getelementptr inbounds i32, ptr %buckets, i64 %idxprom1
262+
%1 = load i32, ptr %arrayidx2, align 4
263+
%incidx = getelementptr inbounds i32, ptr %incvals, i64 %iv
264+
%incval = load i32, ptr %incidx, align 4
265+
%inc = add nsw i32 %1, %incval
266+
store i32 %inc, ptr %arrayidx2, align 4
267+
%iv.next = add nuw nsw i64 %iv, 1
268+
%exitcond = icmp eq i64 %iv.next, %N
269+
br i1 %exitcond, label %for.exit, label %for.body
270+
271+
for.exit:
272+
ret void
273+
}
274+
275+
attributes #0 = { "target-features"="+sve2" vscale_range(1,16) }

0 commit comments

Comments
 (0)