Skip to content

Commit e3a1330

Browse files
committed
[NFC] Add tests for scalable vectorization of loops with large stride acesses
This patch just adds tests that we can vectorize loop such as these: for (i = 0; i < n; i++) dst[i * 7] += 1; and for (i = 0; i < n; i++) if (cond[i]) dst[i * 7] += 1; using scalable vectors, where we expect to use gathers and scatters in the vectorized loop. The vector of pointers used for the gather is identical to those used for the scatter so there should be no memory dependences. Tests are added here: Transforms/LoopVectorize/AArch64/sve-large-strides.ll Differential Revision: https://reviews.llvm.org/D99192
1 parent dc53715 commit e3a1330

File tree

1 file changed

+101
-0
lines changed

1 file changed

+101
-0
lines changed
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -dce -instcombine -S <%s | FileCheck %s
2+
3+
define void @stride7_i32(i32* noalias nocapture %dst, i64 %n) {
4+
; CHECK-LABEL: @stride7_i32(
5+
; CHECK: vector.body
6+
; CHECK: %[[VEC_IND:.*]] = phi <vscale x 4 x i64> [ %{{.*}}, %vector.ph ], [ %{{.*}}, %vector.body ]
7+
; CHECK-NEXT: %[[PTR_INDICES:.*]] = mul nuw nsw <vscale x 4 x i64> %[[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 7, i32 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
8+
; CHECK-NEXT: %[[PTRS:.*]] = getelementptr inbounds i32, i32* %dst, <vscale x 4 x i64> %[[PTR_INDICES]]
9+
; CHECK-NEXT: %[[GLOAD:.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32(<vscale x 4 x i32*> %[[PTRS]]
10+
; CHECK-NEXT: %[[VALS:.*]] = add nsw <vscale x 4 x i32> %[[GLOAD]],
11+
; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32(<vscale x 4 x i32> %[[VALS]], <vscale x 4 x i32*> %[[PTRS]]
12+
entry:
13+
br label %for.body
14+
15+
for.body: ; preds = %entry, %for.body
16+
%i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
17+
%mul = mul nuw nsw i64 %i.05, 7
18+
%arrayidx = getelementptr inbounds i32, i32* %dst, i64 %mul
19+
%0 = load i32, i32* %arrayidx, align 4
20+
%add = add nsw i32 %0, 3
21+
store i32 %add, i32* %arrayidx, align 4
22+
%inc = add nuw nsw i64 %i.05, 1
23+
%exitcond.not = icmp eq i64 %inc, %n
24+
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
25+
26+
for.end: ; preds = %for.end.loopexit, %entry
27+
ret void
28+
}
29+
30+
define void @stride7_f64(double* noalias nocapture %dst, i64 %n) {
31+
; CHECK-LABEL: @stride7_f64(
32+
; CHECK: vector.body
33+
; CHECK: %[[VEC_IND:.*]] = phi <vscale x 2 x i64> [ %{{.*}}, %vector.ph ], [ %{{.*}}, %vector.body ]
34+
; CHECK-NEXT: %[[PTR_INDICES:.*]] = mul nuw nsw <vscale x 2 x i64> %[[VEC_IND]], shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 7, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer)
35+
; CHECK-NEXT: %[[PTRS:.*]] = getelementptr inbounds double, double* %dst, <vscale x 2 x i64> %[[PTR_INDICES]]
36+
; CHECK-NEXT: %[[GLOAD:.*]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0f64(<vscale x 2 x double*> %[[PTRS]],
37+
; CHECK-NEXT: %[[VALS:.*]] = fadd <vscale x 2 x double> %[[GLOAD]],
38+
; CHECK-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> %[[VALS]], <vscale x 2 x double*> %[[PTRS]],
39+
entry:
40+
br label %for.body
41+
42+
for.body: ; preds = %entry, %for.body
43+
%i.05 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
44+
%mul = mul nuw nsw i64 %i.05, 7
45+
%arrayidx = getelementptr inbounds double, double* %dst, i64 %mul
46+
%0 = load double, double* %arrayidx, align 8
47+
%add = fadd double %0, 1.000000e+00
48+
store double %add, double* %arrayidx, align 8
49+
%inc = add nuw nsw i64 %i.05, 1
50+
%exitcond.not = icmp eq i64 %inc, %n
51+
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !6
52+
53+
for.end: ; preds = %for.end.loopexit, %entry
54+
ret void
55+
}
56+
57+
58+
define void @cond_stride7_f64(double* noalias nocapture %dst, i64* noalias nocapture readonly %cond, i64 %n) {
59+
; CHECK-LABEL: @cond_stride7_f64(
60+
; CHECK: vector.body
61+
; CHECK: %[[MASK:.*]] = icmp ne <vscale x 2 x i64>
62+
; CHECK: %[[PTRS:.*]] = getelementptr inbounds double, double* %dst, <vscale x 2 x i64> %{{.*}}
63+
; CHECK-NEXT: %[[GLOAD:.*]] = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64.nxv2p0f64(<vscale x 2 x double*> %[[PTRS]], i32 8, <vscale x 2 x i1> %[[MASK]]
64+
; CHECK-NEXT: %[[VALS:.*]] = fadd <vscale x 2 x double> %[[GLOAD]],
65+
; CHECK-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0f64(<vscale x 2 x double> %[[VALS]], <vscale x 2 x double*> %[[PTRS]], i32 8, <vscale x 2 x i1> %[[MASK]])
66+
entry:
67+
br label %for.body
68+
69+
for.body: ; preds = %entry, %for.inc
70+
%i.07 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
71+
%arrayidx = getelementptr inbounds i64, i64* %cond, i64 %i.07
72+
%0 = load i64, i64* %arrayidx, align 8
73+
%tobool.not = icmp eq i64 %0, 0
74+
br i1 %tobool.not, label %for.inc, label %if.then
75+
76+
if.then: ; preds = %for.body
77+
%mul = mul nsw i64 %i.07, 7
78+
%arrayidx1 = getelementptr inbounds double, double* %dst, i64 %mul
79+
%1 = load double, double* %arrayidx1, align 8
80+
%add = fadd double %1, 1.000000e+00
81+
store double %add, double* %arrayidx1, align 8
82+
br label %for.inc
83+
84+
for.inc: ; preds = %for.body, %if.then
85+
%inc = add nuw nsw i64 %i.07, 1
86+
%exitcond.not = icmp eq i64 %inc, %n
87+
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !6
88+
89+
for.end: ; preds = %for.end.loopexit, %entry
90+
ret void
91+
}
92+
93+
94+
!0 = distinct !{!0, !1, !2, !3, !4, !5}
95+
!1 = !{!"llvm.loop.mustprogress"}
96+
!2 = !{!"llvm.loop.vectorize.width", i32 4}
97+
!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
98+
!4 = !{!"llvm.loop.interleave.count", i32 1}
99+
!5 = !{!"llvm.loop.vectorize.enable", i1 true}
100+
!6 = distinct !{!6, !1, !7, !3, !4, !5}
101+
!7 = !{!"llvm.loop.vectorize.width", i32 2}

0 commit comments

Comments
 (0)