1
1
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
2
- ; RUN: opt < %s -passes=loop-vectorize,instcombine -enable-histogram-loop-vectorization -sve-gather-overhead=2 -sve-scatter-overhead=2 -max-dependences=2 -debug-only=loop-vectorize,loop-accesses -S 2>&1 | FileCheck %s
3
- ; RUN: opt < %s -passes=loop-vectorize,instcombine -enable-histogram-loop-vectorization -sve-gather-overhead=2 -sve-scatter-overhead=2 -debug-only=loop-vectorize,loop-accesses -S 2>&1 | FileCheck %s --check-prefix=NORMAL_DEP_LIMIT
2
+ ; RUN: opt < %s -mattr=+sve2 - passes=loop-vectorize,instcombine -enable-histogram-loop-vectorization -sve-gather-overhead=2 -sve-scatter-overhead=2 -force-vector-interleave=1 -max-dependences=2 -debug-only=loop-vectorize,loop-accesses -S 2>&1 | FileCheck %s
3
+ ; RUN: opt < %s -mattr=+sve2 - passes=loop-vectorize,instcombine -enable-histogram-loop-vectorization -sve-gather-overhead=2 -sve-scatter-overhead=2 -force-vector-interleave=1 -debug-only=loop-vectorize,loop-accesses -S 2>&1 | FileCheck %s --check-prefix=NORMAL_DEP_LIMIT
4
4
; REQUIRES: asserts
5
5
6
6
target triple = "aarch64-unknown-linux-gnu"
@@ -13,7 +13,7 @@ target triple = "aarch64-unknown-linux-gnu"
13
13
; CHECK: LV: Can't vectorize due to memory conflicts
14
14
; CHECK: LV: Not vectorizing: Cannot prove legality.
15
15
16
- define void @many_deps (ptr noalias %buckets , ptr %array , ptr %indices , ptr %other , i64 %N ) # 0 {
16
+ define void @many_deps (ptr noalias %buckets , ptr %array , ptr %indices , ptr %other , i64 %N ) {
17
17
; CHECK-LABEL: define void @many_deps(
18
18
; CHECK-SAME: ptr noalias [[BUCKETS:%.*]], ptr [[ARRAY:%.*]], ptr [[INDICES:%.*]], ptr [[OTHER:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
19
19
; CHECK-NEXT: entry:
@@ -36,15 +36,15 @@ define void @many_deps(ptr noalias %buckets, ptr %array, ptr %indices, ptr %othe
36
36
; CHECK-NEXT: store i32 [[ADD_OTHER]], ptr [[GEP_OTHER]], align 4
37
37
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
38
38
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
39
- ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
39
+ ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_EXIT:%.*]], label [[FOR_BODY]]
40
40
; CHECK: for.exit:
41
41
; CHECK-NEXT: ret void
42
42
;
43
43
; NORMAL_DEP_LIMIT-LABEL: define void @many_deps(
44
44
; NORMAL_DEP_LIMIT-SAME: ptr noalias [[BUCKETS:%.*]], ptr [[ARRAY:%.*]], ptr [[INDICES:%.*]], ptr [[OTHER:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
45
45
; NORMAL_DEP_LIMIT-NEXT: entry:
46
46
; NORMAL_DEP_LIMIT-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
47
- ; NORMAL_DEP_LIMIT-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
47
+ ; NORMAL_DEP_LIMIT-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 2
48
48
; NORMAL_DEP_LIMIT-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 8)
49
49
; NORMAL_DEP_LIMIT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]]
50
50
; NORMAL_DEP_LIMIT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
@@ -67,12 +67,13 @@ define void @many_deps(ptr noalias %buckets, ptr %array, ptr %indices, ptr %othe
67
67
; NORMAL_DEP_LIMIT-NEXT: br i1 [[CONFLICT_RDX9]], label [[SCALAR_PH]], label [[ENTRY:%.*]]
68
68
; NORMAL_DEP_LIMIT: vector.ph:
69
69
; NORMAL_DEP_LIMIT-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
70
- ; NORMAL_DEP_LIMIT-NEXT: [[DOTNEG:%.*]] = mul nsw i64 [[TMP4]], -4
71
- ; NORMAL_DEP_LIMIT-NEXT: [[N_VEC:%.*]] = and i64 [[N]], [[DOTNEG]]
70
+ ; NORMAL_DEP_LIMIT-NEXT: [[TMP8:%.*]] = shl i64 [[TMP4]], 2
71
+ ; NORMAL_DEP_LIMIT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP8]]
72
+ ; NORMAL_DEP_LIMIT-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
72
73
; NORMAL_DEP_LIMIT-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
73
- ; NORMAL_DEP_LIMIT-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 2
74
+ ; NORMAL_DEP_LIMIT-NEXT: [[TMP6:%.*]] = shl i64 [[TMP5]], 2
74
75
; NORMAL_DEP_LIMIT-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
75
- ; NORMAL_DEP_LIMIT-NEXT: [[TMP9:%.*]] = trunc nuw nsw i64 [[TMP6]] to i32
76
+ ; NORMAL_DEP_LIMIT-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP6]] to i32
76
77
; NORMAL_DEP_LIMIT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP9]], i64 0
77
78
; NORMAL_DEP_LIMIT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
78
79
; NORMAL_DEP_LIMIT-NEXT: br label [[FOR_BODY:%.*]]
@@ -95,7 +96,7 @@ define void @many_deps(ptr noalias %buckets, ptr %array, ptr %indices, ptr %othe
95
96
; NORMAL_DEP_LIMIT-NEXT: [[TMP16:%.*]] = icmp eq i64 [[IV_NEXT]], [[N_VEC]]
96
97
; NORMAL_DEP_LIMIT-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
97
98
; NORMAL_DEP_LIMIT: middle.block:
98
- ; NORMAL_DEP_LIMIT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N ]], [[N_VEC]]
99
+ ; NORMAL_DEP_LIMIT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF ]], 0
99
100
; NORMAL_DEP_LIMIT-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
100
101
; NORMAL_DEP_LIMIT: scalar.ph:
101
102
; NORMAL_DEP_LIMIT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
@@ -143,13 +144,8 @@ for.body:
143
144
store i32 %add.other , ptr %gep.other , align 4
144
145
%iv.next = add nuw nsw i64 %iv , 1
145
146
%exitcond = icmp eq i64 %iv.next , %N
146
- br i1 %exitcond , label %for.exit , label %for.body , !llvm.loop !0
147
+ br i1 %exitcond , label %for.exit , label %for.body
147
148
148
149
for.exit:
149
150
ret void
150
151
}
151
-
152
- attributes #0 = { "target-features" ="+sve2" vscale_range(1 ,16 ) }
153
-
154
- !0 = distinct !{!0 , !1 }
155
- !1 = !{!"llvm.loop.interleave.count" , i32 1 }
0 commit comments