Skip to content

Commit 99fba5a

Browse files
committed
[LoopVectorize] Add tests for dereferenceable loads in more loops
* Adds tests for strided accesses. * Adds tests for reverse loops. As part of this I've moved one of the negative tests from load-deref-pred-align.ll into a new file (load-deref-pred-neg-off.ll) because the pointer type had a size of 16 bits and I realised it's probably not sensible for allocas that are >16 bits in size! I've also tweaked scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll to use an alloca for the array access, rather than a global since this more reliably leads to generation of dereferenceable loads.
1 parent 2de1333 commit 99fba5a

File tree

3 files changed

+552
-18
lines changed

3 files changed

+552
-18
lines changed

llvm/test/Transforms/LoopVectorize/SystemZ/scalar-steps-with-users-demanding-all-lanes-and-first-lane-only.ll

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
55

6-
@src = external global [8 x i32], align 4
6+
declare void @init()
77

88
; Test case where scalar steps are used by both a VPReplicateRecipe (demands
99
; all scalar lanes) and a VPInstruction that only demands the first lane.
@@ -12,34 +12,36 @@ define void @test_scalar_iv_steps_used_by_replicate_and_first_lane_only_vpinst(p
1212
; CHECK-LABEL: define void @test_scalar_iv_steps_used_by_replicate_and_first_lane_only_vpinst(
1313
; CHECK-SAME: ptr noalias [[DST:%.*]], ptr noalias [[SRC_1:%.*]]) #[[ATTR0:[0-9]+]] {
1414
; CHECK-NEXT: [[ENTRY:.*]]:
15+
; CHECK-NEXT: [[SRC:%.*]] = alloca [8 x i32], align 4
16+
; CHECK-NEXT: call void @init(ptr [[SRC]])
1517
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1618
; CHECK: [[VECTOR_PH]]:
1719
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
1820
; CHECK: [[VECTOR_BODY]]:
1921
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
20-
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
22+
; CHECK-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
2123
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
2224
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
2325
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
24-
; CHECK-NEXT: [[TMP4:%.*]] = mul nsw i64 [[TMP0]], 4
26+
; CHECK-NEXT: [[MUL_IV:%.*]] = mul nsw i64 [[IV]], 4
2527
; CHECK-NEXT: [[TMP5:%.*]] = mul nsw i64 [[TMP1]], 4
2628
; CHECK-NEXT: [[TMP6:%.*]] = mul nsw i64 [[TMP2]], 4
2729
; CHECK-NEXT: [[TMP7:%.*]] = mul nsw i64 [[TMP3]], 4
28-
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP4]]
30+
; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[MUL_IV]]
2931
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP5]]
3032
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP6]]
3133
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP7]]
32-
; CHECK-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP8]], align 1
34+
; CHECK-NEXT: [[L_1:%.*]] = load i8, ptr [[GEP_SRC_1]], align 1
3335
; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP9]], align 1
3436
; CHECK-NEXT: [[TMP14:%.*]] = load i8, ptr [[TMP10]], align 1
3537
; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP11]], align 1
36-
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i8> poison, i8 [[TMP12]], i32 0
38+
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i8> poison, i8 [[L_1]], i32 0
3739
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i8> [[TMP16]], i8 [[TMP13]], i32 1
3840
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i8> [[TMP17]], i8 [[TMP14]], i32 2
3941
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> [[TMP18]], i8 [[TMP15]], i32 3
4042
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq <4 x i8> [[TMP19]], zeroinitializer
41-
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[TMP0]], 4
42-
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr [8 x i32], ptr @src, i64 0, i64 [[TMP21]]
43+
; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[IV]], 4
44+
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr [8 x i32], ptr [[SRC]], i64 0, i64 [[TMP21]]
4345
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP22]], i32 0
4446
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
4547
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP20]], i32 0
@@ -78,26 +80,28 @@ define void @test_scalar_iv_steps_used_by_replicate_and_first_lane_only_vpinst(p
7880
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
7981
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
8082
; CHECK: [[LOOP_HEADER]]:
81-
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
82-
; CHECK-NEXT: [[MUL_IV:%.*]] = mul nsw i64 [[IV]], 4
83-
; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[MUL_IV]]
84-
; CHECK-NEXT: [[L_1:%.*]] = load i8, ptr [[GEP_SRC_1]], align 1
85-
; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[L_1]], 0
83+
; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
84+
; CHECK-NEXT: [[MUL_IV1:%.*]] = mul nsw i64 [[IV1]], 4
85+
; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[MUL_IV1]]
86+
; CHECK-NEXT: [[L_3:%.*]] = load i8, ptr [[GEP_SRC_2]], align 1
87+
; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[L_3]], 0
8688
; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
8789
; CHECK: [[THEN]]:
88-
; CHECK-NEXT: [[IV_OR:%.*]] = or disjoint i64 [[IV]], 4
89-
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds [8 x i32], ptr @src, i64 0, i64 [[IV_OR]]
90+
; CHECK-NEXT: [[IV_OR:%.*]] = or disjoint i64 [[IV1]], 4
91+
; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds [8 x i32], ptr [[SRC]], i64 0, i64 [[IV_OR]]
9092
; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC]], align 4
9193
; CHECK-NEXT: store i32 [[L_2]], ptr [[DST]], align 4
9294
; CHECK-NEXT: br label %[[LOOP_LATCH]]
9395
; CHECK: [[LOOP_LATCH]]:
94-
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
96+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1]], 1
9597
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 4
9698
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
9799
; CHECK: [[EXIT]]:
98100
; CHECK-NEXT: ret void
99101
;
100102
entry:
103+
%src = alloca [8 x i32], align 4
104+
call void @init(ptr %src)
101105
br label %loop.header
102106

103107
loop.header:
@@ -110,7 +114,7 @@ loop.header:
110114

111115
then:
112116
%iv.or = or disjoint i64 %iv, 4
113-
%gep.src = getelementptr inbounds [8 x i32], ptr @src, i64 0, i64 %iv.or
117+
%gep.src = getelementptr inbounds [8 x i32], ptr %src, i64 0, i64 %iv.or
114118
%l.2 = load i32, ptr %gep.src, align 4
115119
store i32 %l.2, ptr %dst, align 4
116120
br label %loop.latch

0 commit comments

Comments
 (0)