3
3
4
4
target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
5
5
6
- @src = external global [ 8 x i32 ], align 4
6
+ declare void @init ()
7
7
8
8
; Test case where scalar steps are used by both a VPReplicateRecipe (demands
9
9
; all scalar lanes) and a VPInstruction that only demands the first lane.
@@ -12,34 +12,36 @@ define void @test_scalar_iv_steps_used_by_replicate_and_first_lane_only_vpinst(p
12
12
; CHECK-LABEL: define void @test_scalar_iv_steps_used_by_replicate_and_first_lane_only_vpinst(
13
13
; CHECK-SAME: ptr noalias [[DST:%.*]], ptr noalias [[SRC_1:%.*]]) #[[ATTR0:[0-9]+]] {
14
14
; CHECK-NEXT: [[ENTRY:.*]]:
15
+ ; CHECK-NEXT: [[SRC:%.*]] = alloca [8 x i32], align 4
16
+ ; CHECK-NEXT: call void @init(ptr [[SRC]])
15
17
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
16
18
; CHECK: [[VECTOR_PH]]:
17
19
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
18
20
; CHECK: [[VECTOR_BODY]]:
19
21
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE6:.*]] ]
20
- ; CHECK-NEXT: [[TMP0 :%.*]] = add i64 [[INDEX]], 0
22
+ ; CHECK-NEXT: [[IV :%.*]] = add i64 [[INDEX]], 0
21
23
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
22
24
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
23
25
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
24
- ; CHECK-NEXT: [[TMP4 :%.*]] = mul nsw i64 [[TMP0 ]], 4
26
+ ; CHECK-NEXT: [[MUL_IV :%.*]] = mul nsw i64 [[IV ]], 4
25
27
; CHECK-NEXT: [[TMP5:%.*]] = mul nsw i64 [[TMP1]], 4
26
28
; CHECK-NEXT: [[TMP6:%.*]] = mul nsw i64 [[TMP2]], 4
27
29
; CHECK-NEXT: [[TMP7:%.*]] = mul nsw i64 [[TMP3]], 4
28
- ; CHECK-NEXT: [[TMP8 :%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP4 ]]
30
+ ; CHECK-NEXT: [[GEP_SRC_1 :%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[MUL_IV ]]
29
31
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP5]]
30
32
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP6]]
31
33
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[TMP7]]
32
- ; CHECK-NEXT: [[TMP12 :%.*]] = load i8, ptr [[TMP8 ]], align 1
34
+ ; CHECK-NEXT: [[L_1 :%.*]] = load i8, ptr [[GEP_SRC_1 ]], align 1
33
35
; CHECK-NEXT: [[TMP13:%.*]] = load i8, ptr [[TMP9]], align 1
34
36
; CHECK-NEXT: [[TMP14:%.*]] = load i8, ptr [[TMP10]], align 1
35
37
; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP11]], align 1
36
- ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i8> poison, i8 [[TMP12 ]], i32 0
38
+ ; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i8> poison, i8 [[L_1 ]], i32 0
37
39
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x i8> [[TMP16]], i8 [[TMP13]], i32 1
38
40
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i8> [[TMP17]], i8 [[TMP14]], i32 2
39
41
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i8> [[TMP18]], i8 [[TMP15]], i32 3
40
42
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq <4 x i8> [[TMP19]], zeroinitializer
41
- ; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[TMP0 ]], 4
42
- ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr [8 x i32], ptr @src , i64 0, i64 [[TMP21]]
43
+ ; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[IV ]], 4
44
+ ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr [8 x i32], ptr [[SRC]] , i64 0, i64 [[TMP21]]
43
45
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP22]], i32 0
44
46
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP23]], align 4
45
47
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i1> [[TMP20]], i32 0
@@ -78,26 +80,28 @@ define void @test_scalar_iv_steps_used_by_replicate_and_first_lane_only_vpinst(p
78
80
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 4, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
79
81
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
80
82
; CHECK: [[LOOP_HEADER]]:
81
- ; CHECK-NEXT: [[IV :%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
82
- ; CHECK-NEXT: [[MUL_IV :%.*]] = mul nsw i64 [[IV ]], 4
83
- ; CHECK-NEXT: [[GEP_SRC_1 :%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[MUL_IV ]]
84
- ; CHECK-NEXT: [[L_1 :%.*]] = load i8, ptr [[GEP_SRC_1 ]], align 1
85
- ; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[L_1 ]], 0
83
+ ; CHECK-NEXT: [[IV1 :%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
84
+ ; CHECK-NEXT: [[MUL_IV1 :%.*]] = mul nsw i64 [[IV1 ]], 4
85
+ ; CHECK-NEXT: [[GEP_SRC_2 :%.*]] = getelementptr inbounds i8, ptr [[SRC_1]], i64 [[MUL_IV1 ]]
86
+ ; CHECK-NEXT: [[L_3 :%.*]] = load i8, ptr [[GEP_SRC_2 ]], align 1
87
+ ; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[L_3 ]], 0
86
88
; CHECK-NEXT: br i1 [[C]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
87
89
; CHECK: [[THEN]]:
88
- ; CHECK-NEXT: [[IV_OR:%.*]] = or disjoint i64 [[IV ]], 4
89
- ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds [8 x i32], ptr @src , i64 0, i64 [[IV_OR]]
90
+ ; CHECK-NEXT: [[IV_OR:%.*]] = or disjoint i64 [[IV1 ]], 4
91
+ ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds [8 x i32], ptr [[SRC]] , i64 0, i64 [[IV_OR]]
90
92
; CHECK-NEXT: [[L_2:%.*]] = load i32, ptr [[GEP_SRC]], align 4
91
93
; CHECK-NEXT: store i32 [[L_2]], ptr [[DST]], align 4
92
94
; CHECK-NEXT: br label %[[LOOP_LATCH]]
93
95
; CHECK: [[LOOP_LATCH]]:
94
- ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV ]], 1
96
+ ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV1 ]], 1
95
97
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 4
96
98
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
97
99
; CHECK: [[EXIT]]:
98
100
; CHECK-NEXT: ret void
99
101
;
100
102
entry:
103
+ %src = alloca [8 x i32 ], align 4
104
+ call void @init (ptr %src )
101
105
br label %loop.header
102
106
103
107
loop.header:
@@ -110,7 +114,7 @@ loop.header:
110
114
111
115
then:
112
116
%iv.or = or disjoint i64 %iv , 4
113
- %gep.src = getelementptr inbounds [8 x i32 ], ptr @ src , i64 0 , i64 %iv.or
117
+ %gep.src = getelementptr inbounds [8 x i32 ], ptr % src , i64 0 , i64 %iv.or
114
118
%l.2 = load i32 , ptr %gep.src , align 4
115
119
store i32 %l.2 , ptr %dst , align 4
116
120
br label %loop.latch
0 commit comments