@@ -17,7 +17,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
17
17
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
18
18
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[TMP0]]
19
19
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
20
- ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !alias.scope !0
20
+ ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
21
21
; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
22
22
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
23
23
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
@@ -118,6 +118,41 @@ exit:
118
118
ret void
119
119
}
120
120
121
+ ; Check that if we have a read from an invariant address, we do not vectorize,
122
+ ; even if we vectorize with runtime checks. The test below is a variant of
123
+ ; @reduc_store_load with a non-constant dependence distance, resulting in
124
+ ; vectorization with runtime checks.
125
+ ;
126
+ ; FIXME: currently this gets vectorized incorrectly.
127
+ ; CHECK-LABEL: @reduc_store_load_with_non_constant_distance_dependence
128
+ ; CHECK: vector.body:
129
+ define void @reduc_store_load_with_non_constant_distance_dependence (ptr %dst , ptr noalias %dst.2 , i64 %off ) {
130
+ entry:
131
+ %gep.dst = getelementptr inbounds i32 , ptr %dst , i64 42
132
+ %dst.2.off = getelementptr inbounds i32 , ptr %dst.2 , i64 %off
133
+ store i32 0 , ptr %gep.dst , align 4
134
+ br label %for.body
135
+
136
+ for.body:
137
+ %sum = phi i32 [ 0 , %entry ], [ %add , %for.body ]
138
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %for.body ]
139
+ %gep.src = getelementptr inbounds i32 , ptr %dst.2 , i64 %iv
140
+ %0 = load i32 , ptr %gep.src , align 4
141
+ %iv.off = mul i64 %iv , 2
142
+ %add = add nsw i32 %sum , %0
143
+ %lv = load i32 , ptr %gep.dst
144
+ store i32 %add , ptr %gep.dst , align 4
145
+ %gep.src.2 = getelementptr inbounds i32 , ptr %dst.2.off , i64 %iv
146
+ store i32 %lv , ptr %gep.src.2 , align 4
147
+ %iv.next = add nuw nsw i64 %iv , 1
148
+ %exitcond = icmp eq i64 %iv.next , 1000
149
+ br i1 %exitcond , label %exit , label %for.body
150
+
151
+ exit:
152
+ ret void
153
+ }
154
+
155
+
121
156
; Final value is not guaranteed to be stored in an invariant address.
122
157
; We don't vectorize in that case.
123
158
;
@@ -186,10 +221,10 @@ for.end:
186
221
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP1]]
187
222
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP2]]
188
223
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP3]]
189
- ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4, !alias.scope !12
190
- ; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4, !alias.scope !12
191
- ; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope !12
192
- ; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope !12
224
+ ; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4
225
+ ; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
226
+ ; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4
227
+ ; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4
193
228
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> poison, i32 [[TMP8]], i32 0
194
229
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i32 1
195
230
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i32 2
@@ -204,10 +239,10 @@ for.end:
204
239
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP22]]
205
240
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP17]], i32 3
206
241
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP24]]
207
- ; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP19]], align 4, !alias.scope !12
208
- ; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP21]], align 4, !alias.scope !12
209
- ; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP23]], align 4, !alias.scope !12
210
- ; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP25]], align 4, !alias.scope !12
242
+ ; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP19]], align 4
243
+ ; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP21]], align 4
244
+ ; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP23]], align 4
245
+ ; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP25]], align 4
211
246
; CHECK-NEXT: [[TMP30:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0
212
247
; CHECK-NEXT: [[TMP31:%.*]] = insertelement <4 x i32> [[TMP30]], i32 [[TMP27]], i32 1
213
248
; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x i32> [[TMP31]], i32 [[TMP28]], i32 2
0 commit comments