Skip to content

Commit 401ecb4

Browse files
committed
[LV] Add test showing miscompile with store reductions and RT checks.
Add anew test showing how a loop gets vectorized incorrectly with a invariant store reduction where the same location is also read, when vectorizing with runtime checks.
1 parent 76508dc commit 401ecb4

File tree

1 file changed

+44
-9
lines changed

1 file changed

+44
-9
lines changed

llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
1717
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
1818
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[TMP0]]
1919
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
20-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !alias.scope !0
20+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
2121
; CHECK-NEXT: [[TMP4]] = add <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
2222
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
2323
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
@@ -118,6 +118,41 @@ exit:
118118
ret void
119119
}
120120

121+
; Check that if we have a read from an invariant address, we do not vectorize,
122+
; even if we vectorize with runtime checks. The test below is a variant of
123+
; @reduc_store_load with a non-constant dependence distance, resulting in
124+
; vectorization with runtime checks.
125+
;
126+
; FIXME: currently this gets vectorized incorrectly.
127+
; CHECK-LABEL: @reduc_store_load_with_non_constant_distance_dependence
128+
; CHECK: vector.body:
129+
define void @reduc_store_load_with_non_constant_distance_dependence(ptr %dst, ptr noalias %dst.2, i64 %off) {
130+
entry:
131+
%gep.dst = getelementptr inbounds i32, ptr %dst, i64 42
132+
%dst.2.off = getelementptr inbounds i32, ptr %dst.2, i64 %off
133+
store i32 0, ptr %gep.dst, align 4
134+
br label %for.body
135+
136+
for.body:
137+
%sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
138+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
139+
%gep.src = getelementptr inbounds i32, ptr %dst.2, i64 %iv
140+
%0 = load i32, ptr %gep.src, align 4
141+
%iv.off = mul i64 %iv, 2
142+
%add = add nsw i32 %sum, %0
143+
%lv = load i32, ptr %gep.dst
144+
store i32 %add, ptr %gep.dst, align 4
145+
%gep.src.2 = getelementptr inbounds i32, ptr %dst.2.off, i64 %iv
146+
store i32 %lv, ptr %gep.src.2, align 4
147+
%iv.next = add nuw nsw i64 %iv, 1
148+
%exitcond = icmp eq i64 %iv.next, 1000
149+
br i1 %exitcond, label %exit, label %for.body
150+
151+
exit:
152+
ret void
153+
}
154+
155+
121156
; Final value is not guaranteed to be stored in an invariant address.
122157
; We don't vectorize in that case.
123158
;
@@ -186,10 +221,10 @@ for.end:
186221
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP1]]
187222
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP2]]
188223
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP3]]
189-
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4, !alias.scope !12
190-
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4, !alias.scope !12
191-
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4, !alias.scope !12
192-
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4, !alias.scope !12
224+
; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP4]], align 4
225+
; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP5]], align 4
226+
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4
227+
; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4
193228
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> poison, i32 [[TMP8]], i32 0
194229
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP9]], i32 1
195230
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP10]], i32 2
@@ -204,10 +239,10 @@ for.end:
204239
; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP22]]
205240
; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP17]], i32 3
206241
; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP24]]
207-
; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP19]], align 4, !alias.scope !12
208-
; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP21]], align 4, !alias.scope !12
209-
; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP23]], align 4, !alias.scope !12
210-
; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP25]], align 4, !alias.scope !12
242+
; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr [[TMP19]], align 4
243+
; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr [[TMP21]], align 4
244+
; CHECK-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP23]], align 4
245+
; CHECK-NEXT: [[TMP29:%.*]] = load i32, ptr [[TMP25]], align 4
211246
; CHECK-NEXT: [[TMP30:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0
212247
; CHECK-NEXT: [[TMP31:%.*]] = insertelement <4 x i32> [[TMP30]], i32 [[TMP27]], i32 1
213248
; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x i32> [[TMP31]], i32 [[TMP28]], i32 2

0 commit comments

Comments
 (0)