Skip to content

Commit 3ad6d1c

Browse files
authored
[LAA] Fix incorrect dependency classification. (#70819)
As shown in #70473, the following loop was not considered safe to vectorize. When determining the memory access dependencies in a loop which has negative iteration step, we invert the source and sink of the dependence. Perhaps we should just invert the operands to getMinusSCEV(). This way the dependency is not regarded to be true, since the users of the `IsWrite` variables, which correspond to each of the memory accesses, rely on program order and therefore should not be swapped. void vectorizable_Read_Write(int *A) { for (unsigned i = 1022; i >= 0; i--) A[i+1] = A[i] + 1; }
1 parent a28e4ea commit 3ad6d1c

File tree

3 files changed

+73
-14
lines changed

3 files changed

+73
-14
lines changed

llvm/lib/Analysis/LoopAccessAnalysis.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1916,15 +1916,12 @@ getDependenceDistanceStrideAndSize(
19161916
const SCEV *Src = PSE.getSCEV(APtr);
19171917
const SCEV *Sink = PSE.getSCEV(BPtr);
19181918

1919-
// If the induction step is negative we have to invert source and sink of
1920-
// the dependence.
1919+
// If the induction step is negative we have to invert source and sink of the
1920+
// dependence when measuring the distance between them. We should not swap
1921+
// AIsWrite with BIsWrite, as their uses expect them in program order.
19211922
if (StrideAPtr < 0) {
1922-
std::swap(APtr, BPtr);
1923-
std::swap(ATy, BTy);
19241923
std::swap(Src, Sink);
1925-
std::swap(AIsWrite, BIsWrite);
19261924
std::swap(AInst, BInst);
1927-
std::swap(StrideAPtr, StrideBPtr);
19281925
}
19291926

19301927
const SCEV *Dist = SE.getMinusSCEV(Sink, Src);

llvm/test/Analysis/LoopAccessAnalysis/forward-negative-step.ll

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33

44
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
55

6-
; FIXME: This should be vectorizable
7-
86
; void vectorizable_Read_Write(int *A) {
97
; for (unsigned i = 1022; i >= 0; i--)
108
; A[i+1] = A[i] + 1;
@@ -13,10 +11,9 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
1311
define void @vectorizable_Read_Write(ptr nocapture %A) {
1412
; CHECK-LABEL: 'vectorizable_Read_Write'
1513
; CHECK-NEXT: loop:
16-
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
17-
; CHECK-NEXT: Forward loop carried data dependence that prevents store-to-load forwarding.
14+
; CHECK-NEXT: Memory dependences are safe
1815
; CHECK-NEXT: Dependences:
19-
; CHECK-NEXT: ForwardButPreventsForwarding:
16+
; CHECK-NEXT: Forward:
2017
; CHECK-NEXT: %l = load i32, ptr %gep.A, align 4 ->
2118
; CHECK-NEXT: store i32 %add, ptr %gep.A.plus.1, align 4
2219
; CHECK-EMPTY:
@@ -47,13 +44,13 @@ exit:
4744
ret void
4845
}
4946

50-
; FIXME: There's a forward dependency that prevents forwarding here.
5147
define void @neg_step_ForwardButPreventsForwarding(ptr nocapture %A, ptr noalias %B) {
5248
; CHECK-LABEL: 'neg_step_ForwardButPreventsForwarding'
5349
; CHECK-NEXT: loop:
54-
; CHECK-NEXT: Memory dependences are safe
50+
; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
51+
; CHECK-NEXT: Forward loop carried data dependence that prevents store-to-load forwarding.
5552
; CHECK-NEXT: Dependences:
56-
; CHECK-NEXT: Forward:
53+
; CHECK-NEXT: ForwardButPreventsForwarding:
5754
; CHECK-NEXT: store i32 0, ptr %gep.A, align 4 ->
5855
; CHECK-NEXT: %l = load i32, ptr %gep.A.plus.1, align 4
5956
; CHECK-EMPTY:
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
; REQUIRES: asserts
2+
; RUN: opt -passes='print<access-info>' -debug-only=loop-accesses -disable-output < %s 2>&1 | FileCheck %s
3+
4+
; void negative_step(int *A) {
5+
; for (int i = 1022; i >= 0; i--)
6+
; A[i+1] = A[i] + 1;
7+
; }
8+
9+
; CHECK: LAA: Found a loop in negative_step: loop
10+
; CHECK: LAA: Checking memory dependencies
11+
; CHECK-NEXT: LAA: Src Scev: {(4092 + %A),+,-4}<nw><%loop>Sink Scev: {(4088 + %A)<nuw>,+,-4}<nw><%loop>(Induction step: -1)
12+
; CHECK-NEXT: LAA: Distance for store i32 %add, ptr %gep.A.plus.1, align 4 to %l = load i32, ptr %gep.A, align 4: -4
13+
; CHECK-NEXT: LAA: Dependence is negative
14+
15+
define void @negative_step(ptr nocapture %A) {
16+
entry:
17+
%A.plus.1 = getelementptr i32, ptr %A, i64 1
18+
br label %loop
19+
20+
loop:
21+
%iv = phi i64 [ 1022, %entry ], [ %iv.next, %loop ]
22+
%gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
23+
%l = load i32, ptr %gep.A, align 4
24+
%add = add nsw i32 %l, 1
25+
%gep.A.plus.1 = getelementptr i32, ptr %A.plus.1, i64 %iv
26+
store i32 %add, ptr %gep.A.plus.1, align 4
27+
%iv.next = add nsw i64 %iv, -1
28+
%cmp.not = icmp eq i64 %iv, 0
29+
br i1 %cmp.not, label %exit, label %loop
30+
31+
exit:
32+
ret void
33+
}
34+
35+
; void positive_step(int *A) {
36+
; for (int i = 1; i < 1024; i++)
37+
; A[i-1] = A[i] + 1;
38+
; }
39+
40+
; CHECK: LAA: Found a loop in positive_step: loop
41+
; CHECK: LAA: Checking memory dependencies
42+
; CHECK-NEXT: LAA: Src Scev: {(4 + %A)<nuw>,+,4}<nuw><%loop>Sink Scev: {%A,+,4}<nw><%loop>(Induction step: 1)
43+
; CHECK-NEXT: LAA: Distance for %l = load i32, ptr %gep.A, align 4 to store i32 %add, ptr %gep.A.minus.1, align 4: -4
44+
; CHECK-NEXT: LAA: Dependence is negative
45+
46+
define void @positive_step(ptr nocapture %A) {
47+
entry:
48+
%A.minus.1 = getelementptr i32, ptr %A, i64 -1
49+
br label %loop
50+
51+
loop:
52+
%iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ]
53+
%gep.A = getelementptr inbounds i32, ptr %A, i64 %iv
54+
%l = load i32, ptr %gep.A, align 4
55+
%add = add nsw i32 %l, 1
56+
%gep.A.minus.1 = getelementptr i32, ptr %A.minus.1, i64 %iv
57+
store i32 %add, ptr %gep.A.minus.1, align 4
58+
%iv.next = add nsw i64 %iv, 1
59+
%cmp.not = icmp eq i64 %iv, 1024
60+
br i1 %cmp.not, label %exit, label %loop
61+
62+
exit:
63+
ret void
64+
}
65+

0 commit comments

Comments
 (0)