Skip to content

Commit dbe3f60

Browse files
committed
[LoopInterchange] Remove 'I' dependency
LoopInterchange pads the end of the direction vector with `I` when the Levels value of a Dependence object is less than the depth of the loop nest. However, `I` dependencies were treated the same as `=` in subsequent processes, which is incorrect. For example, DependenceAnalysis currently returns a default Dependence object when AliasAnalysis results in MayAlias. In such a case, the direction vector should be like `[* * ... *]`. However, because the Levels value of default Dependence object is 0, therefore LoopInterchange generated a direction vector like `[I I ... I]`, leading to unsafe interchanges. This patch fixes the issue by replacing `I` with `*`. As far as I understand, this change doesn't prevent any legal interchanges that were applied before this patch. Fixes #140238
1 parent 416b7df commit dbe3f60

File tree

6 files changed

+120
-8
lines changed

6 files changed

+120
-8
lines changed

llvm/lib/Transforms/Scalar/LoopInterchange.cpp

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,8 +209,24 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
209209
Direction = '*';
210210
Dep.push_back(Direction);
211211
}
212+
213+
// In some cases, the Levels of Dependence may be less than the depth of
214+
// the loop nest, particularly when the array's dimension is smaller
215+
// than the loop nest depth. In such cases, fill the end of the
216+
// direction vector with '*'
217+
// TODO: This can be false dependency in some cases, e.g.,
218+
//
219+
// for (int i=0; i<N; i++)
220+
// for (int j=0; j<N; j++)
221+
// for (int k=0; k<N; k++)
222+
// v[i] += a[i][j][k];
223+
//
224+
// In this example, a dependency exists between the load and store of
225+
// v[i]. The direction vector returned by DependenceInfo would be [=],
226+
// and thus it would become [= * *]. However, if the addition is
227+
// associative, exchanging the j-loop and k-loop is legal.
212228
while (Dep.size() != Level) {
213-
Dep.push_back('I');
229+
Dep.push_back('*');
214230
}
215231

216232
// Make sure we only add unique entries to the dependency matrix.
@@ -1214,7 +1230,7 @@ LoopInterchangeProfitability::isProfitablePerInstrOrderCost() {
12141230
static bool canVectorize(const CharMatrix &DepMatrix, unsigned LoopId) {
12151231
for (unsigned I = 0; I != DepMatrix.size(); I++) {
12161232
char Dir = DepMatrix[I][LoopId];
1217-
if (Dir != 'I' && Dir != '=')
1233+
if (Dir != '=')
12181234
return false;
12191235
}
12201236
return true;
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
; REQUIRES: asserts
2+
; RUN: opt < %s -passes=loop-interchange -verify-dom-info -verify-loop-info \
3+
; RUN: -disable-output -debug 2>&1 | FileCheck %s
4+
5+
;; In the following case, p0 and p1 may alias, so the direction vector must be [* *].
6+
;;
7+
;; void may_alias(float *p0, float *p1) {
8+
;; for (int i = 0; i < 4; i++)
9+
;; for (int j = 0; j < 4; j++)
10+
;; p0[4 * i + j] = p1[4 * j + i];
11+
;; }
12+
13+
; CHECK: Dependency matrix before interchange:
14+
; CHECK-NEXT: * *
15+
; CHECK-NEXT: Processing InnerLoopId = 1 and OuterLoopId = 0
16+
define void @may_alias(ptr %p0, ptr %p1) {
17+
entry:
18+
br label %for.i.header
19+
20+
for.i.header:
21+
%i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
22+
br label %for.j
23+
24+
for.j:
25+
%j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j ]
26+
%idx.0 = getelementptr inbounds [4 x [4 x float]], ptr %p0, i32 0, i32 %j, i32 %i
27+
%idx.1 = getelementptr inbounds [4 x [4 x float]], ptr %p1, i32 0, i32 %i, i32 %j
28+
%0 = load float, ptr %idx.0, align 4
29+
store float %0, ptr %idx.1, align 4
30+
%j.inc = add nuw nsw i32 %j, 1
31+
%cmp.j = icmp slt i32 %j.inc, 4
32+
br i1 %cmp.j, label %for.j, label %for.i.latch
33+
34+
for.i.latch:
35+
%i.inc = add nuw nsw i32 %i, 1
36+
%cmp.i = icmp slt i32 %i.inc, 4
37+
br i1 %cmp.i, label %for.i.header, label %exit
38+
39+
exit:
40+
ret void
41+
}
42+
43+
@A = global [4 x [4 x [4 x float]]] zeroinitializer, align 4
44+
@V = global [4 x float] zeroinitializer, align 4
45+
46+
;; In the following case, there is an all direction dependence for the j-loop
47+
;; and k-loop.
48+
;;
49+
;; for (int i = 0; i < 4; i++)
50+
;; for (int j = 0; j < 4; j++)
51+
;; for (int k = 0; k < 4; k++)
52+
;; V[i] += A[i][j][k];
53+
54+
; CHECK: Dependency matrix before interchange:
55+
; CHECK-NEXT: = * *
56+
; CHECK-NEXT: Processing InnerLoopId = 2 and OuterLoopId = 1
57+
define void @partial_reduction() {
58+
entry:
59+
br label %for.i.header
60+
61+
for.i.header:
62+
%i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
63+
br label %for.j.header
64+
65+
for.j.header:
66+
%j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
67+
br label %for.k
68+
69+
for.k:
70+
%k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
71+
%idx.a = getelementptr inbounds [4 x [4 x [4 x float]]], ptr @A, i32 0, i32 %i, i32 %j, i32 %k
72+
%idx.v = getelementptr inbounds [4 x float], ptr @V, i32 0, i32 %i
73+
%0 = load float, ptr %idx.a, align 4
74+
%1 = load float, ptr %idx.v, align 4
75+
%add = fadd fast float %0, %1
76+
store float %add, ptr %idx.v, align 4
77+
%k.inc = add nuw nsw i32 %k, 1
78+
%cmp.k = icmp slt i32 %k.inc, 4
79+
br i1 %cmp.k, label %for.k, label %for.j.latch
80+
81+
for.j.latch:
82+
%j.inc = add nuw nsw i32 %j, 1
83+
%cmp.j = icmp slt i32 %j.inc, 4
84+
br i1 %cmp.j, label %for.j.header, label %for.i.latch
85+
86+
for.i.latch:
87+
%i.inc = add nuw nsw i32 %i, 1
88+
%cmp.i = icmp slt i32 %i.inc, 4
89+
br i1 %cmp.i, label %for.i.header, label %exit
90+
91+
exit:
92+
ret void
93+
}

llvm/test/Transforms/LoopInterchange/inner-only-reductions.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,10 +71,11 @@ for.end8: ; preds = %for.cond1.for.inc6_
7171

7272
;; Not tightly nested. Do not interchange.
7373
;; Not interchanged hence the phi's in the inner loop will not be split.
74+
;; FIXME: Currently this loops is rejected for dependence reasons.
7475

7576
; CHECK: --- !Missed
7677
; CHECK-NEXT: Pass: loop-interchange
77-
; CHECK-NEXT: Name: UnsupportedPHIOuter
78+
; CHECK-NEXT: Name: Dependence
7879
; CHECK-NEXT: Function: reduction_03
7980

8081
; IR-LABEL: @reduction_03(

llvm/test/Transforms/LoopInterchange/multilevel-partial-reduction.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,16 @@
99
; the innermost loop has a reduction operation which is however not in a form
1010
; that loop interchange can handle. Interchanging the outermost and the
1111
; middle loops would intervene with the reduction and cause miscompile.
12+
;
13+
; FIXME: Currently interchanges are rejected for dependence reasons.
1214

1315
; REMARKS: --- !Missed
1416
; REMARKS-NEXT: Pass: loop-interchange
15-
; REMARKS-NEXT: Name: UnsupportedPHIInner
17+
; REMARKS-NEXT: Name: Dependence
1618
; REMARKS-NEXT: Function: test7
1719
; REMARKS: --- !Missed
1820
; REMARKS-NEXT: Pass: loop-interchange
19-
; REMARKS-NEXT: Name: UnsupportedPHIInner
21+
; REMARKS-NEXT: Name: Dependence
2022
; REMARKS-NEXT: Function: test7
2123

2224
define i32 @test7() {

llvm/test/Transforms/LoopInterchange/pr43326-ideal-access-pattern.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
; Triply nested loop, should be able to do interchange three times
66
; to get the ideal access pattern.
7-
; void f(int e[10][10][10], int f[10][10][10]) {
7+
; void f(int e[restrict 10][10][10], int f[restrict 10][10][10]) {
88
; for (int a = 0; a < 10; a++) {
99
; for (int b = 0; b < 10; b++) {
1010
; for (int c = 0; c < 10; c++) {
@@ -35,7 +35,7 @@
3535
; REMARKS-NEXT: Name: Interchanged
3636
; REMARKS-NEXT: Function: pr43326-triply-nested
3737

38-
define void @pr43326-triply-nested(ptr %e, ptr %f) {
38+
define void @pr43326-triply-nested(ptr noalias %e, ptr noalias %f) {
3939
entry:
4040
br label %for.outermost.header
4141

llvm/test/Transforms/LoopInterchange/unique-dep-matrix.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
; RUN: opt < %s -passes=loop-interchange -S -debug 2>&1 | FileCheck %s
33

44
; CHECK: Dependency matrix before interchange:
5-
; CHECK-NEXT: I I
5+
; CHECK-NEXT: * *
66
; CHECK-NEXT: = *
77
; CHECK-NEXT: < *
88
; CHECK-NEXT: Processing InnerLoopId

0 commit comments

Comments
 (0)