Skip to content

Commit e99ca74

Browse files
authored
[LoopInterchange] Relax the legality check to accept more patterns (#139690)
When proving the legality of exchanging two loops, it doesn't need to check the elements of the direction vectors associated with the loops outside of the two target loops. Before this patch, the legality check looked at all elements of a direction vector to calculate the lexicographically order of the vector, which may reject some legal exchanges. For example, if a direction vector is `[* < =]`, it is safe to swap the last two loops because the corresponding subsequence of the vector (`[< =]`) is lexicographically positive for both before and after the exchange. However, the its order is unknown if we don't drop the prefix since the first element is `*`. This patch improves the logic of legality check to ignore such unrelated prefixes of direction vectors.
1 parent 8be514c commit e99ca74

File tree

3 files changed

+200
-7
lines changed

3 files changed

+200
-7
lines changed

llvm/lib/Transforms/Scalar/LoopInterchange.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -249,10 +249,6 @@ static std::optional<bool> isLexicographicallyPositive(std::vector<char> &DV,
249249
return std::nullopt;
250250
}
251251

252-
static std::optional<bool> isLexicographicallyPositive(std::vector<char> &DV) {
253-
return isLexicographicallyPositive(DV, 0, DV.size());
254-
}
255-
256252
// Checks if it is legal to interchange 2 loops.
257253
static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
258254
unsigned InnerLoopId,
@@ -273,10 +269,10 @@ static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
273269

274270
// Check if the direction vector is lexicographically positive (or zero)
275271
// for both before/after exchanged.
276-
if (isLexicographicallyPositive(Cur) == false)
272+
if (isLexicographicallyPositive(Cur, OuterLoopId, Cur.size()) == false)
277273
return false;
278274
std::swap(Cur[InnerLoopId], Cur[OuterLoopId]);
279-
if (isLexicographicallyPositive(Cur) == false)
275+
if (isLexicographicallyPositive(Cur, OuterLoopId, Cur.size()) == false)
280276
return false;
281277
}
282278
return true;

llvm/test/Transforms/LoopInterchange/inner-only-reductions.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ for.end8: ; preds = %for.cond1.for.inc6_
7474

7575
; CHECK: --- !Missed
7676
; CHECK-NEXT: Pass: loop-interchange
77-
; CHECK-NEXT: Name: Dependence
77+
; CHECK-NEXT: Name: UnsupportedPHIOuter
7878
; CHECK-NEXT: Function: reduction_03
7979

8080
; IR-LABEL: @reduction_03(
Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
; REQUIRES: asserts
2+
; RUN: opt < %s -passes=loop-interchange -verify-dom-info -verify-loop-info \
3+
; RUN: -disable-output -debug 2>&1 | FileCheck %s
4+
5+
@a = dso_local global [256 x [256 x float]] zeroinitializer, align 4
6+
@b = dso_local global [20 x [20 x [20 x i32]]] zeroinitializer, align 4
7+
8+
;; for (int n = 0; n < 100; ++n)
9+
;; for (int i = 0; i < 256; ++i)
10+
;; for (int j = 1; j < 256; ++j)
11+
;; a[j - 1][i] += a[j][i];
12+
;;
13+
;; The direction vector of `a` is [* = <]. We can interchange the innermost
14+
;; two loops, The direction vector after interchanging will be [* < =].
15+
16+
; CHECK: Dependency matrix before interchange:
17+
; CHECK-NEXT: * = <
18+
; CHECK-NEXT: * = =
19+
; CHECK-NEXT: Processing InnerLoopId = 2 and OuterLoopId = 1
20+
; CHECK-NEXT: Checking if loops are tightly nested
21+
; CHECK-NEXT: Checking instructions in Loop header and Loop latch
22+
; CHECK-NEXT: Loops are perfectly nested
23+
; CHECK-NEXT: Loops are legal to interchange
24+
25+
define void @all_eq_lt() {
26+
entry:
27+
br label %for.n.header
28+
29+
for.n.header:
30+
%n = phi i32 [ 0, %entry ], [ %n.inc, %for.n.latch ]
31+
br label %for.i.header
32+
33+
for.i.header:
34+
%i = phi i32 [ 0, %for.n.header ], [ %i.inc, %for.i.latch ]
35+
br label %for.j
36+
37+
for.j:
38+
%j = phi i32 [ 1, %for.i.header ], [ %j.inc, %for.j ]
39+
%j.dec = sub nsw i32 %j, 1
40+
%idx.store = getelementptr inbounds [256 x [256 x float]], ptr @a, i32 0, i32 %j.dec, i32 %i
41+
%idx.load = getelementptr inbounds [256 x [256 x float]], ptr @a, i32 0, i32 %j, i32 %i
42+
%0 = load float, ptr %idx.load, align 4
43+
%1 = load float, ptr %idx.store, align 4
44+
%add = fadd fast float %0, %1
45+
store float %add, ptr %idx.store, align 4
46+
%j.inc = add nuw nsw i32 %j, 1
47+
%cmp.j = icmp slt i32 %j.inc, 256
48+
br i1 %cmp.j, label %for.j, label %for.i.latch
49+
50+
for.i.latch:
51+
%i.inc = add nuw nsw i32 %i, 1
52+
%cmp.i = icmp slt i32 %i.inc, 256
53+
br i1 %cmp.i, label %for.i.header, label %for.n.latch
54+
55+
for.n.latch:
56+
%n.inc = add nuw nsw i32 %n, 1
57+
%cmp.n = icmp slt i32 %n.inc, 100
58+
br i1 %cmp.n, label %for.n.header, label %exit
59+
60+
exit:
61+
ret void
62+
}
63+
64+
;; for (int i = 0; i < 256; ++i)
65+
;; for (int j = 1; j < 256; ++j)
66+
;; a[j - 1][i] = a[j][255 - i];
67+
;;
68+
;; The direction vector of `a` is [* <]. We cannot interchange the loops
69+
;; because we must handle a `*` dependence conservatively.
70+
71+
; CHECK: Dependency matrix before interchange:
72+
; CHECK-NEXT: * <
73+
; CHECK-NEXT: Processing InnerLoopId = 1 and OuterLoopId = 0
74+
; CHECK-NEXT: Failed interchange InnerLoopId = 1 and OuterLoopId = 0 due to dependence
75+
; CHECK-NEXT: Not interchanging loops. Cannot prove legality.
76+
77+
define void @all_lt() {
78+
entry:
79+
br label %for.i.header
80+
81+
for.i.header:
82+
%i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
83+
%i.rev = sub nsw i32 255, %i
84+
br label %for.j
85+
86+
for.j:
87+
%j = phi i32 [ 1, %for.i.header ], [ %j.inc, %for.j ]
88+
%j.dec = sub nsw i32 %j, 1
89+
%idx.store = getelementptr inbounds [256 x [256 x float]], ptr @a, i32 0, i32 %j.dec, i32 %i
90+
%idx.load = getelementptr inbounds [256 x [256 x float]], ptr @a, i32 0, i32 %j, i32 %i.rev
91+
%0 = load float, ptr %idx.load, align 4
92+
store float %0, ptr %idx.store, align 4
93+
%j.inc = add nuw nsw i32 %j, 1
94+
%cmp.j = icmp slt i32 %j.inc, 256
95+
br i1 %cmp.j, label %for.j, label %for.i.latch
96+
97+
for.i.latch:
98+
%i.inc = add nuw nsw i32 %i, 1
99+
%cmp.i = icmp slt i32 %i.inc, 256
100+
br i1 %cmp.i, label %for.i.header, label %exit
101+
102+
exit:
103+
ret void
104+
}
105+
106+
;; for (int i = 0; i < 255; ++i)
107+
;; for (int j = 1; j < 256; ++j)
108+
;; a[j][i] = a[j - 1][i + 1];
109+
;;
110+
;; The direciton vector of `a` is [< >]. We cannot interchange the loops
111+
;; because the read/write order for `a` cannot be changed.
112+
113+
; CHECK: Dependency matrix before interchange:
114+
; CHECK-NEXT: < >
115+
; CHECK-NEXT: Processing InnerLoopId = 1 and OuterLoopId = 0
116+
; CHECK-NEXT: Failed interchange InnerLoopId = 1 and OuterLoopId = 0 due to dependence
117+
; CHECK-NEXT: Not interchanging loops. Cannot prove legality.
118+
119+
define void @lt_gt() {
120+
entry:
121+
br label %for.i.header
122+
123+
for.i.header:
124+
%i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
125+
%i.inc = add nuw nsw i32 %i, 1
126+
br label %for.j
127+
128+
for.j:
129+
%j = phi i32 [ 1, %for.i.header ], [ %j.inc, %for.j ]
130+
%j.dec = sub nsw i32 %j, 1
131+
%idx.store = getelementptr inbounds [256 x [256 x float]], ptr @a, i32 0, i32 %j, i32 %i
132+
%idx.load = getelementptr inbounds [256 x [256 x float]], ptr @a, i32 0, i32 %j.dec, i32 %i.inc
133+
%0 = load float, ptr %idx.load, align 4
134+
store float %0, ptr %idx.store, align 4
135+
%j.inc = add nuw nsw i32 %j, 1
136+
%cmp.j = icmp slt i32 %j.inc, 256
137+
br i1 %cmp.j, label %for.j, label %for.i.latch
138+
139+
for.i.latch:
140+
%cmp.i = icmp slt i32 %i.inc, 255
141+
br i1 %cmp.i, label %for.i.header, label %exit
142+
143+
exit:
144+
ret void
145+
}
146+
147+
;; for (int i = 0; i < 20; i++)
148+
;; for (int j = 0; j < 20; j++)
149+
;; for (int k = 0; k < 19; k++)
150+
;; b[i][j][k] = b[i][5][k + 1];
151+
;;
152+
;; The direction vector of `b` is [= * <]. We cannot interchange all the loops.
153+
154+
; CHECK: Dependency matrix before interchange:
155+
; CHECK-NEXT: = * <
156+
; CHECK-NEXT: Processing InnerLoopId = 2 and OuterLoopId = 1
157+
; CHECK-NEXT: Failed interchange InnerLoopId = 2 and OuterLoopId = 1 due to dependence
158+
; CHECK-NEXT: Not interchanging loops. Cannot prove legality.
159+
; CHECK-NEXT: Processing InnerLoopId = 1 and OuterLoopId = 0
160+
; CHECK-NEXT: Failed interchange InnerLoopId = 1 and OuterLoopId = 0 due to dependence
161+
; CHECK-NEXT: Not interchanging loops. Cannot prove legality.
162+
163+
define void @eq_all_lt() {
164+
entry:
165+
br label %for.i.header
166+
167+
for.i.header:
168+
%i = phi i32 [ 0, %entry ], [ %i.inc, %for.i.latch ]
169+
br label %for.j.header
170+
171+
for.j.header:
172+
%j = phi i32 [ 0, %for.i.header ], [ %j.inc, %for.j.latch ]
173+
br label %for.k
174+
175+
for.k:
176+
%k = phi i32 [ 0, %for.j.header ], [ %k.inc, %for.k ]
177+
%k.inc = add nuw nsw i32 %k, 1
178+
%idx.store = getelementptr inbounds [20 x [20 x [20 x i32]]], ptr @b, i32 %i, i32 %j, i32 %k
179+
%idx.load = getelementptr inbounds [20 x [20 x [20 x i32]]], ptr @b, i32 %i, i32 5, i32 %k.inc
180+
%0 = load i32, ptr %idx.load, align 4
181+
store i32 %0, ptr %idx.store, align 4
182+
%cmp.k = icmp slt i32 %k.inc, 19
183+
br i1 %cmp.k, label %for.k, label %for.j.latch
184+
185+
for.j.latch:
186+
%j.inc = add nuw nsw i32 %j, 1
187+
%cmp.j = icmp slt i32 %j.inc, 20
188+
br i1 %cmp.j, label %for.j.header, label %for.i.latch
189+
190+
for.i.latch:
191+
%i.inc = add nuw nsw i32 %i, 1
192+
%cmp.i = icmp slt i32 %i.inc, 20
193+
br i1 %cmp.i, label %for.i.header, label %exit
194+
195+
exit:
196+
ret void
197+
}

0 commit comments

Comments
 (0)