Skip to content

Commit cf976bf

Browse files
authored
[LoopInterchange] Add tests for the vectorization profitability (NFC) (#133665)
There is a problem with the current profitability check for vectorization in LoopInterchange. There are both false positives and false negatives. The former means that the heuristic may say that "an exchange is necessary to vectorize the innermost loop" even though it's already possible. The latter means that the heuristic may miss a case where an exchange is necessary to vectorize the innermost loop. Note that this is not a dependency analysis problem. This is caused by incorrect handling of the dependency matrix in the profitability check, so these problems can occur even if the analysis is accurate (no overestimation). This patch adds tests to clarify the cases that should be fixed. The root cause of these cases is that the heuristic doesn't handle the direction of a dependency correctly.
1 parent 14335be commit cf976bf

File tree

1 file changed

+108
-0
lines changed

1 file changed

+108
-0
lines changed
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 \
2+
; RUN: -pass-remarks-output=%t -disable-output -loop-interchange-profitabilities=vectorize
3+
; RUN: FileCheck -input-file %t %s
4+
5+
@A = dso_local global [256 x [256 x float]] zeroinitializer
6+
@B = dso_local global [256 x [256 x float]] zeroinitializer
7+
@C = dso_local global [256 x [256 x float]] zeroinitializer
8+
9+
; Check that the below loops are exchanged for vectorization.
10+
;
11+
; for (int i = 0; i < 256; i++) {
12+
; for (int j = 1; j < 256; j++) {
13+
; A[i][j] = A[i][j-1] + B[i][j];
14+
; C[i][j] += 1;
15+
; }
16+
; }
17+
;
18+
; FIXME: These loops are not exchanged at this time due to the problem in
19+
; profitability heuristic calculation for vectorization.
20+
21+
; CHECK: --- !Missed
22+
; CHECK-NEXT: Pass: loop-interchange
23+
; CHECK-NEXT: Name: InterchangeNotProfitable
24+
; CHECK-NEXT: Function: interchange_necessary_for_vectorization
25+
; CHECK-NEXT: Args:
26+
; CHECK-NEXT: - String: Interchanging loops is not considered to improve cache locality nor vectorization.
27+
; CHECK-NEXT: ...
28+
define void @interchange_necessary_for_vectorization() {
29+
entry:
30+
br label %for.i.header
31+
32+
for.i.header:
33+
%i = phi i64 [ 1, %entry ], [ %i.next, %for.i.inc ]
34+
br label %for.j.body
35+
36+
for.j.body:
37+
%j = phi i64 [ 1, %for.i.header ], [ %j.next, %for.j.body ]
38+
%j.dec = add nsw i64 %j, -1
39+
%a.load.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, i64 %i, i64 %j.dec
40+
%b.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @B, i64 %i, i64 %j
41+
%c.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @C, i64 %i, i64 %j
42+
%a = load float, ptr %a.load.index, align 4
43+
%b = load float, ptr %b.index, align 4
44+
%c = load float, ptr %c.index, align 4
45+
%add.0 = fadd float %a, %b
46+
%a.store.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, i64 %i, i64 %j
47+
store float %add.0, ptr %a.store.index, align 4
48+
%add.1 = fadd float %c, 1.0
49+
store float %add.1, ptr %c.index, align 4
50+
%j.next = add nuw nsw i64 %j, 1
51+
%cmp.j = icmp eq i64 %j.next, 256
52+
br i1 %cmp.j, label %for.i.inc, label %for.j.body
53+
54+
for.i.inc:
55+
%i.next = add nuw nsw i64 %i, 1
56+
%cmp.i = icmp eq i64 %i.next, 256
57+
br i1 %cmp.i, label %exit, label %for.i.header
58+
59+
exit:
60+
ret void
61+
}
62+
63+
; Check that the following innermost loop can be vectorized so that
64+
; interchanging is unnecessary.
65+
;
66+
; for (int i = 0; i < 256; i++)
67+
; for (int j = 1; j < 256; j++)
68+
; A[i][j-1] = A[i][j] + B[i][j];
69+
;
70+
; FIXME: These loops are exchanged at this time due to the problem in
71+
; profitability heuristic calculation for vectorization.
72+
73+
; CHECK: --- !Passed
74+
; CHECK-NEXT: Pass: loop-interchange
75+
; CHECK-NEXT: Name: Interchanged
76+
; CHECK-NEXT: Function: interchange_unnecesasry_for_vectorization
77+
; CHECK-NEXT: Args:
78+
; CHECK-NEXT: - String: Loop interchanged with enclosing loop.
79+
define void @interchange_unnecesasry_for_vectorization() {
80+
entry:
81+
br label %for.i.header
82+
83+
for.i.header:
84+
%i = phi i64 [ 1, %entry ], [ %i.next, %for.i.inc ]
85+
br label %for.j.body
86+
87+
for.j.body:
88+
%j = phi i64 [ 1, %for.i.header ], [ %j.next, %for.j.body ]
89+
%j.dec = add nsw i64 %j, -1
90+
%a.load.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, i64 %i, i64 %j
91+
%b.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @B, i64 %i, i64 %j
92+
%a = load float, ptr %a.load.index, align 4
93+
%b = load float, ptr %b.index, align 4
94+
%add = fadd float %a, %b
95+
%a.store.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, i64 %i, i64 %j.dec
96+
store float %add, ptr %a.store.index, align 4
97+
%j.next = add nuw nsw i64 %j, 1
98+
%cmp.j = icmp eq i64 %j.next, 256
99+
br i1 %cmp.j, label %for.i.inc, label %for.j.body
100+
101+
for.i.inc:
102+
%i.next = add nuw nsw i64 %i, 1
103+
%cmp.i = icmp eq i64 %i.next, 256
104+
br i1 %cmp.i, label %exit, label %for.i.header
105+
106+
exit:
107+
ret void
108+
}

0 commit comments

Comments
 (0)