Skip to content

Commit cac6f21

Browse files
authored
[LoopInterchange] Make the entries of the Dependency Matrix unique (#116195)
The entries in the dependency matrix can contain a lot of duplicates, which is unnecessary and results in more checks that we can avoid, and this patch adds that.
1 parent 129a1a2 commit cac6f21

File tree

2 files changed

+59
-10
lines changed

2 files changed

+59
-10
lines changed

llvm/lib/Transforms/Scalar/LoopInterchange.cpp

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "llvm/ADT/SmallVector.h"
1818
#include "llvm/ADT/Statistic.h"
1919
#include "llvm/ADT/StringRef.h"
20+
#include "llvm/ADT/StringSet.h"
2021
#include "llvm/Analysis/DependenceAnalysis.h"
2122
#include "llvm/Analysis/LoopCacheAnalysis.h"
2223
#include "llvm/Analysis/LoopInfo.h"
@@ -71,7 +72,7 @@ static const unsigned MaxMemInstrCount = 100;
7172
// Maximum loop depth supported.
7273
static const unsigned MaxLoopNestDepth = 10;
7374

74-
#ifdef DUMP_DEP_MATRICIES
75+
#ifndef NDEBUG
7576
static void printDepMatrix(CharMatrix &DepMatrix) {
7677
for (auto &Row : DepMatrix) {
7778
for (auto D : Row)
@@ -110,6 +111,7 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
110111
<< " Loads and Stores to analyze\n");
111112

112113
ValueVector::iterator I, IE, J, JE;
114+
StringSet<> Seen;
113115

114116
for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
115117
for (J = I, JE = MemInstr.end(); J != JE; ++J) {
@@ -156,7 +158,10 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
156158
Dep.push_back('I');
157159
}
158160

159-
DepMatrix.push_back(Dep);
161+
// Make sure we only add unique entries to the dependency matrix.
162+
if (Seen.insert(StringRef(Dep.data(), Dep.size())).second)
163+
DepMatrix.push_back(Dep);
164+
160165
if (DepMatrix.size() > MaxMemInstrCount) {
161166
LLVM_DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
162167
<< " dependencies inside loop\n");
@@ -449,10 +454,9 @@ struct LoopInterchange {
449454
LLVM_DEBUG(dbgs() << "Populating dependency matrix failed\n");
450455
return false;
451456
}
452-
#ifdef DUMP_DEP_MATRICIES
453-
LLVM_DEBUG(dbgs() << "Dependence before interchange\n");
454-
printDepMatrix(DependencyMatrix);
455-
#endif
457+
458+
LLVM_DEBUG(dbgs() << "Dependency matrix before interchange:\n";
459+
printDepMatrix(DependencyMatrix));
456460

457461
// Get the Outermost loop exit.
458462
BasicBlock *LoopNestExit = OuterMostLoop->getExitBlock();
@@ -492,10 +496,10 @@ struct LoopInterchange {
492496
std::swap(LoopList[i - 1], LoopList[i]);
493497
// Update the DependencyMatrix
494498
interChangeDependencies(DependencyMatrix, i, i - 1);
495-
#ifdef DUMP_DEP_MATRICIES
496-
LLVM_DEBUG(dbgs() << "Dependence after interchange\n");
497-
printDepMatrix(DependencyMatrix);
498-
#endif
499+
500+
LLVM_DEBUG(dbgs() << "Dependency matrix after interchange:\n";
501+
printDepMatrix(DependencyMatrix));
502+
499503
ChangedPerIter |= Interchanged;
500504
Changed |= Interchanged;
501505
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
; REQUIRES: asserts
2+
; RUN: opt < %s -passes=loop-interchange -S -debug 2>&1 | FileCheck %s
3+
4+
; CHECK: Dependency matrix before interchange:
5+
; CHECK-NEXT: I I
6+
; CHECK-NEXT: = S
7+
; CHECK-NEXT: < S
8+
; CHECK-NEXT: Processing InnerLoopId
9+
10+
; This example is taken from github issue #54176
11+
;
12+
define void @foo(i32 noundef %n, i32 noundef %m, ptr nocapture noundef %aa, ptr nocapture noundef readonly %bb, ptr nocapture noundef writeonly %cc) {
13+
entry:
14+
%arrayidx7 = getelementptr inbounds i8, ptr %aa, i64 512
15+
br label %for.cond1.preheader
16+
17+
for.cond1.preheader:
18+
%indvars.iv32 = phi i64 [ 1, %entry ], [ %indvars.iv.next33, %for.cond.cleanup3 ]
19+
%0 = add nsw i64 %indvars.iv32, -1
20+
%arrayidx9 = getelementptr inbounds [128 x float], ptr %arrayidx7, i64 0, i64 %0
21+
%arrayidx12 = getelementptr inbounds [128 x float], ptr %arrayidx7, i64 0, i64 %indvars.iv32
22+
br label %for.body4
23+
24+
for.cond.cleanup:
25+
ret void
26+
27+
for.cond.cleanup3:
28+
%indvars.iv.next33 = add nuw nsw i64 %indvars.iv32, 1
29+
%exitcond36 = icmp ne i64 %indvars.iv.next33, 128
30+
br i1 %exitcond36, label %for.cond1.preheader, label %for.cond.cleanup
31+
32+
for.body4:
33+
%indvars.iv = phi i64 [ 1, %for.cond1.preheader ], [ %indvars.iv.next, %for.body4 ]
34+
%arrayidx6 = getelementptr inbounds [128 x float], ptr %bb, i64 %indvars.iv, i64 %indvars.iv32
35+
%1 = load float, ptr %arrayidx6, align 4
36+
%2 = load float, ptr %arrayidx9, align 4
37+
%add = fadd fast float %2, %1
38+
store float %add, ptr %arrayidx9, align 4
39+
%3 = load float, ptr %arrayidx12, align 4
40+
%arrayidx16 = getelementptr inbounds [128 x float], ptr %cc, i64 %indvars.iv, i64 %indvars.iv32
41+
store float %3, ptr %arrayidx16, align 4
42+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
43+
%exitcond = icmp ne i64 %indvars.iv.next, 1024
44+
br i1 %exitcond, label %for.body4, label %for.cond.cleanup3
45+
}

0 commit comments

Comments
 (0)