Skip to content
This repository was archived by the owner on Mar 28, 2020. It is now read-only.

Commit 799bfde

Browse files
author
Chad Rosier
committed
[LoopInterchange] Track all dependencies, not just anti dependencies.
Currently, we give up on loop interchange if we encounter a flow dependency anywhere in the loop list. Worse yet, we don't even track output dependencies. This patch updates the dependency matrix computation to track flow and output dependencies in the same way we track anti dependencies. This improves an internal workload by 2.2x. Note the loop interchange pass is off by default and it can be enabled with '-mllvm -enable-loopinterchange' Differential Revision: https://reviews.llvm.org/D24564 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@282101 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent b81a1e9 commit 799bfde

File tree

2 files changed

+239
-50
lines changed

2 files changed

+239
-50
lines changed

lib/Transforms/Scalar/LoopInterchange.cpp

Lines changed: 47 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -111,62 +111,59 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
111111
Instruction *Dst = cast<Instruction>(*J);
112112
if (Src == Dst)
113113
continue;
114+
// Ignore Input dependencies.
114115
if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
115116
continue;
117+
// Track Output, Flow, and Anti dependencies.
116118
if (auto D = DI->depends(Src, Dst, true)) {
117-
DEBUG(dbgs() << "Found Dependency between Src and Dst\n"
119+
assert(D->isOrdered() && "Expected an output, flow or anti dep.");
120+
DEBUG(StringRef DepType =
121+
D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
122+
dbgs() << "Found " << DepType
123+
<< " dependency between Src and Dst\n"
118124
<< " Src:" << *Src << "\n Dst:" << *Dst << '\n');
119-
if (D->isFlow()) {
120-
// TODO: Handle Flow dependence.Check if it is sufficient to populate
121-
// the Dependence Matrix with the direction reversed.
122-
DEBUG(dbgs() << "Flow dependence not handled\n");
123-
return false;
124-
}
125-
if (D->isAnti()) {
126-
DEBUG(dbgs() << "Found Anti dependence\n");
127-
unsigned Levels = D->getLevels();
128-
char Direction;
129-
for (unsigned II = 1; II <= Levels; ++II) {
130-
const SCEV *Distance = D->getDistance(II);
131-
const SCEVConstant *SCEVConst =
132-
dyn_cast_or_null<SCEVConstant>(Distance);
133-
if (SCEVConst) {
134-
const ConstantInt *CI = SCEVConst->getValue();
135-
if (CI->isNegative())
136-
Direction = '<';
137-
else if (CI->isZero())
138-
Direction = '=';
139-
else
140-
Direction = '>';
141-
Dep.push_back(Direction);
142-
} else if (D->isScalar(II)) {
143-
Direction = 'S';
144-
Dep.push_back(Direction);
145-
} else {
146-
unsigned Dir = D->getDirection(II);
147-
if (Dir == Dependence::DVEntry::LT ||
148-
Dir == Dependence::DVEntry::LE)
149-
Direction = '<';
150-
else if (Dir == Dependence::DVEntry::GT ||
151-
Dir == Dependence::DVEntry::GE)
152-
Direction = '>';
153-
else if (Dir == Dependence::DVEntry::EQ)
154-
Direction = '=';
155-
else
156-
Direction = '*';
157-
Dep.push_back(Direction);
158-
}
159-
}
160-
while (Dep.size() != Level) {
161-
Dep.push_back('I');
125+
unsigned Levels = D->getLevels();
126+
char Direction;
127+
for (unsigned II = 1; II <= Levels; ++II) {
128+
const SCEV *Distance = D->getDistance(II);
129+
const SCEVConstant *SCEVConst =
130+
dyn_cast_or_null<SCEVConstant>(Distance);
131+
if (SCEVConst) {
132+
const ConstantInt *CI = SCEVConst->getValue();
133+
if (CI->isNegative())
134+
Direction = '<';
135+
else if (CI->isZero())
136+
Direction = '=';
137+
else
138+
Direction = '>';
139+
Dep.push_back(Direction);
140+
} else if (D->isScalar(II)) {
141+
Direction = 'S';
142+
Dep.push_back(Direction);
143+
} else {
144+
unsigned Dir = D->getDirection(II);
145+
if (Dir == Dependence::DVEntry::LT ||
146+
Dir == Dependence::DVEntry::LE)
147+
Direction = '<';
148+
else if (Dir == Dependence::DVEntry::GT ||
149+
Dir == Dependence::DVEntry::GE)
150+
Direction = '>';
151+
else if (Dir == Dependence::DVEntry::EQ)
152+
Direction = '=';
153+
else
154+
Direction = '*';
155+
Dep.push_back(Direction);
162156
}
157+
}
158+
while (Dep.size() != Level) {
159+
Dep.push_back('I');
160+
}
163161

164-
DepMatrix.push_back(Dep);
165-
if (DepMatrix.size() > MaxMemInstrCount) {
166-
DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
167-
<< " dependencies inside loop\n");
168-
return false;
169-
}
162+
DepMatrix.push_back(Dep);
163+
if (DepMatrix.size() > MaxMemInstrCount) {
164+
DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
165+
<< " dependencies inside loop\n");
166+
return false;
170167
}
171168
}
172169
}

test/Transforms/LoopInterchange/interchange.ll

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -555,3 +555,195 @@ for.end17: ; preds = %for.inc15
555555
; CHECK: for.end17: ; preds = %for.inc15
556556
; CHECK: ret void
557557

558+
;;-----------------------------------Test case 09-------------------------------
559+
;; Test that a flow dependency in outer loop doesn't prevent interchange in
560+
;; loops i and j.
561+
;;
562+
;; for (int k = 0; k < 100; ++k) {
563+
;; T[k] = fn1();
564+
;; for (int i = 0; i < 1000; ++i)
565+
;; for(int j = 1; j < 1000; ++j)
566+
;; Arr[j][i] = Arr[j][i]+k;
567+
;; fn2(T[k]);
568+
;; }
569+
570+
@T = internal global [100 x double] zeroinitializer, align 4
571+
@Arr = internal global [1000 x [1000 x i32]] zeroinitializer, align 4
572+
573+
define void @interchange_09(i32 %k) {
574+
entry:
575+
br label %for.body
576+
577+
for.cond.cleanup: ; preds = %for.cond.cleanup4
578+
ret void
579+
580+
for.body: ; preds = %for.cond.cleanup4, %entry
581+
%indvars.iv45 = phi i64 [ 0, %entry ], [ %indvars.iv.next46, %for.cond.cleanup4 ]
582+
%call = call double @fn1()
583+
%arrayidx = getelementptr inbounds [100 x double], [100 x double]* @T, i64 0, i64 %indvars.iv45
584+
store double %call, double* %arrayidx, align 8
585+
br label %for.cond6.preheader
586+
587+
for.cond6.preheader: ; preds = %for.cond.cleanup8, %for.body
588+
%indvars.iv42 = phi i64 [ 0, %for.body ], [ %indvars.iv.next43, %for.cond.cleanup8 ]
589+
br label %for.body9
590+
591+
for.cond.cleanup4: ; preds = %for.cond.cleanup8
592+
%tmp = load double, double* %arrayidx, align 8
593+
call void @fn2(double %tmp)
594+
%indvars.iv.next46 = add nuw nsw i64 %indvars.iv45, 1
595+
%exitcond47 = icmp ne i64 %indvars.iv.next46, 100
596+
br i1 %exitcond47, label %for.body, label %for.cond.cleanup
597+
598+
for.cond.cleanup8: ; preds = %for.body9
599+
%indvars.iv.next43 = add nuw nsw i64 %indvars.iv42, 1
600+
%exitcond44 = icmp ne i64 %indvars.iv.next43, 1000
601+
br i1 %exitcond44, label %for.cond6.preheader, label %for.cond.cleanup4
602+
603+
for.body9: ; preds = %for.body9, %for.cond6.preheader
604+
%indvars.iv = phi i64 [ 1, %for.cond6.preheader ], [ %indvars.iv.next, %for.body9 ]
605+
%arrayidx13 = getelementptr inbounds [1000 x [1000 x i32]], [1000 x [1000 x i32]]* @Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv42
606+
%tmp1 = load i32, i32* %arrayidx13, align 4
607+
%tmp2 = trunc i64 %indvars.iv45 to i32
608+
%add = add nsw i32 %tmp1, %tmp2
609+
store i32 %add, i32* %arrayidx13, align 4
610+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
611+
%exitcond = icmp ne i64 %indvars.iv.next, 1000
612+
br i1 %exitcond, label %for.body9, label %for.cond.cleanup8
613+
}
614+
615+
declare double @fn1()
616+
declare void @fn2(double)
617+
618+
619+
620+
621+
622+
;; After interchange %indvars.iv (j) should increment as the middle loop.
623+
;; After interchange %indvars.iv42 (i) should increment with the inner most loop.
624+
625+
; CHECK-LABEL: @interchange_09
626+
627+
; CHECK: for.body:
628+
; CHECK: %indvars.iv45 = phi i64 [ %indvars.iv.next46, %for.cond.cleanup4 ], [ 0, %for.body.preheader ]
629+
; CHECK: %call = call double @fn1()
630+
; CHECK: %arrayidx = getelementptr inbounds [100 x double], [100 x double]* @T, i64 0, i64 %indvars.iv45
631+
; CHECK: store double %call, double* %arrayidx, align 8
632+
; CHECK: br label %for.body9.preheader
633+
634+
; CHECK: for.cond6.preheader.preheader:
635+
; CHECK: br label %for.cond6.preheader
636+
637+
; CHECK: for.cond6.preheader:
638+
; CHECK: %indvars.iv42 = phi i64 [ %indvars.iv.next43, %for.cond.cleanup8 ], [ 0, %for.cond6.preheader.preheader ]
639+
; CHECK: br label %for.body9.split1
640+
641+
; CHECK: for.body9.preheader:
642+
; CHECK: br label %for.body9
643+
644+
; CHECK: for.cond.cleanup4:
645+
; CHECK: %tmp = load double, double* %arrayidx, align 8
646+
; CHECK: call void @fn2(double %tmp)
647+
; CHECK: %indvars.iv.next46 = add nuw nsw i64 %indvars.iv45, 1
648+
; CHECK: %exitcond47 = icmp ne i64 %indvars.iv.next46, 100
649+
; CHECK: br i1 %exitcond47, label %for.body, label %for.cond.cleanup
650+
651+
; CHECK: for.cond.cleanup8:
652+
; CHECK: %indvars.iv.next43 = add nuw nsw i64 %indvars.iv42, 1
653+
; CHECK: %exitcond44 = icmp ne i64 %indvars.iv.next43, 1000
654+
; CHECK: br i1 %exitcond44, label %for.cond6.preheader, label %for.body9.split
655+
656+
; CHECK: for.body9:
657+
; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body9.split ], [ 1, %for.body9.preheader ]
658+
; CHECK: br label %for.cond6.preheader.preheader
659+
660+
; CHECK: for.body9.split1:
661+
; CHECK: %arrayidx13 = getelementptr inbounds [1000 x [1000 x i32]], [1000 x [1000 x i32]]* @Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv42
662+
; CHECK: store i32 %add, i32* %arrayidx13, align 4
663+
; CHECK: br label %for.cond.cleanup8
664+
665+
; CHECK: for.body9.split:
666+
; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
667+
; CHECK: %exitcond = icmp ne i64 %indvars.iv.next, 1000
668+
; CHECK: br i1 %exitcond, label %for.body9, label %for.cond.cleanup4
669+
670+
671+
;;-----------------------------------Test case 10-------------------------------
672+
;; Test to make sure we can handle output dependencies.
673+
;;
674+
;; for (int i = 0; i < 2; ++i)
675+
;; for(int j = 0; j < 3; ++j) {
676+
;; A[j][i] = i;
677+
;; A[j][i+1] = j;
678+
;; }
679+
680+
@A10 = local_unnamed_addr global [3 x [3 x i32]] zeroinitializer, align 16
681+
682+
define void @interchange_10() {
683+
entry:
684+
br label %for.cond1.preheader
685+
686+
for.cond.loopexit: ; preds = %for.body4
687+
%exitcond28 = icmp ne i64 %indvars.iv.next27, 2
688+
br i1 %exitcond28, label %for.cond1.preheader, label %for.cond.cleanup
689+
690+
for.cond1.preheader: ; preds = %for.cond.loopexit, %entry
691+
%indvars.iv26 = phi i64 [ 0, %entry ], [ %indvars.iv.next27, %for.cond.loopexit ]
692+
%indvars.iv.next27 = add nuw nsw i64 %indvars.iv26, 1
693+
br label %for.body4
694+
695+
for.cond.cleanup: ; preds = %for.cond.loopexit
696+
ret void
697+
698+
for.body4: ; preds = %for.body4, %for.cond1.preheader
699+
%indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body4 ]
700+
%arrayidx6 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* @A10, i64 0, i64 %indvars.iv, i64 %indvars.iv26
701+
%tmp = trunc i64 %indvars.iv26 to i32
702+
store i32 %tmp, i32* %arrayidx6, align 4
703+
%arrayidx10 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* @A10, i64 0, i64 %indvars.iv, i64 %indvars.iv.next27
704+
%tmp1 = trunc i64 %indvars.iv to i32
705+
store i32 %tmp1, i32* %arrayidx10, align 4
706+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
707+
%exitcond = icmp ne i64 %indvars.iv.next, 3
708+
br i1 %exitcond, label %for.body4, label %for.cond.loopexit
709+
}
710+
711+
; CHECK-LABEL: @interchange_10
712+
; CHECK: entry:
713+
; CHECK: br label %for.body4.preheader
714+
715+
; CHECK: for.cond1.preheader.preheader:
716+
; CHECK: br label %for.cond1.preheader
717+
718+
; CHECK: for.cond.loopexit:
719+
; CHECK: %exitcond28 = icmp ne i64 %indvars.iv.next27, 2
720+
; CHECK: br i1 %exitcond28, label %for.cond1.preheader, label %for.body4.split
721+
722+
; CHECK: for.cond1.preheader:
723+
; CHECK: %indvars.iv26 = phi i64 [ %indvars.iv.next27, %for.cond.loopexit ], [ 0, %for.cond1.preheader.preheader ]
724+
; CHECK: %indvars.iv.next27 = add nuw nsw i64 %indvars.iv26, 1
725+
; CHECK: br label %for.body4.split1
726+
727+
; CHECK: for.body4.preheader:
728+
; CHECK: br label %for.body4
729+
730+
; CHECK: for.cond.cleanup:
731+
; CHECK: ret void
732+
733+
; CHECK: for.body4:
734+
; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body4.split ], [ 0, %for.body4.preheader ]
735+
; CHECK: br label %for.cond1.preheader.preheader
736+
737+
; CHECK: for.body4.split1:
738+
; CHECK: %arrayidx6 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* @A10, i64 0, i64 %indvars.iv, i64 %indvars.iv26
739+
; CHECK: %tmp = trunc i64 %indvars.iv26 to i32
740+
; CHECK: store i32 %tmp, i32* %arrayidx6, align 4
741+
; CHECK: %arrayidx10 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* @A10, i64 0, i64 %indvars.iv, i64 %indvars.iv.next27
742+
; CHECK: %tmp1 = trunc i64 %indvars.iv to i32
743+
; CHECK: store i32 %tmp1, i32* %arrayidx10, align 4
744+
; CHECK: br label %for.cond.loopexit
745+
746+
; CHECK: for.body4.split:
747+
; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
748+
; CHECK: %exitcond = icmp ne i64 %indvars.iv.next, 3
749+
; CHECK: br i1 %exitcond, label %for.body4, label %for.cond.cleanup

0 commit comments

Comments
 (0)