Skip to content

Commit 7e65944

Browse files
authored
[llvm][CodeGen] avoid repeated interval calculation in window scheduler (#132352)
Some new registers are reused when replacing some old ones in certain use case of ModuloScheduleExpander. It is necessary to avoid repeated interval calculations for these registers.
1 parent b384d6d commit 7e65944

File tree

3 files changed

+108
-31
lines changed

3 files changed

+108
-31
lines changed

llvm/include/llvm/CodeGen/ModuloSchedule.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -188,9 +188,6 @@ class ModuloScheduleExpander {
188188
/// Instructions to change when emitting the final schedule.
189189
InstrChangesTy InstrChanges;
190190

191-
/// Record the registers that need to compute live intervals.
192-
SmallVector<Register> NoIntervalRegs;
193-
194191
void generatePipelinedLoop();
195192
void generateProlog(unsigned LastStage, MachineBasicBlock *KernelBB,
196193
ValueMapTy *VRMap, MBBVectorTy &PrologBBs);
@@ -214,7 +211,6 @@ class ModuloScheduleExpander {
214211
void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs,
215212
MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
216213
ValueMapTy *VRMap);
217-
void calculateIntervals();
218214
bool computeDelta(MachineInstr &MI, unsigned &Delta);
219215
void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
220216
unsigned Num);

llvm/lib/CodeGen/ModuloSchedule.cpp

Lines changed: 5 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -181,10 +181,6 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
181181
// Add branches between prolog and epilog blocks.
182182
addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
183183

184-
// The intervals of newly created virtual registers are calculated after the
185-
// kernel expansion.
186-
calculateIntervals();
187-
188184
delete[] VRMap;
189185
delete[] VRMapPhi;
190186
}
@@ -549,10 +545,8 @@ void ModuloScheduleExpander::generateExistingPhis(
549545
if (VRMap[LastStageNum - np - 1].count(LoopVal))
550546
PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
551547

552-
if (IsLast && np == NumPhis - 1) {
548+
if (IsLast && np == NumPhis - 1)
553549
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
554-
NoIntervalRegs.push_back(NewReg);
555-
}
556550
continue;
557551
}
558552
}
@@ -592,10 +586,8 @@ void ModuloScheduleExpander::generateExistingPhis(
592586
// Check if we need to rename any uses that occurs after the loop. The
593587
// register to replace depends on whether the Phi is scheduled in the
594588
// epilog.
595-
if (IsLast && np == NumPhis - 1) {
589+
if (IsLast && np == NumPhis - 1)
596590
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
597-
NoIntervalRegs.push_back(NewReg);
598-
}
599591

600592
// In the kernel, a dependent Phi uses the value from this Phi.
601593
if (InKernel)
@@ -615,10 +607,8 @@ void ModuloScheduleExpander::generateExistingPhis(
615607
if (NumStages == 0 && IsLast) {
616608
auto &CurStageMap = VRMap[CurStageNum];
617609
auto It = CurStageMap.find(LoopVal);
618-
if (It != CurStageMap.end()) {
610+
if (It != CurStageMap.end())
619611
replaceRegUsesAfterLoop(Def, It->second, BB, MRI);
620-
NoIntervalRegs.push_back(It->second);
621-
}
622612
}
623613
}
624614
}
@@ -738,10 +728,8 @@ void ModuloScheduleExpander::generatePhis(
738728
rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
739729
NewReg);
740730
}
741-
if (IsLast && np == NumPhis - 1) {
731+
if (IsLast && np == NumPhis - 1)
742732
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
743-
NoIntervalRegs.push_back(NewReg);
744-
}
745733
}
746734
}
747735
}
@@ -953,14 +941,6 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
953941
}
954942
}
955943

956-
/// Some registers are generated during the kernel expansion. We calculate the
957-
/// live intervals of these registers after the expansion.
958-
void ModuloScheduleExpander::calculateIntervals() {
959-
for (Register Reg : NoIntervalRegs)
960-
LIS.createAndComputeVirtRegInterval(Reg);
961-
NoIntervalRegs.clear();
962-
}
963-
964944
/// Return true if we can compute the amount the instruction changes
965945
/// during each iteration. Set Delta to the amount of the change.
966946
bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
@@ -1081,10 +1061,8 @@ void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
10811061
Register NewReg = MRI.createVirtualRegister(RC);
10821062
MO.setReg(NewReg);
10831063
VRMap[CurStageNum][reg] = NewReg;
1084-
if (LastDef) {
1064+
if (LastDef)
10851065
replaceRegUsesAfterLoop(reg, NewReg, BB, MRI);
1086-
NoIntervalRegs.push_back(NewReg);
1087-
}
10881066
} else if (MO.isUse()) {
10891067
MachineInstr *Def = MRI.getVRegDef(reg);
10901068
// Compute the stage that contains the last definition for instruction.
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc --mtriple=aarch64 %s -run-pass=pipeliner -o - | FileCheck %s
3+
4+
...
5+
---
6+
name: foo
7+
tracksRegLiveness: true
8+
body: |
9+
; CHECK-LABEL: name: foo
10+
; CHECK: bb.0:
11+
; CHECK-NEXT: successors: %bb.3(0x80000000)
12+
; CHECK-NEXT: liveins: $x0
13+
; CHECK-NEXT: {{ $}}
14+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
15+
; CHECK-NEXT: [[FMOVD0_:%[0-9]+]]:fpr64 = FMOVD0
16+
; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
17+
; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64sp = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
18+
; CHECK-NEXT: {{ $}}
19+
; CHECK-NEXT: bb.3:
20+
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000)
21+
; CHECK-NEXT: {{ $}}
22+
; CHECK-NEXT: [[FADDDrr:%[0-9]+]]:fpr64 = nofpexcept FADDDrr [[FMOVD0_]], [[FMOVD0_]], implicit $fpcr
23+
; CHECK-NEXT: [[SUBSXri:%[0-9]+]]:gpr64 = nsw SUBSXri [[SUBREG_TO_REG]], 1, 0, implicit-def $nzcv
24+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY [[SUBSXri]]
25+
; CHECK-NEXT: [[FMOVDi:%[0-9]+]]:fpr64 = FMOVDi 112
26+
; CHECK-NEXT: Bcc 0, %bb.7, implicit $nzcv
27+
; CHECK-NEXT: B %bb.4
28+
; CHECK-NEXT: {{ $}}
29+
; CHECK-NEXT: bb.4:
30+
; CHECK-NEXT: successors: %bb.5(0x80000000), %bb.6(0x00000000)
31+
; CHECK-NEXT: {{ $}}
32+
; CHECK-NEXT: [[FADDDrr1:%[0-9]+]]:fpr64 = nofpexcept FADDDrr [[FADDDrr]], [[FMOVD0_]], implicit $fpcr
33+
; CHECK-NEXT: [[FADDDrr2:%[0-9]+]]:fpr64 = nofpexcept FADDDrr [[FMOVD0_]], [[FMOVD0_]], implicit $fpcr
34+
; CHECK-NEXT: [[SUBSXri1:%[0-9]+]]:gpr64 = nsw SUBSXri [[COPY1]], 1, 0, implicit-def $nzcv
35+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64all = COPY [[SUBSXri1]]
36+
; CHECK-NEXT: [[FMOVDi1:%[0-9]+]]:fpr64 = FMOVDi 112
37+
; CHECK-NEXT: Bcc 0, %bb.6, implicit $nzcv
38+
; CHECK-NEXT: B %bb.5
39+
; CHECK-NEXT: {{ $}}
40+
; CHECK-NEXT: bb.5:
41+
; CHECK-NEXT: successors: %bb.6(0x04000000), %bb.5(0x7c000000)
42+
; CHECK-NEXT: {{ $}}
43+
; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr64sp = PHI [[COPY2]], %bb.4, %24, %bb.5
44+
; CHECK-NEXT: [[PHI1:%[0-9]+]]:fpr64 = PHI [[FMOVDi1]], %bb.4, %25, %bb.5
45+
; CHECK-NEXT: [[PHI2:%[0-9]+]]:fpr64 = PHI [[FMOVDi]], %bb.4, [[PHI1]], %bb.5
46+
; CHECK-NEXT: [[PHI3:%[0-9]+]]:fpr64 = PHI [[FADDDrr2]], %bb.4, %22, %bb.5
47+
; CHECK-NEXT: [[PHI4:%[0-9]+]]:fpr64 = PHI [[FADDDrr1]], %bb.4, %23, %bb.5
48+
; CHECK-NEXT: [[SUBSXri2:%[0-9]+]]:gpr64 = nsw SUBSXri [[PHI]], 1, 0, implicit-def $nzcv
49+
; CHECK-NEXT: [[FADDDrr3:%[0-9]+]]:fpr64 = nofpexcept FADDDrr [[PHI2]], [[FMOVD0_]], implicit $fpcr
50+
; CHECK-NEXT: [[FADDDrr4:%[0-9]+]]:fpr64 = nofpexcept FADDDrr [[PHI3]], [[PHI2]], implicit $fpcr
51+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64all = COPY [[SUBSXri2]]
52+
; CHECK-NEXT: STRDui [[PHI4]], [[COPY]], 0
53+
; CHECK-NEXT: [[FMOVDi2:%[0-9]+]]:fpr64 = FMOVDi 112
54+
; CHECK-NEXT: Bcc 1, %bb.5, implicit $nzcv
55+
; CHECK-NEXT: B %bb.6
56+
; CHECK-NEXT: {{ $}}
57+
; CHECK-NEXT: bb.6:
58+
; CHECK-NEXT: successors: %bb.7(0x80000000)
59+
; CHECK-NEXT: {{ $}}
60+
; CHECK-NEXT: [[PHI5:%[0-9]+]]:fpr64 = PHI [[FMOVDi]], %bb.4, [[PHI1]], %bb.5
61+
; CHECK-NEXT: [[PHI6:%[0-9]+]]:fpr64 = PHI [[FADDDrr2]], %bb.4, [[FADDDrr3]], %bb.5
62+
; CHECK-NEXT: [[PHI7:%[0-9]+]]:fpr64 = PHI [[FADDDrr1]], %bb.4, [[FADDDrr4]], %bb.5
63+
; CHECK-NEXT: STRDui [[PHI7]], [[COPY]], 0
64+
; CHECK-NEXT: {{ $}}
65+
; CHECK-NEXT: bb.7:
66+
; CHECK-NEXT: successors: %bb.2(0x80000000)
67+
; CHECK-NEXT: {{ $}}
68+
; CHECK-NEXT: [[PHI8:%[0-9]+]]:fpr64 = PHI [[FMOVD0_]], %bb.3, [[PHI5]], %bb.6
69+
; CHECK-NEXT: [[PHI9:%[0-9]+]]:fpr64 = PHI [[FADDDrr]], %bb.3, [[PHI6]], %bb.6
70+
; CHECK-NEXT: [[FADDDrr5:%[0-9]+]]:fpr64 = nofpexcept FADDDrr [[PHI9]], [[PHI8]], implicit $fpcr
71+
; CHECK-NEXT: STRDui [[FADDDrr5]], [[COPY]], 0
72+
; CHECK-NEXT: B %bb.2
73+
; CHECK-NEXT: {{ $}}
74+
; CHECK-NEXT: bb.2:
75+
; CHECK-NEXT: RET_ReallyLR
76+
bb.0:
77+
successors: %bb.1(0x80000000)
78+
liveins: $x0
79+
80+
%0:gpr64common = COPY $x0
81+
%1:fpr64 = FMOVD0
82+
%2:gpr32 = MOVi32imm 1
83+
%3:gpr64all = SUBREG_TO_REG 0, killed %2, %subreg.sub_32
84+
85+
bb.1:
86+
successors: %bb.2(0x04000000), %bb.1(0x7c000000)
87+
88+
%4:gpr64sp = PHI %3, %bb.0, %5, %bb.1
89+
%6:fpr64 = PHI %1, %bb.0, %7, %bb.1
90+
%8:fpr64 = PHI %1, %bb.0, %6, %bb.1
91+
%9:fpr64 = nofpexcept FADDDrr %8, %1, implicit $fpcr
92+
%10:fpr64 = nofpexcept FADDDrr killed %9, %6, implicit $fpcr
93+
STRDui killed %10, %0, 0
94+
%11:gpr64 = nsw SUBSXri %4, 1, 0, implicit-def $nzcv
95+
%5:gpr64all = COPY %11
96+
%7:fpr64 = FMOVDi 112
97+
Bcc 1, %bb.1, implicit $nzcv
98+
B %bb.2
99+
100+
bb.2:
101+
RET_ReallyLR
102+
103+
...

0 commit comments

Comments
 (0)