Skip to content

Commit 40c675f

Browse files
committed
[llvm][CodeGen] Fix the empty interval issue in Window Scheduler(#128714)
The interval of newly generated reg in ModuloScheduleExpander is empty. This will cause crush at some corner case. This patch recalculate the live intervals of these regs.
1 parent 8a855d6 commit 40c675f

File tree

3 files changed

+213
-13
lines changed

3 files changed

+213
-13
lines changed

llvm/include/llvm/CodeGen/ModuloSchedule.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,9 @@ class ModuloScheduleExpander {
188188
/// Instructions to change when emitting the final schedule.
189189
InstrChangesTy InstrChanges;
190190

191+
/// Record newly created registers with empty live intervals.
192+
SmallVector<Register> EmptyIntervalRegs;
193+
191194
void generatePipelinedLoop();
192195
void generateProlog(unsigned LastStage, MachineBasicBlock *KernelBB,
193196
ValueMapTy *VRMap, MBBVectorTy &PrologBBs);
@@ -211,6 +214,7 @@ class ModuloScheduleExpander {
211214
void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs,
212215
MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
213216
ValueMapTy *VRMap);
217+
void recalcEmptyIntervals();
214218
bool computeDelta(MachineInstr &MI, unsigned &Delta);
215219
void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
216220
unsigned Num);

llvm/lib/CodeGen/ModuloSchedule.cpp

Lines changed: 52 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
141141
MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
142142
updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
143143
KernelBB->push_back(NewMI);
144+
LIS.InsertMachineInstrInMaps(*NewMI);
144145
InstrMap[NewMI] = CI;
145146
}
146147

@@ -150,6 +151,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
150151
MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
151152
updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
152153
KernelBB->push_back(NewMI);
154+
LIS.InsertMachineInstrInMaps(*NewMI);
153155
InstrMap[NewMI] = &MI;
154156
}
155157

@@ -179,6 +181,8 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
179181
// Add branches between prolog and epilog blocks.
180182
addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
181183

184+
recalcEmptyIntervals();
185+
182186
delete[] VRMap;
183187
delete[] VRMapPhi;
184188
}
@@ -226,6 +230,7 @@ void ModuloScheduleExpander::generateProlog(unsigned LastStage,
226230
cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
227231
updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
228232
NewBB->push_back(NewMI);
233+
LIS.InsertMachineInstrInMaps(*NewMI);
229234
InstrMap[NewMI] = &*BBI;
230235
}
231236
}
@@ -303,6 +308,7 @@ void ModuloScheduleExpander::generateEpilog(
303308
MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
304309
updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
305310
NewBB->push_back(NewMI);
311+
LIS.InsertMachineInstrInMaps(*NewMI);
306312
InstrMap[NewMI] = In;
307313
}
308314
}
@@ -344,13 +350,16 @@ void ModuloScheduleExpander::generateEpilog(
344350
static void replaceRegUsesAfterLoop(Register FromReg, Register ToReg,
345351
MachineBasicBlock *MBB,
346352
MachineRegisterInfo &MRI,
347-
LiveIntervals &LIS) {
353+
LiveIntervals &LIS,
354+
SmallVector<Register> &EmptyIntervalRegs) {
348355
for (MachineOperand &O :
349356
llvm::make_early_inc_range(MRI.use_operands(FromReg)))
350357
if (O.getParent()->getParent() != MBB)
351358
O.setReg(ToReg);
352-
if (!LIS.hasInterval(ToReg))
359+
if (!LIS.hasInterval(ToReg)) {
353360
LIS.createEmptyInterval(ToReg);
361+
EmptyIntervalRegs.push_back(ToReg);
362+
}
354363
}
355364

356365
/// Return true if the register has a use that occurs outside the
@@ -542,7 +551,8 @@ void ModuloScheduleExpander::generateExistingPhis(
542551
PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
543552

544553
if (IsLast && np == NumPhis - 1)
545-
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
554+
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS,
555+
EmptyIntervalRegs);
546556
continue;
547557
}
548558
}
@@ -560,6 +570,7 @@ void ModuloScheduleExpander::generateExistingPhis(
560570
TII->get(TargetOpcode::PHI), NewReg);
561571
NewPhi.addReg(PhiOp1).addMBB(BB1);
562572
NewPhi.addReg(PhiOp2).addMBB(BB2);
573+
LIS.InsertMachineInstrInMaps(*NewPhi);
563574
if (np == 0)
564575
InstrMap[NewPhi] = &*BBI;
565576

@@ -582,7 +593,7 @@ void ModuloScheduleExpander::generateExistingPhis(
582593
// register to replace depends on whether the Phi is scheduled in the
583594
// epilog.
584595
if (IsLast && np == NumPhis - 1)
585-
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
596+
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS, EmptyIntervalRegs);
586597

587598
// In the kernel, a dependent Phi uses the value from this Phi.
588599
if (InKernel)
@@ -602,7 +613,8 @@ void ModuloScheduleExpander::generateExistingPhis(
602613
if (NumStages == 0 && IsLast) {
603614
auto It = VRMap[CurStageNum].find(LoopVal);
604615
if (It != VRMap[CurStageNum].end())
605-
replaceRegUsesAfterLoop(Def, It->second, BB, MRI, LIS);
616+
replaceRegUsesAfterLoop(Def, It->second, BB, MRI, LIS,
617+
EmptyIntervalRegs);
606618
}
607619
}
608620
}
@@ -702,6 +714,7 @@ void ModuloScheduleExpander::generatePhis(
702714
TII->get(TargetOpcode::PHI), NewReg);
703715
NewPhi.addReg(PhiOp1).addMBB(BB1);
704716
NewPhi.addReg(PhiOp2).addMBB(BB2);
717+
LIS.InsertMachineInstrInMaps(*NewPhi);
705718
if (np == 0)
706719
InstrMap[NewPhi] = &*BBI;
707720

@@ -722,7 +735,7 @@ void ModuloScheduleExpander::generatePhis(
722735
NewReg);
723736
}
724737
if (IsLast && np == NumPhis - 1)
725-
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
738+
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS, EmptyIntervalRegs);
726739
}
727740
}
728741
}
@@ -831,9 +844,11 @@ void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
831844
// We split the lifetime when we find the first use.
832845
if (!SplitReg) {
833846
SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
834-
BuildMI(*KernelBB, MI, MI->getDebugLoc(),
835-
TII->get(TargetOpcode::COPY), SplitReg)
836-
.addReg(Def);
847+
MachineInstr *newCopy =
848+
BuildMI(*KernelBB, MI, MI->getDebugLoc(),
849+
TII->get(TargetOpcode::COPY), SplitReg)
850+
.addReg(Def);
851+
LIS.InsertMachineInstrInMaps(*newCopy);
837852
}
838853
BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
839854
}
@@ -901,13 +916,17 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
901916
removePhis(Epilog, LastEpi);
902917
// Remove the blocks that are no longer referenced.
903918
if (LastPro != LastEpi) {
919+
for (auto &MI : *LastEpi)
920+
LIS.RemoveMachineInstrFromMaps(MI);
904921
LastEpi->clear();
905922
LastEpi->eraseFromParent();
906923
}
907924
if (LastPro == KernelBB) {
908925
LoopInfo->disposed(&LIS);
909926
NewKernel = nullptr;
910927
}
928+
for (auto &MI : *LastPro)
929+
LIS.RemoveMachineInstrFromMaps(MI);
911930
LastPro->clear();
912931
LastPro->eraseFromParent();
913932
} else {
@@ -928,6 +947,25 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
928947
}
929948
}
930949

950+
/// Some new registers are generated during the kernel expansion. We recalculate
951+
/// the live intervals of these registers after the expansion.
952+
void ModuloScheduleExpander::recalcEmptyIntervals() {
953+
// The interval can be computed if the register's non-debug users have
954+
// slot indexes.
955+
auto CanRecalculateInterval = [this](unsigned Reg) -> bool {
956+
for (auto &Opnd : this->MRI.reg_nodbg_operands(Reg))
957+
if (this->LIS.isNotInMIMap(*Opnd.getParent()))
958+
return false;
959+
return true;
960+
};
961+
for (auto Reg : EmptyIntervalRegs)
962+
if (CanRecalculateInterval(Reg)) {
963+
LIS.removeInterval(Reg);
964+
LIS.createAndComputeVirtRegInterval(Reg);
965+
}
966+
EmptyIntervalRegs.clear();
967+
}
968+
931969
/// Return true if we can compute the amount the instruction changes
932970
/// during each iteration. Set Delta to the amount of the change.
933971
bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
@@ -1049,7 +1087,7 @@ void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
10491087
MO.setReg(NewReg);
10501088
VRMap[CurStageNum][reg] = NewReg;
10511089
if (LastDef)
1052-
replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
1090+
replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS, EmptyIntervalRegs);
10531091
} else if (MO.isUse()) {
10541092
MachineInstr *Def = MRI.getVRegDef(reg);
10551093
// Compute the stage that contains the last definition for instruction.
@@ -1198,10 +1236,11 @@ void ModuloScheduleExpander::rewriteScheduledInstr(
11981236
UseOp.setReg(ReplaceReg);
11991237
else {
12001238
Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
1201-
BuildMI(*BB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY),
1202-
SplitReg)
1203-
.addReg(ReplaceReg);
1239+
MachineInstr *newCopy = BuildMI(*BB, UseMI, UseMI->getDebugLoc(),
1240+
TII->get(TargetOpcode::COPY), SplitReg)
1241+
.addReg(ReplaceReg);
12041242
UseOp.setReg(SplitReg);
1243+
LIS.InsertMachineInstrInMaps(*newCopy);
12051244
}
12061245
}
12071246
}
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc --mtriple=hexagon %s -run-pass=pipeliner -o -| FileCheck %s
3+
4+
--- |
5+
define void @test_swp_ws_live_intervals(i32 %.pre) {
6+
entry:
7+
%cgep9 = bitcast ptr null to ptr
8+
br label %for.body147
9+
10+
for.body147: ; preds = %for.body170, %entry
11+
%add11.i526 = or i32 %.pre, 1
12+
br label %for.body158
13+
14+
for.body158: ; preds = %for.body158, %for.body147
15+
%lsr.iv = phi i32 [ %lsr.iv.next, %for.body158 ], [ -1, %for.body147 ]
16+
%add11.i536602603 = phi i32 [ %add11.i526, %for.body147 ], [ 0, %for.body158 ]
17+
%and8.i534 = and i32 %add11.i536602603, 1
18+
%cgep7 = getelementptr [64 x i32], ptr %cgep9, i32 0, i32 %and8.i534
19+
store i32 0, ptr %cgep7, align 4
20+
%lsr.iv.next = add nsw i32 %lsr.iv, 1
21+
%cmp157.3 = icmp ult i32 %lsr.iv.next, 510
22+
br i1 %cmp157.3, label %for.body158, label %for.body170
23+
24+
for.body170: ; preds = %for.body170, %for.body158
25+
%lsr.iv3 = phi ptr [ %cgep6, %for.body170 ], [ inttoptr (i32 4 to ptr), %for.body158 ]
26+
%lsr.iv1 = phi i32 [ %lsr.iv.next2, %for.body170 ], [ -1, %for.body158 ]
27+
%add11.i556606607 = phi i32 [ 0, %for.body170 ], [ 1, %for.body158 ]
28+
%cgep5 = getelementptr i8, ptr %lsr.iv3, i32 -4
29+
store i32 0, ptr %cgep5, align 8
30+
%sub.i547.1 = add i32 %add11.i556606607, 1
31+
%and.i548.1 = and i32 %sub.i547.1, 1
32+
%cgep8 = getelementptr [64 x i32], ptr %cgep9, i32 0, i32 %and.i548.1
33+
%0 = load i32, ptr %cgep8, align 4
34+
store i32 %0, ptr %lsr.iv3, align 4
35+
%lsr.iv.next2 = add nsw i32 %lsr.iv1, 1
36+
%cmp169.1 = icmp ult i32 %lsr.iv.next2, 254
37+
%cgep6 = getelementptr i8, ptr %lsr.iv3, i32 2
38+
br i1 %cmp169.1, label %for.body170, label %for.body147
39+
}
40+
41+
...
42+
---
43+
name: test_swp_ws_live_intervals
44+
tracksRegLiveness: true
45+
body: |
46+
; CHECK-LABEL: name: test_swp_ws_live_intervals
47+
; CHECK: bb.0.entry:
48+
; CHECK-NEXT: successors: %bb.1(0x80000000)
49+
; CHECK-NEXT: liveins: $r0
50+
; CHECK-NEXT: {{ $}}
51+
; CHECK-NEXT: [[COPY:%[0-9]+]]:intregs = COPY $r0
52+
; CHECK-NEXT: [[S2_setbit_i:%[0-9]+]]:intregs = S2_setbit_i [[COPY]], 0
53+
; CHECK-NEXT: {{ $}}
54+
; CHECK-NEXT: bb.1:
55+
; CHECK-NEXT: successors: %bb.5(0x80000000)
56+
; CHECK-NEXT: {{ $}}
57+
; CHECK-NEXT: bb.5:
58+
; CHECK-NEXT: successors: %bb.6(0x80000000)
59+
; CHECK-NEXT: {{ $}}
60+
; CHECK-NEXT: [[A2_andir:%[0-9]+]]:intregs = A2_andir [[S2_setbit_i]], 1
61+
; CHECK-NEXT: [[S2_asl_i_r:%[0-9]+]]:intregs = S2_asl_i_r [[A2_andir]], 2
62+
; CHECK-NEXT: [[A2_tfrsi:%[0-9]+]]:intregs = A2_tfrsi 1
63+
; CHECK-NEXT: [[A2_tfrsi1:%[0-9]+]]:intregs = A2_tfrsi 4
64+
; CHECK-NEXT: [[A2_tfrsi2:%[0-9]+]]:intregs = A2_tfrsi 0
65+
; CHECK-NEXT: J2_loop0i %bb.6, 510, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
66+
; CHECK-NEXT: J2_jump %bb.6, implicit-def $pc
67+
; CHECK-NEXT: {{ $}}
68+
; CHECK-NEXT: bb.6:
69+
; CHECK-NEXT: successors: %bb.6(0x7c000000), %bb.7(0x04000000)
70+
; CHECK-NEXT: {{ $}}
71+
; CHECK-NEXT: [[PHI:%[0-9]+]]:intregs = PHI [[A2_tfrsi2]], %bb.5, %24, %bb.6
72+
; CHECK-NEXT: [[PHI1:%[0-9]+]]:intregs = PHI [[S2_asl_i_r]], %bb.5, %23, %bb.6
73+
; CHECK-NEXT: S4_storeiri_io [[PHI1]], 0, 0 :: (store (s32) into %ir.cgep7)
74+
; CHECK-NEXT: [[A2_andir1:%[0-9]+]]:intregs = A2_andir [[PHI]], 1
75+
; CHECK-NEXT: [[A2_tfrsi3:%[0-9]+]]:intregs = A2_tfrsi 1
76+
; CHECK-NEXT: [[A2_tfrsi4:%[0-9]+]]:intregs = A2_tfrsi 4
77+
; CHECK-NEXT: [[S2_asl_i_r1:%[0-9]+]]:intregs = S2_asl_i_r [[A2_andir1]], 2
78+
; CHECK-NEXT: [[A2_tfrsi5:%[0-9]+]]:intregs = A2_tfrsi 0
79+
; CHECK-NEXT: ENDLOOP0 %bb.6, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
80+
; CHECK-NEXT: J2_jump %bb.7, implicit-def $pc
81+
; CHECK-NEXT: {{ $}}
82+
; CHECK-NEXT: bb.7:
83+
; CHECK-NEXT: successors: %bb.3(0x80000000)
84+
; CHECK-NEXT: {{ $}}
85+
; CHECK-NEXT: [[PHI2:%[0-9]+]]:intregs = PHI [[S2_asl_i_r1]], %bb.6
86+
; CHECK-NEXT: [[PHI3:%[0-9]+]]:intregs = PHI [[A2_tfrsi3]], %bb.6
87+
; CHECK-NEXT: [[PHI4:%[0-9]+]]:intregs = PHI [[A2_tfrsi4]], %bb.6
88+
; CHECK-NEXT: S4_storeiri_io [[PHI2]], 0, 0 :: (store unknown-size into %ir.cgep7, align 4)
89+
; CHECK-NEXT: J2_jump %bb.3, implicit-def $pc
90+
; CHECK-NEXT: {{ $}}
91+
; CHECK-NEXT: bb.3:
92+
; CHECK-NEXT: successors: %bb.4(0x80000000)
93+
; CHECK-NEXT: {{ $}}
94+
; CHECK-NEXT: J2_loop0i %bb.4, 255, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
95+
; CHECK-NEXT: J2_jump %bb.4, implicit-def $pc
96+
; CHECK-NEXT: {{ $}}
97+
; CHECK-NEXT: bb.4:
98+
; CHECK-NEXT: successors: %bb.4(0x7c000000), %bb.1(0x04000000)
99+
; CHECK-NEXT: {{ $}}
100+
; CHECK-NEXT: [[PHI5:%[0-9]+]]:intregs = PHI [[PHI4]], %bb.3, %9, %bb.4
101+
; CHECK-NEXT: [[PHI6:%[0-9]+]]:intregs = PHI [[PHI3]], %bb.3, %11, %bb.4
102+
; CHECK-NEXT: [[A2_tfrsi6:%[0-9]+]]:intregs = A2_tfrsi 0
103+
; CHECK-NEXT: S2_storeri_io [[PHI5]], -4, [[A2_tfrsi6]] :: (store (s32) into %ir.cgep5, align 8)
104+
; CHECK-NEXT: [[A2_addi:%[0-9]+]]:intregs = A2_addi [[PHI6]], 1
105+
; CHECK-NEXT: [[S2_insert:%[0-9]+]]:intregs = S2_insert [[PHI2]], [[A2_addi]], 1, 2
106+
; CHECK-NEXT: [[L2_loadri_io:%[0-9]+]]:intregs = L2_loadri_io [[S2_insert]], 0 :: (load (s32) from %ir.cgep8)
107+
; CHECK-NEXT: S2_storeri_io [[PHI5]], 0, [[L2_loadri_io]] :: (store (s32) into %ir.lsr.iv3)
108+
; CHECK-NEXT: [[A2_addi1:%[0-9]+]]:intregs = A2_addi [[PHI5]], 2
109+
; CHECK-NEXT: ENDLOOP0 %bb.4, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
110+
; CHECK-NEXT: J2_jump %bb.1, implicit-def dead $pc
111+
bb.0.entry:
112+
successors: %bb.1(0x80000000)
113+
liveins: $r0
114+
115+
%0:intregs = COPY $r0
116+
%1:intregs = S2_setbit_i %0, 0
117+
118+
bb.1:
119+
successors: %bb.2(0x80000000)
120+
121+
J2_loop0i %bb.2, 511, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
122+
123+
bb.2:
124+
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
125+
126+
%2:intregs = PHI %1, %bb.1, %3, %bb.2
127+
%4:intregs = A2_andir %2, 1
128+
%5:intregs = S2_asl_i_r %4, 2
129+
S4_storeiri_io %5, 0, 0 :: (store (s32) into %ir.cgep7)
130+
%6:intregs = A2_tfrsi 1
131+
%7:intregs = A2_tfrsi 4
132+
%3:intregs = A2_tfrsi 0
133+
ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
134+
J2_jump %bb.3, implicit-def dead $pc
135+
136+
bb.3:
137+
successors: %bb.4(0x80000000)
138+
139+
J2_loop0i %bb.4, 255, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
140+
J2_jump %bb.4, implicit-def $pc
141+
142+
bb.4:
143+
successors: %bb.4(0x7c000000), %bb.1(0x04000000)
144+
145+
%8:intregs = PHI %7, %bb.3, %9, %bb.4
146+
%10:intregs = PHI %6, %bb.3, %11, %bb.4
147+
%11:intregs = A2_tfrsi 0
148+
S2_storeri_io %8, -4, %11 :: (store (s32) into %ir.cgep5, align 8)
149+
%12:intregs = A2_addi %10, 1
150+
%13:intregs = S2_insert %5, %12, 1, 2
151+
%14:intregs = L2_loadri_io %13, 0 :: (load (s32) from %ir.cgep8)
152+
S2_storeri_io %8, 0, %14 :: (store (s32) into %ir.lsr.iv3)
153+
%9:intregs = A2_addi %8, 2
154+
ENDLOOP0 %bb.4, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
155+
J2_jump %bb.1, implicit-def dead $pc
156+
157+
...

0 commit comments

Comments
 (0)