Skip to content

[llvm][CodeGen] Fix the empty interval issue in Window Scheduler(#128714) #129204

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Mar 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/include/llvm/CodeGen/ModuloSchedule.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,9 @@ class ModuloScheduleExpander {
/// Instructions to change when emitting the final schedule.
InstrChangesTy InstrChanges;

/// Record the registers that need to compute live intervals.
SmallVector<Register> NoIntervalRegs;

void generatePipelinedLoop();
void generateProlog(unsigned LastStage, MachineBasicBlock *KernelBB,
ValueMapTy *VRMap, MBBVectorTy &PrologBBs);
Expand All @@ -211,6 +214,7 @@ class ModuloScheduleExpander {
void addBranches(MachineBasicBlock &PreheaderBB, MBBVectorTy &PrologBBs,
MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
ValueMapTy *VRMap);
void calculateIntervals();
bool computeDelta(MachineInstr &MI, unsigned &Delta);
void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
unsigned Num);
Expand Down
75 changes: 54 additions & 21 deletions llvm/lib/CodeGen/ModuloSchedule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
KernelBB->push_back(NewMI);
LIS.InsertMachineInstrInMaps(*NewMI);
InstrMap[NewMI] = CI;
}

Expand All @@ -150,6 +151,7 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
KernelBB->push_back(NewMI);
LIS.InsertMachineInstrInMaps(*NewMI);
InstrMap[NewMI] = &MI;
}

Expand Down Expand Up @@ -179,6 +181,10 @@ void ModuloScheduleExpander::generatePipelinedLoop() {
// Add branches between prolog and epilog blocks.
addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);

// The intervals of newly created virtual registers are calculated after the
// kernel expansion.
calculateIntervals();

delete[] VRMap;
delete[] VRMapPhi;
}
Expand Down Expand Up @@ -226,6 +232,7 @@ void ModuloScheduleExpander::generateProlog(unsigned LastStage,
cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
NewBB->push_back(NewMI);
LIS.InsertMachineInstrInMaps(*NewMI);
InstrMap[NewMI] = &*BBI;
}
}
Expand Down Expand Up @@ -303,6 +310,7 @@ void ModuloScheduleExpander::generateEpilog(
MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
NewBB->push_back(NewMI);
LIS.InsertMachineInstrInMaps(*NewMI);
InstrMap[NewMI] = In;
}
}
Expand Down Expand Up @@ -343,14 +351,11 @@ void ModuloScheduleExpander::generateEpilog(
/// basic block with ToReg.
static void replaceRegUsesAfterLoop(Register FromReg, Register ToReg,
MachineBasicBlock *MBB,
MachineRegisterInfo &MRI,
LiveIntervals &LIS) {
MachineRegisterInfo &MRI) {
for (MachineOperand &O :
llvm::make_early_inc_range(MRI.use_operands(FromReg)))
if (O.getParent()->getParent() != MBB)
O.setReg(ToReg);
if (!LIS.hasInterval(ToReg))
LIS.createEmptyInterval(ToReg);
}

/// Return true if the register has a use that occurs outside the
Expand Down Expand Up @@ -541,8 +546,10 @@ void ModuloScheduleExpander::generateExistingPhis(
if (VRMap[LastStageNum - np - 1].count(LoopVal))
PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];

if (IsLast && np == NumPhis - 1)
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
if (IsLast && np == NumPhis - 1) {
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
NoIntervalRegs.push_back(NewReg);
}
continue;
}
}
Expand All @@ -560,6 +567,7 @@ void ModuloScheduleExpander::generateExistingPhis(
TII->get(TargetOpcode::PHI), NewReg);
NewPhi.addReg(PhiOp1).addMBB(BB1);
NewPhi.addReg(PhiOp2).addMBB(BB2);
LIS.InsertMachineInstrInMaps(*NewPhi);
if (np == 0)
InstrMap[NewPhi] = &*BBI;

Expand All @@ -581,8 +589,10 @@ void ModuloScheduleExpander::generateExistingPhis(
// Check if we need to rename any uses that occurs after the loop. The
// register to replace depends on whether the Phi is scheduled in the
// epilog.
if (IsLast && np == NumPhis - 1)
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
if (IsLast && np == NumPhis - 1) {
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
NoIntervalRegs.push_back(NewReg);
}

// In the kernel, a dependent Phi uses the value from this Phi.
if (InKernel)
Expand All @@ -600,9 +610,12 @@ void ModuloScheduleExpander::generateExistingPhis(
// Check if we need to rename a Phi that has been eliminated due to
// scheduling.
if (NumStages == 0 && IsLast) {
auto It = VRMap[CurStageNum].find(LoopVal);
if (It != VRMap[CurStageNum].end())
replaceRegUsesAfterLoop(Def, It->second, BB, MRI, LIS);
auto &CurStageMap = VRMap[CurStageNum];
auto It = CurStageMap.find(LoopVal);
if (It != CurStageMap.end()) {
replaceRegUsesAfterLoop(Def, It->second, BB, MRI);
NoIntervalRegs.push_back(It->second);
}
}
}
}
Expand Down Expand Up @@ -702,6 +715,7 @@ void ModuloScheduleExpander::generatePhis(
TII->get(TargetOpcode::PHI), NewReg);
NewPhi.addReg(PhiOp1).addMBB(BB1);
NewPhi.addReg(PhiOp2).addMBB(BB2);
LIS.InsertMachineInstrInMaps(*NewPhi);
if (np == 0)
InstrMap[NewPhi] = &*BBI;

Expand All @@ -721,8 +735,10 @@ void ModuloScheduleExpander::generatePhis(
rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
NewReg);
}
if (IsLast && np == NumPhis - 1)
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
if (IsLast && np == NumPhis - 1) {
replaceRegUsesAfterLoop(Def, NewReg, BB, MRI);
NoIntervalRegs.push_back(NewReg);
}
}
}
}
Expand Down Expand Up @@ -831,9 +847,11 @@ void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
// We split the lifetime when we find the first use.
if (!SplitReg) {
SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
BuildMI(*KernelBB, MI, MI->getDebugLoc(),
TII->get(TargetOpcode::COPY), SplitReg)
.addReg(Def);
MachineInstr *newCopy =
BuildMI(*KernelBB, MI, MI->getDebugLoc(),
TII->get(TargetOpcode::COPY), SplitReg)
.addReg(Def);
LIS.InsertMachineInstrInMaps(*newCopy);
}
BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
}
Expand Down Expand Up @@ -901,13 +919,17 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
removePhis(Epilog, LastEpi);
// Remove the blocks that are no longer referenced.
if (LastPro != LastEpi) {
for (auto &MI : *LastEpi)
LIS.RemoveMachineInstrFromMaps(MI);
LastEpi->clear();
LastEpi->eraseFromParent();
}
if (LastPro == KernelBB) {
LoopInfo->disposed(&LIS);
NewKernel = nullptr;
}
for (auto &MI : *LastPro)
LIS.RemoveMachineInstrFromMaps(MI);
LastPro->clear();
LastPro->eraseFromParent();
} else {
Expand All @@ -928,6 +950,14 @@ void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
}
}

/// Some registers are generated during the kernel expansion. We calculate the
/// live intervals of these registers after the expansion.
void ModuloScheduleExpander::calculateIntervals() {
for (Register Reg : NoIntervalRegs)
LIS.createAndComputeVirtRegInterval(Reg);
NoIntervalRegs.clear();
}

/// Return true if we can compute the amount the instruction changes
/// during each iteration. Set Delta to the amount of the change.
bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
Expand Down Expand Up @@ -1048,8 +1078,10 @@ void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
Register NewReg = MRI.createVirtualRegister(RC);
MO.setReg(NewReg);
VRMap[CurStageNum][reg] = NewReg;
if (LastDef)
replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
if (LastDef) {
replaceRegUsesAfterLoop(reg, NewReg, BB, MRI);
NoIntervalRegs.push_back(NewReg);
}
} else if (MO.isUse()) {
MachineInstr *Def = MRI.getVRegDef(reg);
// Compute the stage that contains the last definition for instruction.
Expand Down Expand Up @@ -1198,10 +1230,11 @@ void ModuloScheduleExpander::rewriteScheduledInstr(
UseOp.setReg(ReplaceReg);
else {
Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
BuildMI(*BB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY),
SplitReg)
.addReg(ReplaceReg);
MachineInstr *newCopy = BuildMI(*BB, UseMI, UseMI->getDebugLoc(),
TII->get(TargetOpcode::COPY), SplitReg)
.addReg(ReplaceReg);
UseOp.setReg(SplitReg);
LIS.InsertMachineInstrInMaps(*newCopy);
}
}
}
Expand Down
157 changes: 157 additions & 0 deletions llvm/test/CodeGen/Hexagon/swp-ws-live-intervals-issue128714.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc --mtriple=hexagon %s -run-pass=pipeliner -o -| FileCheck %s

--- |
define void @test_swp_ws_live_intervals(i32 %.pre) {
entry:
%cgep9 = bitcast ptr null to ptr
br label %for.body147

for.body147: ; preds = %for.body170, %entry
%add11.i526 = or i32 %.pre, 1
br label %for.body158

for.body158: ; preds = %for.body158, %for.body147
%lsr.iv = phi i32 [ %lsr.iv.next, %for.body158 ], [ -1, %for.body147 ]
%add11.i536602603 = phi i32 [ %add11.i526, %for.body147 ], [ 0, %for.body158 ]
%and8.i534 = and i32 %add11.i536602603, 1
%cgep7 = getelementptr [64 x i32], ptr %cgep9, i32 0, i32 %and8.i534
store i32 0, ptr %cgep7, align 4
%lsr.iv.next = add nsw i32 %lsr.iv, 1
%cmp157.3 = icmp ult i32 %lsr.iv.next, 510
br i1 %cmp157.3, label %for.body158, label %for.body170

for.body170: ; preds = %for.body170, %for.body158
%lsr.iv3 = phi ptr [ %cgep6, %for.body170 ], [ inttoptr (i32 4 to ptr), %for.body158 ]
%lsr.iv1 = phi i32 [ %lsr.iv.next2, %for.body170 ], [ -1, %for.body158 ]
%add11.i556606607 = phi i32 [ 0, %for.body170 ], [ 1, %for.body158 ]
%cgep5 = getelementptr i8, ptr %lsr.iv3, i32 -4
store i32 0, ptr %cgep5, align 8
%sub.i547.1 = add i32 %add11.i556606607, 1
%and.i548.1 = and i32 %sub.i547.1, 1
%cgep8 = getelementptr [64 x i32], ptr %cgep9, i32 0, i32 %and.i548.1
%0 = load i32, ptr %cgep8, align 4
store i32 %0, ptr %lsr.iv3, align 4
%lsr.iv.next2 = add nsw i32 %lsr.iv1, 1
%cmp169.1 = icmp ult i32 %lsr.iv.next2, 254
%cgep6 = getelementptr i8, ptr %lsr.iv3, i32 2
br i1 %cmp169.1, label %for.body170, label %for.body147
}

...
---
name: test_swp_ws_live_intervals
tracksRegLiveness: true
body: |
; CHECK-LABEL: name: test_swp_ws_live_intervals
; CHECK: bb.0.entry:
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $r0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:intregs = COPY $r0
; CHECK-NEXT: [[S2_setbit_i:%[0-9]+]]:intregs = S2_setbit_i [[COPY]], 0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.5(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.6(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[A2_andir:%[0-9]+]]:intregs = A2_andir [[S2_setbit_i]], 1
; CHECK-NEXT: [[S2_asl_i_r:%[0-9]+]]:intregs = S2_asl_i_r [[A2_andir]], 2
; CHECK-NEXT: [[A2_tfrsi:%[0-9]+]]:intregs = A2_tfrsi 1
; CHECK-NEXT: [[A2_tfrsi1:%[0-9]+]]:intregs = A2_tfrsi 4
; CHECK-NEXT: [[A2_tfrsi2:%[0-9]+]]:intregs = A2_tfrsi 0
; CHECK-NEXT: J2_loop0i %bb.6, 510, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
; CHECK-NEXT: J2_jump %bb.6, implicit-def $pc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.6:
; CHECK-NEXT: successors: %bb.6(0x7c000000), %bb.7(0x04000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI:%[0-9]+]]:intregs = PHI [[A2_tfrsi2]], %bb.5, %24, %bb.6
; CHECK-NEXT: [[PHI1:%[0-9]+]]:intregs = PHI [[S2_asl_i_r]], %bb.5, %23, %bb.6
; CHECK-NEXT: S4_storeiri_io [[PHI1]], 0, 0 :: (store (s32) into %ir.cgep7)
; CHECK-NEXT: [[A2_andir1:%[0-9]+]]:intregs = A2_andir [[PHI]], 1
; CHECK-NEXT: [[A2_tfrsi3:%[0-9]+]]:intregs = A2_tfrsi 1
; CHECK-NEXT: [[A2_tfrsi4:%[0-9]+]]:intregs = A2_tfrsi 4
; CHECK-NEXT: [[S2_asl_i_r1:%[0-9]+]]:intregs = S2_asl_i_r [[A2_andir1]], 2
; CHECK-NEXT: [[A2_tfrsi5:%[0-9]+]]:intregs = A2_tfrsi 0
; CHECK-NEXT: ENDLOOP0 %bb.6, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
; CHECK-NEXT: J2_jump %bb.7, implicit-def $pc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.7:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI2:%[0-9]+]]:intregs = PHI [[S2_asl_i_r1]], %bb.6
; CHECK-NEXT: [[PHI3:%[0-9]+]]:intregs = PHI [[A2_tfrsi3]], %bb.6
; CHECK-NEXT: [[PHI4:%[0-9]+]]:intregs = PHI [[A2_tfrsi4]], %bb.6
; CHECK-NEXT: S4_storeiri_io [[PHI2]], 0, 0 :: (store unknown-size into %ir.cgep7, align 4)
; CHECK-NEXT: J2_jump %bb.3, implicit-def $pc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: J2_loop0i %bb.4, 255, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
; CHECK-NEXT: J2_jump %bb.4, implicit-def $pc
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.4(0x7c000000), %bb.1(0x04000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI5:%[0-9]+]]:intregs = PHI [[PHI4]], %bb.3, %9, %bb.4
; CHECK-NEXT: [[PHI6:%[0-9]+]]:intregs = PHI [[PHI3]], %bb.3, %11, %bb.4
; CHECK-NEXT: [[A2_tfrsi6:%[0-9]+]]:intregs = A2_tfrsi 0
; CHECK-NEXT: S2_storeri_io [[PHI5]], -4, [[A2_tfrsi6]] :: (store (s32) into %ir.cgep5, align 8)
; CHECK-NEXT: [[A2_addi:%[0-9]+]]:intregs = A2_addi [[PHI6]], 1
; CHECK-NEXT: [[S2_insert:%[0-9]+]]:intregs = S2_insert [[PHI2]], [[A2_addi]], 1, 2
; CHECK-NEXT: [[L2_loadri_io:%[0-9]+]]:intregs = L2_loadri_io [[S2_insert]], 0 :: (load (s32) from %ir.cgep8)
; CHECK-NEXT: S2_storeri_io [[PHI5]], 0, [[L2_loadri_io]] :: (store (s32) into %ir.lsr.iv3)
; CHECK-NEXT: [[A2_addi1:%[0-9]+]]:intregs = A2_addi [[PHI5]], 2
; CHECK-NEXT: ENDLOOP0 %bb.4, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
; CHECK-NEXT: J2_jump %bb.1, implicit-def dead $pc
bb.0.entry:
successors: %bb.1(0x80000000)
liveins: $r0

%0:intregs = COPY $r0
%1:intregs = S2_setbit_i %0, 0

bb.1:
successors: %bb.2(0x80000000)

J2_loop0i %bb.2, 511, implicit-def $lc0, implicit-def $sa0, implicit-def $usr

bb.2:
successors: %bb.2(0x7c000000), %bb.3(0x04000000)

%2:intregs = PHI %1, %bb.1, %3, %bb.2
%4:intregs = A2_andir %2, 1
%5:intregs = S2_asl_i_r %4, 2
S4_storeiri_io %5, 0, 0 :: (store (s32) into %ir.cgep7)
%6:intregs = A2_tfrsi 1
%7:intregs = A2_tfrsi 4
%3:intregs = A2_tfrsi 0
ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
J2_jump %bb.3, implicit-def dead $pc

bb.3:
successors: %bb.4(0x80000000)

J2_loop0i %bb.4, 255, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
J2_jump %bb.4, implicit-def $pc

bb.4:
successors: %bb.4(0x7c000000), %bb.1(0x04000000)

%8:intregs = PHI %7, %bb.3, %9, %bb.4
%10:intregs = PHI %6, %bb.3, %11, %bb.4
%11:intregs = A2_tfrsi 0
S2_storeri_io %8, -4, %11 :: (store (s32) into %ir.cgep5, align 8)
%12:intregs = A2_addi %10, 1
%13:intregs = S2_insert %5, %12, 1, 2
%14:intregs = L2_loadri_io %13, 0 :: (load (s32) from %ir.cgep8)
S2_storeri_io %8, 0, %14 :: (store (s32) into %ir.lsr.iv3)
%9:intregs = A2_addi %8, 2
ENDLOOP0 %bb.4, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
J2_jump %bb.1, implicit-def dead $pc

...
Loading