Skip to content

Commit b9d9cbf

Browse files
committed
[ModuloSchedule] Implement modulo variable expansion for pipelining
Modulo variable expansion is a technique that resolves overlap of variable lifetimes by unrolling. The existing implementation solves it by making a copy by move instruction for processors with ordinary registers such as Arm and x86. This method may result in a very large number of move instructions, which can cause performance problems. Modulo variable expansion is enabled by specifing -pipeliner-mve-cg. A backend must implement some newly defined interfaces in PipelinerLoopInfo.
1 parent 894f52f commit b9d9cbf

File tree

4 files changed

+713
-0
lines changed

4 files changed

+713
-0
lines changed

llvm/include/llvm/CodeGen/ModuloSchedule.h

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,71 @@ class PeelingModuloScheduleExpander {
370370
std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopInfo;
371371
};
372372

373+
/// Expand the kernel using modulo variable expansion algorithm (MVE).
374+
/// It unrolls the kernel enough to avoid overlap of register lifetime.
375+
class ModuloScheduleExpanderMVE {
376+
private:
377+
using ValueMapTy = DenseMap<unsigned, unsigned>;
378+
using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
379+
using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>;
380+
381+
ModuloSchedule &Schedule;
382+
MachineFunction &MF;
383+
const TargetSubtargetInfo &ST;
384+
MachineRegisterInfo &MRI;
385+
const TargetInstrInfo *TII = nullptr;
386+
LiveIntervals &LIS;
387+
388+
MachineBasicBlock *OrigKernel = nullptr;
389+
MachineBasicBlock *OrigPreheader = nullptr;
390+
MachineBasicBlock *OrigExit = nullptr;
391+
MachineBasicBlock *Check = nullptr;
392+
MachineBasicBlock *Prolog = nullptr;
393+
MachineBasicBlock *NewKernel = nullptr;
394+
MachineBasicBlock *Epilog = nullptr;
395+
MachineBasicBlock *NewPreheader = nullptr;
396+
MachineBasicBlock *NewExit = nullptr;
397+
std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopInfo;
398+
399+
/// The number of unroll required to avoid overlap of live ranges.
400+
/// NumUnroll = 1 means no unrolling.
401+
int NumUnroll;
402+
403+
void calcNumUnroll();
404+
void generatePipelinedLoop();
405+
void generateProlog(SmallVectorImpl<ValueMapTy> &VRMap);
406+
void generatePhi(MachineInstr *OrigMI, int UnrollNum,
407+
SmallVectorImpl<ValueMapTy> &PrologVRMap,
408+
SmallVectorImpl<ValueMapTy> &KernelVRMap,
409+
SmallVectorImpl<ValueMapTy> &PhiVRMap);
410+
void generateKernel(SmallVectorImpl<ValueMapTy> &PrologVRMap,
411+
SmallVectorImpl<ValueMapTy> &KernelVRMap);
412+
void generateEpilog(SmallVectorImpl<ValueMapTy> &KernelVRMap,
413+
SmallVectorImpl<ValueMapTy> &EpilogVRMap);
414+
void mergeRegUsesAfterPipeline(Register OrigReg, Register NewReg);
415+
416+
MachineInstr *cloneInstr(MachineInstr *OldMI);
417+
418+
void updateInstrDef(MachineInstr *NewMI, ValueMapTy &VRMap, bool LastDef);
419+
420+
void generateKernelPhi(Register OrigLoopVal, Register NewLoopVal,
421+
unsigned UnrollNum,
422+
SmallVectorImpl<ValueMapTy> &VRMapProlog,
423+
SmallVectorImpl<ValueMapTy> &VRMapPhi);
424+
void updateInstrUse(MachineInstr *MI, int StageNum, int PhaseNum,
425+
SmallVectorImpl<ValueMapTy> &CurVRMap,
426+
SmallVectorImpl<ValueMapTy> *PrevVRMap);
427+
428+
public:
429+
ModuloScheduleExpanderMVE(MachineFunction &MF, ModuloSchedule &S,
430+
LiveIntervals &LIS)
431+
: Schedule(S), MF(MF), ST(MF.getSubtarget()), MRI(MF.getRegInfo()),
432+
TII(ST.getInstrInfo()), LIS(LIS) {}
433+
434+
void expand();
435+
static bool canApply(MachineLoop &L);
436+
};
437+
373438
/// Expander that simply annotates each scheduled instruction with a post-instr
374439
/// symbol that can be consumed by the ModuloScheduleTest pass.
375440
///

llvm/include/llvm/CodeGen/TargetInstrInfo.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -765,6 +765,20 @@ class TargetInstrInfo : public MCInstrInfo {
765765
createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB,
766766
SmallVectorImpl<MachineOperand> &Cond) = 0;
767767

768+
/// Create a condtion to determine if the remaining trip count represented
769+
/// by the loop counter CounterReg is greater than TC. Some instructions
770+
/// such as comparisons may be inserted at the bottom of MBB. CounterReg
771+
/// must be accessible there.
772+
///
773+
/// The definition of the return value is the same as for the variant above.
774+
virtual std::optional<bool>
775+
createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB,
776+
SmallVectorImpl<MachineOperand> &Cond,
777+
Register CounterReg) {
778+
llvm_unreachable(
779+
"Target didn't implement createTripCountGreaterCondition");
780+
}
781+
768782
/// Modify the loop such that the trip count is
769783
/// OriginalTC + TripCountAdjust.
770784
virtual void adjustTripCount(int TripCountAdjust) = 0;
@@ -778,6 +792,16 @@ class TargetInstrInfo : public MCInstrInfo {
778792
/// Once this function is called, no other functions on this object are
779793
/// valid; the loop has been removed.
780794
virtual void disposed() = 0;
795+
796+
/// Return the initial value of the loop counter.
797+
virtual Register getCounterInitReg() {
798+
llvm_unreachable("Target didn't implement getCounterInitReg");
799+
}
800+
801+
/// Return the updated value of the loop counter in the original loop.
802+
virtual Register getCounterUpdatedReg() {
803+
llvm_unreachable("Target didn't implement getCounterUpdatedReg");
804+
}
781805
};
782806

783807
/// Analyze loop L, which must be a single-basic-block loop, and if the

llvm/lib/CodeGen/MachinePipeliner.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,10 @@ static cl::opt<int>
192192
cl::desc("Margin representing the unused percentage of "
193193
"the register pressure limit"));
194194

195+
static cl::opt<bool>
196+
MVECodeGen("pipeliner-mve-cg", cl::Hidden, cl::init(false),
197+
cl::desc("Use the MVE code generator for software pipelining"));
198+
195199
namespace llvm {
196200

197201
// A command line option to enable the CopyToPhi DAG mutation.
@@ -677,6 +681,11 @@ void SwingSchedulerDAG::schedule() {
677681
if (ExperimentalCodeGen && NewInstrChanges.empty()) {
678682
PeelingModuloScheduleExpander MSE(MF, MS, &LIS);
679683
MSE.expand();
684+
}
685+
if (MVECodeGen && NewInstrChanges.empty() &&
686+
ModuloScheduleExpanderMVE::canApply(Loop)) {
687+
ModuloScheduleExpanderMVE MSE(MF, MS, LIS);
688+
MSE.expand();
680689
} else {
681690
ModuloScheduleExpander MSE(MF, MS, LIS, std::move(NewInstrChanges));
682691
MSE.expand();

0 commit comments

Comments
 (0)