Skip to content

Commit 9f5dc31

Browse files
committed
[ModuloSchedule] Implement modulo variable expansion for pipelining
Modulo variable expansion is a technique that resolves overlap of variable lifetimes by unrolling. The existing implementation solves it by making a copy by move instruction for processors with ordinary registers such as Arm and x86. This method may result in a very large number of move instructions, which can cause performance problems. Modulo variable expansion is enabled by specifing -pipeliner-mve-cg. A backend must implement some newly defined interfaces in PipelinerLoopInfo.
1 parent 0de6baa commit 9f5dc31

File tree

4 files changed

+713
-0
lines changed

4 files changed

+713
-0
lines changed

llvm/include/llvm/CodeGen/ModuloSchedule.h

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,71 @@ class PeelingModuloScheduleExpander {
369369
std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopInfo;
370370
};
371371

372+
/// Expand the kernel using modulo variable expansion algorithm (MVE).
373+
/// It unrolls the kernel enough to avoid overlap of register lifetime.
374+
class ModuloScheduleExpanderMVE {
375+
private:
376+
using ValueMapTy = DenseMap<unsigned, unsigned>;
377+
using MBBVectorTy = SmallVectorImpl<MachineBasicBlock *>;
378+
using InstrMapTy = DenseMap<MachineInstr *, MachineInstr *>;
379+
380+
ModuloSchedule &Schedule;
381+
MachineFunction &MF;
382+
const TargetSubtargetInfo &ST;
383+
MachineRegisterInfo &MRI;
384+
const TargetInstrInfo *TII = nullptr;
385+
LiveIntervals &LIS;
386+
387+
MachineBasicBlock *OrigKernel = nullptr;
388+
MachineBasicBlock *OrigPreheader = nullptr;
389+
MachineBasicBlock *OrigExit = nullptr;
390+
MachineBasicBlock *Check = nullptr;
391+
MachineBasicBlock *Prolog = nullptr;
392+
MachineBasicBlock *NewKernel = nullptr;
393+
MachineBasicBlock *Epilog = nullptr;
394+
MachineBasicBlock *NewPreheader = nullptr;
395+
MachineBasicBlock *NewExit = nullptr;
396+
std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopInfo;
397+
398+
/// The number of unroll required to avoid overlap of live ranges.
399+
/// NumUnroll = 1 means no unrolling.
400+
int NumUnroll;
401+
402+
void calcNumUnroll();
403+
void generatePipelinedLoop();
404+
void generateProlog(SmallVectorImpl<ValueMapTy> &VRMap);
405+
void generatePhi(MachineInstr *OrigMI, int UnrollNum,
406+
SmallVectorImpl<ValueMapTy> &PrologVRMap,
407+
SmallVectorImpl<ValueMapTy> &KernelVRMap,
408+
SmallVectorImpl<ValueMapTy> &PhiVRMap);
409+
void generateKernel(SmallVectorImpl<ValueMapTy> &PrologVRMap,
410+
SmallVectorImpl<ValueMapTy> &KernelVRMap);
411+
void generateEpilog(SmallVectorImpl<ValueMapTy> &KernelVRMap,
412+
SmallVectorImpl<ValueMapTy> &EpilogVRMap);
413+
void mergeRegUsesAfterPipeline(Register OrigReg, Register NewReg);
414+
415+
MachineInstr *cloneInstr(MachineInstr *OldMI);
416+
417+
void updateInstrDef(MachineInstr *NewMI, ValueMapTy &VRMap, bool LastDef);
418+
419+
void generateKernelPhi(Register OrigLoopVal, Register NewLoopVal,
420+
unsigned UnrollNum,
421+
SmallVectorImpl<ValueMapTy> &VRMapProlog,
422+
SmallVectorImpl<ValueMapTy> &VRMapPhi);
423+
void updateInstrUse(MachineInstr *MI, int StageNum, int PhaseNum,
424+
SmallVectorImpl<ValueMapTy> &CurVRMap,
425+
SmallVectorImpl<ValueMapTy> *PrevVRMap);
426+
427+
public:
428+
ModuloScheduleExpanderMVE(MachineFunction &MF, ModuloSchedule &S,
429+
LiveIntervals &LIS)
430+
: Schedule(S), MF(MF), ST(MF.getSubtarget()), MRI(MF.getRegInfo()),
431+
TII(ST.getInstrInfo()), LIS(LIS) {}
432+
433+
void expand();
434+
static bool canApply(MachineLoop &L);
435+
};
436+
372437
/// Expander that simply annotates each scheduled instruction with a post-instr
373438
/// symbol that can be consumed by the ModuloScheduleTest pass.
374439
///

llvm/include/llvm/CodeGen/TargetInstrInfo.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -748,6 +748,20 @@ class TargetInstrInfo : public MCInstrInfo {
748748
createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB,
749749
SmallVectorImpl<MachineOperand> &Cond) = 0;
750750

751+
/// Create a condtion to determine if the remaining trip count represented
752+
/// by the loop counter CounterReg is greater than TC. Some instructions
753+
/// such as comparisons may be inserted at the bottom of MBB. CounterReg
754+
/// must be accessible there.
755+
///
756+
/// The definition of the return value is the same as for the variant above.
757+
virtual std::optional<bool>
758+
createTripCountGreaterCondition(int TC, MachineBasicBlock &MBB,
759+
SmallVectorImpl<MachineOperand> &Cond,
760+
Register CounterReg) {
761+
llvm_unreachable(
762+
"Target didn't implement createTripCountGreaterCondition");
763+
}
764+
751765
/// Modify the loop such that the trip count is
752766
/// OriginalTC + TripCountAdjust.
753767
virtual void adjustTripCount(int TripCountAdjust) = 0;
@@ -761,6 +775,16 @@ class TargetInstrInfo : public MCInstrInfo {
761775
/// Once this function is called, no other functions on this object are
762776
/// valid; the loop has been removed.
763777
virtual void disposed() = 0;
778+
779+
/// Return the initial value of the loop counter.
780+
virtual Register getCounterInitReg() {
781+
llvm_unreachable("Target didn't implement getCounterInitReg");
782+
}
783+
784+
/// Return the updated value of the loop counter in the original loop.
785+
virtual Register getCounterUpdatedReg() {
786+
llvm_unreachable("Target didn't implement getCounterUpdatedReg");
787+
}
764788
};
765789

766790
/// Analyze loop L, which must be a single-basic-block loop, and if the

llvm/lib/CodeGen/MachinePipeliner.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,10 @@ static cl::opt<bool> ExperimentalCodeGen(
174174
cl::desc(
175175
"Use the experimental peeling code generator for software pipelining"));
176176

177+
static cl::opt<bool>
178+
MVECodeGen("pipeliner-mve-cg", cl::Hidden, cl::init(false),
179+
cl::desc("Use the MVE code generator for software pipelining"));
180+
177181
namespace llvm {
178182

179183
// A command line option to enable the CopyToPhi DAG mutation.
@@ -659,6 +663,11 @@ void SwingSchedulerDAG::schedule() {
659663
if (ExperimentalCodeGen && NewInstrChanges.empty()) {
660664
PeelingModuloScheduleExpander MSE(MF, MS, &LIS);
661665
MSE.expand();
666+
}
667+
if (MVECodeGen && NewInstrChanges.empty() &&
668+
ModuloScheduleExpanderMVE::canApply(Loop)) {
669+
ModuloScheduleExpanderMVE MSE(MF, MS, LIS);
670+
MSE.expand();
662671
} else {
663672
ModuloScheduleExpander MSE(MF, MS, LIS, std::move(NewInstrChanges));
664673
MSE.expand();

0 commit comments

Comments
 (0)