Skip to content

[llvm-exegesis] Add support for warmup iterations #76895

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
181 changes: 117 additions & 64 deletions llvm/tools/llvm-exegesis/lib/Assembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,38 +47,14 @@ static constexpr const char ModuleID[] = "ExegesisInfoTest";
static constexpr const char FunctionID[] = "foo";
static const Align kFunctionAlignment(4096);

// Fills the given basic block with register setup code, and returns true if
// all registers could be setup correctly.
static bool generateSnippetSetupCode(const ExegesisTarget &ET,
const MCSubtargetInfo *const MSI,
BasicBlockFiller &BBF,
const BenchmarkKey &Key,
bool GenerateMemoryInstructions) {
static bool generateRegisterSetupCode(
const ExegesisTarget &ET, const MCSubtargetInfo *const MSI,
BasicBlockFiller &BBF, ArrayRef<RegisterValue> InitialRegisterValues,
bool GenerateMemoryInstructions, Register StackPointerRegister) {
bool IsSnippetSetupComplete = true;
if (GenerateMemoryInstructions) {
BBF.addInstructions(ET.generateMemoryInitialSetup());
for (const MemoryMapping &MM : Key.MemoryMappings) {
#ifdef __linux__
// The frontend that generates that parses the memory mapping information
// from the user should validate that the requested address is a multiple
// of the page size. Assert that this is true here.
assert(MM.Address % getpagesize() == 0 &&
"Memory mappings need to be aligned to page boundaries.");
#endif
BBF.addInstructions(ET.generateMmap(
MM.Address, Key.MemoryValues.at(MM.MemoryValueName).SizeBytes,
ET.getAuxiliaryMemoryStartAddress() +
sizeof(int) * (Key.MemoryValues.at(MM.MemoryValueName).Index +
SubprocessMemory::AuxiliaryMemoryOffset)));
}
BBF.addInstructions(ET.setStackRegisterToAuxMem());
}
Register StackPointerRegister = BBF.MF.getSubtarget()
.getTargetLowering()
->getStackPointerRegisterToSaveRestore();
for (const RegisterValue &RV : Key.RegisterInitialValues) {
for (const RegisterValue &RV : InitialRegisterValues) {
if (GenerateMemoryInstructions) {
// If we're generating memory instructions, don't load in the value for
// If we are generating memory instructions, don't load in the value for
// the register with the stack pointer as it will be used later to finish
// the setup.
if (RV.Register == StackPointerRegister)
Expand All @@ -90,23 +66,61 @@ static bool generateSnippetSetupCode(const ExegesisTarget &ET,
IsSnippetSetupComplete = false;
BBF.addInstructions(SetRegisterCode);
}
if (GenerateMemoryInstructions) {
return IsSnippetSetupComplete;
}

static void generateMemoryMappings(const ExegesisTarget &ET,
BasicBlockFiller &BBF,
const BenchmarkKey &Key) {
BBF.addInstructions(ET.generateMemoryInitialSetup());
for (const MemoryMapping &MM : Key.MemoryMappings) {
#ifdef __linux__
// The frontend that generates that parses the memory mapping information
// from the user should validate that the requested address is a multiple
// of the page size. Assert that this is true here.
assert(MM.Address % getpagesize() == 0 &&
"Memory mappings need to be aligned to page boundaries.");
#endif
BBF.addInstructions(ET.generateMmap(
MM.Address, Key.MemoryValues.at(MM.MemoryValueName).SizeBytes,
ET.getAuxiliaryMemoryStartAddress() +
sizeof(int) * (Key.MemoryValues.at(MM.MemoryValueName).Index +
SubprocessMemory::AuxiliaryMemoryOffset)));
}
BBF.addInstructions(ET.setStackRegisterToAuxMem());
}

static bool
setStackPointerRegister(const ExegesisTarget &ET,
const MCSubtargetInfo *const MSI, BasicBlockFiller &BBF,
ArrayRef<RegisterValue> InitialRegisterValues,
Register StackPointerRegister) {
bool IsSnippetSetupComplete = true;
for (const RegisterValue &RV : InitialRegisterValues) {
// Load in the stack register now as we're done using it elsewhere
// and need to set the value in preparation for executing the
// snippet.
if (RV.Register != StackPointerRegister)
continue;
const auto SetRegisterCode = ET.setRegTo(*MSI, RV.Register, RV.Value);
if (SetRegisterCode.empty())
IsSnippetSetupComplete = false;
BBF.addInstructions(SetRegisterCode);
break;
}
return IsSnippetSetupComplete;
}

static bool generatePerfCounterReset(
const ExegesisTarget &ET, const MCSubtargetInfo *const MSI,
BasicBlockFiller &BBF, ArrayRef<RegisterValue> InitialRegisterValues,
Register StackPointerRegister) {
bool IsSnippetSetupComplete = true;
#ifdef HAVE_LIBPFM
BBF.addInstructions(ET.configurePerfCounter(PERF_EVENT_IOC_RESET, true));
BBF.addInstructions(ET.configurePerfCounter(PERF_EVENT_IOC_RESET, true));
#endif // HAVE_LIBPFM
for (const RegisterValue &RV : Key.RegisterInitialValues) {
// Load in the stack register now as we're done using it elsewhere
// and need to set the value in preparation for executing the
// snippet.
if (RV.Register != StackPointerRegister)
continue;
const auto SetRegisterCode = ET.setRegTo(*MSI, RV.Register, RV.Value);
if (SetRegisterCode.empty())
IsSnippetSetupComplete = false;
BBF.addInstructions(SetRegisterCode);
break;
}
}
IsSnippetSetupComplete = setStackPointerRegister(
ET, MSI, BBF, InitialRegisterValues, StackPointerRegister);
return IsSnippetSetupComplete;
}

Expand Down Expand Up @@ -147,7 +161,7 @@ MachineFunction &createVoidVoidPtrMachineFunction(StringRef FunctionName,
return MMI->getOrCreateMachineFunction(*F);
}

BasicBlockFiller::BasicBlockFiller(MachineFunction &MF, MachineBasicBlock *MBB,
BasicBlockFiller::BasicBlockFiller(MachineFunction *MF, MachineBasicBlock *MBB,
const MCInstrInfo *MCII)
: MF(MF), MBB(MBB), MCII(MCII) {}

Expand Down Expand Up @@ -193,17 +207,17 @@ void BasicBlockFiller::addReturn(const ExegesisTarget &ET,
#endif // __linux__
}
// Insert the return code.
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
if (TII->getReturnOpcode() < TII->getNumOpcodes()) {
BuildMI(MBB, DL, TII->get(TII->getReturnOpcode()));
} else {
MachineIRBuilder MIB(MF);
MachineIRBuilder MIB(*MF);
MIB.setMBB(*MBB);

FunctionLoweringInfo FuncInfo;
FuncInfo.CanLowerReturn = true;
MF.getSubtarget().getCallLowering()->lowerReturn(MIB, nullptr, {}, FuncInfo,
0);
MF->getSubtarget().getCallLowering()->lowerReturn(MIB, nullptr, {},
FuncInfo, 0);
}
}

Expand All @@ -215,7 +229,7 @@ FunctionFiller::FunctionFiller(MachineFunction &MF,
BasicBlockFiller FunctionFiller::addBasicBlock() {
MachineBasicBlock *MBB = MF.CreateMachineBasicBlock();
MF.push_back(MBB);
return BasicBlockFiller(MF, MBB, MCII);
return BasicBlockFiller(&MF, MBB, MCII);
}

ArrayRef<unsigned> FunctionFiller::getRegistersSetUp() const {
Expand All @@ -241,11 +255,28 @@ BitVector getFunctionReservedRegs(const TargetMachine &TM) {
return MF.getSubtarget().getRegisterInfo()->getReservedRegs(MF);
}

static void setMBBLiveIns(const ExegesisTarget &ET, MachineBasicBlock *MBB,
bool GenerateMemoryInstructions,
ArrayRef<unsigned> LiveIns) {
for (const unsigned Reg : LiveIns)
MBB->addLiveIn(Reg);

if (GenerateMemoryInstructions) {
for (const unsigned Reg : ET.getArgumentRegisters())
MBB->addLiveIn(Reg);
// Add a live in for registers that need saving so that the machine
// verifier doesn't fail if the register is never defined.
for (const unsigned Reg : ET.getRegistersNeedSaving())
MBB->addLiveIn(Reg);
}
}

Error assembleToStream(const ExegesisTarget &ET,
std::unique_ptr<LLVMTargetMachine> TM,
ArrayRef<unsigned> LiveIns, const FillFunction &Fill,
raw_pwrite_stream &AsmStream, const BenchmarkKey &Key,
bool GenerateMemoryInstructions) {
bool GenerateMemoryInstructions,
std::optional<FillFunction> WarmupFill) {
auto Context = std::make_unique<LLVMContext>();
std::unique_ptr<Module> Module =
createModule(Context, TM->createDataLayout());
Expand Down Expand Up @@ -280,28 +311,50 @@ Error assembleToStream(const ExegesisTarget &ET,
FunctionFiller Sink(MF, std::move(RegistersSetUp));
auto Entry = Sink.getEntry();

for (const unsigned Reg : LiveIns)
Entry.MBB->addLiveIn(Reg);
setMBBLiveIns(ET, Entry.MBB, GenerateMemoryInstructions, LiveIns);

if (GenerateMemoryInstructions) {
for (const unsigned Reg : ET.getArgumentRegisters())
Entry.MBB->addLiveIn(Reg);
// Add a live in for registers that need saving so that the machine verifier
// doesn't fail if the register is never defined.
for (const unsigned Reg : ET.getRegistersNeedSaving())
Entry.MBB->addLiveIn(Reg);
bool IsSnippetSetupComplete = true;
const MCSubtargetInfo *const MSI = TM->getMCSubtargetInfo();

Register StackPointerRegister = MF.getSubtarget()
.getTargetLowering()
->getStackPointerRegisterToSaveRestore();

if (GenerateMemoryInstructions)
generateMemoryMappings(ET, Entry, Key);

BasicBlockFiller BenchmarkStartBlock = Entry;

if (WarmupFill) {
IsSnippetSetupComplete &= generateRegisterSetupCode(
ET, MSI, Entry, Key.RegisterInitialValues, GenerateMemoryInstructions,
StackPointerRegister);

IsSnippetSetupComplete &= setStackPointerRegister(
ET, MSI, Entry, Key.RegisterInitialValues, StackPointerRegister);

BenchmarkStartBlock = (*WarmupFill)(Sink, false, Entry);

setMBBLiveIns(ET, BenchmarkStartBlock.MBB, GenerateMemoryInstructions,
LiveIns);
}

const bool IsSnippetSetupComplete = generateSnippetSetupCode(
ET, TM->getMCSubtargetInfo(), Entry, Key, GenerateMemoryInstructions);
IsSnippetSetupComplete &= generateRegisterSetupCode(
ET, MSI, BenchmarkStartBlock, Key.RegisterInitialValues,
GenerateMemoryInstructions, StackPointerRegister);

if (GenerateMemoryInstructions)
IsSnippetSetupComplete &= generatePerfCounterReset(
ET, MSI, BenchmarkStartBlock, Key.RegisterInitialValues,
StackPointerRegister);

// If the snippet setup is not complete, we disable liveliness tracking. This
// means that we won't know what values are in the registers.
// FIXME: this should probably be an assertion.
if (!IsSnippetSetupComplete)
Properties.reset(MachineFunctionProperties::Property::TracksLiveness);

Fill(Sink);
Fill(Sink, true, BenchmarkStartBlock);

// prologue/epilogue pass needs the reserved registers to be frozen, this
// is usually done by the SelectionDAGISel pass.
Expand Down
14 changes: 8 additions & 6 deletions llvm/tools/llvm-exegesis/lib/Assembler.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ BitVector getFunctionReservedRegs(const TargetMachine &TM);
// Helper to fill in a basic block.
class BasicBlockFiller {
public:
BasicBlockFiller(MachineFunction &MF, MachineBasicBlock *MBB,
BasicBlockFiller(MachineFunction *MF, MachineBasicBlock *MBB,
const MCInstrInfo *MCII);

void addInstruction(const MCInst &Inst, const DebugLoc &DL = DebugLoc());
Expand All @@ -53,9 +53,9 @@ class BasicBlockFiller {
void addReturn(const ExegesisTarget &ET, bool SubprocessCleanup,
const DebugLoc &DL = DebugLoc());

MachineFunction &MF;
MachineBasicBlock *const MBB;
const MCInstrInfo *const MCII;
MachineFunction *MF;
MachineBasicBlock *MBB;
const MCInstrInfo *MCII;
};

// Helper to fill in a function.
Expand All @@ -82,7 +82,8 @@ class FunctionFiller {
};

// A callback that fills a function.
using FillFunction = std::function<void(FunctionFiller &)>;
using FillFunction =
std::function<BasicBlockFiller(FunctionFiller &, bool, BasicBlockFiller &)>;

// Creates a temporary `void foo(char*)` function containing the provided
// Instructions. Runs a set of llvm Passes to provide correct prologue and
Expand All @@ -92,7 +93,8 @@ Error assembleToStream(const ExegesisTarget &ET,
std::unique_ptr<LLVMTargetMachine> TM,
ArrayRef<unsigned> LiveIns, const FillFunction &Fill,
raw_pwrite_stream &AsmStreamm, const BenchmarkKey &Key,
bool GenerateMemoryInstructions);
bool GenerateMemoryInstructions,
std::optional<FillFunction> WarmupFill);

// Creates an ObjectFile in the format understood by the host.
// Note: the resulting object keeps a copy of Buffer so it can be discarded once
Expand Down
24 changes: 17 additions & 7 deletions llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -478,15 +478,22 @@ class SubProcessFunctionExecutorImpl
Expected<SmallString<0>> BenchmarkRunner::assembleSnippet(
const BenchmarkCode &BC, const SnippetRepetitor &Repetitor,
unsigned MinInstructions, unsigned LoopBodySize,
bool GenerateMemoryInstructions) const {
bool GenerateMemoryInstructions, unsigned MinWarmupInstructions) const {
const std::vector<MCInst> &Instructions = BC.Key.Instructions;
SmallString<0> Buffer;
raw_svector_ostream OS(Buffer);

std::optional<FillFunction> OptionalWarmupFill = {};
if (MinWarmupInstructions > 0)
OptionalWarmupFill =
Repetitor.Repeat(Instructions, MinWarmupInstructions, LoopBodySize,
GenerateMemoryInstructions);

if (Error E = assembleToStream(
State.getExegesisTarget(), State.createTargetMachine(), BC.LiveIns,
Repetitor.Repeat(Instructions, MinInstructions, LoopBodySize,
GenerateMemoryInstructions),
OS, BC.Key, GenerateMemoryInstructions)) {
OS, BC.Key, GenerateMemoryInstructions, OptionalWarmupFill)) {
return std::move(E);
}
return Buffer;
Expand All @@ -495,7 +502,7 @@ Expected<SmallString<0>> BenchmarkRunner::assembleSnippet(
Expected<BenchmarkRunner::RunnableConfiguration>
BenchmarkRunner::getRunnableConfiguration(
const BenchmarkCode &BC, unsigned NumRepetitions, unsigned LoopBodySize,
const SnippetRepetitor &Repetitor) const {
const SnippetRepetitor &Repetitor, unsigned WarmupMinInstructions) const {
RunnableConfiguration RC;

Benchmark &BenchmarkResult = RC.BenchmarkResult;
Expand All @@ -519,9 +526,12 @@ BenchmarkRunner::getRunnableConfiguration(
if (BenchmarkPhaseSelector > BenchmarkPhaseSelectorE::PrepareSnippet) {
const int MinInstructionsForSnippet = 4 * Instructions.size();
const int LoopBodySizeForSnippet = 2 * Instructions.size();
// Do not include warmup iterations in the assembled snippet to display
// as reasonable warmup instruction minimums can easily blow up the size
// of the string.
auto Snippet =
assembleSnippet(BC, Repetitor, MinInstructionsForSnippet,
LoopBodySizeForSnippet, GenerateMemoryInstructions);
LoopBodySizeForSnippet, GenerateMemoryInstructions, 0);
if (Error E = Snippet.takeError())
return std::move(E);

Expand All @@ -534,9 +544,9 @@ BenchmarkRunner::getRunnableConfiguration(
// measurements.
if (BenchmarkPhaseSelector >
BenchmarkPhaseSelectorE::PrepareAndAssembleSnippet) {
auto Snippet =
assembleSnippet(BC, Repetitor, BenchmarkResult.NumRepetitions,
LoopBodySize, GenerateMemoryInstructions);
auto Snippet = assembleSnippet(
BC, Repetitor, BenchmarkResult.NumRepetitions, LoopBodySize,
GenerateMemoryInstructions, WarmupMinInstructions);
if (Error E = Snippet.takeError())
return std::move(E);
RC.ObjectFile = getObjectFromBuffer(*Snippet);
Expand Down
6 changes: 4 additions & 2 deletions llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ class BenchmarkRunner {
Expected<RunnableConfiguration>
getRunnableConfiguration(const BenchmarkCode &Configuration,
unsigned NumRepetitions, unsigned LoopUnrollFactor,
const SnippetRepetitor &Repetitor) const;
const SnippetRepetitor &Repetitor,
unsigned WarmupMinInstructions) const;

std::pair<Error, Benchmark>
runConfiguration(RunnableConfiguration &&RC,
Expand Down Expand Up @@ -116,7 +117,8 @@ class BenchmarkRunner {
Expected<SmallString<0>>
assembleSnippet(const BenchmarkCode &BC, const SnippetRepetitor &Repetitor,
unsigned MinInstructions, unsigned LoopBodySize,
bool GenerateMemoryInstructions) const;
bool GenerateMemoryInstructions,
unsigned MinWarmupInstructions) const;

Expected<std::string> writeObjectFile(StringRef Buffer,
StringRef FileName) const;
Expand Down
Loading