Skip to content

Commit 1d1186d

Browse files
[llvm-exegesis] Add loop-register snippet annotation (#82873)
This patch adds a LLVM-EXEGESIS-LOOP-REGISTER snippet annotation which allows a user to specify the register to use for the loop counter in the loop repetition mode. This allows for executing snippets that don't work with the default value (currently R8 on X86).
1 parent e5ed7b6 commit 1d1186d

File tree

11 files changed

+117
-39
lines changed

11 files changed

+117
-39
lines changed

llvm/docs/CommandGuide/llvm-exegesis.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,14 @@ properly.
8989
annotation requires the subprocess execution mode. This is useful in
9090
cases where the memory accessed by the snippet depends on the location
9191
of the snippet, like RIP-relative addressing.
92+
* `LLVM-EXEGESIS-LOOP-REGISTER <register name>` - This annotation specifies
93+
the loop register to use for keeping track of the current iteration when
94+
using the loop repetition mode. :program:`llvm-exegesis` needs to keep track
95+
of the current loop iteration within the loop repetition mode in a performant
96+
manner (i.e., no memory accesses), and uses a register to do this. This register
97+
has an architecture specific default (e.g., `R8` on X86), but this might conflict
98+
with some snippets. This annotation allows changing the register to prevent
99+
interference between the loop index register and the snippet.
92100

93101
EXAMPLE 1: benchmarking instructions
94102
------------------------------------
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# REQUIRES: exegesis-can-measure-latency, x86_64-linux
2+
3+
# Test that specifying the loop register to use works as expected.
4+
5+
# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mode=latency -snippets-file=%s | FileCheck %s
6+
7+
# CHECK: measurements:
8+
9+
# LLVM-EXEGESIS-DEFREG R11 ff
10+
# LLVM-EXEGESIS-LOOP-REGISTER R12
11+
12+
addq $0xff, %r11

llvm/tools/llvm-exegesis/lib/BenchmarkResult.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@ struct BenchmarkKey {
7474
// The address that the snippet should be loaded in at if the execution mode
7575
// being used supports it.
7676
intptr_t SnippetAddress = 0;
77+
// The register that should be used to hold the loop counter.
78+
unsigned LoopRegister;
7779
};
7880

7981
struct BenchmarkMeasure {

llvm/tools/llvm-exegesis/lib/SnippetFile.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "SnippetFile.h"
1010
#include "BenchmarkRunner.h"
1111
#include "Error.h"
12+
#include "Target.h"
1213
#include "llvm/MC/MCContext.h"
1314
#include "llvm/MC/MCInstPrinter.h"
1415
#include "llvm/MC/MCObjectFileInfo.h"
@@ -175,6 +176,20 @@ class BenchmarkCodeStreamer : public MCStreamer, public AsmCommentConsumer {
175176

176177
return;
177178
}
179+
if (CommentText.consume_front("LOOP-REGISTER")) {
180+
// LLVM-EXEGESIS-LOOP-REGISTER <loop register>
181+
unsigned LoopRegister;
182+
183+
if (!(LoopRegister = findRegisterByName(CommentText.trim()))) {
184+
errs() << "unknown register '" << CommentText
185+
<< "' in 'LLVM-EXEGESIS-LOOP-REGISTER " << CommentText << "'\n";
186+
++InvalidComments;
187+
return;
188+
}
189+
190+
Result->Key.LoopRegister = LoopRegister;
191+
return;
192+
}
178193
}
179194

180195
unsigned numInvalidComments() const { return InvalidComments; }
@@ -221,6 +236,11 @@ Expected<std::vector<BenchmarkCode>> readSnippets(const LLVMState &State,
221236

222237
BenchmarkCode Result;
223238

239+
// Ensure that there is a default loop register value specified.
240+
Result.Key.LoopRegister =
241+
State.getExegesisTarget().getDefaultLoopCounterRegister(
242+
State.getTargetMachine().getTargetTriple());
243+
224244
const TargetMachine &TM = State.getTargetMachine();
225245
MCContext Context(TM.getTargetTriple(), TM.getMCAsmInfo(),
226246
TM.getMCRegisterInfo(), TM.getMCSubtargetInfo());

llvm/tools/llvm-exegesis/lib/SnippetRepetitor.cpp

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,8 @@ class DuplicateSnippetRepetitor : public SnippetRepetitor {
4848

4949
class LoopSnippetRepetitor : public SnippetRepetitor {
5050
public:
51-
explicit LoopSnippetRepetitor(const LLVMState &State)
52-
: SnippetRepetitor(State),
53-
LoopCounter(State.getExegesisTarget().getLoopCounterRegister(
54-
State.getTargetMachine().getTargetTriple())) {}
51+
explicit LoopSnippetRepetitor(const LLVMState &State, unsigned LoopRegister)
52+
: SnippetRepetitor(State), LoopCounter(LoopRegister) {}
5553

5654
// Loop over the snippet ceil(MinInstructions / Instructions.Size()) times.
5755
FillFunction Repeat(ArrayRef<MCInst> Instructions, unsigned MinInstructions,
@@ -113,8 +111,8 @@ class LoopSnippetRepetitor : public SnippetRepetitor {
113111
(void)_;
114112
Loop.addInstructions(Instructions);
115113
}
116-
ET.decrementLoopCounterAndJump(*Loop.MBB, *Loop.MBB,
117-
State.getInstrInfo());
114+
ET.decrementLoopCounterAndJump(*Loop.MBB, *Loop.MBB, State.getInstrInfo(),
115+
LoopCounter);
118116

119117
// Set up the exit basic block.
120118
Loop.MBB->addSuccessor(Exit.MBB, BranchProbability::getZero());
@@ -138,14 +136,14 @@ SnippetRepetitor::~SnippetRepetitor() {}
138136

139137
std::unique_ptr<const SnippetRepetitor>
140138
SnippetRepetitor::Create(Benchmark::RepetitionModeE Mode,
141-
const LLVMState &State) {
139+
const LLVMState &State, unsigned LoopRegister) {
142140
switch (Mode) {
143141
case Benchmark::Duplicate:
144142
case Benchmark::MiddleHalfDuplicate:
145143
return std::make_unique<DuplicateSnippetRepetitor>(State);
146144
case Benchmark::Loop:
147145
case Benchmark::MiddleHalfLoop:
148-
return std::make_unique<LoopSnippetRepetitor>(State);
146+
return std::make_unique<LoopSnippetRepetitor>(State, LoopRegister);
149147
case Benchmark::AggregateMin:
150148
break;
151149
}

llvm/tools/llvm-exegesis/lib/SnippetRepetitor.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ namespace exegesis {
2929
class SnippetRepetitor {
3030
public:
3131
static std::unique_ptr<const SnippetRepetitor>
32-
Create(Benchmark::RepetitionModeE Mode, const LLVMState &State);
32+
Create(Benchmark::RepetitionModeE Mode, const LLVMState &State,
33+
unsigned LoopRegister);
3334

3435
virtual ~SnippetRepetitor();
3536

llvm/tools/llvm-exegesis/lib/Target.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -202,12 +202,15 @@ class ExegesisTarget {
202202
}
203203

204204
// Returns a counter usable as a loop counter.
205-
virtual unsigned getLoopCounterRegister(const Triple &) const { return 0; }
205+
virtual unsigned getDefaultLoopCounterRegister(const Triple &) const {
206+
return 0;
207+
}
206208

207209
// Adds the code to decrement the loop counter and
208210
virtual void decrementLoopCounterAndJump(MachineBasicBlock &MBB,
209211
MachineBasicBlock &TargetMBB,
210-
const MCInstrInfo &MII) const {
212+
const MCInstrInfo &MII,
213+
unsigned LoopRegister) const {
211214
llvm_unreachable("decrementLoopCounterAndBranch() requires "
212215
"getLoopCounterRegister() > 0");
213216
}

llvm/tools/llvm-exegesis/lib/X86/Target.cpp

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -720,7 +720,7 @@ class ExegesisX86Target : public ExegesisTarget {
720720

721721
unsigned getScratchMemoryRegister(const Triple &TT) const override;
722722

723-
unsigned getLoopCounterRegister(const Triple &) const override;
723+
unsigned getDefaultLoopCounterRegister(const Triple &) const override;
724724

725725
unsigned getMaxMemoryAccessSize() const override { return 64; }
726726

@@ -733,7 +733,8 @@ class ExegesisX86Target : public ExegesisTarget {
733733

734734
void decrementLoopCounterAndJump(MachineBasicBlock &MBB,
735735
MachineBasicBlock &TargetMBB,
736-
const MCInstrInfo &MII) const override;
736+
const MCInstrInfo &MII,
737+
unsigned LoopRegister) const override;
737738

738739
std::vector<MCInst> setRegTo(const MCSubtargetInfo &STI, unsigned Reg,
739740
const APInt &Value) const override;
@@ -852,7 +853,7 @@ const unsigned ExegesisX86Target::kUnavailableRegistersSSE[12] = {
852853
// We're using one of R8-R15 because these registers are never hardcoded in
853854
// instructions (e.g. MOVS writes to EDI, ESI, EDX), so they have less
854855
// conflicts.
855-
constexpr const unsigned kLoopCounterReg = X86::R8;
856+
constexpr const unsigned kDefaultLoopCounterReg = X86::R8;
856857

857858
} // namespace
858859

@@ -870,11 +871,12 @@ unsigned ExegesisX86Target::getScratchMemoryRegister(const Triple &TT) const {
870871
return TT.isOSWindows() ? X86::RCX : X86::RDI;
871872
}
872873

873-
unsigned ExegesisX86Target::getLoopCounterRegister(const Triple &TT) const {
874+
unsigned
875+
ExegesisX86Target::getDefaultLoopCounterRegister(const Triple &TT) const {
874876
if (!TT.isArch64Bit()) {
875877
return 0;
876878
}
877-
return kLoopCounterReg;
879+
return kDefaultLoopCounterReg;
878880
}
879881

880882
Error ExegesisX86Target::randomizeTargetMCOperand(
@@ -912,10 +914,10 @@ void ExegesisX86Target::fillMemoryOperands(InstructionTemplate &IT,
912914

913915
void ExegesisX86Target::decrementLoopCounterAndJump(
914916
MachineBasicBlock &MBB, MachineBasicBlock &TargetMBB,
915-
const MCInstrInfo &MII) const {
917+
const MCInstrInfo &MII, unsigned LoopRegister) const {
916918
BuildMI(&MBB, DebugLoc(), MII.get(X86::ADD64ri8))
917-
.addDef(kLoopCounterReg)
918-
.addUse(kLoopCounterReg)
919+
.addDef(LoopRegister)
920+
.addUse(LoopRegister)
919921
.addImm(-1);
920922
BuildMI(&MBB, DebugLoc(), MII.get(X86::JCC_1))
921923
.addMBB(&TargetMBB)

llvm/tools/llvm-exegesis/llvm-exegesis.cpp

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -497,22 +497,42 @@ void benchmarkMain() {
497497
}
498498

499499
const auto Opcodes = getOpcodesOrDie(State);
500+
std::vector<BenchmarkCode> Configurations;
501+
502+
unsigned LoopRegister =
503+
State.getExegesisTarget().getDefaultLoopCounterRegister(
504+
State.getTargetMachine().getTargetTriple());
505+
506+
if (Opcodes.empty()) {
507+
Configurations = ExitOnErr(readSnippets(State, SnippetsFile));
508+
for (const auto &Configuration : Configurations) {
509+
if (ExecutionMode != BenchmarkRunner::ExecutionModeE::SubProcess &&
510+
(Configuration.Key.MemoryMappings.size() != 0 ||
511+
Configuration.Key.MemoryValues.size() != 0 ||
512+
Configuration.Key.SnippetAddress != 0))
513+
ExitWithError("Memory and snippet address annotations are only "
514+
"supported in subprocess "
515+
"execution mode");
516+
}
517+
LoopRegister = Configurations[0].Key.LoopRegister;
518+
}
500519

501520
SmallVector<std::unique_ptr<const SnippetRepetitor>, 2> Repetitors;
502521
if (RepetitionMode != Benchmark::RepetitionModeE::AggregateMin)
503-
Repetitors.emplace_back(SnippetRepetitor::Create(RepetitionMode, State));
522+
Repetitors.emplace_back(
523+
SnippetRepetitor::Create(RepetitionMode, State, LoopRegister));
504524
else {
505525
for (Benchmark::RepetitionModeE RepMode :
506526
{Benchmark::RepetitionModeE::Duplicate,
507527
Benchmark::RepetitionModeE::Loop})
508-
Repetitors.emplace_back(SnippetRepetitor::Create(RepMode, State));
528+
Repetitors.emplace_back(
529+
SnippetRepetitor::Create(RepMode, State, LoopRegister));
509530
}
510531

511532
BitVector AllReservedRegs;
512533
for (const std::unique_ptr<const SnippetRepetitor> &Repetitor : Repetitors)
513534
AllReservedRegs |= Repetitor->getReservedRegs();
514535

515-
std::vector<BenchmarkCode> Configurations;
516536
if (!Opcodes.empty()) {
517537
for (const unsigned Opcode : Opcodes) {
518538
// Ignore instructions without a sched class if
@@ -534,17 +554,6 @@ void benchmarkMain() {
534554
std::move(ConfigsForInstr->begin(), ConfigsForInstr->end(),
535555
std::back_inserter(Configurations));
536556
}
537-
} else {
538-
Configurations = ExitOnErr(readSnippets(State, SnippetsFile));
539-
for (const auto &Configuration : Configurations) {
540-
if (ExecutionMode != BenchmarkRunner::ExecutionModeE::SubProcess &&
541-
(Configuration.Key.MemoryMappings.size() != 0 ||
542-
Configuration.Key.MemoryValues.size() != 0 ||
543-
Configuration.Key.SnippetAddress != 0))
544-
ExitWithError("Memory and snippet address annotations are only "
545-
"supported in subprocess "
546-
"execution mode");
547-
}
548557
}
549558

550559
if (MinInstructions == 0) {

llvm/unittests/tools/llvm-exegesis/X86/SnippetFileTest.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,25 @@ TEST_F(X86SnippetFileTest, SnippetAddress) {
219219
EXPECT_EQ(Snippet.Key.SnippetAddress, 0x10000);
220220
}
221221

222+
TEST_F(X86SnippetFileTest, LoopRegister) {
223+
auto Snippets = TestCommon(R"(
224+
# LLVM-EXEGESIS-LOOP-REGISTER R11
225+
)");
226+
ASSERT_TRUE(static_cast<bool>(Snippets));
227+
EXPECT_THAT(*Snippets, SizeIs(1));
228+
const auto &Snippet = (*Snippets)[0];
229+
EXPECT_EQ(Snippet.Key.LoopRegister, X86::R11);
230+
}
231+
232+
TEST_F(X86SnippetFileTest, LoopRegisterInvalidRegister) {
233+
auto Error = TestCommon(R"(
234+
# LLVM-EXEGESIS-LOOP-REGISTER INVALID
235+
)")
236+
.takeError();
237+
EXPECT_TRUE(static_cast<bool>(Error));
238+
consumeError(std::move(Error));
239+
}
240+
222241
} // namespace
223242
} // namespace exegesis
224243
} // namespace llvm

llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,10 @@ class X86SnippetRepetitorTest : public X86TestBase {
4040

4141
void TestCommon(Benchmark::RepetitionModeE RepetitionMode,
4242
unsigned SnippetInstructions = 1) {
43-
const auto Repetitor = SnippetRepetitor::Create(RepetitionMode, State);
43+
const auto Repetitor = SnippetRepetitor::Create(
44+
RepetitionMode, State,
45+
State.getExegesisTarget().getDefaultLoopCounterRegister(
46+
State.getTargetMachine().getTargetTriple()));
4447
const std::vector<MCInst> Instructions(SnippetInstructions,
4548
MCInstBuilder(X86::NOOP));
4649
FunctionFiller Sink(*MF, {X86::EAX});
@@ -98,11 +101,12 @@ TEST_F(X86SnippetRepetitorTest, Loop) {
98101
HasOpcode(X86::NOOP), HasOpcode(X86::NOOP),
99102
HasOpcode(X86::NOOP), HasOpcode(X86::ADD64ri8),
100103
HasOpcode(X86::JCC_1)));
101-
EXPECT_THAT(LoopBlock.liveins(),
102-
UnorderedElementsAre(
103-
LiveReg(X86::EAX),
104-
LiveReg(State.getExegesisTarget().getLoopCounterRegister(
105-
State.getTargetMachine().getTargetTriple()))));
104+
EXPECT_THAT(
105+
LoopBlock.liveins(),
106+
UnorderedElementsAre(
107+
LiveReg(X86::EAX),
108+
LiveReg(State.getExegesisTarget().getDefaultLoopCounterRegister(
109+
State.getTargetMachine().getTargetTriple()))));
106110
EXPECT_THAT(MF->getBlockNumbered(2)->instrs(),
107111
ElementsAre(HasOpcode(X86::RET64)));
108112
}

0 commit comments

Comments
 (0)