Skip to content

[llvm-mca] Add command line option -call-latency #92958

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion llvm/include/llvm/MCA/InstrBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ class InstrBuilder {

bool FirstCallInst;
bool FirstReturnInst;
unsigned CallLatency;

using InstRecycleCallback = std::function<Instruction *(const InstrDesc &)>;
InstRecycleCallback InstRecycleCB;
Expand All @@ -98,7 +99,7 @@ class InstrBuilder {
public:
InstrBuilder(const MCSubtargetInfo &STI, const MCInstrInfo &MCII,
const MCRegisterInfo &RI, const MCInstrAnalysis *IA,
const InstrumentManager &IM);
const InstrumentManager &IM, unsigned CallLatency);

void clear() {
Descriptors.clear();
Expand Down
20 changes: 11 additions & 9 deletions llvm/lib/MCA/InstrBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti,
const llvm::MCInstrInfo &mcii,
const llvm::MCRegisterInfo &mri,
const llvm::MCInstrAnalysis *mcia,
const mca::InstrumentManager &im)
const mca::InstrumentManager &im, unsigned cl)
: STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), IM(im), FirstCallInst(true),
FirstReturnInst(true) {
FirstReturnInst(true), CallLatency(cl) {
const MCSchedModel &SM = STI.getSchedModel();
ProcResourceMasks.resize(SM.getNumProcResourceKinds());
computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks);
Expand Down Expand Up @@ -220,17 +220,19 @@ static void initializeUsedResources(InstrDesc &ID,

static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc,
const MCSchedClassDesc &SCDesc,
const MCSubtargetInfo &STI) {
const MCSubtargetInfo &STI,
unsigned CallLatency) {
if (MCDesc.isCall()) {
// We cannot estimate how long this call will take.
// Artificially set an arbitrarily high latency (100cy).
ID.MaxLatency = 100U;
// Artificially set an arbitrarily high latency.
ID.MaxLatency = CallLatency;
return;
}

int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
// If latency is unknown, then conservatively assume a MaxLatency of 100cy.
ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency);
// If latency is unknown, then conservatively assume the MaxLatency set for
// calls.
ID.MaxLatency = Latency < 0 ? CallLatency : static_cast<unsigned>(Latency);
}

static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) {
Expand Down Expand Up @@ -568,7 +570,7 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI,
// We don't correctly model calls.
WithColor::warning() << "found a call in the input assembly sequence.\n";
WithColor::note() << "call instructions are not correctly modeled. "
<< "Assume a latency of 100cy.\n";
<< "Assume a latency of " << CallLatency << "cy.\n";
FirstCallInst = false;
}

Expand All @@ -580,7 +582,7 @@ InstrBuilder::createInstrDescImpl(const MCInst &MCI,
}

initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
computeMaxLatency(*ID, MCDesc, SCDesc, STI);
computeMaxLatency(*ID, MCDesc, SCDesc, STI, CallLatency);

if (Error Err = verifyOperands(MCDesc, MCI))
return std::move(Err);
Expand Down
58 changes: 58 additions & 0 deletions llvm/test/tools/llvm-mca/X86/call-latency.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 %s | FileCheck --check-prefixes=ALL,DEFAULT %s
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -call-latency=50 -iterations=1 %s | FileCheck --check-prefixes=ALL,CUSTOM %s

callq printf

# ALL: Iterations: 1
# ALL-NEXT: Instructions: 1

# CUSTOM-NEXT: Total Cycles: 53
# DEFAULT-NEXT: Total Cycles: 103

# ALL-NEXT: Total uOps: 1

# ALL: Dispatch Width: 2

# CUSTOM-NEXT: uOps Per Cycle: 0.02
# CUSTOM-NEXT: IPC: 0.02

# DEFAULT-NEXT: uOps Per Cycle: 0.01
# DEFAULT-NEXT: IPC: 0.01

# ALL-NEXT: Block RThroughput: 0.5

# ALL: Instruction Info:
# ALL-NEXT: [1]: #uOps
# ALL-NEXT: [2]: Latency
# ALL-NEXT: [3]: RThroughput
# ALL-NEXT: [4]: MayLoad
# ALL-NEXT: [5]: MayStore
# ALL-NEXT: [6]: HasSideEffects (U)

# ALL: [1] [2] [3] [4] [5] [6] Instructions:
# ALL-NEXT: 1 1 0.50 callq printf

# ALL: Resources:
# ALL-NEXT: [0] - JALU0
# ALL-NEXT: [1] - JALU1
# ALL-NEXT: [2] - JDiv
# ALL-NEXT: [3] - JFPA
# ALL-NEXT: [4] - JFPM
# ALL-NEXT: [5] - JFPU0
# ALL-NEXT: [6] - JFPU1
# ALL-NEXT: [7] - JLAGU
# ALL-NEXT: [8] - JMul
# ALL-NEXT: [9] - JSAGU
# ALL-NEXT: [10] - JSTC
# ALL-NEXT: [11] - JVALU0
# ALL-NEXT: [12] - JVALU1
# ALL-NEXT: [13] - JVIMUL

# ALL: Resource pressure per iteration:
# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
# ALL-NEXT: - 1.00 - - - - - - - - - - - -

# ALL: Resource pressure by instruction:
# ALL-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
# ALL-NEXT: - 1.00 - - - - - - - - - - - - callq printf
7 changes: 6 additions & 1 deletion llvm/tools/llvm-mca/llvm-mca.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,11 @@ static cl::opt<unsigned>
"(instructions per cycle)"),
cl::cat(ToolOptions), cl::init(0));

static cl::opt<unsigned>
CallLatency("call-latency", cl::Hidden,
cl::desc("Number of cycles to assume for a call instruction"),
cl::cat(ToolOptions), cl::init(100U));

enum class SkipType { NONE, LACK_SCHED, PARSE_FAILURE, ANY_FAILURE };

static cl::opt<enum SkipType> SkipUnsupportedInstructions(
Expand Down Expand Up @@ -568,7 +573,7 @@ int main(int argc, char **argv) {
}

// Create an instruction builder.
mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM);
mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM, CallLatency);

// Create a context to control ownership of the pipeline hardware.
mca::Context MCA(*MRI, *STI);
Expand Down
2 changes: 1 addition & 1 deletion llvm/unittests/tools/llvm-mca/MCATestBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ Error MCATestBase::runBaselineMCA(json::Object &Result, ArrayRef<MCInst> Insts,

// Default InstrumentManager
auto IM = std::make_unique<mca::InstrumentManager>(*STI, *MCII);
mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM);
mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM, /*CallLatency=*/100);

const SmallVector<mca::Instrument *> Instruments;
SmallVector<std::unique_ptr<mca::Instruction>> LoweredInsts;
Expand Down
4 changes: 2 additions & 2 deletions llvm/unittests/tools/llvm-mca/X86/TestIncrementalMCA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ TEST_F(X86TestBase, TestResumablePipeline) {
P->addEventListener(SV.get());

auto IM = std::make_unique<mca::InstrumentManager>(*STI, *MCII);
mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM);
mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM, /*CallLatency=*/100);

const SmallVector<mca::Instrument *> Instruments;
// Tile size = 7
Expand Down Expand Up @@ -124,7 +124,7 @@ TEST_F(X86TestBase, TestInstructionRecycling) {
// Default InstrumentManager
auto IM = std::make_unique<mca::InstrumentManager>(*STI, *MCII);

mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM);
mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM, /*CallLatency=*/100);
IB.setInstRecycleCallback(GetRecycledInst);

const SmallVector<mca::Instrument *> Instruments;
Expand Down
Loading