Skip to content

Commit 2fa6eaf

Browse files
xur-llvmshenhanc78
andauthored
[llvm-profgen] Add support for Linux kenrel profile (llvm#92831)
Add the support to handle Linux kernel perf files. The functionality is under option -kernel. Note that currently only main kernel (in vmlinux) is handled: kernel modules are not handled. --------- Co-authored-by: Han Shen <[email protected]>
1 parent e387299 commit 2fa6eaf

File tree

4 files changed

+123
-66
lines changed

4 files changed

+123
-66
lines changed

llvm/tools/llvm-profgen/PerfReader.cpp

Lines changed: 88 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ bool VirtualUnwinder::unwind(const PerfSample *Sample, uint64_t Repeat) {
321321

322322
std::unique_ptr<PerfReaderBase>
323323
PerfReaderBase::create(ProfiledBinary *Binary, PerfInputFile &PerfInput,
324-
std::optional<uint32_t> PIDFilter) {
324+
std::optional<int32_t> PIDFilter) {
325325
std::unique_ptr<PerfReaderBase> PerfReader;
326326

327327
if (PerfInput.Format == PerfFormat::UnsymbolizedProfile) {
@@ -331,9 +331,10 @@ PerfReaderBase::create(ProfiledBinary *Binary, PerfInputFile &PerfInput,
331331
}
332332

333333
// For perf data input, we need to convert them into perf script first.
334+
// If this is a kernel perf file, there is no need for retrieving PIDs.
334335
if (PerfInput.Format == PerfFormat::PerfData)
335-
PerfInput =
336-
PerfScriptReader::convertPerfDataToTrace(Binary, PerfInput, PIDFilter);
336+
PerfInput = PerfScriptReader::convertPerfDataToTrace(
337+
Binary, Binary->isKernel(), PerfInput, PIDFilter);
337338

338339
assert((PerfInput.Format == PerfFormat::PerfScript) &&
339340
"Should be a perfscript!");
@@ -353,59 +354,69 @@ PerfReaderBase::create(ProfiledBinary *Binary, PerfInputFile &PerfInput,
353354
}
354355

355356
PerfInputFile
356-
PerfScriptReader::convertPerfDataToTrace(ProfiledBinary *Binary,
357+
PerfScriptReader::convertPerfDataToTrace(ProfiledBinary *Binary, bool SkipPID,
357358
PerfInputFile &File,
358-
std::optional<uint32_t> PIDFilter) {
359+
std::optional<int32_t> PIDFilter) {
359360
StringRef PerfData = File.InputFile;
360361
// Run perf script to retrieve PIDs matching binary we're interested in.
361362
auto PerfExecutable = sys::Process::FindInEnvPath("PATH", "perf");
362363
if (!PerfExecutable) {
363364
exitWithError("Perf not found.");
364365
}
365366
std::string PerfPath = *PerfExecutable;
366-
367367
SmallString<128> PerfTraceFile;
368368
sys::fs::createUniquePath("perf-script-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%.tmp",
369369
PerfTraceFile, /*MakeAbsolute=*/true);
370370
std::string ErrorFile = std::string(PerfTraceFile) + ".err";
371-
StringRef ScriptMMapArgs[] = {PerfPath, "script", "--show-mmap-events",
372-
"-F", "comm,pid", "-i",
373-
PerfData};
374371
std::optional<StringRef> Redirects[] = {std::nullopt, // Stdin
375372
StringRef(PerfTraceFile), // Stdout
376373
StringRef(ErrorFile)}; // Stderr
377-
sys::ExecuteAndWait(PerfPath, ScriptMMapArgs, std::nullopt, Redirects);
378-
379374
PerfScriptReader::TempFileCleanups.emplace_back(PerfTraceFile);
380375
PerfScriptReader::TempFileCleanups.emplace_back(ErrorFile);
381376

382-
// Collect the PIDs
383-
TraceStream TraceIt(PerfTraceFile);
384377
std::string PIDs;
385-
std::unordered_set<uint32_t> PIDSet;
386-
while (!TraceIt.isAtEoF()) {
387-
MMapEvent MMap;
388-
if (isMMap2Event(TraceIt.getCurrentLine()) &&
389-
extractMMap2EventForBinary(Binary, TraceIt.getCurrentLine(), MMap)) {
390-
auto It = PIDSet.emplace(MMap.PID);
391-
if (It.second && (!PIDFilter || MMap.PID == *PIDFilter)) {
392-
if (!PIDs.empty()) {
393-
PIDs.append(",");
378+
if (!SkipPID) {
379+
StringRef ScriptMMapArgs[] = {PerfPath, "script", "--show-mmap-events",
380+
"-F", "comm,pid", "-i",
381+
PerfData};
382+
sys::ExecuteAndWait(PerfPath, ScriptMMapArgs, std::nullopt, Redirects);
383+
384+
// Collect the PIDs
385+
TraceStream TraceIt(PerfTraceFile);
386+
std::unordered_set<int32_t> PIDSet;
387+
while (!TraceIt.isAtEoF()) {
388+
MMapEvent MMap;
389+
if (isMMapEvent(TraceIt.getCurrentLine()) &&
390+
extractMMapEventForBinary(Binary, TraceIt.getCurrentLine(), MMap)) {
391+
auto It = PIDSet.emplace(MMap.PID);
392+
if (It.second && (!PIDFilter || MMap.PID == *PIDFilter)) {
393+
if (!PIDs.empty()) {
394+
PIDs.append(",");
395+
}
396+
PIDs.append(utostr(MMap.PID));
394397
}
395-
PIDs.append(utostr(MMap.PID));
396398
}
399+
TraceIt.advance();
397400
}
398-
TraceIt.advance();
399-
}
400401

401-
if (PIDs.empty()) {
402-
exitWithError("No relevant mmap event is found in perf data.");
402+
if (PIDs.empty()) {
403+
exitWithError("No relevant mmap event is found in perf data.");
404+
}
403405
}
404406

405407
// Run perf script again to retrieve events for PIDs collected above
406-
StringRef ScriptSampleArgs[] = {PerfPath, "script", "--show-mmap-events",
407-
"-F", "ip,brstack", "--pid",
408-
PIDs, "-i", PerfData};
408+
SmallVector<StringRef, 8> ScriptSampleArgs;
409+
ScriptSampleArgs.push_back(PerfPath);
410+
ScriptSampleArgs.push_back("script");
411+
ScriptSampleArgs.push_back("--show-mmap-events");
412+
ScriptSampleArgs.push_back("-F");
413+
ScriptSampleArgs.push_back("ip,brstack");
414+
ScriptSampleArgs.push_back("-i");
415+
ScriptSampleArgs.push_back(PerfData);
416+
if (!PIDs.empty()) {
417+
ScriptSampleArgs.push_back("--pid");
418+
ScriptSampleArgs.push_back(PIDs);
419+
}
409420
sys::ExecuteAndWait(PerfPath, ScriptSampleArgs, std::nullopt, Redirects);
410421

411422
return {std::string(PerfTraceFile), PerfFormat::PerfScript,
@@ -428,7 +439,10 @@ static StringRef filename(StringRef Path, bool UseBackSlash) {
428439
void PerfScriptReader::updateBinaryAddress(const MMapEvent &Event) {
429440
// Drop the event which doesn't belong to user-provided binary
430441
StringRef BinaryName = filename(Event.BinaryPath, Binary->isCOFF());
431-
if (Binary->getName() != BinaryName)
442+
bool IsKernel = Binary->isKernel();
443+
if (!IsKernel && Binary->getName() != BinaryName)
444+
return;
445+
if (IsKernel && !Binary->isKernelImageName(BinaryName))
432446
return;
433447

434448
// Drop the event if process does not match pid filter
@@ -441,7 +455,7 @@ void PerfScriptReader::updateBinaryAddress(const MMapEvent &Event) {
441455
return;
442456
}
443457

444-
if (Event.Offset == Binary->getTextSegmentOffset()) {
458+
if (IsKernel || Event.Offset == Binary->getTextSegmentOffset()) {
445459
// A binary image could be unloaded and then reloaded at different
446460
// place, so update binary load address.
447461
// Only update for the first executable segment and assume all other
@@ -950,16 +964,23 @@ void PerfScriptReader::parseSample(TraceStream &TraceIt) {
950964
parseSample(TraceIt, Count);
951965
}
952966

953-
bool PerfScriptReader::extractMMap2EventForBinary(ProfiledBinary *Binary,
954-
StringRef Line,
955-
MMapEvent &MMap) {
956-
// Parse a line like:
967+
bool PerfScriptReader::extractMMapEventForBinary(ProfiledBinary *Binary,
968+
StringRef Line,
969+
MMapEvent &MMap) {
970+
// Parse a MMap2 line like:
957971
// PERF_RECORD_MMAP2 2113428/2113428: [0x7fd4efb57000(0x204000) @ 0
958972
// 08:04 19532229 3585508847]: r-xp /usr/lib64/libdl-2.17.so
959-
constexpr static const char *const Pattern =
960-
"PERF_RECORD_MMAP2 ([0-9]+)/[0-9]+: "
973+
constexpr static const char *const MMap2Pattern =
974+
"PERF_RECORD_MMAP2 (-?[0-9]+)/[0-9]+: "
961975
"\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ "
962976
"(0x[a-f0-9]+|0) .*\\]: [-a-z]+ (.*)";
977+
// Parse a MMap line like
978+
// PERF_RECORD_MMAP -1/0: [0xffffffff81e00000(0x3e8fa000) @ \
979+
// 0xffffffff81e00000]: x [kernel.kallsyms]_text
980+
constexpr static const char *const MMapPattern =
981+
"PERF_RECORD_MMAP (-?[0-9]+)/[0-9]+: "
982+
"\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ "
983+
"(0x[a-f0-9]+|0)\\]: [-a-z]+ (.*)";
963984
// Field 0 - whole line
964985
// Field 1 - PID
965986
// Field 2 - base address
@@ -975,14 +996,25 @@ bool PerfScriptReader::extractMMap2EventForBinary(ProfiledBinary *Binary,
975996
BINARY_PATH = 5
976997
};
977998

978-
Regex RegMmap2(Pattern);
999+
bool R = false;
9791000
SmallVector<StringRef, 6> Fields;
980-
bool R = RegMmap2.match(Line, &Fields);
1001+
if (Line.contains("PERF_RECORD_MMAP2 ")) {
1002+
Regex RegMmap2(MMap2Pattern);
1003+
R = RegMmap2.match(Line, &Fields);
1004+
} else if (Line.contains("PERF_RECORD_MMAP ")) {
1005+
Regex RegMmap(MMapPattern);
1006+
R = RegMmap.match(Line, &Fields);
1007+
} else
1008+
llvm_unreachable("unexpected MMAP event entry");
1009+
9811010
if (!R) {
9821011
std::string WarningMsg = "Cannot parse mmap event: " + Line.str() + " \n";
9831012
WithColor::warning() << WarningMsg;
1013+
return false;
9841014
}
985-
Fields[PID].getAsInteger(10, MMap.PID);
1015+
long long MMapPID = 0;
1016+
getAsSignedInteger(Fields[PID], 10, MMapPID);
1017+
MMap.PID = MMapPID;
9861018
Fields[MMAPPED_ADDRESS].getAsInteger(0, MMap.Address);
9871019
Fields[MMAPPED_SIZE].getAsInteger(0, MMap.Size);
9881020
Fields[PAGE_OFFSET].getAsInteger(0, MMap.Offset);
@@ -993,19 +1025,22 @@ bool PerfScriptReader::extractMMap2EventForBinary(ProfiledBinary *Binary,
9931025
}
9941026

9951027
StringRef BinaryName = filename(MMap.BinaryPath, Binary->isCOFF());
1028+
if (Binary->isKernel()) {
1029+
return Binary->isKernelImageName(BinaryName);
1030+
}
9961031
return Binary->getName() == BinaryName;
9971032
}
9981033

999-
void PerfScriptReader::parseMMap2Event(TraceStream &TraceIt) {
1034+
void PerfScriptReader::parseMMapEvent(TraceStream &TraceIt) {
10001035
MMapEvent MMap;
1001-
if (extractMMap2EventForBinary(Binary, TraceIt.getCurrentLine(), MMap))
1036+
if (extractMMapEventForBinary(Binary, TraceIt.getCurrentLine(), MMap))
10021037
updateBinaryAddress(MMap);
10031038
TraceIt.advance();
10041039
}
10051040

10061041
void PerfScriptReader::parseEventOrSample(TraceStream &TraceIt) {
1007-
if (isMMap2Event(TraceIt.getCurrentLine()))
1008-
parseMMap2Event(TraceIt);
1042+
if (isMMapEvent(TraceIt.getCurrentLine()))
1043+
parseMMapEvent(TraceIt);
10091044
else
10101045
parseSample(TraceIt);
10111046
}
@@ -1032,17 +1067,17 @@ bool PerfScriptReader::isLBRSample(StringRef Line) {
10321067
return false;
10331068
}
10341069

1035-
bool PerfScriptReader::isMMap2Event(StringRef Line) {
1070+
bool PerfScriptReader::isMMapEvent(StringRef Line) {
10361071
// Short cut to avoid string find is possible.
10371072
if (Line.empty() || Line.size() < 50)
10381073
return false;
10391074

10401075
if (std::isdigit(Line[0]))
10411076
return false;
10421077

1043-
// PERF_RECORD_MMAP2 does not appear at the beginning of the line
1044-
// for ` perf script --show-mmap-events -i ...`
1045-
return Line.contains("PERF_RECORD_MMAP2");
1078+
// PERF_RECORD_MMAP2 or PERF_RECORD_MMAP does not appear at the beginning of
1079+
// the line for ` perf script --show-mmap-events -i ...`
1080+
return Line.contains("PERF_RECORD_MMAP");
10461081
}
10471082

10481083
// The raw hybird sample is like
@@ -1208,6 +1243,10 @@ void PerfScriptReader::warnInvalidRange() {
12081243
void PerfScriptReader::parsePerfTraces() {
12091244
// Parse perf traces and do aggregation.
12101245
parseAndAggregateTrace();
1246+
if (Binary->isKernel() && !Binary->getIsLoadedByMMap()) {
1247+
exitWithError(
1248+
"Kernel is requested, but no kernel is found in mmap events.");
1249+
}
12111250

12121251
emitWarningSummary(NumLeafExternalFrame, NumTotalSample,
12131252
"of samples have leaf external frame in call stack.");

llvm/tools/llvm-profgen/PerfReader.h

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -570,7 +570,7 @@ class PerfReaderBase {
570570
virtual ~PerfReaderBase() = default;
571571
static std::unique_ptr<PerfReaderBase>
572572
create(ProfiledBinary *Binary, PerfInputFile &PerfInput,
573-
std::optional<uint32_t> PIDFilter);
573+
std::optional<int32_t> PIDFilter);
574574

575575
// Entry of the reader to parse multiple perf traces
576576
virtual void parsePerfTraces() = 0;
@@ -595,15 +595,15 @@ class PerfReaderBase {
595595
class PerfScriptReader : public PerfReaderBase {
596596
public:
597597
PerfScriptReader(ProfiledBinary *B, StringRef PerfTrace,
598-
std::optional<uint32_t> PID)
599-
: PerfReaderBase(B, PerfTrace), PIDFilter(PID){};
598+
std::optional<int32_t> PID)
599+
: PerfReaderBase(B, PerfTrace), PIDFilter(PID) {};
600600

601601
// Entry of the reader to parse multiple perf traces
602602
void parsePerfTraces() override;
603603
// Generate perf script from perf data
604-
static PerfInputFile
605-
convertPerfDataToTrace(ProfiledBinary *Binary, PerfInputFile &File,
606-
std::optional<uint32_t> PIDFilter);
604+
static PerfInputFile convertPerfDataToTrace(ProfiledBinary *Binary,
605+
bool SkipPID, PerfInputFile &File,
606+
std::optional<int32_t> PIDFilter);
607607
// Extract perf script type by peaking at the input
608608
static PerfContent checkPerfScriptType(StringRef FileName);
609609

@@ -615,7 +615,7 @@ class PerfScriptReader : public PerfReaderBase {
615615
protected:
616616
// The parsed MMap event
617617
struct MMapEvent {
618-
uint64_t PID = 0;
618+
int64_t PID = 0;
619619
uint64_t Address = 0;
620620
uint64_t Size = 0;
621621
uint64_t Offset = 0;
@@ -625,15 +625,15 @@ class PerfScriptReader : public PerfReaderBase {
625625
// Check whether a given line is LBR sample
626626
static bool isLBRSample(StringRef Line);
627627
// Check whether a given line is MMAP event
628-
static bool isMMap2Event(StringRef Line);
629-
// Parse a single line of a PERF_RECORD_MMAP2 event looking for a
628+
static bool isMMapEvent(StringRef Line);
629+
// Parse a single line of a PERF_RECORD_MMAP event looking for a
630630
// mapping between the binary name and its memory layout.
631-
static bool extractMMap2EventForBinary(ProfiledBinary *Binary, StringRef Line,
632-
MMapEvent &MMap);
631+
static bool extractMMapEventForBinary(ProfiledBinary *Binary, StringRef Line,
632+
MMapEvent &MMap);
633633
// Update base address based on mmap events
634634
void updateBinaryAddress(const MMapEvent &Event);
635635
// Parse mmap event and update binary address
636-
void parseMMap2Event(TraceStream &TraceIt);
636+
void parseMMapEvent(TraceStream &TraceIt);
637637
// Parse perf events/samples and do aggregation
638638
void parseAndAggregateTrace();
639639
// Parse either an MMAP event or a perf sample
@@ -669,7 +669,7 @@ class PerfScriptReader : public PerfReaderBase {
669669
// Keep track of all invalid return addresses
670670
std::set<uint64_t> InvalidReturnAddresses;
671671
// PID for the process of interest
672-
std::optional<uint32_t> PIDFilter;
672+
std::optional<int32_t> PIDFilter;
673673
};
674674

675675
/*
@@ -681,8 +681,8 @@ class PerfScriptReader : public PerfReaderBase {
681681
class LBRPerfReader : public PerfScriptReader {
682682
public:
683683
LBRPerfReader(ProfiledBinary *Binary, StringRef PerfTrace,
684-
std::optional<uint32_t> PID)
685-
: PerfScriptReader(Binary, PerfTrace, PID){};
684+
std::optional<int32_t> PID)
685+
: PerfScriptReader(Binary, PerfTrace, PID) {};
686686
// Parse the LBR only sample.
687687
void parseSample(TraceStream &TraceIt, uint64_t Count) override;
688688
};
@@ -699,8 +699,8 @@ class LBRPerfReader : public PerfScriptReader {
699699
class HybridPerfReader : public PerfScriptReader {
700700
public:
701701
HybridPerfReader(ProfiledBinary *Binary, StringRef PerfTrace,
702-
std::optional<uint32_t> PID)
703-
: PerfScriptReader(Binary, PerfTrace, PID){};
702+
std::optional<int32_t> PID)
703+
: PerfScriptReader(Binary, PerfTrace, PID) {};
704704
// Parse the hybrid sample including the call and LBR line
705705
void parseSample(TraceStream &TraceIt, uint64_t Count) override;
706706
void generateUnsymbolizedProfile() override;

llvm/tools/llvm-profgen/ProfiledBinary.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,10 @@ static cl::list<std::string> DisassembleFunctions(
5656
cl::desc("List of functions to print disassembly for. Accept demangled "
5757
"names only. Only work with show-disassembly-only"));
5858

59+
static cl::opt<bool>
60+
KernelBinary("kernel",
61+
cl::desc("Generate the profile for Linux kernel binary."));
62+
5963
extern cl::opt<bool> ShowDetailedWarning;
6064
extern cl::opt<bool> InferMissingFrames;
6165

@@ -221,6 +225,9 @@ void ProfiledBinary::load() {
221225

222226
LLVM_DEBUG(dbgs() << "Loading " << Path << "\n");
223227

228+
// Mark the binary as a kernel image;
229+
IsKernel = KernelBinary;
230+
224231
// Find the preferred load address for text sections.
225232
setPreferredTextSegmentAddresses(Obj);
226233

llvm/tools/llvm-profgen/ProfiledBinary.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,9 @@ class ProfiledBinary {
291291
// Whether we need to symbolize all instructions to get function context size.
292292
bool TrackFuncContextSize = false;
293293

294+
// Whether this is a kernel image;
295+
bool IsKernel = false;
296+
294297
// Indicate if the base loading address is parsed from the mmap event or uses
295298
// the preferred address
296299
bool IsLoadedByMMap = false;
@@ -428,6 +431,14 @@ class ProfiledBinary {
428431

429432
bool usePseudoProbes() const { return UsePseudoProbes; }
430433
bool useFSDiscriminator() const { return UseFSDiscriminator; }
434+
bool isKernel() const { return IsKernel; }
435+
436+
static bool isKernelImageName(StringRef BinaryName) {
437+
return BinaryName == "[kernel.kallsyms]" ||
438+
BinaryName == "[kernel.kallsyms]_stext" ||
439+
BinaryName == "[kernel.kallsyms]_text";
440+
}
441+
431442
// Get the index in CodeAddressVec for the address
432443
// As we might get an address which is not the code
433444
// here it would round to the next valid code address by

0 commit comments

Comments
 (0)