Skip to content

Commit 95554cb

Browse files
[memprof] Teach extractCallsFromIR to recognize heap allocation functions (#115938)
This patch teaches extractCallsFromIR to recognize heap allocation functions. Specifically, when we encounter a callee that is known to be a heap allocation function like "new", we set the callee GUID to 0. Note that I am planning to do the same for the caller-callee pairs extracted from the profile. That is, when I encounter a frame that does not have a callee, we assume that the frame is calling some heap allocation function with GUID 0. Technically, I'm not recognizing enough functions in this patch. TCMalloc is known to drop certain frames in the call stack immediately above new. This patch is meant to lay the groundwork, setting up GetTLI, plumbing it to extractCallsFromIR, and adjusting the unit tests. I'll address remaining issues in subsequent patches.
1 parent 5911fbb commit 95554cb

File tree

3 files changed

+93
-4
lines changed

3 files changed

+93
-4
lines changed

llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
namespace llvm {
1919
class Function;
2020
class Module;
21+
class TargetLibraryInfo;
2122

2223
namespace vfs {
2324
class FileSystem;
@@ -86,7 +87,8 @@ using CallEdgeTy = std::pair<LineLocation, uint64_t>;
8687

8788
// Extract all calls from the IR. Arrange them in a map from caller GUIDs to a
8889
// list of call sites, each of the form {LineLocation, CalleeGUID}.
89-
DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> extractCallsFromIR(Module &M);
90+
DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
91+
extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI);
9092

9193
} // namespace memprof
9294
} // namespace llvm

llvm/lib/Transforms/Instrumentation/MemProfiler.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -796,7 +796,7 @@ struct AllocMatchInfo {
796796
};
797797

798798
DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>
799-
memprof::extractCallsFromIR(Module &M) {
799+
memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI) {
800800
DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> Calls;
801801

802802
auto GetOffset = [](const DILocation *DIL) {
@@ -820,16 +820,25 @@ memprof::extractCallsFromIR(Module &M) {
820820
continue;
821821

822822
StringRef CalleeName = CalledFunction->getName();
823+
bool IsAlloc = isAllocationWithHotColdVariant(CalledFunction, TLI);
823824
for (const DILocation *DIL = I.getDebugLoc(); DIL;
824825
DIL = DIL->getInlinedAt()) {
825826
StringRef CallerName = DIL->getSubprogramLinkageName();
826827
assert(!CallerName.empty() &&
827828
"Be sure to enable -fdebug-info-for-profiling");
828829
uint64_t CallerGUID = IndexedMemProfRecord::getGUID(CallerName);
829830
uint64_t CalleeGUID = IndexedMemProfRecord::getGUID(CalleeName);
831+
// Pretend that we are calling a function with GUID == 0 if we are
832+
// calling a heap allocation function.
833+
if (IsAlloc)
834+
CalleeGUID = 0;
830835
LineLocation Loc = {GetOffset(DIL), DIL->getColumn()};
831836
Calls[CallerGUID].emplace_back(Loc, CalleeGUID);
832837
CalleeName = CallerName;
838+
// FIXME: Recognize other frames that are associated with heap
839+
// allocation functions. It may be too early to reset IsAlloc to
840+
// false here.
841+
IsAlloc = false;
833842
}
834843
}
835844
}

llvm/unittests/Transforms/Instrumentation/MemProfUseTest.cpp

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9+
#include "llvm/Analysis/TargetLibraryInfo.h"
910
#include "llvm/AsmParser/Parser.h"
1011
#include "llvm/IR/LLVMContext.h"
1112
#include "llvm/IR/Module.h"
13+
#include "llvm/Passes/PassBuilder.h"
1214
#include "llvm/ProfileData/MemProf.h"
1315
#include "llvm/Support/SourceMgr.h"
1416
#include "llvm/Transforms/Instrumentation/MemProfiler.h"
@@ -80,7 +82,12 @@ declare !dbg !19 void @_Z2f3v()
8082
std::unique_ptr<Module> M = parseAssemblyString(IR, Err, Ctx);
8183
ASSERT_TRUE(M);
8284

83-
auto Calls = extractCallsFromIR(*M);
85+
auto *F = M->getFunction("_Z3foov");
86+
ASSERT_NE(F, nullptr);
87+
88+
TargetLibraryInfoWrapperPass WrapperPass;
89+
auto &TLI = WrapperPass.getTLI(*F);
90+
auto Calls = extractCallsFromIR(*M, TLI);
8491

8592
// Expect exactly one caller.
8693
ASSERT_THAT(Calls, SizeIs(1));
@@ -177,7 +184,12 @@ declare !dbg !25 void @_Z2g2v() local_unnamed_addr
177184
std::unique_ptr<Module> M = parseAssemblyString(IR, Err, Ctx);
178185
ASSERT_TRUE(M);
179186

180-
auto Calls = extractCallsFromIR(*M);
187+
auto *F = M->getFunction("_Z3foov");
188+
ASSERT_NE(F, nullptr);
189+
190+
TargetLibraryInfoWrapperPass WrapperPass;
191+
auto &TLI = WrapperPass.getTLI(*F);
192+
auto Calls = extractCallsFromIR(*M, TLI);
181193

182194
// Expect exactly 4 callers.
183195
ASSERT_THAT(Calls, SizeIs(4));
@@ -220,4 +232,70 @@ declare !dbg !25 void @_Z2g2v() local_unnamed_addr
220232
EXPECT_THAT(G3CallSites[1],
221233
Pair(FieldsAre(2U, 3U), IndexedMemProfRecord::getGUID("_Z2g2v")));
222234
}
235+
236+
TEST(MemProf, ExtractDirectCallsFromIRCallingNew) {
237+
// The following IR is generated from:
238+
//
239+
// int *foo() {
240+
// return ::new (int);
241+
// }
242+
StringRef IR = R"IR(
243+
define dso_local noundef ptr @_Z3foov() #0 !dbg !10 {
244+
entry:
245+
%call = call noalias noundef nonnull ptr @_Znwm(i64 noundef 4) #2, !dbg !13
246+
ret ptr %call, !dbg !14
247+
}
248+
249+
; Function Attrs: nobuiltin allocsize(0)
250+
declare noundef nonnull ptr @_Znwm(i64 noundef) #1
251+
252+
attributes #0 = { mustprogress uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
253+
attributes #1 = { nobuiltin allocsize(0) "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
254+
attributes #2 = { builtin allocsize(0) }
255+
256+
!llvm.dbg.cu = !{!0}
257+
!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8}
258+
!llvm.ident = !{!9}
259+
260+
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
261+
!1 = !DIFile(filename: "foobar.cc", directory: "/")
262+
!2 = !{i32 7, !"Dwarf Version", i32 5}
263+
!3 = !{i32 2, !"Debug Info Version", i32 3}
264+
!4 = !{i32 1, !"wchar_size", i32 4}
265+
!5 = !{i32 1, !"MemProfProfileFilename", !"memprof.profraw"}
266+
!6 = !{i32 8, !"PIC Level", i32 2}
267+
!7 = !{i32 7, !"PIE Level", i32 2}
268+
!8 = !{i32 7, !"uwtable", i32 2}
269+
!9 = !{!"clang"}
270+
!10 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 1, type: !11, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
271+
!11 = !DISubroutineType(types: !12)
272+
!12 = !{}
273+
!13 = !DILocation(line: 2, column: 10, scope: !10)
274+
!14 = !DILocation(line: 2, column: 3, scope: !10)
275+
)IR";
276+
277+
LLVMContext Ctx;
278+
SMDiagnostic Err;
279+
std::unique_ptr<Module> M = parseAssemblyString(IR, Err, Ctx);
280+
ASSERT_TRUE(M);
281+
282+
auto *F = M->getFunction("_Z3foov");
283+
ASSERT_NE(F, nullptr);
284+
285+
TargetLibraryInfoWrapperPass WrapperPass;
286+
auto &TLI = WrapperPass.getTLI(*F);
287+
auto Calls = extractCallsFromIR(*M, TLI);
288+
289+
// Expect exactly one caller.
290+
ASSERT_THAT(Calls, SizeIs(1));
291+
292+
// Verify each key-value pair.
293+
294+
auto FooIt = Calls.find(IndexedMemProfRecord::getGUID("_Z3foov"));
295+
ASSERT_NE(FooIt, Calls.end());
296+
const auto &[FooCallerGUID, FooCallSites] = *FooIt;
297+
EXPECT_EQ(FooCallerGUID, IndexedMemProfRecord::getGUID("_Z3foov"));
298+
ASSERT_THAT(FooCallSites, SizeIs(1));
299+
EXPECT_THAT(FooCallSites[0], Pair(FieldsAre(1U, 10U), 0));
300+
}
223301
} // namespace

0 commit comments

Comments
 (0)