Skip to content

Commit 17993eb

Browse files
[Memprof] Adds instrumentation support for memprof with histograms. (#100834)
This patch allows running `-fmemory-profile` without the flag `-memprof-use-callbacks`, meaning the `RecordAccessesHistogram` is injected into IR as a sequence of instructions. This significantly increases performance of the instrumented binary.
1 parent f9765a2 commit 17993eb

File tree

2 files changed

+74
-9
lines changed

2 files changed

+74
-9
lines changed

llvm/lib/Transforms/Instrumentation/MemProfiler.cpp

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ constexpr int LLVM_MEM_PROFILER_VERSION = 1;
6161
// Size of memory mapped to a single shadow location.
6262
constexpr uint64_t DefaultMemGranularity = 64;
6363

64+
// Size of memory mapped to a single histogram bucket.
65+
constexpr uint64_t HistogramGranularity = 8;
66+
6467
// Scale from granularity down to shadow size.
6568
constexpr uint64_t DefaultShadowScale = 3;
6669

@@ -192,7 +195,7 @@ namespace {
192195
struct ShadowMapping {
193196
ShadowMapping() {
194197
Scale = ClMappingScale;
195-
Granularity = ClMappingGranularity;
198+
Granularity = ClHistogram ? HistogramGranularity : ClMappingGranularity;
196199
Mask = ~(Granularity - 1);
197200
}
198201

@@ -276,6 +279,8 @@ MemProfilerPass::MemProfilerPass() = default;
276279

277280
PreservedAnalyses MemProfilerPass::run(Function &F,
278281
AnalysisManager<Function> &AM) {
282+
assert((!ClHistogram || ClMappingGranularity == DefaultMemGranularity) &&
283+
"Memprof with histogram only supports default mapping granularity");
279284
Module &M = *F.getParent();
280285
MemProfiler Profiler(M);
281286
if (Profiler.instrumentFunction(F))
@@ -288,10 +293,6 @@ ModuleMemProfilerPass::ModuleMemProfilerPass() = default;
288293
PreservedAnalyses ModuleMemProfilerPass::run(Module &M,
289294
AnalysisManager<Module> &AM) {
290295

291-
assert((!ClHistogram || (ClHistogram && ClUseCalls)) &&
292-
"Cannot use -memprof-histogram without Callbacks. Set "
293-
"memprof-use-callbacks");
294-
295296
ModuleMemProfiler Profiler(M);
296297
if (Profiler.instrumentModule(M))
297298
return PreservedAnalyses::none();
@@ -489,14 +490,21 @@ void MemProfiler::instrumentAddress(Instruction *OrigIns,
489490
return;
490491
}
491492

492-
// Create an inline sequence to compute shadow location, and increment the
493-
// value by one.
494-
Type *ShadowTy = Type::getInt64Ty(*C);
493+
Type *ShadowTy = ClHistogram ? Type::getInt8Ty(*C) : Type::getInt64Ty(*C);
495494
Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
495+
496496
Value *ShadowPtr = memToShadow(AddrLong, IRB);
497497
Value *ShadowAddr = IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy);
498498
Value *ShadowValue = IRB.CreateLoad(ShadowTy, ShadowAddr);
499-
Value *Inc = ConstantInt::get(Type::getInt64Ty(*C), 1);
499+
// If we are profiling with histograms, add overflow protection at 255.
500+
if (ClHistogram) {
501+
Value *MaxCount = ConstantInt::get(Type::getInt8Ty(*C), 255);
502+
Value *Cmp = IRB.CreateICmpULT(ShadowValue, MaxCount);
503+
Instruction *IncBlock =
504+
SplitBlockAndInsertIfThen(Cmp, InsertBefore, /*Unreachable=*/false);
505+
IRB.SetInsertPoint(IncBlock);
506+
}
507+
Value *Inc = ConstantInt::get(ShadowTy, 1);
500508
ShadowValue = IRB.CreateAdd(ShadowValue, Inc);
501509
IRB.CreateStore(ShadowValue, ShadowAddr);
502510
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
; Test basic memory profiler instrumentation with histograms.
2+
;
3+
; RUN: opt < %s -passes='function(memprof),memprof-module' -memprof-histogram -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s
4+
5+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
6+
target triple = "x86_64-unknown-linux-gnu"
7+
8+
; CHECK: @llvm.used = appending global [1 x ptr] [ptr @memprof.module_ctor]
9+
; CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @memprof.module_ctor, ptr null }]
10+
11+
define i32 @test_load(ptr %a) {
12+
entry:
13+
%tmp1 = load i32, ptr %a, align 4
14+
ret i32 %tmp1
15+
}
16+
; CHECK-LABEL: @test_load
17+
; CHECK: %[[SHADOW_OFFSET:[^ ]*]] = load i64, ptr @__memprof_shadow_memory_dynamic_address
18+
; CHECK-NEXT: %[[LOAD_ADDR:[^ ]*]] = ptrtoint ptr %a to i64
19+
; CHECK-NEXT: %[[MASKED_ADDR:[^ ]*]] = and i64 %[[LOAD_ADDR]], -8
20+
; CHECK-S3-NEXT: %[[SHIFTED_ADDR:[^ ]*]] = lshr i64 %[[MASKED_ADDR]], 3
21+
; CHECK-NEXT: add i64 %[[SHIFTED_ADDR]], %[[SHADOW_OFFSET]]
22+
; CHECK-NEXT: %[[LOAD_SHADOW_PTR:[^ ]*]] = inttoptr
23+
; CHECK-NEXT: %[[LOAD_SHADOW:[^ ]*]] = load i8, ptr %[[LOAD_SHADOW_PTR]]
24+
; CHECK-NEXT: %[[ICMP_MAX_COUNT:[^ ]*]] = icmp ult i8 %[[LOAD_SHADOW]], -1
25+
; CHECK-NEXT: br i1 %[[ICMP_MAX_COUNT]], label %[[INC_LABEL:[^ ]*]], label %[[ELSE_LABEL:[^ ]*]]
26+
; CHECK: [[INC_LABEL]]:
27+
; CHECK-NEXT: %[[NEW_SHADOW:[^ ]*]] = add i8 %[[LOAD_SHADOW]], 1
28+
; CHECK-NEXT: store i8 %[[NEW_SHADOW]], ptr %[[LOAD_SHADOW_PTR]]
29+
; CHECK-NEXT: br label %[[ELSE_LABEL]]
30+
; The actual load.
31+
; CHECK: [[ELSE_LABEL]]:
32+
; CHECK-NEXT: %tmp1 = load i32, ptr %a
33+
; CHECK-NEXT: ret i32 %tmp1
34+
35+
define void @test_store(ptr %a) {
36+
entry:
37+
store i32 42, ptr %a, align 4
38+
ret void
39+
}
40+
; CHECK-LABEL: @test_store
41+
; CHECK: %[[SHADOW_OFFSET:[^ ]*]] = load i64, ptr @__memprof_shadow_memory_dynamic_address
42+
; CHECK-NEXT: %[[LOAD_ADDR:[^ ]*]] = ptrtoint ptr %a to i64
43+
; CHECK-NEXT: %[[MASKED_ADDR:[^ ]*]] = and i64 %[[LOAD_ADDR]], -8
44+
; CHECK-S3-NEXT: %[[SHIFTED_ADDR:[^ ]*]] = lshr i64 %[[MASKED_ADDR]], 3
45+
; CHECK-NEXT: add i64 %[[SHIFTED_ADDR]], %[[SHADOW_OFFSET]]
46+
; CHECK-NEXT: %[[STORE_SHADOW_PTR:[^ ]*]] = inttoptr
47+
; CHECK-NEXT: %[[STORE_SHADOW:[^ ]*]] = load i8, ptr %[[STORE_SHADOW_PTR]]
48+
; CHECK-NEXT: %[[ICMP_MAX_COUNT:[^ ]*]] = icmp ult i8 %[[STORE_SHADOW]], -1
49+
; CHECK-NEXT: br i1 %[[ICMP_MAX_COUNT]], label %[[INC_LABEL:[^ ]*]], label %[[ELSE_LABEL:[^ ]*]]
50+
; CHECK: [[INC_LABEL]]:
51+
; CHECK-NEXT: %[[NEW_SHADOW:[^ ]*]] = add i8 %[[STORE_SHADOW]], 1
52+
; CHECK-NEXT: store i8 %[[NEW_SHADOW]], ptr %[[STORE_SHADOW_PTR]]
53+
; CHECK-NEXT: br label %[[ELSE_LABEL]]
54+
; The actual store.
55+
; CHECK: [[ELSE_LABEL]]:
56+
; CHECK-NEXT: store i32 42, ptr %a, align 4
57+
; CHECK-NEXT: ret void

0 commit comments

Comments
 (0)