Skip to content

Commit 80826de

Browse files
Merge pull request #341 from nnethercote/new-DHAT
Add support for the new version of DHAT.
2 parents 6d1e1be + 146fefe commit 80826de

File tree

4 files changed

+66
-18
lines changed

4 files changed

+66
-18
lines changed

collector/README.md

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -234,26 +234,48 @@ except that `$PROFILER` is one of the following.
234234
text output is also written to files with a `clgann` prefix; this output is
235235
much the same as the `cgann`-prefixed files produced by Cachegrind, but
236236
with extra annotations showing function call counts.
237-
- `dhat`: Profile with [DHAT](http://valgrind.org/docs/manual/dh-manual.html),
238-
a heap profiler.
239-
- **Purpose**. DHAT is good for finding which parts of the code are causing a
240-
lot of allocations. This is relevant if another profiler such as
237+
- `exp-dhat`: Profile with [ExpDHAT](http://valgrind.org/docs/manual/dh-manual.html),
238+
an experimental heap profiler that came with Valgrind (`--tool=exp-dhat`)
239+
prior to version 3.15.
240+
- **Purpose**. ExpDHAT is good for finding which parts of the code are
241+
causing a lot of allocations. This is relevant if another profiler such as
241242
`perf-record` or Cachegrind tell you that `malloc` and `free` are hot
242243
functions (as they often are).
243244
- **Slowdown**. Roughly 5--20x.
244-
- **Prerequisites**. DHAT may require a rustc configured with `use-jemalloc =
245-
false` to work well.
246-
- **Configuration**. DHAT is configured within `profile` to run with the
247-
non-default `--tot-blocks-allocd` option, so that it sorts its
248-
output by the number of blocks allocated rather than the number of bytes
249-
allocated. This is because the number of allocations typically has a
250-
greater effect on speed than the size of those allocations; many small
251-
allocations will typically be slower than a few large allocations.
245+
- **Prerequisites**. ExpDHAT may require a rustc configured with
246+
`use-jemalloc = false` to work well.
247+
- **Configuration**. ExpDHAT is configured within `profile` to run with the
248+
non-default `--tot-blocks-allocd` option, so that it sorts its output by
249+
the number of blocks allocated rather than the number of bytes allocated.
250+
This is because the number of allocations typically has a greater effect on
251+
speed than the size of those allocations; many small allocations will
252+
typically be slower than a few large allocations.
252253
- **Output**. Human-readable text output is written to files with a `dhat`
253254
prefix. This file includes summary statistics followed by numerous records,
254255
each of which aggregates data about all the allocations associated with a
255256
particular stack trace: the number of allocations, their average size, and
256257
how often they are read from and written to.
258+
- `dhat`: Profile with [DHAT](http://valgrind.org/docs/manual/dh-manual.html),
259+
a heap profiler that comes with Valgrind (`--tool=dhat`) in versions 3.15
260+
and later. It has the same purpose as ExpDHAT, but is significantly more
261+
powerful.
262+
- **Purpose**. DHAT is good for finding which parts of the code are causing a
263+
lot of allocations. This is relevant if another profiler such as
264+
`perf-record` or Cachegrind tell you that `malloc` and `free` are hot
265+
functions (as they often are). It also gives insight into peak memory
266+
usage, similar to Massif.
267+
- **Slowdown**. Roughly 5--20x.
268+
- **Prerequisites**. DHAT may require a rustc configured with `use-jemalloc =
269+
false` to work well.
270+
- **Configuration**. DHAT is configured within `profile` to run with the
271+
non-default `--num-callers=4` option, which dictates stack depths. (This
272+
value of 4 does not include inlined stack frames, so in practice the depths
273+
of stack traces are a lot more than 4.) This is almost always enough, but
274+
on the rare occasion it isn't, you can the value in `rustc-fake.rs` and
275+
rebuild `collector`. Note that higher values make DHAT run more slowly and
276+
increase the size of its data files.
277+
- **Output**. Raw output is written to files with a `dhout` prefix. Those
278+
files can be viewed with DHAT's viewer (`dh_view.html`).
257279
- `massif`: Profile with
258280
[Massif](http://valgrind.org/docs/manual/ms-manual.html), a heap profiler.
259281
- **Purpose**. Massif is designed to give insight into a program's peak

collector/src/bin/rustc-fake.rs

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ fn main() {
9292
assert!(cmd.status().expect("failed to spawn").success());
9393
}
9494

95-
"dhat" => {
95+
"exp-dhat" => {
9696
let mut cmd = Command::new("valgrind");
9797
let has_valgrind = cmd.output().is_ok();
9898
assert!(has_valgrind);
@@ -106,6 +106,19 @@ fn main() {
106106
assert!(cmd.status().expect("failed to spawn").success());
107107
}
108108

109+
"dhat" => {
110+
let mut cmd = Command::new("valgrind");
111+
let has_valgrind = cmd.output().is_ok();
112+
assert!(has_valgrind);
113+
cmd.arg("--tool=dhat")
114+
.arg("--num-callers=4")
115+
.arg("--dhat-out-file=dhout")
116+
.arg(&rustc)
117+
.args(&args);
118+
119+
assert!(cmd.status().expect("failed to spawn").success());
120+
}
121+
109122
"massif" => {
110123
let mut cmd = Command::new("valgrind");
111124
let has_valgrind = cmd.output().is_ok();

collector/src/bin/rustc-perf-collector/execute.rs

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ pub enum Profiler {
8888
PerfRecord,
8989
Cachegrind,
9090
Callgrind,
91+
ExpDHAT,
9192
DHAT,
9293
Massif,
9394
Eprintln,
@@ -112,6 +113,7 @@ impl Profiler {
112113
"perf-record" => Ok(Profiler::PerfRecord),
113114
"cachegrind" => Ok(Profiler::Cachegrind),
114115
"callgrind" => Ok(Profiler::Callgrind),
116+
"exp-dhat" => Ok(Profiler::ExpDHAT),
115117
"dhat" => Ok(Profiler::DHAT),
116118
"massif" => Ok(Profiler::Massif),
117119
"eprintln" => Ok(Profiler::Eprintln),
@@ -126,6 +128,7 @@ impl Profiler {
126128
Profiler::PerfRecord => "perf-record",
127129
Profiler::Cachegrind => "cachegrind",
128130
Profiler::Callgrind => "callgrind",
131+
Profiler::ExpDHAT => "exp-dhat",
129132
Profiler::DHAT => "dhat",
130133
Profiler::Massif => "massif",
131134
Profiler::Eprintln => "eprintln",
@@ -517,16 +520,26 @@ impl<'a> Processor for ProfileProcessor<'a> {
517520
f.flush()?;
518521
}
519522

520-
// DHAT writes its output to stderr. We copy that output into a
523+
// ExpDHAT writes its output to stderr. We copy that output into a
521524
// file in the output dir.
522-
Profiler::DHAT => {
523-
let dhat_file = filepath(self.output_dir, &out_file("dhat"));
525+
Profiler::ExpDHAT => {
526+
let exp_dhat_file = filepath(self.output_dir, &out_file("exp-dhat"));
524527

525-
let mut f = File::create(dhat_file)?;
528+
let mut f = File::create(exp_dhat_file)?;
526529
f.write_all(&output.stderr)?;
527530
f.flush()?;
528531
}
529532

533+
// DHAT produces (via rustc-fake) a data file called 'dhout'. We
534+
// copy it from the temp dir to the output dir, giving it a new
535+
// name in the process.
536+
Profiler::DHAT => {
537+
let tmp_dhout_file = filepath(data.cwd.as_ref(), "dhout");
538+
let dhout_file = filepath(self.output_dir, &out_file("dhout"));
539+
540+
fs::copy(&tmp_dhout_file, &dhout_file)?;
541+
}
542+
530543
// Massif produces (via rustc-fake) a data file called 'msout'. We
531544
// copy it from the temp dir to the output dir, giving it a new
532545
// name in the process.

collector/src/bin/rustc-perf-collector/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,7 @@ fn main_result() -> Result<i32, Error> {
387387
'BaseIncr', 'CleanIncr', 'PatchedIncrs', 'All'")
388388
(@arg PROFILER: +required +takes_value
389389
"One of: 'time-passes', 'perf-record', 'cachegrind',\n\
390-
'callgrind', 'dhat', 'massif', 'eprintln'")
390+
'callgrind', 'exp-dhat', 'dhat', 'massif', 'eprintln'")
391391
(@arg ID: +required +takes_value "Identifier to associate benchmark results with")
392392
)
393393
(@subcommand remove_benchmark =>

0 commit comments

Comments
 (0)