Skip to content

Commit 74069a0

Browse files
Sbermacmel
authored andcommitted
perf record --off-cpu: Dump the remaining PERF_SAMPLE_ in sample_type from BPF's stack trace map
Dump the remaining PERF_SAMPLE_ data, as if it is dumping a direct sample. Put the stack trace, tid, off-cpu time and cgroup id into the raw_data section, just like a direct off-cpu sample coming from BPF's bpf_perf_event_output(). This ensures that evsel__parse_sample() correctly parses both direct samples and accumulated samples. Suggested-by: Namhyung Kim <[email protected]> Reviewed-by: Ian Rogers <[email protected]> Signed-off-by: Howard Chu <[email protected]> Tested-by: Arnaldo Carvalho de Melo <[email protected]> Tested-by: Gautam Menghani <[email protected]> Tested-by: Ian Rogers <[email protected]> Acked-by: Namhyung Kim <[email protected]> Cc: Adrian Hunter <[email protected]> Cc: Alexander Shishkin <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: James Clark <[email protected]> Cc: Jiri Olsa <[email protected]> Cc: Kan Liang <[email protected]> Cc: Mark Rutland <[email protected]> Cc: Peter Zijlstra <[email protected]> Link: https://lore.kernel.org/r/[email protected] Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
1 parent 8ae7a57 commit 74069a0

File tree

1 file changed

+35
-24
lines changed

1 file changed

+35
-24
lines changed

tools/perf/util/bpf_off_cpu.c

Lines changed: 35 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ union off_cpu_data {
3737
u64 array[1024 / sizeof(u64)];
3838
};
3939

40+
u64 off_cpu_raw[MAX_STACKS + 5];
41+
4042
static int off_cpu_config(struct evlist *evlist)
4143
{
4244
char off_cpu_event[64];
@@ -313,6 +315,7 @@ int off_cpu_write(struct perf_session *session)
313315
{
314316
int bytes = 0, size;
315317
int fd, stack;
318+
u32 raw_size;
316319
u64 sample_type, val, sid = 0;
317320
struct evsel *evsel;
318321
struct perf_data_file *file = &session->data->file;
@@ -352,46 +355,54 @@ int off_cpu_write(struct perf_session *session)
352355

353356
while (!bpf_map_get_next_key(fd, &prev, &key)) {
354357
int n = 1; /* start from perf_event_header */
355-
int ip_pos = -1;
356358

357359
bpf_map_lookup_elem(fd, &key, &val);
358360

361+
/* zero-fill some of the fields, will be overwritten by raw_data when parsing */
359362
if (sample_type & PERF_SAMPLE_IDENTIFIER)
360363
data.array[n++] = sid;
361-
if (sample_type & PERF_SAMPLE_IP) {
362-
ip_pos = n;
364+
if (sample_type & PERF_SAMPLE_IP)
363365
data.array[n++] = 0; /* will be updated */
364-
}
365366
if (sample_type & PERF_SAMPLE_TID)
366-
data.array[n++] = (u64)key.pid << 32 | key.tgid;
367+
data.array[n++] = 0;
367368
if (sample_type & PERF_SAMPLE_TIME)
368369
data.array[n++] = tstamp;
369-
if (sample_type & PERF_SAMPLE_ID)
370-
data.array[n++] = sid;
371370
if (sample_type & PERF_SAMPLE_CPU)
372371
data.array[n++] = 0;
373372
if (sample_type & PERF_SAMPLE_PERIOD)
374-
data.array[n++] = val;
375-
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
376-
int len = 0;
377-
378-
/* data.array[n] is callchain->nr (updated later) */
379-
data.array[n + 1] = PERF_CONTEXT_USER;
380-
data.array[n + 2] = 0;
381-
382-
bpf_map_lookup_elem(stack, &key.stack_id, &data.array[n + 2]);
383-
while (data.array[n + 2 + len])
373+
data.array[n++] = 0;
374+
if (sample_type & PERF_SAMPLE_RAW) {
375+
/*
376+
* [ size ][ data ]
377+
* [ data ]
378+
* [ data ]
379+
* [ data ]
380+
* [ data ][ empty]
381+
*/
382+
int len = 0, i = 0;
383+
void *raw_data = (void *)data.array + n * sizeof(u64);
384+
385+
off_cpu_raw[i++] = (u64)key.pid << 32 | key.tgid;
386+
off_cpu_raw[i++] = val;
387+
388+
/* off_cpu_raw[i] is callchain->nr (updated later) */
389+
off_cpu_raw[i + 1] = PERF_CONTEXT_USER;
390+
off_cpu_raw[i + 2] = 0;
391+
392+
bpf_map_lookup_elem(stack, &key.stack_id, &off_cpu_raw[i + 2]);
393+
while (off_cpu_raw[i + 2 + len])
384394
len++;
385395

386-
/* update length of callchain */
387-
data.array[n] = len + 1;
396+
off_cpu_raw[i] = len + 1;
397+
i += len + 2;
398+
399+
off_cpu_raw[i++] = key.cgroup_id;
388400

389-
/* update sample ip with the first callchain entry */
390-
if (ip_pos >= 0)
391-
data.array[ip_pos] = data.array[n + 2];
401+
raw_size = i * sizeof(u64) + sizeof(u32); /* 4 bytes for alignment */
402+
memcpy(raw_data, &raw_size, sizeof(raw_size));
403+
memcpy(raw_data + sizeof(u32), off_cpu_raw, i * sizeof(u64));
392404

393-
/* calculate sample callchain data array length */
394-
n += len + 2;
405+
n += i + 1;
395406
}
396407
if (sample_type & PERF_SAMPLE_CGROUP)
397408
data.array[n++] = key.cgroup_id;

0 commit comments

Comments
 (0)