Skip to content

Commit 16c66bc

Browse files
olsajiriacmel
authored andcommitted
perf top: Add processing thread
Add a new thread that takes care of the hist creating to alleviate the main reader thread so it can keep perf mmaps served in time so that we reduce the possibility of losing events. The 'perf top' command now spawns 2 extra threads, the data processing is the following: 1) The main thread reads the data from mmaps and queues them to ordered events object; 2) The processing threads takes the data from the ordered events object and create initial histogram; 3) The GUI thread periodically sorts the initial histogram and presents it. Passing the data between threads 1 and 2 is done by having 2 ordered events queues. One is always being stored by thread 1 while the other is flushed out in thread 2. Passing the data between threads 2 and 3 stays the same as was initially for threads 1 and 3. Signed-off-by: Jiri Olsa <[email protected]> Acked-by: David S. Miller <[email protected]> Acked-by: Namhyung Kim <[email protected]> Tested-by: Arnaldo Carvalho de Melo <[email protected]> Cc: Alexander Shishkin <[email protected]> Cc: Peter Zijlstra <[email protected]> Link: http://lkml.kernel.org/n/[email protected] Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
1 parent 254de74 commit 16c66bc

File tree

4 files changed

+151
-62
lines changed

4 files changed

+151
-62
lines changed

tools/perf/builtin-top.c

Lines changed: 141 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
#include "arch/common.h"
4747

4848
#include "util/debug.h"
49+
#include "util/ordered-events.h"
4950

5051
#include <assert.h>
5152
#include <elf.h>
@@ -830,78 +831,28 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
830831
{
831832
struct record_opts *opts = &top->record_opts;
832833
struct perf_evlist *evlist = top->evlist;
833-
struct perf_sample sample;
834-
struct perf_evsel *evsel;
835834
struct perf_mmap *md;
836-
struct perf_session *session = top->session;
837835
union perf_event *event;
838-
struct machine *machine;
839-
int ret;
840836

841837
md = opts->overwrite ? &evlist->overwrite_mmap[idx] : &evlist->mmap[idx];
842838
if (perf_mmap__read_init(md) < 0)
843839
return;
844840

845841
while ((event = perf_mmap__read_event(md)) != NULL) {
846-
ret = perf_evlist__parse_sample(evlist, event, &sample);
847-
if (ret) {
848-
pr_err("Can't parse sample, err = %d\n", ret);
849-
goto next_event;
850-
}
851-
852-
evsel = perf_evlist__id2evsel(session->evlist, sample.id);
853-
assert(evsel != NULL);
842+
u64 timestamp = -1ULL;
843+
int ret;
854844

855-
if (event->header.type == PERF_RECORD_SAMPLE)
856-
++top->samples;
857-
858-
switch (sample.cpumode) {
859-
case PERF_RECORD_MISC_USER:
860-
++top->us_samples;
861-
if (top->hide_user_symbols)
862-
goto next_event;
863-
machine = &session->machines.host;
864-
break;
865-
case PERF_RECORD_MISC_KERNEL:
866-
++top->kernel_samples;
867-
if (top->hide_kernel_symbols)
868-
goto next_event;
869-
machine = &session->machines.host;
845+
ret = perf_evlist__parse_sample_timestamp(evlist, event, &timestamp);
846+
if (ret && ret != -1)
870847
break;
871-
case PERF_RECORD_MISC_GUEST_KERNEL:
872-
++top->guest_kernel_samples;
873-
machine = perf_session__find_machine(session,
874-
sample.pid);
875-
break;
876-
case PERF_RECORD_MISC_GUEST_USER:
877-
++top->guest_us_samples;
878-
/*
879-
* TODO: we don't process guest user from host side
880-
* except simple counting.
881-
*/
882-
goto next_event;
883-
default:
884-
if (event->header.type == PERF_RECORD_SAMPLE)
885-
goto next_event;
886-
machine = &session->machines.host;
887-
break;
888-
}
889848

849+
pthread_mutex_lock(&top->qe.lock);
850+
ret = ordered_events__queue(top->qe.in, event, timestamp, 0);
851+
pthread_mutex_unlock(&top->qe.lock);
890852

891-
if (event->header.type == PERF_RECORD_SAMPLE) {
892-
perf_event__process_sample(&top->tool, event, evsel,
893-
&sample, machine);
894-
} else if (event->header.type == PERF_RECORD_LOST) {
895-
perf_top__process_lost(top, event, evsel);
896-
} else if (event->header.type == PERF_RECORD_LOST_SAMPLES) {
897-
perf_top__process_lost_samples(top, event, evsel);
898-
} else if (event->header.type < PERF_RECORD_MAX) {
899-
hists__inc_nr_events(evsel__hists(evsel), event->header.type);
900-
machine__process_event(machine, event, &sample);
901-
} else
902-
++session->evlist->stats.nr_unknown_events;
903-
next_event:
904853
perf_mmap__consume(md);
854+
if (ret)
855+
break;
905856
}
906857

907858
perf_mmap__read_done(md);
@@ -1084,14 +1035,133 @@ static int callchain_param__setup_sample_type(struct callchain_param *callchain)
10841035
return 0;
10851036
}
10861037

1038+
static struct ordered_events *rotate_queues(struct perf_top *top)
1039+
{
1040+
struct ordered_events *in = top->qe.in;
1041+
1042+
if (top->qe.in == &top->qe.data[1])
1043+
top->qe.in = &top->qe.data[0];
1044+
else
1045+
top->qe.in = &top->qe.data[1];
1046+
1047+
return in;
1048+
}
1049+
1050+
static void *process_thread(void *arg)
1051+
{
1052+
struct perf_top *top = arg;
1053+
1054+
while (!done) {
1055+
struct ordered_events *out, *in = top->qe.in;
1056+
1057+
if (!in->nr_events) {
1058+
usleep(100);
1059+
continue;
1060+
}
1061+
1062+
pthread_mutex_lock(&top->qe.lock);
1063+
out = rotate_queues(top);
1064+
pthread_mutex_unlock(&top->qe.lock);
1065+
1066+
if (ordered_events__flush(out, OE_FLUSH__TOP))
1067+
pr_err("failed to process events\n");
1068+
}
1069+
1070+
return NULL;
1071+
}
1072+
1073+
static int deliver_event(struct ordered_events *qe,
1074+
struct ordered_event *qevent)
1075+
{
1076+
struct perf_top *top = qe->data;
1077+
struct perf_evlist *evlist = top->evlist;
1078+
struct perf_session *session = top->session;
1079+
union perf_event *event = qevent->event;
1080+
struct perf_sample sample;
1081+
struct perf_evsel *evsel;
1082+
struct machine *machine;
1083+
int ret = -1;
1084+
1085+
ret = perf_evlist__parse_sample(evlist, event, &sample);
1086+
if (ret) {
1087+
pr_err("Can't parse sample, err = %d\n", ret);
1088+
goto next_event;
1089+
}
1090+
1091+
evsel = perf_evlist__id2evsel(session->evlist, sample.id);
1092+
assert(evsel != NULL);
1093+
1094+
if (event->header.type == PERF_RECORD_SAMPLE)
1095+
++top->samples;
1096+
1097+
switch (sample.cpumode) {
1098+
case PERF_RECORD_MISC_USER:
1099+
++top->us_samples;
1100+
if (top->hide_user_symbols)
1101+
goto next_event;
1102+
machine = &session->machines.host;
1103+
break;
1104+
case PERF_RECORD_MISC_KERNEL:
1105+
++top->kernel_samples;
1106+
if (top->hide_kernel_symbols)
1107+
goto next_event;
1108+
machine = &session->machines.host;
1109+
break;
1110+
case PERF_RECORD_MISC_GUEST_KERNEL:
1111+
++top->guest_kernel_samples;
1112+
machine = perf_session__find_machine(session,
1113+
sample.pid);
1114+
break;
1115+
case PERF_RECORD_MISC_GUEST_USER:
1116+
++top->guest_us_samples;
1117+
/*
1118+
* TODO: we don't process guest user from host side
1119+
* except simple counting.
1120+
*/
1121+
goto next_event;
1122+
default:
1123+
if (event->header.type == PERF_RECORD_SAMPLE)
1124+
goto next_event;
1125+
machine = &session->machines.host;
1126+
break;
1127+
}
1128+
1129+
if (event->header.type == PERF_RECORD_SAMPLE) {
1130+
perf_event__process_sample(&top->tool, event, evsel,
1131+
&sample, machine);
1132+
} else if (event->header.type == PERF_RECORD_LOST) {
1133+
perf_top__process_lost(top, event, evsel);
1134+
} else if (event->header.type == PERF_RECORD_LOST_SAMPLES) {
1135+
perf_top__process_lost_samples(top, event, evsel);
1136+
} else if (event->header.type < PERF_RECORD_MAX) {
1137+
hists__inc_nr_events(evsel__hists(evsel), event->header.type);
1138+
machine__process_event(machine, event, &sample);
1139+
} else
1140+
++session->evlist->stats.nr_unknown_events;
1141+
1142+
ret = 0;
1143+
next_event:
1144+
return ret;
1145+
}
1146+
1147+
static void init_process_thread(struct perf_top *top)
1148+
{
1149+
ordered_events__init(&top->qe.data[0], deliver_event, top);
1150+
ordered_events__init(&top->qe.data[1], deliver_event, top);
1151+
ordered_events__set_copy_on_queue(&top->qe.data[0], true);
1152+
ordered_events__set_copy_on_queue(&top->qe.data[1], true);
1153+
top->qe.in = &top->qe.data[0];
1154+
pthread_mutex_init(&top->qe.lock, NULL);
1155+
}
1156+
10871157
static int __cmd_top(struct perf_top *top)
10881158
{
10891159
char msg[512];
10901160
struct perf_evsel *pos;
10911161
struct perf_evsel_config_term *err_term;
10921162
struct perf_evlist *evlist = top->evlist;
10931163
struct record_opts *opts = &top->record_opts;
1094-
pthread_t thread;
1164+
pthread_t thread, thread_process;
10951165
int ret;
10961166

10971167
top->session = perf_session__new(NULL, false, NULL);
@@ -1115,6 +1185,8 @@ static int __cmd_top(struct perf_top *top)
11151185
if (top->nr_threads_synthesize > 1)
11161186
perf_set_multithreaded();
11171187

1188+
init_process_thread(top);
1189+
11181190
machine__synthesize_threads(&top->session->machines.host, &opts->target,
11191191
top->evlist->threads, false,
11201192
top->nr_threads_synthesize);
@@ -1155,10 +1227,15 @@ static int __cmd_top(struct perf_top *top)
11551227
perf_evlist__enable(top->evlist);
11561228

11571229
ret = -1;
1230+
if (pthread_create(&thread_process, NULL, process_thread, top)) {
1231+
ui__error("Could not create process thread.\n");
1232+
goto out_delete;
1233+
}
1234+
11581235
if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
11591236
display_thread), top)) {
11601237
ui__error("Could not create display thread.\n");
1161-
goto out_delete;
1238+
goto out_join_thread;
11621239
}
11631240

11641241
if (top->realtime_prio) {
@@ -1193,6 +1270,8 @@ static int __cmd_top(struct perf_top *top)
11931270
ret = 0;
11941271
out_join:
11951272
pthread_join(thread, NULL);
1273+
out_join_thread:
1274+
pthread_join(thread_process, NULL);
11961275
out_delete:
11971276
perf_session__delete(top->session);
11981277
top->session = NULL;
@@ -1284,6 +1363,7 @@ int cmd_top(int argc, const char **argv)
12841363
* stays in overwrite mode. -acme
12851364
* */
12861365
.overwrite = 0,
1366+
.sample_time = true,
12871367
},
12881368
.max_stack = sysctl__max_stack(),
12891369
.annotation_opts = annotation__default_options,

tools/perf/util/ordered-events.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,8 +279,10 @@ int ordered_events__flush(struct ordered_events *oe, enum oe_flush how)
279279

280280
switch (how) {
281281
case OE_FLUSH__FINAL:
282-
oe->next_flush = ULLONG_MAX;
283282
show_progress = true;
283+
__fallthrough;
284+
case OE_FLUSH__TOP:
285+
oe->next_flush = ULLONG_MAX;
284286
break;
285287

286288
case OE_FLUSH__HALF:

tools/perf/util/ordered-events.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ enum oe_flush {
1818
OE_FLUSH__FINAL,
1919
OE_FLUSH__ROUND,
2020
OE_FLUSH__HALF,
21+
OE_FLUSH__TOP,
2122
};
2223

2324
struct ordered_events;

tools/perf/util/top.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,12 @@ struct perf_top {
4040
const char *sym_filter;
4141
float min_percent;
4242
unsigned int nr_threads_synthesize;
43+
44+
struct {
45+
struct ordered_events *in;
46+
struct ordered_events data[2];
47+
pthread_mutex_t lock;
48+
} qe;
4349
};
4450

4551
#define CONSOLE_CLEAR ""

0 commit comments

Comments
 (0)