Skip to content

Commit 12e89e6

Browse files
Kan Liangacmel
authored andcommitted
perf hist: Add fast path for duplicate entries check
Perf checks the duplicate entries in a callchain before adding an entry. However the check is very slow especially with deeper call stack. Almost ~50% elapsed time of perf report is spent on the check when the call stack is always depth of 32. The hist_entry__cmp() is used to compare the new entry with the old entries. It will go through all the available sorts in the sort_list, and call the specific cmp of each sort, which is very slow. Actually, for most cases, there are no duplicate entries in callchain. The symbols are usually different. It's much faster to do a quick check for symbols first. Only do the full cmp when the symbols are exactly the same. The quick check is only to check symbols, not dso. Export _sort__sym_cmp. $ perf record --call-graph lbr ./tchain_edit_64 Without the patch $time perf report --stdio real 0m21.142s user 0m21.110s sys 0m0.033s With the patch $time perf report --stdio real 0m10.977s user 0m10.948s sys 0m0.027s Signed-off-by: Kan Liang <[email protected]> Acked-by: Jiri Olsa <[email protected]> Cc: Adrian Hunter <[email protected]> Cc: Alexey Budankov <[email protected]> Cc: Andi Kleen <[email protected]> Cc: Mathieu Poirier <[email protected]> Cc: Michael Ellerman <[email protected]> Cc: Namhyung Kim <[email protected]> Cc: Pavel Gerasimov <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Ravi Bangoria <[email protected]> Cc: Stephane Eranian <[email protected]> Cc: Vitaly Slobodskoy <[email protected]> Link: http://lore.kernel.org/lkml/[email protected] Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
1 parent d80da76 commit 12e89e6

File tree

3 files changed

+26
-1
lines changed

3 files changed

+26
-1
lines changed

tools/perf/util/hist.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1070,6 +1070,20 @@ iter_next_cumulative_entry(struct hist_entry_iter *iter,
10701070
return fill_callchain_info(al, node, iter->hide_unresolved);
10711071
}
10721072

1073+
static bool
1074+
hist_entry__fast__sym_diff(struct hist_entry *left,
1075+
struct hist_entry *right)
1076+
{
1077+
struct symbol *sym_l = left->ms.sym;
1078+
struct symbol *sym_r = right->ms.sym;
1079+
1080+
if (!sym_l && !sym_r)
1081+
return left->ip != right->ip;
1082+
1083+
return !!_sort__sym_cmp(sym_l, sym_r);
1084+
}
1085+
1086+
10731087
static int
10741088
iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
10751089
struct addr_location *al)
@@ -1096,6 +1110,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
10961110
};
10971111
int i;
10981112
struct callchain_cursor cursor;
1113+
bool fast = hists__has(he_tmp.hists, sym);
10991114

11001115
callchain_cursor_snapshot(&cursor, &callchain_cursor);
11011116

@@ -1106,6 +1121,14 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
11061121
* It's possible that it has cycles or recursive calls.
11071122
*/
11081123
for (i = 0; i < iter->curr; i++) {
1124+
/*
1125+
* For most cases, there are no duplicate entries in callchain.
1126+
* The symbols are usually different. Do a quick check for
1127+
* symbols first.
1128+
*/
1129+
if (fast && hist_entry__fast__sym_diff(he_cache[i], &he_tmp))
1130+
continue;
1131+
11091132
if (hist_entry__cmp(he_cache[i], &he_tmp) == 0) {
11101133
/* to avoid calling callback function */
11111134
iter->he = NULL;

tools/perf/util/sort.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ static int64_t _sort__addr_cmp(u64 left_ip, u64 right_ip)
237237
return (int64_t)(right_ip - left_ip);
238238
}
239239

240-
static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r)
240+
int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r)
241241
{
242242
if (!sym_l || !sym_r)
243243
return cmp_null(sym_l, sym_r);

tools/perf/util/sort.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,5 +311,7 @@ int64_t
311311
sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right);
312312
int64_t
313313
sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right);
314+
int64_t
315+
_sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r);
314316
char *hist_entry__srcline(struct hist_entry *he);
315317
#endif /* __PERF_SORT_H */

0 commit comments

Comments
 (0)