Skip to content

Commit 510457e

Browse files
author
Ingo Molnar
committed
Merge tag 'perf-core-for-mingo-4.13-20170718' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: User visible changes: - Initial support for namespaces, using setns to access files in namespaces, grabbing their build-ids, etc. We still need to work more to deal with namespaces that vanish before we can get the needed data to do analysis, but this should be as good as what is in bcc now (Krister Johansen) - Add header record types to pipe-mode, now this command: $ perf record -o - -e cycles sleep 1 | perf report --stdio --header Will show the same as in non-pipe mode, i.e. involving a perf.data file (David Carrillo-Cisneros) - Implement a visual marker for fused x86 instructions in the annotate TUI browser, available now in 'perf report', more work needed to have it available as well in 'perf top' (Jin Yao) Further explanation from one of Jin's patches: │ ┌──cmpl $0x0,argp_program_version_hook 81.93 │ ├──je 20 │ │ lock cmpxchg %esi,0x38a9a4(%rip) │ │↓ jne 29 │ │↓ jmp 43 11.47 │20:└─→cmpxch %esi,0x38a999(%rip) That means the cmpl+je is a fused instruction pair and they should be considered together. - Record the branch type and then show statistics and info about in callchain entries (Jin Yao) Example from one of Jin's patches: # perf record -g -j any,save_type # perf report --branch-history --stdio --no-children 38.50% div.c:45 [.] main div | ---main div.c:42 (RET CROSS_2M cycles:2) compute_flag div.c:28 (cycles:2) compute_flag div.c:27 (RET CROSS_2M cycles:1) rand rand.c:28 (cycles:1) rand rand.c:28 (RET CROSS_2M cycles:1) __random random.c:298 (cycles:1) __random random.c:297 (COND_BWD CROSS_2M cycles:1) __random random.c:295 (cycles:1) __random random.c:295 (COND_BWD CROSS_2M cycles:1) __random random.c:295 (cycles:1) __random random.c:295 (RET CROSS_2M cycles:9) - Beautify the fcntl syscall, which is an interesting one in the sense that infrastructure had to be put in place to change the formatters of some arguments according to the value in a previous one, i.e. cmd dictates how arg and the syscall return will be formatted. (Arnaldo Carvalho de Melo Infrastructure changes: - 'perf test attr' fixes (Jiri Olsa) Vendor events changes: - Add POWER9 PMU events Sukadev (Bhattiprolu) - Support additional POWER8+ PVR in PMU mapfile (Shriya) Signed-off-by: Arnaldo Carvalho de Melo <[email protected]> Signed-off-by: Ingo Molnar <[email protected]>
2 parents 3bda69c + b851dd4 commit 510457e

File tree

126 files changed

+6339
-1031
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

126 files changed

+6339
-1031
lines changed

arch/x86/events/intel/lbr.c

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,9 @@ enum {
109109
X86_BR_ZERO_CALL = 1 << 15,/* zero length call */
110110
X86_BR_CALL_STACK = 1 << 16,/* call stack */
111111
X86_BR_IND_JMP = 1 << 17,/* indirect jump */
112+
113+
X86_BR_TYPE_SAVE = 1 << 18,/* indicate to save branch type */
114+
112115
};
113116

114117
#define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
@@ -510,6 +513,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
510513
cpuc->lbr_entries[i].in_tx = 0;
511514
cpuc->lbr_entries[i].abort = 0;
512515
cpuc->lbr_entries[i].cycles = 0;
516+
cpuc->lbr_entries[i].type = 0;
513517
cpuc->lbr_entries[i].reserved = 0;
514518
}
515519
cpuc->lbr_stack.nr = i;
@@ -596,6 +600,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
596600
cpuc->lbr_entries[out].in_tx = in_tx;
597601
cpuc->lbr_entries[out].abort = abort;
598602
cpuc->lbr_entries[out].cycles = cycles;
603+
cpuc->lbr_entries[out].type = 0;
599604
cpuc->lbr_entries[out].reserved = 0;
600605
out++;
601606
}
@@ -673,6 +678,10 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
673678

674679
if (br_type & PERF_SAMPLE_BRANCH_CALL)
675680
mask |= X86_BR_CALL | X86_BR_ZERO_CALL;
681+
682+
if (br_type & PERF_SAMPLE_BRANCH_TYPE_SAVE)
683+
mask |= X86_BR_TYPE_SAVE;
684+
676685
/*
677686
* stash actual user request into reg, it may
678687
* be used by fixup code for some CPU
@@ -926,6 +935,43 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
926935
return ret;
927936
}
928937

938+
#define X86_BR_TYPE_MAP_MAX 16
939+
940+
static int branch_map[X86_BR_TYPE_MAP_MAX] = {
941+
PERF_BR_CALL, /* X86_BR_CALL */
942+
PERF_BR_RET, /* X86_BR_RET */
943+
PERF_BR_SYSCALL, /* X86_BR_SYSCALL */
944+
PERF_BR_SYSRET, /* X86_BR_SYSRET */
945+
PERF_BR_UNKNOWN, /* X86_BR_INT */
946+
PERF_BR_UNKNOWN, /* X86_BR_IRET */
947+
PERF_BR_COND, /* X86_BR_JCC */
948+
PERF_BR_UNCOND, /* X86_BR_JMP */
949+
PERF_BR_UNKNOWN, /* X86_BR_IRQ */
950+
PERF_BR_IND_CALL, /* X86_BR_IND_CALL */
951+
PERF_BR_UNKNOWN, /* X86_BR_ABORT */
952+
PERF_BR_UNKNOWN, /* X86_BR_IN_TX */
953+
PERF_BR_UNKNOWN, /* X86_BR_NO_TX */
954+
PERF_BR_CALL, /* X86_BR_ZERO_CALL */
955+
PERF_BR_UNKNOWN, /* X86_BR_CALL_STACK */
956+
PERF_BR_IND, /* X86_BR_IND_JMP */
957+
};
958+
959+
static int
960+
common_branch_type(int type)
961+
{
962+
int i;
963+
964+
type >>= 2; /* skip X86_BR_USER and X86_BR_KERNEL */
965+
966+
if (type) {
967+
i = __ffs(type);
968+
if (i < X86_BR_TYPE_MAP_MAX)
969+
return branch_map[i];
970+
}
971+
972+
return PERF_BR_UNKNOWN;
973+
}
974+
929975
/*
930976
* implement actual branch filter based on user demand.
931977
* Hardware may not exactly satisfy that request, thus
@@ -942,7 +988,8 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
942988
bool compress = false;
943989

944990
/* if sampling all branches, then nothing to filter */
945-
if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
991+
if (((br_sel & X86_BR_ALL) == X86_BR_ALL) &&
992+
((br_sel & X86_BR_TYPE_SAVE) != X86_BR_TYPE_SAVE))
946993
return;
947994

948995
for (i = 0; i < cpuc->lbr_stack.nr; i++) {
@@ -963,6 +1010,9 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
9631010
cpuc->lbr_entries[i].from = 0;
9641011
compress = true;
9651012
}
1013+
1014+
if ((br_sel & X86_BR_TYPE_SAVE) == X86_BR_TYPE_SAVE)
1015+
cpuc->lbr_entries[i].type = common_branch_type(type);
9661016
}
9671017

9681018
if (!compress)

include/uapi/linux/perf_event.h

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,8 @@ enum perf_branch_sample_type_shift {
174174
PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT = 14, /* no flags */
175175
PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT = 15, /* no cycles */
176176

177+
PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT = 16, /* save branch type */
178+
177179
PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */
178180
};
179181

@@ -198,9 +200,30 @@ enum perf_branch_sample_type {
198200
PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT,
199201
PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT,
200202

203+
PERF_SAMPLE_BRANCH_TYPE_SAVE =
204+
1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT,
205+
201206
PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
202207
};
203208

209+
/*
210+
* Common flow change classification
211+
*/
212+
enum {
213+
PERF_BR_UNKNOWN = 0, /* unknown */
214+
PERF_BR_COND = 1, /* conditional */
215+
PERF_BR_UNCOND = 2, /* unconditional */
216+
PERF_BR_IND = 3, /* indirect */
217+
PERF_BR_CALL = 4, /* function call */
218+
PERF_BR_IND_CALL = 5, /* indirect function call */
219+
PERF_BR_RET = 6, /* function return */
220+
PERF_BR_SYSCALL = 7, /* syscall */
221+
PERF_BR_SYSRET = 8, /* syscall return */
222+
PERF_BR_COND_CALL = 9, /* conditional function call */
223+
PERF_BR_COND_RET = 10, /* conditional function return */
224+
PERF_BR_MAX,
225+
};
226+
204227
#define PERF_SAMPLE_BRANCH_PLM_ALL \
205228
(PERF_SAMPLE_BRANCH_USER|\
206229
PERF_SAMPLE_BRANCH_KERNEL|\
@@ -1015,6 +1038,7 @@ union perf_mem_data_src {
10151038
* in_tx: running in a hardware transaction
10161039
* abort: aborting a hardware transaction
10171040
* cycles: cycles from last branch (or 0 if not supported)
1041+
* type: branch type
10181042
*/
10191043
struct perf_branch_entry {
10201044
__u64 from;
@@ -1024,7 +1048,8 @@ struct perf_branch_entry {
10241048
in_tx:1, /* in transaction */
10251049
abort:1, /* transaction abort */
10261050
cycles:16, /* cycle count to last branch */
1027-
reserved:44;
1051+
type:4, /* branch type */
1052+
reserved:40;
10281053
};
10291054

10301055
#endif /* _UAPI_LINUX_PERF_EVENT_H */

tools/arch/x86/include/asm/unistd_32.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,6 @@
1010
#ifndef __NR_getcpu
1111
# define __NR_getcpu 318
1212
#endif
13+
#ifndef __NR_setns
14+
# define __NR_setns 346
15+
#endif

tools/arch/x86/include/asm/unistd_64.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,6 @@
1010
#ifndef __NR_getcpu
1111
# define __NR_getcpu 309
1212
#endif
13+
#ifndef __NR_setns
14+
#define __NR_setns 308
15+
#endif
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#ifndef _UAPI_ASM_X86_UNISTD_H
2+
#define _UAPI_ASM_X86_UNISTD_H
3+
4+
/* x32 syscall flag bit */
5+
#define __X32_SYSCALL_BIT 0x40000000
6+
7+
#ifndef __KERNEL__
8+
# ifdef __i386__
9+
# include <asm/unistd_32.h>
10+
# elif defined(__ILP32__)
11+
# include <asm/unistd_x32.h>
12+
# else
13+
# include <asm/unistd_64.h>
14+
# endif
15+
#endif
16+
17+
#endif /* _UAPI_ASM_X86_UNISTD_H */

tools/build/Makefile.feature

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ FEATURE_TESTS_BASIC := \
6464
get_cpuid \
6565
bpf \
6666
sched_getcpu \
67-
sdt
67+
sdt \
68+
setns
6869

6970
# FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
7071
# of all feature tests

tools/build/feature/Makefile

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@ FILES= \
4949
test-sdt.bin \
5050
test-cxx.bin \
5151
test-jvmti.bin \
52-
test-sched_getcpu.bin
52+
test-sched_getcpu.bin \
53+
test-setns.bin
5354

5455
FILES := $(addprefix $(OUTPUT),$(FILES))
5556

@@ -95,6 +96,9 @@ $(OUTPUT)test-glibc.bin:
9596
$(OUTPUT)test-sched_getcpu.bin:
9697
$(BUILD)
9798

99+
$(OUTPUT)test-setns.bin:
100+
$(BUILD)
101+
98102
DWARFLIBS := -ldw
99103
ifeq ($(findstring -static,${LDFLAGS}),-static)
100104
DWARFLIBS += -lelf -lebl -lz -llzma -lbz2

tools/build/feature/test-all.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,10 @@
153153
# include "test-sdt.c"
154154
#undef main
155155

156+
#define main main_test_setns
157+
# include "test-setns.c"
158+
#undef main
159+
156160
int main(int argc, char *argv[])
157161
{
158162
main_test_libpython();
@@ -188,6 +192,7 @@ int main(int argc, char *argv[])
188192
main_test_libcrypto();
189193
main_test_sched_getcpu();
190194
main_test_sdt();
195+
main_test_setns();
191196

192197
return 0;
193198
}

tools/build/feature/test-setns.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#define _GNU_SOURCE
2+
#include <sched.h>
3+
4+
int main(void)
5+
{
6+
return setns(0, 0);
7+
}

0 commit comments

Comments
 (0)