Skip to content

Commit be2d3ec

Browse files
committed
Merge tag 'perf-tools-for-v5.18-2022-04-02' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull more perf tools updates from Arnaldo Carvalho de Melo: - Avoid SEGV if core.cpus isn't set in 'perf stat'. - Stop depending on .git files for building PERF-VERSION-FILE, used in 'perf --version', fixing some perf tools build scenarios. - Convert tracepoint.py example to python3. - Update UAPI header copies from the kernel sources: socket, mman-common, msr-index, KVM, i915 and cpufeatures. - Update copy of libbpf's hashmap.c. - Directly return instead of using local ret variable in evlist__create_syswide_maps(), found by coccinelle. * tag 'perf-tools-for-v5.18-2022-04-02' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: perf python: Convert tracepoint.py example to python3 perf evlist: Directly return instead of using local ret variable perf cpumap: More cpu map reuse by merge. perf cpumap: Add is_subset function perf evlist: Rename cpus to user_requested_cpus perf tools: Stop depending on .git files for building PERF-VERSION-FILE tools headers cpufeatures: Sync with the kernel sources tools headers UAPI: Sync drm/i915_drm.h with the kernel sources tools headers UAPI: Sync linux/kvm.h with the kernel sources tools kvm headers arm64: Update KVM headers from the kernel sources tools arch x86: Sync the msr-index.h copy with the kernel sources tools headers UAPI: Sync asm-generic/mman-common.h with the kernel perf beauty: Update copy of linux/socket.h with the kernel sources perf tools: Update copy of libbpf's hashmap.c perf stat: Avoid SEGV if core.cpus isn't set
2 parents d897b68 + 7e2022a commit be2d3ec

File tree

31 files changed

+180
-88
lines changed

31 files changed

+180
-88
lines changed

tools/arch/arm64/include/uapi/asm/kvm.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,16 @@ struct kvm_arm_copy_mte_tags {
419419
#define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS
420420
#define KVM_PSCI_RET_DENIED PSCI_RET_DENIED
421421

422+
/* arm64-specific kvm_run::system_event flags */
423+
/*
424+
* Reset caused by a PSCI v1.1 SYSTEM_RESET2 call.
425+
* Valid only when the system event has a type of KVM_SYSTEM_EVENT_RESET.
426+
*/
427+
#define KVM_SYSTEM_EVENT_RESET_FLAG_PSCI_RESET2 (1ULL << 0)
428+
429+
/* run->fail_entry.hardware_entry_failure_reason codes. */
430+
#define KVM_EXIT_FAIL_ENTRY_CPU_UNSUPPORTED (1ULL << 0)
431+
422432
#endif
423433

424434
#endif /* __ARM_KVM_H__ */

tools/arch/x86/include/asm/cpufeatures.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,7 @@
388388
#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */
389389
#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
390390
#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */
391+
#define X86_FEATURE_IBT (18*32+20) /* Indirect Branch Tracking */
391392
#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */
392393
#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */
393394
#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */

tools/arch/x86/include/asm/msr-index.h

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,8 @@
205205
#define RTIT_CTL_DISRETC BIT(11)
206206
#define RTIT_CTL_PTW_EN BIT(12)
207207
#define RTIT_CTL_BRANCH_EN BIT(13)
208+
#define RTIT_CTL_EVENT_EN BIT(31)
209+
#define RTIT_CTL_NOTNT BIT_ULL(55)
208210
#define RTIT_CTL_MTC_RANGE_OFFSET 14
209211
#define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET)
210212
#define RTIT_CTL_CYC_THRESH_OFFSET 19
@@ -360,11 +362,29 @@
360362
#define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c
361363
#define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d
362364

363-
364365
#define MSR_CORE_PERF_LIMIT_REASONS 0x00000690
365366
#define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0
366367
#define MSR_RING_PERF_LIMIT_REASONS 0x000006B1
367368

369+
/* Control-flow Enforcement Technology MSRs */
370+
#define MSR_IA32_U_CET 0x000006a0 /* user mode cet */
371+
#define MSR_IA32_S_CET 0x000006a2 /* kernel mode cet */
372+
#define CET_SHSTK_EN BIT_ULL(0)
373+
#define CET_WRSS_EN BIT_ULL(1)
374+
#define CET_ENDBR_EN BIT_ULL(2)
375+
#define CET_LEG_IW_EN BIT_ULL(3)
376+
#define CET_NO_TRACK_EN BIT_ULL(4)
377+
#define CET_SUPPRESS_DISABLE BIT_ULL(5)
378+
#define CET_RESERVED (BIT_ULL(6) | BIT_ULL(7) | BIT_ULL(8) | BIT_ULL(9))
379+
#define CET_SUPPRESS BIT_ULL(10)
380+
#define CET_WAIT_ENDBR BIT_ULL(11)
381+
382+
#define MSR_IA32_PL0_SSP 0x000006a4 /* ring-0 shadow stack pointer */
383+
#define MSR_IA32_PL1_SSP 0x000006a5 /* ring-1 shadow stack pointer */
384+
#define MSR_IA32_PL2_SSP 0x000006a6 /* ring-2 shadow stack pointer */
385+
#define MSR_IA32_PL3_SSP 0x000006a7 /* ring-3 shadow stack pointer */
386+
#define MSR_IA32_INT_SSP_TAB 0x000006a8 /* exception shadow stack table */
387+
368388
/* Hardware P state interface */
369389
#define MSR_PPERF 0x0000064e
370390
#define MSR_PERF_LIMIT_REASONS 0x0000064f

tools/include/uapi/asm-generic/mman-common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@
7575
#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */
7676
#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */
7777

78+
#define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */
79+
7880
/* compatibility flags */
7981
#define MAP_FILE 0
8082

tools/include/uapi/drm/i915_drm.h

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1118,10 +1118,16 @@ struct drm_i915_gem_exec_object2 {
11181118
/**
11191119
* When the EXEC_OBJECT_PINNED flag is specified this is populated by
11201120
* the user with the GTT offset at which this object will be pinned.
1121+
*
11211122
* When the I915_EXEC_NO_RELOC flag is specified this must contain the
11221123
* presumed_offset of the object.
1124+
*
11231125
* During execbuffer2 the kernel populates it with the value of the
11241126
* current GTT offset of the object, for future presumed_offset writes.
1127+
*
1128+
* See struct drm_i915_gem_create_ext for the rules when dealing with
1129+
* alignment restrictions with I915_MEMORY_CLASS_DEVICE, on devices with
1130+
* minimum page sizes, like DG2.
11251131
*/
11261132
__u64 offset;
11271133

@@ -3144,11 +3150,40 @@ struct drm_i915_gem_create_ext {
31443150
*
31453151
* The (page-aligned) allocated size for the object will be returned.
31463152
*
3147-
* Note that for some devices we have might have further minimum
3148-
* page-size restrictions(larger than 4K), like for device local-memory.
3149-
* However in general the final size here should always reflect any
3150-
* rounding up, if for example using the I915_GEM_CREATE_EXT_MEMORY_REGIONS
3151-
* extension to place the object in device local-memory.
3153+
*
3154+
* DG2 64K min page size implications:
3155+
*
3156+
* On discrete platforms, starting from DG2, we have to contend with GTT
3157+
* page size restrictions when dealing with I915_MEMORY_CLASS_DEVICE
3158+
* objects. Specifically the hardware only supports 64K or larger GTT
3159+
* page sizes for such memory. The kernel will already ensure that all
3160+
* I915_MEMORY_CLASS_DEVICE memory is allocated using 64K or larger page
3161+
* sizes underneath.
3162+
*
3163+
* Note that the returned size here will always reflect any required
3164+
* rounding up done by the kernel, i.e 4K will now become 64K on devices
3165+
* such as DG2.
3166+
*
3167+
* Special DG2 GTT address alignment requirement:
3168+
*
3169+
* The GTT alignment will also need to be at least 2M for such objects.
3170+
*
3171+
* Note that due to how the hardware implements 64K GTT page support, we
3172+
* have some further complications:
3173+
*
3174+
* 1) The entire PDE (which covers a 2MB virtual address range), must
3175+
* contain only 64K PTEs, i.e mixing 4K and 64K PTEs in the same
3176+
* PDE is forbidden by the hardware.
3177+
*
3178+
* 2) We still need to support 4K PTEs for I915_MEMORY_CLASS_SYSTEM
3179+
* objects.
3180+
*
3181+
* To keep things simple for userland, we mandate that any GTT mappings
3182+
* must be aligned to and rounded up to 2MB. The kernel will internally
3183+
* pad them out to the next 2MB boundary. As this only wastes virtual
3184+
* address space and avoids userland having to copy any needlessly
3185+
* complicated PDE sharing scheme (coloring) and only affects DG2, this
3186+
* is deemed to be a good compromise.
31523187
*/
31533188
__u64 size;
31543189
/**

tools/include/uapi/linux/kvm.h

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -562,19 +562,25 @@ struct kvm_s390_mem_op {
562562
__u32 op; /* type of operation */
563563
__u64 buf; /* buffer in userspace */
564564
union {
565-
__u8 ar; /* the access register number */
565+
struct {
566+
__u8 ar; /* the access register number */
567+
__u8 key; /* access key, ignored if flag unset */
568+
};
566569
__u32 sida_offset; /* offset into the sida */
567-
__u8 reserved[32]; /* should be set to 0 */
570+
__u8 reserved[32]; /* ignored */
568571
};
569572
};
570573
/* types for kvm_s390_mem_op->op */
571574
#define KVM_S390_MEMOP_LOGICAL_READ 0
572575
#define KVM_S390_MEMOP_LOGICAL_WRITE 1
573576
#define KVM_S390_MEMOP_SIDA_READ 2
574577
#define KVM_S390_MEMOP_SIDA_WRITE 3
578+
#define KVM_S390_MEMOP_ABSOLUTE_READ 4
579+
#define KVM_S390_MEMOP_ABSOLUTE_WRITE 5
575580
/* flags for kvm_s390_mem_op->flags */
576581
#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0)
577582
#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1)
583+
#define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2)
578584

579585
/* for KVM_INTERRUPT */
580586
struct kvm_interrupt {
@@ -1137,6 +1143,7 @@ struct kvm_ppc_resize_hpt {
11371143
#define KVM_CAP_PPC_AIL_MODE_3 210
11381144
#define KVM_CAP_S390_MEM_OP_EXTENSION 211
11391145
#define KVM_CAP_PMU_CAPABILITY 212
1146+
#define KVM_CAP_DISABLE_QUIRKS2 213
11401147

11411148
#ifdef KVM_CAP_IRQ_ROUTING
11421149

tools/lib/perf/cpumap.c

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,26 @@ struct perf_cpu perf_cpu_map__max(struct perf_cpu_map *map)
319319
return map->nr > 0 ? map->map[map->nr - 1] : result;
320320
}
321321

322+
/** Is 'b' a subset of 'a'. */
323+
bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu_map *b)
324+
{
325+
if (a == b || !b)
326+
return true;
327+
if (!a || b->nr > a->nr)
328+
return false;
329+
330+
for (int i = 0, j = 0; i < a->nr; i++) {
331+
if (a->map[i].cpu > b->map[j].cpu)
332+
return false;
333+
if (a->map[i].cpu == b->map[j].cpu) {
334+
j++;
335+
if (j == b->nr)
336+
return true;
337+
}
338+
}
339+
return false;
340+
}
341+
322342
/*
323343
* Merge two cpumaps
324344
*
@@ -335,17 +355,12 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
335355
int i, j, k;
336356
struct perf_cpu_map *merged;
337357

338-
if (!orig && !other)
339-
return NULL;
340-
if (!orig) {
341-
perf_cpu_map__get(other);
342-
return other;
343-
}
344-
if (!other)
345-
return orig;
346-
if (orig->nr == other->nr &&
347-
!memcmp(orig->map, other->map, orig->nr * sizeof(struct perf_cpu)))
358+
if (perf_cpu_map__is_subset(orig, other))
348359
return orig;
360+
if (perf_cpu_map__is_subset(other, orig)) {
361+
perf_cpu_map__put(orig);
362+
return perf_cpu_map__get(other);
363+
}
349364

350365
tmp_len = orig->nr + other->nr;
351366
tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));

tools/lib/perf/evlist.c

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,10 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
4141
*/
4242
if (!evsel->own_cpus || evlist->has_user_cpus) {
4343
perf_cpu_map__put(evsel->cpus);
44-
evsel->cpus = perf_cpu_map__get(evlist->cpus);
45-
} else if (!evsel->system_wide && perf_cpu_map__empty(evlist->cpus)) {
44+
evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
45+
} else if (!evsel->system_wide && perf_cpu_map__empty(evlist->user_requested_cpus)) {
4646
perf_cpu_map__put(evsel->cpus);
47-
evsel->cpus = perf_cpu_map__get(evlist->cpus);
47+
evsel->cpus = perf_cpu_map__get(evlist->user_requested_cpus);
4848
} else if (evsel->cpus != evsel->own_cpus) {
4949
perf_cpu_map__put(evsel->cpus);
5050
evsel->cpus = perf_cpu_map__get(evsel->own_cpus);
@@ -123,10 +123,10 @@ static void perf_evlist__purge(struct perf_evlist *evlist)
123123

124124
void perf_evlist__exit(struct perf_evlist *evlist)
125125
{
126-
perf_cpu_map__put(evlist->cpus);
126+
perf_cpu_map__put(evlist->user_requested_cpus);
127127
perf_cpu_map__put(evlist->all_cpus);
128128
perf_thread_map__put(evlist->threads);
129-
evlist->cpus = NULL;
129+
evlist->user_requested_cpus = NULL;
130130
evlist->all_cpus = NULL;
131131
evlist->threads = NULL;
132132
fdarray__exit(&evlist->pollfd);
@@ -155,9 +155,9 @@ void perf_evlist__set_maps(struct perf_evlist *evlist,
155155
* original reference count of 1. If that is not the case it is up to
156156
* the caller to increase the reference count.
157157
*/
158-
if (cpus != evlist->cpus) {
159-
perf_cpu_map__put(evlist->cpus);
160-
evlist->cpus = perf_cpu_map__get(cpus);
158+
if (cpus != evlist->user_requested_cpus) {
159+
perf_cpu_map__put(evlist->user_requested_cpus);
160+
evlist->user_requested_cpus = perf_cpu_map__get(cpus);
161161
}
162162

163163
if (threads != evlist->threads) {
@@ -294,7 +294,7 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist,
294294

295295
int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
296296
{
297-
int nr_cpus = perf_cpu_map__nr(evlist->cpus);
297+
int nr_cpus = perf_cpu_map__nr(evlist->user_requested_cpus);
298298
int nr_threads = perf_thread_map__nr(evlist->threads);
299299
int nfds = 0;
300300
struct perf_evsel *evsel;
@@ -426,7 +426,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
426426
int idx, struct perf_mmap_param *mp, int cpu_idx,
427427
int thread, int *_output, int *_output_overwrite)
428428
{
429-
struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx);
429+
struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->user_requested_cpus, cpu_idx);
430430
struct perf_evsel *evsel;
431431
int revent;
432432

@@ -536,7 +536,7 @@ mmap_per_cpu(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
536536
struct perf_mmap_param *mp)
537537
{
538538
int nr_threads = perf_thread_map__nr(evlist->threads);
539-
int nr_cpus = perf_cpu_map__nr(evlist->cpus);
539+
int nr_cpus = perf_cpu_map__nr(evlist->user_requested_cpus);
540540
int cpu, thread;
541541

542542
for (cpu = 0; cpu < nr_cpus; cpu++) {
@@ -564,8 +564,8 @@ static int perf_evlist__nr_mmaps(struct perf_evlist *evlist)
564564
{
565565
int nr_mmaps;
566566

567-
nr_mmaps = perf_cpu_map__nr(evlist->cpus);
568-
if (perf_cpu_map__empty(evlist->cpus))
567+
nr_mmaps = perf_cpu_map__nr(evlist->user_requested_cpus);
568+
if (perf_cpu_map__empty(evlist->user_requested_cpus))
569569
nr_mmaps = perf_thread_map__nr(evlist->threads);
570570

571571
return nr_mmaps;
@@ -576,7 +576,7 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
576576
struct perf_mmap_param *mp)
577577
{
578578
struct perf_evsel *evsel;
579-
const struct perf_cpu_map *cpus = evlist->cpus;
579+
const struct perf_cpu_map *cpus = evlist->user_requested_cpus;
580580
const struct perf_thread_map *threads = evlist->threads;
581581

582582
if (!ops || !ops->get || !ops->mmap)

tools/lib/perf/include/internal/cpumap.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,5 +25,6 @@ struct perf_cpu_map {
2525
#endif
2626

2727
int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu);
28+
bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu_map *b);
2829

2930
#endif /* __LIBPERF_INTERNAL_CPUMAP_H */

tools/lib/perf/include/internal/evlist.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,12 @@ struct perf_evlist {
1919
int nr_entries;
2020
int nr_groups;
2121
bool has_user_cpus;
22-
struct perf_cpu_map *cpus;
22+
/**
23+
* The cpus passed from the command line or all online CPUs by
24+
* default.
25+
*/
26+
struct perf_cpu_map *user_requested_cpus;
27+
/** The union of all evsel cpu maps. */
2328
struct perf_cpu_map *all_cpus;
2429
struct perf_thread_map *threads;
2530
int nr_mmaps;

tools/perf/Makefile.perf

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -691,9 +691,8 @@ $(OUTPUT)common-cmds.h: $(wildcard Documentation/perf-*.txt)
691691
$(SCRIPTS) : % : %.sh
692692
$(QUIET_GEN)$(INSTALL) '[email protected]' '$(OUTPUT)$@'
693693

694-
$(OUTPUT)PERF-VERSION-FILE: ../../.git/HEAD ../../.git/ORIG_HEAD
694+
$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
695695
$(Q)$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
696-
$(Q)touch $(OUTPUT)PERF-VERSION-FILE
697696

698697
# These can record PERF_VERSION
699698
perf.spec $(SCRIPTS) \
@@ -1139,21 +1138,12 @@ else
11391138
@echo "FEATURE-DUMP file available in $(OUTPUT)FEATURE-DUMP"
11401139
endif
11411140

1142-
#
1143-
# Trick: if ../../.git does not exist - we are building out of tree for example,
1144-
# then force version regeneration:
1145-
#
1146-
ifeq ($(wildcard ../../.git/HEAD),)
1147-
GIT-HEAD-PHONY = ../../.git/HEAD ../../.git/ORIG_HEAD
1148-
else
1149-
GIT-HEAD-PHONY =
1150-
endif
11511141

11521142
FORCE:
11531143

11541144
.PHONY: all install clean config-clean strip install-gtk
11551145
.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
1156-
.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE prepare
1146+
.PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope FORCE prepare
11571147
.PHONY: libtraceevent_plugins archheaders
11581148

11591149
endif # force_fixdep

0 commit comments

Comments
 (0)