Skip to content

Commit 0cdf6d4

Browse files
committed
Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Thomas Gleixner: "A set of fixes for perf: Kernel side: - Fix the hardcoded index of extra PCI devices on Broadwell which caused a resource conflict and triggered warnings on CPU hotplug. Tooling: - Update the tools copy of several files, including perf_event.h, powerpc's asm/unistd.h (new io_pgetevents syscall), bpf.h and x86's memcpy_64.s (used in 'perf bench mem'), silencing the respective warnings during the perf tools build. - Fix the build on the alpine:edge distro" * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf/x86/intel/uncore: Fix hardcoded index of Broadwell extra PCI devices perf tools: Fix the build on the alpine:edge distro tools arch: Update arch/x86/lib/memcpy_64.S copy used in 'perf bench mem memcpy' tools headers uapi: Refresh linux/bpf.h copy tools headers powerpc: Update asm/unistd.h copy to pick new tools headers uapi: Update tools's copy of linux/perf_event.h
2 parents b9fb1fc + ce03b6d commit 0cdf6d4

File tree

15 files changed

+132
-67
lines changed

15 files changed

+132
-67
lines changed

arch/x86/events/intel/uncore.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
#define UNCORE_PCI_DEV_TYPE(data) ((data >> 8) & 0xff)
2929
#define UNCORE_PCI_DEV_IDX(data) (data & 0xff)
3030
#define UNCORE_EXTRA_PCI_DEV 0xff
31-
#define UNCORE_EXTRA_PCI_DEV_MAX 3
31+
#define UNCORE_EXTRA_PCI_DEV_MAX 4
3232

3333
#define UNCORE_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, 0xff)
3434

arch/x86/events/intel/uncore_snbep.c

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1029,6 +1029,7 @@ void snbep_uncore_cpu_init(void)
10291029
enum {
10301030
SNBEP_PCI_QPI_PORT0_FILTER,
10311031
SNBEP_PCI_QPI_PORT1_FILTER,
1032+
BDX_PCI_QPI_PORT2_FILTER,
10321033
HSWEP_PCI_PCU_3,
10331034
};
10341035

@@ -3286,15 +3287,18 @@ static const struct pci_device_id bdx_uncore_pci_ids[] = {
32863287
},
32873288
{ /* QPI Port 0 filter */
32883289
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f86),
3289-
.driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 0),
3290+
.driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
3291+
SNBEP_PCI_QPI_PORT0_FILTER),
32903292
},
32913293
{ /* QPI Port 1 filter */
32923294
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f96),
3293-
.driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 1),
3295+
.driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
3296+
SNBEP_PCI_QPI_PORT1_FILTER),
32943297
},
32953298
{ /* QPI Port 2 filter */
32963299
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6f46),
3297-
.driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV, 2),
3300+
.driver_data = UNCORE_PCI_DEV_DATA(UNCORE_EXTRA_PCI_DEV,
3301+
BDX_PCI_QPI_PORT2_FILTER),
32983302
},
32993303
{ /* PCU.3 (for Capability registers) */
33003304
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x6fc0),

tools/arch/powerpc/include/uapi/asm/unistd.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,5 +399,6 @@
399399
#define __NR_pkey_free 385
400400
#define __NR_pkey_mprotect 386
401401
#define __NR_rseq 387
402+
#define __NR_io_pgetevents 388
402403

403404
#endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
#ifndef _MCSAFE_TEST_H_
3+
#define _MCSAFE_TEST_H_
4+
5+
.macro MCSAFE_TEST_CTL
6+
.endm
7+
8+
.macro MCSAFE_TEST_SRC reg count target
9+
.endm
10+
11+
.macro MCSAFE_TEST_DST reg count target
12+
.endm
13+
#endif /* _MCSAFE_TEST_H_ */

tools/arch/x86/lib/memcpy_64.S

Lines changed: 54 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <linux/linkage.h>
44
#include <asm/errno.h>
55
#include <asm/cpufeatures.h>
6+
#include <asm/mcsafe_test.h>
67
#include <asm/alternative-asm.h>
78
#include <asm/export.h>
89

@@ -183,12 +184,15 @@ ENTRY(memcpy_orig)
183184
ENDPROC(memcpy_orig)
184185

185186
#ifndef CONFIG_UML
187+
188+
MCSAFE_TEST_CTL
189+
186190
/*
187-
* memcpy_mcsafe_unrolled - memory copy with machine check exception handling
191+
* __memcpy_mcsafe - memory copy with machine check exception handling
188192
* Note that we only catch machine checks when reading the source addresses.
189193
* Writes to target are posted and don't generate machine checks.
190194
*/
191-
ENTRY(memcpy_mcsafe_unrolled)
195+
ENTRY(__memcpy_mcsafe)
192196
cmpl $8, %edx
193197
/* Less than 8 bytes? Go to byte copy loop */
194198
jb .L_no_whole_words
@@ -204,58 +208,33 @@ ENTRY(memcpy_mcsafe_unrolled)
204208
subl $8, %ecx
205209
negl %ecx
206210
subl %ecx, %edx
207-
.L_copy_leading_bytes:
211+
.L_read_leading_bytes:
208212
movb (%rsi), %al
213+
MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
214+
MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
215+
.L_write_leading_bytes:
209216
movb %al, (%rdi)
210217
incq %rsi
211218
incq %rdi
212219
decl %ecx
213-
jnz .L_copy_leading_bytes
220+
jnz .L_read_leading_bytes
214221

215222
.L_8byte_aligned:
216-
/* Figure out how many whole cache lines (64-bytes) to copy */
217-
movl %edx, %ecx
218-
andl $63, %edx
219-
shrl $6, %ecx
220-
jz .L_no_whole_cache_lines
221-
222-
/* Loop copying whole cache lines */
223-
.L_cache_w0: movq (%rsi), %r8
224-
.L_cache_w1: movq 1*8(%rsi), %r9
225-
.L_cache_w2: movq 2*8(%rsi), %r10
226-
.L_cache_w3: movq 3*8(%rsi), %r11
227-
movq %r8, (%rdi)
228-
movq %r9, 1*8(%rdi)
229-
movq %r10, 2*8(%rdi)
230-
movq %r11, 3*8(%rdi)
231-
.L_cache_w4: movq 4*8(%rsi), %r8
232-
.L_cache_w5: movq 5*8(%rsi), %r9
233-
.L_cache_w6: movq 6*8(%rsi), %r10
234-
.L_cache_w7: movq 7*8(%rsi), %r11
235-
movq %r8, 4*8(%rdi)
236-
movq %r9, 5*8(%rdi)
237-
movq %r10, 6*8(%rdi)
238-
movq %r11, 7*8(%rdi)
239-
leaq 64(%rsi), %rsi
240-
leaq 64(%rdi), %rdi
241-
decl %ecx
242-
jnz .L_cache_w0
243-
244-
/* Are there any trailing 8-byte words? */
245-
.L_no_whole_cache_lines:
246223
movl %edx, %ecx
247224
andl $7, %edx
248225
shrl $3, %ecx
249226
jz .L_no_whole_words
250227

251-
/* Copy trailing words */
252-
.L_copy_trailing_words:
228+
.L_read_words:
253229
movq (%rsi), %r8
254-
mov %r8, (%rdi)
255-
leaq 8(%rsi), %rsi
256-
leaq 8(%rdi), %rdi
230+
MCSAFE_TEST_SRC %rsi 8 .E_read_words
231+
MCSAFE_TEST_DST %rdi 8 .E_write_words
232+
.L_write_words:
233+
movq %r8, (%rdi)
234+
addq $8, %rsi
235+
addq $8, %rdi
257236
decl %ecx
258-
jnz .L_copy_trailing_words
237+
jnz .L_read_words
259238

260239
/* Any trailing bytes? */
261240
.L_no_whole_words:
@@ -264,38 +243,55 @@ ENTRY(memcpy_mcsafe_unrolled)
264243

265244
/* Copy trailing bytes */
266245
movl %edx, %ecx
267-
.L_copy_trailing_bytes:
246+
.L_read_trailing_bytes:
268247
movb (%rsi), %al
248+
MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
249+
MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
250+
.L_write_trailing_bytes:
269251
movb %al, (%rdi)
270252
incq %rsi
271253
incq %rdi
272254
decl %ecx
273-
jnz .L_copy_trailing_bytes
255+
jnz .L_read_trailing_bytes
274256

275257
/* Copy successful. Return zero */
276258
.L_done_memcpy_trap:
277259
xorq %rax, %rax
278260
ret
279-
ENDPROC(memcpy_mcsafe_unrolled)
280-
EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled)
261+
ENDPROC(__memcpy_mcsafe)
262+
EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
281263

282264
.section .fixup, "ax"
283-
/* Return -EFAULT for any failure */
284-
.L_memcpy_mcsafe_fail:
285-
mov $-EFAULT, %rax
265+
/*
266+
* Return number of bytes not copied for any failure. Note that
267+
* there is no "tail" handling since the source buffer is 8-byte
268+
* aligned and poison is cacheline aligned.
269+
*/
270+
.E_read_words:
271+
shll $3, %ecx
272+
.E_leading_bytes:
273+
addl %edx, %ecx
274+
.E_trailing_bytes:
275+
mov %ecx, %eax
286276
ret
287277

278+
/*
279+
* For write fault handling, given the destination is unaligned,
280+
* we handle faults on multi-byte writes with a byte-by-byte
281+
* copy up to the write-protected page.
282+
*/
283+
.E_write_words:
284+
shll $3, %ecx
285+
addl %edx, %ecx
286+
movl %ecx, %edx
287+
jmp mcsafe_handle_tail
288+
288289
.previous
289290

290-
_ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail)
291-
_ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail)
292-
_ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail)
293-
_ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail)
294-
_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
295-
_ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail)
296-
_ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail)
297-
_ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail)
298-
_ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail)
299-
_ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail)
300-
_ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail)
291+
_ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
292+
_ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
293+
_ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
294+
_ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
295+
_ASM_EXTABLE(.L_write_words, .E_write_words)
296+
_ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
301297
#endif

tools/include/uapi/linux/bpf.h

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1857,7 +1857,8 @@ union bpf_attr {
18571857
* is resolved), the nexthop address is returned in ipv4_dst
18581858
* or ipv6_dst based on family, smac is set to mac address of
18591859
* egress device, dmac is set to nexthop mac address, rt_metric
1860-
* is set to metric from route (IPv4/IPv6 only).
1860+
* is set to metric from route (IPv4/IPv6 only), and ifindex
1861+
* is set to the device index of the nexthop from the FIB lookup.
18611862
*
18621863
* *plen* argument is the size of the passed in struct.
18631864
* *flags* argument can be a combination of one or more of the
@@ -1873,9 +1874,10 @@ union bpf_attr {
18731874
* *ctx* is either **struct xdp_md** for XDP programs or
18741875
* **struct sk_buff** tc cls_act programs.
18751876
* Return
1876-
* Egress device index on success, 0 if packet needs to continue
1877-
* up the stack for further processing or a negative error in case
1878-
* of failure.
1877+
* * < 0 if any input argument is invalid
1878+
* * 0 on success (packet is forwarded, nexthop neighbor exists)
1879+
* * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
1880+
* * packet is not forwarded or needs assist from full stack
18791881
*
18801882
* int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
18811883
* Description
@@ -2612,6 +2614,18 @@ struct bpf_raw_tracepoint_args {
26122614
#define BPF_FIB_LOOKUP_DIRECT BIT(0)
26132615
#define BPF_FIB_LOOKUP_OUTPUT BIT(1)
26142616

2617+
enum {
2618+
BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */
2619+
BPF_FIB_LKUP_RET_BLACKHOLE, /* dest is blackholed; can be dropped */
2620+
BPF_FIB_LKUP_RET_UNREACHABLE, /* dest is unreachable; can be dropped */
2621+
BPF_FIB_LKUP_RET_PROHIBIT, /* dest not allowed; can be dropped */
2622+
BPF_FIB_LKUP_RET_NOT_FWDED, /* packet is not forwarded */
2623+
BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */
2624+
BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
2625+
BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
2626+
BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
2627+
};
2628+
26152629
struct bpf_fib_lookup {
26162630
/* input: network family for lookup (AF_INET, AF_INET6)
26172631
* output: network family of egress nexthop
@@ -2625,7 +2639,11 @@ struct bpf_fib_lookup {
26252639

26262640
/* total length of packet from network header - used for MTU check */
26272641
__u16 tot_len;
2628-
__u32 ifindex; /* L3 device index for lookup */
2642+
2643+
/* input: L3 device index for lookup
2644+
* output: device index from FIB lookup
2645+
*/
2646+
__u32 ifindex;
26292647

26302648
union {
26312649
/* inputs to lookup */

tools/include/uapi/linux/perf_event.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,8 @@ enum perf_event_sample_format {
143143
PERF_SAMPLE_PHYS_ADDR = 1U << 19,
144144

145145
PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */
146+
147+
__PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63,
146148
};
147149

148150
/*

tools/perf/arch/x86/util/pmu.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// SPDX-License-Identifier: GPL-2.0
22
#include <string.h>
33

4+
#include <linux/stddef.h>
45
#include <linux/perf_event.h>
56

67
#include "../../util/intel-pt.h"

tools/perf/arch/x86/util/tsc.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include <stdbool.h>
33
#include <errno.h>
44

5+
#include <linux/stddef.h>
56
#include <linux/perf_event.h>
67

78
#include "../../perf.h"

tools/perf/bench/Build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ perf-y += futex-wake-parallel.o
77
perf-y += futex-requeue.o
88
perf-y += futex-lock-pi.o
99

10+
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o
1011
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
1112
perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
1213

tools/perf/bench/mem-memcpy-x86-64-asm.S

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#define altinstr_replacement text
77
#define globl p2align 4; .globl
88
#define _ASM_EXTABLE_FAULT(x, y)
9+
#define _ASM_EXTABLE(x, y)
910

1011
#include "../../arch/x86/lib/memcpy_64.S"
1112
/*
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/*
2+
* From code in arch/x86/lib/usercopy_64.c, copied to keep tools/ copy
3+
* of the kernel's arch/x86/lib/memcpy_64.s used in 'perf bench mem memcpy'
4+
* happy.
5+
*/
6+
#include <linux/types.h>
7+
8+
unsigned long __memcpy_mcsafe(void *dst, const void *src, size_t cnt);
9+
unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len);
10+
11+
unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len)
12+
{
13+
for (; len; --len, to++, from++) {
14+
/*
15+
* Call the assembly routine back directly since
16+
* memcpy_mcsafe() may silently fallback to memcpy.
17+
*/
18+
unsigned long rem = __memcpy_mcsafe(to, from, 1);
19+
20+
if (rem)
21+
break;
22+
}
23+
return len;
24+
}

tools/perf/perf.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <time.h>
66
#include <stdbool.h>
77
#include <linux/types.h>
8+
#include <linux/stddef.h>
89
#include <linux/perf_event.h>
910

1011
extern bool test_attr__enabled;

tools/perf/util/header.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#ifndef __PERF_HEADER_H
33
#define __PERF_HEADER_H
44

5+
#include <linux/stddef.h>
56
#include <linux/perf_event.h>
67
#include <sys/types.h>
78
#include <stdbool.h>

tools/perf/util/namespaces.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#define __PERF_NAMESPACES_H
1111

1212
#include <sys/types.h>
13+
#include <linux/stddef.h>
1314
#include <linux/perf_event.h>
1415
#include <linux/refcount.h>
1516
#include <linux/types.h>

0 commit comments

Comments
 (0)