Skip to content

Commit 1f27a05

Browse files
committed
tools arch: Update arch/x86/lib/memcpy_64.S copy used in 'perf bench mem memcpy'
To cope with the changes in: 12c8913 ("x86/asm/memcpy_mcsafe: Add write-protection-fault handling") 60622d6 ("x86/asm/memcpy_mcsafe: Return bytes remaining") bd13154 ("x86/asm/memcpy_mcsafe: Add labels for __memcpy_mcsafe() write fault handling") da7bc9c ("x86/asm/memcpy_mcsafe: Remove loop unrolling") This needed introducing a file with a copy of the mcsafe_handle_tail() function, that is used in the new memcpy_64.S file, as well as a dummy mcsafe_test.h header. Testing it: $ nm ~/bin/perf | grep mcsafe 0000000000484130 T mcsafe_handle_tail 0000000000484300 T __memcpy_mcsafe $ $ perf bench mem memcpy # Running 'mem/memcpy' benchmark: # function 'default' (Default memcpy() provided by glibc) # Copying 1MB bytes ... 44.389205 GB/sec # function 'x86-64-unrolled' (unrolled memcpy() in arch/x86/lib/memcpy_64.S) # Copying 1MB bytes ... 22.710756 GB/sec # function 'x86-64-movsq' (movsq-based memcpy() in arch/x86/lib/memcpy_64.S) # Copying 1MB bytes ... 42.459239 GB/sec # function 'x86-64-movsb' (movsb-based memcpy() in arch/x86/lib/memcpy_64.S) # Copying 1MB bytes ... 42.459239 GB/sec $ This silences this perf tools build warning: Warning: Kernel ABI header at 'tools/arch/x86/lib/memcpy_64.S' differs from latest version at 'arch/x86/lib/memcpy_64.S' Cc: Adrian Hunter <[email protected]> Cc: Dan Williams <[email protected]> Cc: David Ahern <[email protected]> Cc: Jiri Olsa <[email protected]> Cc: Mika Penttilä <[email protected]> Cc: Namhyung Kim <[email protected]> Cc: Tony Luck <[email protected]> Cc: Wang Nan <[email protected]> Link: https://lkml.kernel.org/n/[email protected] Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
1 parent fc73bfd commit 1f27a05

File tree

5 files changed

+93
-58
lines changed

5 files changed

+93
-58
lines changed
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
#ifndef _MCSAFE_TEST_H_
3+
#define _MCSAFE_TEST_H_
4+
5+
.macro MCSAFE_TEST_CTL
6+
.endm
7+
8+
.macro MCSAFE_TEST_SRC reg count target
9+
.endm
10+
11+
.macro MCSAFE_TEST_DST reg count target
12+
.endm
13+
#endif /* _MCSAFE_TEST_H_ */

tools/arch/x86/lib/memcpy_64.S

Lines changed: 54 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <linux/linkage.h>
44
#include <asm/errno.h>
55
#include <asm/cpufeatures.h>
6+
#include <asm/mcsafe_test.h>
67
#include <asm/alternative-asm.h>
78
#include <asm/export.h>
89

@@ -183,12 +184,15 @@ ENTRY(memcpy_orig)
183184
ENDPROC(memcpy_orig)
184185

185186
#ifndef CONFIG_UML
187+
188+
MCSAFE_TEST_CTL
189+
186190
/*
187-
* memcpy_mcsafe_unrolled - memory copy with machine check exception handling
191+
* __memcpy_mcsafe - memory copy with machine check exception handling
188192
* Note that we only catch machine checks when reading the source addresses.
189193
* Writes to target are posted and don't generate machine checks.
190194
*/
191-
ENTRY(memcpy_mcsafe_unrolled)
195+
ENTRY(__memcpy_mcsafe)
192196
cmpl $8, %edx
193197
/* Less than 8 bytes? Go to byte copy loop */
194198
jb .L_no_whole_words
@@ -204,58 +208,33 @@ ENTRY(memcpy_mcsafe_unrolled)
204208
subl $8, %ecx
205209
negl %ecx
206210
subl %ecx, %edx
207-
.L_copy_leading_bytes:
211+
.L_read_leading_bytes:
208212
movb (%rsi), %al
213+
MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
214+
MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
215+
.L_write_leading_bytes:
209216
movb %al, (%rdi)
210217
incq %rsi
211218
incq %rdi
212219
decl %ecx
213-
jnz .L_copy_leading_bytes
220+
jnz .L_read_leading_bytes
214221

215222
.L_8byte_aligned:
216-
/* Figure out how many whole cache lines (64-bytes) to copy */
217-
movl %edx, %ecx
218-
andl $63, %edx
219-
shrl $6, %ecx
220-
jz .L_no_whole_cache_lines
221-
222-
/* Loop copying whole cache lines */
223-
.L_cache_w0: movq (%rsi), %r8
224-
.L_cache_w1: movq 1*8(%rsi), %r9
225-
.L_cache_w2: movq 2*8(%rsi), %r10
226-
.L_cache_w3: movq 3*8(%rsi), %r11
227-
movq %r8, (%rdi)
228-
movq %r9, 1*8(%rdi)
229-
movq %r10, 2*8(%rdi)
230-
movq %r11, 3*8(%rdi)
231-
.L_cache_w4: movq 4*8(%rsi), %r8
232-
.L_cache_w5: movq 5*8(%rsi), %r9
233-
.L_cache_w6: movq 6*8(%rsi), %r10
234-
.L_cache_w7: movq 7*8(%rsi), %r11
235-
movq %r8, 4*8(%rdi)
236-
movq %r9, 5*8(%rdi)
237-
movq %r10, 6*8(%rdi)
238-
movq %r11, 7*8(%rdi)
239-
leaq 64(%rsi), %rsi
240-
leaq 64(%rdi), %rdi
241-
decl %ecx
242-
jnz .L_cache_w0
243-
244-
/* Are there any trailing 8-byte words? */
245-
.L_no_whole_cache_lines:
246223
movl %edx, %ecx
247224
andl $7, %edx
248225
shrl $3, %ecx
249226
jz .L_no_whole_words
250227

251-
/* Copy trailing words */
252-
.L_copy_trailing_words:
228+
.L_read_words:
253229
movq (%rsi), %r8
254-
mov %r8, (%rdi)
255-
leaq 8(%rsi), %rsi
256-
leaq 8(%rdi), %rdi
230+
MCSAFE_TEST_SRC %rsi 8 .E_read_words
231+
MCSAFE_TEST_DST %rdi 8 .E_write_words
232+
.L_write_words:
233+
movq %r8, (%rdi)
234+
addq $8, %rsi
235+
addq $8, %rdi
257236
decl %ecx
258-
jnz .L_copy_trailing_words
237+
jnz .L_read_words
259238

260239
/* Any trailing bytes? */
261240
.L_no_whole_words:
@@ -264,38 +243,55 @@ ENTRY(memcpy_mcsafe_unrolled)
264243

265244
/* Copy trailing bytes */
266245
movl %edx, %ecx
267-
.L_copy_trailing_bytes:
246+
.L_read_trailing_bytes:
268247
movb (%rsi), %al
248+
MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
249+
MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
250+
.L_write_trailing_bytes:
269251
movb %al, (%rdi)
270252
incq %rsi
271253
incq %rdi
272254
decl %ecx
273-
jnz .L_copy_trailing_bytes
255+
jnz .L_read_trailing_bytes
274256

275257
/* Copy successful. Return zero */
276258
.L_done_memcpy_trap:
277259
xorq %rax, %rax
278260
ret
279-
ENDPROC(memcpy_mcsafe_unrolled)
280-
EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled)
261+
ENDPROC(__memcpy_mcsafe)
262+
EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
281263

282264
.section .fixup, "ax"
283-
/* Return -EFAULT for any failure */
284-
.L_memcpy_mcsafe_fail:
285-
mov $-EFAULT, %rax
265+
/*
266+
* Return number of bytes not copied for any failure. Note that
267+
* there is no "tail" handling since the source buffer is 8-byte
268+
* aligned and poison is cacheline aligned.
269+
*/
270+
.E_read_words:
271+
shll $3, %ecx
272+
.E_leading_bytes:
273+
addl %edx, %ecx
274+
.E_trailing_bytes:
275+
mov %ecx, %eax
286276
ret
287277

278+
/*
279+
* For write fault handling, given the destination is unaligned,
280+
* we handle faults on multi-byte writes with a byte-by-byte
281+
* copy up to the write-protected page.
282+
*/
283+
.E_write_words:
284+
shll $3, %ecx
285+
addl %edx, %ecx
286+
movl %ecx, %edx
287+
jmp mcsafe_handle_tail
288+
288289
.previous
289290

290-
_ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail)
291-
_ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail)
292-
_ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail)
293-
_ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail)
294-
_ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail)
295-
_ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail)
296-
_ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail)
297-
_ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail)
298-
_ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail)
299-
_ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail)
300-
_ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail)
291+
_ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
292+
_ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
293+
_ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
294+
_ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
295+
_ASM_EXTABLE(.L_write_words, .E_write_words)
296+
_ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
301297
#endif

tools/perf/bench/Build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ perf-y += futex-wake-parallel.o
77
perf-y += futex-requeue.o
88
perf-y += futex-lock-pi.o
99

10+
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o
1011
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
1112
perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
1213

tools/perf/bench/mem-memcpy-x86-64-asm.S

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#define altinstr_replacement text
77
#define globl p2align 4; .globl
88
#define _ASM_EXTABLE_FAULT(x, y)
9+
#define _ASM_EXTABLE(x, y)
910

1011
#include "../../arch/x86/lib/memcpy_64.S"
1112
/*
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
/*
2+
* From code in arch/x86/lib/usercopy_64.c, copied to keep tools/ copy
3+
* of the kernel's arch/x86/lib/memcpy_64.s used in 'perf bench mem memcpy'
4+
* happy.
5+
*/
6+
#include <linux/types.h>
7+
8+
unsigned long __memcpy_mcsafe(void *dst, const void *src, size_t cnt);
9+
unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len);
10+
11+
unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len)
12+
{
13+
for (; len; --len, to++, from++) {
14+
/*
15+
* Call the assembly routine back directly since
16+
* memcpy_mcsafe() may silently fallback to memcpy.
17+
*/
18+
unsigned long rem = __memcpy_mcsafe(to, from, 1);
19+
20+
if (rem)
21+
break;
22+
}
23+
return len;
24+
}

0 commit comments

Comments
 (0)