Skip to content

Commit 7f02ce6

Browse files
committed
tools headers: Update the copy of x86's mem{cpy,set}_64.S used in 'perf bench'
This is to get the changes from: 68674f9 ("x86: don't use REP_GOOD or ERMS for small memory copies") 20f3337 ("x86: don't use REP_GOOD or ERMS for small memory clearing") This also make the 'perf bench mem' files stop referring to the erms versions that gone away with the above patches. That addresses these perf tools build warning: Warning: Kernel ABI header at 'tools/arch/x86/lib/memcpy_64.S' differs from latest version at 'arch/x86/lib/memcpy_64.S' diff -u tools/arch/x86/lib/memcpy_64.S arch/x86/lib/memcpy_64.S Warning: Kernel ABI header at 'tools/arch/x86/lib/memset_64.S' differs from latest version at 'arch/x86/lib/memset_64.S' diff -u tools/arch/x86/lib/memset_64.S arch/x86/lib/memset_64.S Signed-off-by: Arnaldo Carvalho de Melo <[email protected]>
1 parent 9bc83d6 commit 7f02ce6

File tree

7 files changed

+24
-72
lines changed

7 files changed

+24
-72
lines changed

tools/arch/x86/lib/memcpy_64.S

Lines changed: 10 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,6 @@
99

1010
.section .noinstr.text, "ax"
1111

12-
/*
13-
* We build a jump to memcpy_orig by default which gets NOPped out on
14-
* the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
15-
* have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
16-
* to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
17-
*/
18-
1912
/*
2013
* memcpy - Copy a memory block.
2114
*
@@ -26,17 +19,21 @@
2619
*
2720
* Output:
2821
* rax original destination
22+
*
23+
* The FSRM alternative should be done inline (avoiding the call and
24+
* the disgusting return handling), but that would require some help
25+
* from the compiler for better calling conventions.
26+
*
27+
* The 'rep movsb' itself is small enough to replace the call, but the
28+
* two register moves blow up the code. And one of them is "needed"
29+
* only for the return value that is the same as the source input,
30+
* which the compiler could/should do much better anyway.
2931
*/
3032
SYM_TYPED_FUNC_START(__memcpy)
31-
ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
32-
"jmp memcpy_erms", X86_FEATURE_ERMS
33+
ALTERNATIVE "jmp memcpy_orig", "", X86_FEATURE_FSRM
3334

3435
movq %rdi, %rax
3536
movq %rdx, %rcx
36-
shrq $3, %rcx
37-
andl $7, %edx
38-
rep movsq
39-
movl %edx, %ecx
4037
rep movsb
4138
RET
4239
SYM_FUNC_END(__memcpy)
@@ -45,17 +42,6 @@ EXPORT_SYMBOL(__memcpy)
4542
SYM_FUNC_ALIAS(memcpy, __memcpy)
4643
EXPORT_SYMBOL(memcpy)
4744

48-
/*
49-
* memcpy_erms() - enhanced fast string memcpy. This is faster and
50-
* simpler than memcpy. Use memcpy_erms when possible.
51-
*/
52-
SYM_FUNC_START_LOCAL(memcpy_erms)
53-
movq %rdi, %rax
54-
movq %rdx, %rcx
55-
rep movsb
56-
RET
57-
SYM_FUNC_END(memcpy_erms)
58-
5945
SYM_FUNC_START_LOCAL(memcpy_orig)
6046
movq %rdi, %rax
6147

tools/arch/x86/lib/memset_64.S

Lines changed: 11 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -18,27 +18,22 @@
1818
* rdx count (bytes)
1919
*
2020
* rax original destination
21+
*
22+
* The FSRS alternative should be done inline (avoiding the call and
23+
* the disgusting return handling), but that would require some help
24+
* from the compiler for better calling conventions.
25+
*
26+
* The 'rep stosb' itself is small enough to replace the call, but all
27+
* the register moves blow up the code. And two of them are "needed"
28+
* only for the return value that is the same as the source input,
29+
* which the compiler could/should do much better anyway.
2130
*/
2231
SYM_FUNC_START(__memset)
23-
/*
24-
* Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
25-
* to use it when possible. If not available, use fast string instructions.
26-
*
27-
* Otherwise, use original memset function.
28-
*/
29-
ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
30-
"jmp memset_erms", X86_FEATURE_ERMS
32+
ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS
3133

3234
movq %rdi,%r9
35+
movb %sil,%al
3336
movq %rdx,%rcx
34-
andl $7,%edx
35-
shrq $3,%rcx
36-
/* expand byte value */
37-
movzbl %sil,%esi
38-
movabs $0x0101010101010101,%rax
39-
imulq %rsi,%rax
40-
rep stosq
41-
movl %edx,%ecx
4237
rep stosb
4338
movq %r9,%rax
4439
RET
@@ -48,26 +43,6 @@ EXPORT_SYMBOL(__memset)
4843
SYM_FUNC_ALIAS(memset, __memset)
4944
EXPORT_SYMBOL(memset)
5045

51-
/*
52-
* ISO C memset - set a memory block to a byte value. This function uses
53-
* enhanced rep stosb to override the fast string function.
54-
* The code is simpler and shorter than the fast string function as well.
55-
*
56-
* rdi destination
57-
* rsi value (char)
58-
* rdx count (bytes)
59-
*
60-
* rax original destination
61-
*/
62-
SYM_FUNC_START_LOCAL(memset_erms)
63-
movq %rdi,%r9
64-
movb %sil,%al
65-
movq %rdx,%rcx
66-
rep stosb
67-
movq %r9,%rax
68-
RET
69-
SYM_FUNC_END(memset_erms)
70-
7146
SYM_FUNC_START_LOCAL(memset_orig)
7247
movq %rdi,%r10
7348

tools/include/asm/alternative.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
/* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */
66

7-
#define altinstruction_entry #
8-
#define ALTERNATIVE_2 #
7+
#define ALTERNATIVE #
98

109
#endif

tools/perf/bench/mem-memcpy-x86-64-asm-def.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,3 @@ MEMCPY_FN(memcpy_orig,
77
MEMCPY_FN(__memcpy,
88
"x86-64-movsq",
99
"movsq-based memcpy() in arch/x86/lib/memcpy_64.S")
10-
11-
MEMCPY_FN(memcpy_erms,
12-
"x86-64-movsb",
13-
"movsb-based memcpy() in arch/x86/lib/memcpy_64.S")

tools/perf/bench/mem-memcpy-x86-64-asm.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
/* Various wrappers to make the kernel .S file build in user-space: */
44

5-
// memcpy_orig and memcpy_erms are being defined as SYM_L_LOCAL but we need it
5+
// memcpy_orig is being defined as SYM_L_LOCAL but we need it
66
#define SYM_FUNC_START_LOCAL(name) \
77
SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
88
#define memcpy MEMCPY /* don't hide glibc's memcpy() */

tools/perf/bench/mem-memset-x86-64-asm-def.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,3 @@ MEMSET_FN(memset_orig,
77
MEMSET_FN(__memset,
88
"x86-64-stosq",
99
"movsq-based memset() in arch/x86/lib/memset_64.S")
10-
11-
MEMSET_FN(memset_erms,
12-
"x86-64-stosb",
13-
"movsb-based memset() in arch/x86/lib/memset_64.S")

tools/perf/bench/mem-memset-x86-64-asm.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/* SPDX-License-Identifier: GPL-2.0 */
2-
// memset_orig and memset_erms are being defined as SYM_L_LOCAL but we need it
2+
// memset_orig is being defined as SYM_L_LOCAL but we need it
33
#define SYM_FUNC_START_LOCAL(name) \
44
SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
55
#define memset MEMSET /* don't hide glibc's memset() */

0 commit comments

Comments
 (0)