Skip to content

Commit b915399

Browse files
vleonengopherbot
authored andcommitted
internal/bytealg: optimize Equal for arm64 target
Remove redundant intermediate jump in runtime.memequal Remove redundant a.ptr==b.ptr check in runtime.memequal_varlen Add 16-bytes alignment before some labels in runtime.memequal goos: linux goarch: arm64 pkg: bytes │ ./master.log │ ./opt.log │ │ sec/op │ sec/op vs base │ Equal/0-4 0.8342n ± 0% 0.5254n ± 3% -37.01% (p=0.000 n=8) Equal/same/1-4 2.720n ± 0% 2.720n ± 2% ~ (p=0.779 n=8) Equal/same/6-4 2.720n ± 5% 2.720n ± 2% ~ (p=0.908 n=8) Equal/same/9-4 2.722n ± 2% 2.721n ± 2% ~ (p=0.779 n=8) Equal/same/15-4 2.719n ± 0% 2.719n ± 0% ~ (p=0.641 n=8) Equal/same/16-4 2.721n ± 2% 2.719n ± 0% -0.07% (p=0.014 n=8) Equal/same/20-4 2.720n ± 0% 2.721n ± 2% ~ (p=0.236 n=8) Equal/same/32-4 2.720n ± 1% 2.720n ± 0% ~ (p=0.396 n=8) Equal/same/4K-4 2.719n ± 0% 2.720n ± 0% ~ (p=0.663 n=8) Equal/same/4M-4 2.721n ± 0% 2.720n ± 0% ~ (p=0.075 n=8) Equal/same/64M-4 2.720n ± 0% 2.720n ± 2% ~ (p=0.806 n=8) Equal/1-4 6.671n ± 0% 5.449n ± 0% -18.33% (p=0.000 n=8) Equal/6-4 8.761n ± 2% 7.508n ± 0% -14.30% (p=0.000 n=8) Equal/9-4 8.343n ± 0% 7.091n ± 0% -15.01% (p=0.000 n=8) Equal/15-4 8.339n ± 2% 7.090n ± 0% -14.98% (p=0.000 n=8) Equal/16-4 9.173n ± 0% 7.925n ± 2% -13.61% (p=0.000 n=8) Equal/20-4 11.26n ± 0% 10.01n ± 0% -11.10% (p=0.000 n=8) Equal/32-4 10.425n ± 0% 9.176n ± 0% -11.98% (p=0.000 n=8) Equal/4K-4 192.9n ± 0% 192.7n ± 0% -0.10% (p=0.044 n=8) Equal/4M-4 191.3µ ± 0% 191.3µ ± 0% ~ (p=0.798 n=8) Equal/64M-4 3.066m ± 2% 3.065m ± 0% ~ (p=0.083 n=8) EqualBothUnaligned/64_0-4 7.506n ± 2% 7.090n ± 2% -5.55% (p=0.000 n=8) EqualBothUnaligned/64_1-4 7.850n ± 1% 7.423n ± 0% -5.43% (p=0.000 n=8) EqualBothUnaligned/64_4-4 7.505n ± 0% 7.088n ± 0% -5.56% (p=0.000 n=8) EqualBothUnaligned/64_7-4 7.840n ± 0% 7.413n ± 0% -5.44% (p=0.000 n=8) EqualBothUnaligned/4096_0-4 193.0n ± 4% 190.9n ± 0% -1.09% (p=0.004 n=8) EqualBothUnaligned/4096_1-4 223.9n ± 0% 223.1n ± 0% -0.36% (p=0.000 n=8) EqualBothUnaligned/4096_4-4 191.9n ± 2% 191.5n ± 0% -0.21% (p=0.004 n=8) EqualBothUnaligned/4096_7-4 223.8n ± 0% 223.1n ± 1% ~ (p=0.098 n=8) EqualBothUnaligned/4194304_0-4 191.8µ ± 0% 191.8µ ± 0% ~ (p=0.504 n=8) EqualBothUnaligned/4194304_1-4 225.4µ ± 2% 225.5µ ± 0% ~ (p=0.065 n=8) EqualBothUnaligned/4194304_4-4 192.6µ ± 0% 192.7µ ± 2% +0.06% (p=0.041 n=8) EqualBothUnaligned/4194304_7-4 225.4µ ± 0% 225.5µ ± 0% +0.05% (p=0.050 n=8) EqualBothUnaligned/67108864_0-4 3.069m ± 0% 3.069m ± 0% ~ (p=0.314 n=8) EqualBothUnaligned/67108864_1-4 3.589m ± 0% 3.588m ± 0% ~ (p=0.959 n=8) EqualBothUnaligned/67108864_4-4 3.083m ± 0% 3.083m ± 2% ~ (p=0.505 n=8) EqualBothUnaligned/67108864_7-4 3.588m ± 0% 3.588m ± 0% ~ (p=1.000 n=8) geomean 199.9n 190.5n -4.70% Change-Id: Ib8d0d4006dd39162a600ac98a5f44a0f05136ed3 Reviewed-on: https://go-review.googlesource.com/c/go/+/601135 Reviewed-by: Keith Randall <[email protected]> Reviewed-by: Cherry Mui <[email protected]> Auto-Submit: Keith Randall <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Keith Randall <[email protected]> Auto-Submit: Keith Randall <[email protected]>
1 parent 1f0c044 commit b915399

File tree

1 file changed

+10
-16
lines changed

1 file changed

+10
-16
lines changed

src/internal/bytealg/equal_arm64.s

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,25 +5,11 @@
55
#include "go_asm.h"
66
#include "textflag.h"
77

8-
// memequal(a, b unsafe.Pointer, size uintptr) bool
9-
TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
10-
// short path to handle 0-byte case
11-
CBZ R2, equal
12-
// short path to handle equal pointers
13-
CMP R0, R1
14-
BEQ equal
15-
B memeqbody<>(SB)
16-
equal:
17-
MOVD $1, R0
18-
RET
19-
208
// memequal_varlen(a, b unsafe.Pointer) bool
219
TEXT runtime·memequal_varlen<ABIInternal>(SB),NOSPLIT,$0-17
22-
CMP R0, R1
23-
BEQ eq
2410
MOVD 8(R26), R2 // compiler stores size at offset 8 in the closure
2511
CBZ R2, eq
26-
B memeqbody<>(SB)
12+
B runtime·memequal<ABIInternal>(SB)
2713
eq:
2814
MOVD $1, R0
2915
RET
@@ -33,7 +19,13 @@ eq:
3319
// R1: pointer b
3420
// R2: data len
3521
// at return: result in R0
36-
TEXT memeqbody<>(SB),NOSPLIT,$0
22+
// memequal(a, b unsafe.Pointer, size uintptr) bool
23+
TEXT runtime·memequal<ABIInternal>(SB),NOSPLIT|NOFRAME,$0-25
24+
// short path to handle 0-byte case
25+
CBZ R2, equal
26+
// short path to handle equal pointers
27+
CMP R0, R1
28+
BEQ equal
3729
CMP $1, R2
3830
// handle 1-byte special case for better performance
3931
BEQ one
@@ -91,6 +83,7 @@ tail:
9183
EOR R4, R5
9284
CBNZ R5, not_equal
9385
B equal
86+
PCALIGN $16
9487
lt_8:
9588
TBZ $2, R2, lt_4
9689
MOVWU (R0), R4
@@ -103,6 +96,7 @@ lt_8:
10396
EOR R4, R5
10497
CBNZ R5, not_equal
10598
B equal
99+
PCALIGN $16
106100
lt_4:
107101
TBZ $1, R2, lt_2
108102
MOVHU.P 2(R0), R4

0 commit comments

Comments
 (0)