Skip to content

Commit b3b5413

Browse files
committed
[libc][NFC] Separate avx/no-avx x86 memcpy implementations
Reviewed By: courbet Differential Revision: https://reviews.llvm.org/D153958
1 parent 7653011 commit b3b5413

File tree

1 file changed

+39
-4
lines changed

1 file changed

+39
-4
lines changed

libc/src/string/memory_utils/x86_64/memcpy_implementations.h

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
namespace __llvm_libc {
2020

2121
[[maybe_unused]] LIBC_INLINE void
22-
inline_memcpy_x86(Ptr __restrict dst, CPtr __restrict src, size_t count) {
22+
inline_memcpy_x86_avx(Ptr __restrict dst, CPtr __restrict src, size_t count) {
2323
if (count == 0)
2424
return;
2525
if (count == 1)
@@ -40,12 +40,47 @@ inline_memcpy_x86(Ptr __restrict dst, CPtr __restrict src, size_t count) {
4040
return builtin::Memcpy<32>::head_tail(dst, src, count);
4141
if (count < 128)
4242
return builtin::Memcpy<64>::head_tail(dst, src, count);
43-
if (x86::kAvx && count < 256)
43+
if (count < 256)
4444
return builtin::Memcpy<128>::head_tail(dst, src, count);
4545
builtin::Memcpy<32>::block(dst, src);
4646
align_to_next_boundary<32, Arg::Dst>(dst, src, count);
47-
static constexpr size_t kBlockSize = x86::kAvx ? 64 : 32;
48-
return builtin::Memcpy<kBlockSize>::loop_and_tail(dst, src, count);
47+
return builtin::Memcpy<64>::loop_and_tail(dst, src, count);
48+
}
49+
50+
[[maybe_unused]] LIBC_INLINE void inline_memcpy_x86_no_avx(Ptr __restrict dst,
51+
CPtr __restrict src,
52+
size_t count) {
53+
if (count == 0)
54+
return;
55+
if (count == 1)
56+
return builtin::Memcpy<1>::block(dst, src);
57+
if (count == 2)
58+
return builtin::Memcpy<2>::block(dst, src);
59+
if (count == 3)
60+
return builtin::Memcpy<3>::block(dst, src);
61+
if (count == 4)
62+
return builtin::Memcpy<4>::block(dst, src);
63+
if (count < 8)
64+
return builtin::Memcpy<4>::head_tail(dst, src, count);
65+
if (count < 16)
66+
return builtin::Memcpy<8>::head_tail(dst, src, count);
67+
if (count < 32)
68+
return builtin::Memcpy<16>::head_tail(dst, src, count);
69+
if (count < 64)
70+
return builtin::Memcpy<32>::head_tail(dst, src, count);
71+
if (count < 128)
72+
return builtin::Memcpy<64>::head_tail(dst, src, count);
73+
builtin::Memcpy<32>::block(dst, src);
74+
align_to_next_boundary<32, Arg::Dst>(dst, src, count);
75+
return builtin::Memcpy<32>::loop_and_tail(dst, src, count);
76+
}
77+
78+
[[maybe_unused]] LIBC_INLINE void
79+
inline_memcpy_x86(Ptr __restrict dst, CPtr __restrict src, size_t count) {
80+
if constexpr (x86::kAvx)
81+
return inline_memcpy_x86_avx(dst, src, count);
82+
else
83+
return inline_memcpy_x86_no_avx(dst, src, count);
4984
}
5085

5186
[[maybe_unused]] LIBC_INLINE void

0 commit comments

Comments
 (0)