Skip to content

Commit 3ab8d2a

Browse files
Dinar Temirbulatovdavid-arm
andauthored
[AArch64][compiler-rt] Add memcpy, memset, memmove, memchr builtins. (#77496)
Add naive implementation of memcpy, memset, memmove, memchr for SME targets. Co-authored-by: David Sherwood <[email protected]>
1 parent ff96273 commit 3ab8d2a

File tree

7 files changed

+220
-5
lines changed

7 files changed

+220
-5
lines changed

compiler-rt/cmake/builtin-config-ix.cmake

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,12 @@ asm(\".arch armv8-a+lse\");
3535
asm(\"cas w0, w1, [x2]\");
3636
")
3737

38-
builtin_check_c_compiler_source(COMPILER_RT_HAS_ASM_SME
38+
builtin_check_c_compiler_source(COMPILER_RT_HAS_AARCH64_SME
3939
"
40-
asm(\".arch armv9-a+sme\");
41-
asm(\"smstart\");
40+
void foo(void) __arm_streaming_compatible {
41+
asm(\".arch armv9-a+sme\");
42+
asm(\"smstart\");
43+
}
4244
")
4345

4446
if(ANDROID)

compiler-rt/lib/builtins/CMakeLists.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -560,9 +560,10 @@ set(aarch64_SOURCES
560560
aarch64/fp_mode.c
561561
)
562562

563-
if(COMPILER_RT_HAS_ASM_SME AND (COMPILER_RT_HAS_AUXV OR COMPILER_RT_BAREMETAL_BUILD))
564-
list(APPEND aarch64_SOURCES aarch64/sme-abi.S aarch64/sme-abi-init.c)
563+
if(COMPILER_RT_HAS_AARCH64_SME AND COMPILER_RT_HAS_FNO_BUILTIN_FLAG AND (COMPILER_RT_HAS_AUXV OR COMPILER_RT_BAREMETAL_BUILD))
564+
list(APPEND aarch64_SOURCES aarch64/sme-abi.S aarch64/sme-abi-init.c aarch64/sme-libc-routines.c)
565565
message(STATUS "AArch64 SME ABI routines enabled")
566+
set_source_files_properties(aarch64/sme-libc-routines.c PROPERTIES COMPILE_FLAGS "-fno-builtin")
566567
else()
567568
message(STATUS "AArch64 SME ABI routines disabled")
568569
endif()
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
#include <stdlib.h>
2+
3+
// WARNING: When building the scalar versions of these functions you need to
4+
// use the compiler flag "-mllvm -disable-loop-idiom-all" to prevent clang
5+
// from recognising a loop idiom and planting calls to memcpy!
6+
7+
static void *__arm_sc_memcpy_fwd(void *dest, const void *src,
8+
size_t n) __arm_streaming_compatible {
9+
unsigned char *destp = (unsigned char *)dest;
10+
const unsigned char *srcp = (const unsigned char *)src;
11+
for (size_t i = 0; i < n; ++i)
12+
destp[i] = srcp[i];
13+
14+
return dest;
15+
}
16+
17+
// If dest and src overlap then behaviour is undefined, hence we can add the
18+
// restrict keywords here. This also matches the definition of the libc memcpy
19+
// according to the man page.
20+
void *__arm_sc_memcpy(void *__restrict__ dest, const void *__restrict__ src,
21+
size_t n) __arm_streaming_compatible {
22+
return __arm_sc_memcpy_fwd(dest, src, n);
23+
}
24+
25+
void *__arm_sc_memset(void *dest, int c, size_t n) __arm_streaming_compatible {
26+
unsigned char *destp = (unsigned char *)dest;
27+
unsigned char c8 = (unsigned char)c;
28+
for (size_t i = 0; i < n; ++i)
29+
destp[i] = c8;
30+
31+
return dest;
32+
}
33+
34+
static void *__arm_sc_memcpy_rev(void *dest, const void *src,
35+
size_t n) __arm_streaming_compatible {
36+
unsigned char *destp = (unsigned char *)dest;
37+
const unsigned char *srcp = (const unsigned char *)src;
38+
// TODO: Improve performance by copying larger chunks in reverse, or by
39+
// using SVE.
40+
while (n > 0) {
41+
--n;
42+
destp[n] = srcp[n];
43+
}
44+
return dest;
45+
}
46+
47+
// Semantically a memmove is equivalent to the following:
48+
// 1. Copy the entire contents of src to a temporary array that does not
49+
// overlap with src or dest.
50+
// 2. Copy the contents of the temporary array into dest.
51+
void *__arm_sc_memmove(void *dest, const void *src,
52+
size_t n) __arm_streaming_compatible {
53+
unsigned char *destp = (unsigned char *)dest;
54+
const unsigned char *srcp = (const unsigned char *)src;
55+
56+
// If src and dest don't overlap then just invoke memcpy
57+
if ((srcp > (destp + n)) || (destp > (srcp + n)))
58+
return __arm_sc_memcpy_fwd(dest, src, n);
59+
60+
// Overlap case 1:
61+
// src: Low | -> | High
62+
// dest: Low | -> | High
63+
// Here src is always ahead of dest at a higher addres. If we first read a
64+
// chunk of data from src we can safely write the same chunk to dest without
65+
// corrupting future reads of src.
66+
if (srcp > destp)
67+
return __arm_sc_memcpy_fwd(dest, src, n);
68+
69+
// Overlap case 2:
70+
// src: Low | -> | High
71+
// dest: Low | -> | High
72+
// While we're in the overlap region we're always corrupting future reads of
73+
// src when writing to dest. An efficient way to do this is to copy the data
74+
// in reverse by starting at the highest address.
75+
return __arm_sc_memcpy_rev(dest, src, n);
76+
}
77+
78+
const void *__arm_sc_memchr(const void *src, int c,
79+
size_t n) __arm_streaming_compatible {
80+
const unsigned char *srcp = (const unsigned char *)src;
81+
unsigned char c8 = (unsigned char)c;
82+
for (size_t i = 0; i < n; ++i)
83+
if (srcp[i] == c8)
84+
return &srcp[i];
85+
86+
return NULL;
87+
}
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
// REQUIRES: aarch64-target-arch, aarch64-sme-available
2+
// RUN: %clangxx_builtins %s %librt -o %t && %run %t
3+
4+
#include <cassert>
5+
#include <initializer_list>
6+
#include <stdint.h>
7+
#include <stdlib.h>
8+
#include <string.h>
9+
10+
extern "C" {
11+
void *__arm_sc_memcpy(void *, const void *, size_t);
12+
void *__arm_sc_memset(void *, int, size_t);
13+
void *__arm_sc_memmove(void *, const void *, size_t);
14+
void *__arm_sc_memchr(const void *, int, size_t);
15+
}
16+
17+
template <unsigned N> class Memory {
18+
public:
19+
uint8_t ptr[N];
20+
unsigned size;
21+
22+
Memory(unsigned stride = 0) {
23+
size = N;
24+
if (stride == 0)
25+
return;
26+
for (unsigned i = 0; i < N; i++)
27+
ptr[i] = i * stride;
28+
}
29+
30+
void assert_equal(const Memory &other) {
31+
assert(N == other.size);
32+
assert(memcmp(ptr, other.ptr, N) == 0);
33+
}
34+
35+
void assert_equal(std::initializer_list<uint8_t> s) {
36+
assert(N == s.size());
37+
auto it = s.begin();
38+
for (unsigned i = 0; i < N; ++i)
39+
assert(ptr[i] == *it++);
40+
}
41+
42+
void assert_elemt_equal_at(unsigned I, uint8_t elem) {
43+
assert(ptr[I] == elem);
44+
}
45+
};
46+
47+
int main() {
48+
49+
// Testing memcpy from src to dst.
50+
{
51+
Memory<8> src(1);
52+
Memory<8> dst;
53+
if (!__arm_sc_memcpy(dst.ptr, src.ptr, 8))
54+
abort();
55+
dst.assert_equal(src);
56+
dst.assert_equal({0, 1, 2, 3, 4, 5, 6, 7});
57+
}
58+
59+
// Testing memcpy from src to dst with pointer offset.
60+
{
61+
Memory<8> src(1);
62+
Memory<8> dst(1);
63+
if (!__arm_sc_memcpy(dst.ptr + 1, src.ptr, 6))
64+
abort();
65+
dst.assert_equal({0, 0, 1, 2, 3, 4, 5, 7});
66+
}
67+
68+
// Testing memchr.
69+
{
70+
Memory<8> src(4);
71+
for (unsigned i = 0; i < 8; ++i) {
72+
uint8_t e = src.ptr[i];
73+
uint8_t *elem = (uint8_t *)memchr(src.ptr, e, 8);
74+
if (!elem)
75+
abort();
76+
src.assert_elemt_equal_at(elem - src.ptr, *elem);
77+
for (unsigned i = 0; i < 8; ++i)
78+
assert(__arm_sc_memchr(src.ptr, src.ptr[i], 8) ==
79+
memchr(src.ptr, src.ptr[i], 8));
80+
}
81+
}
82+
83+
// Testing memset.
84+
{
85+
Memory<8> array;
86+
if (!__arm_sc_memset(array.ptr, 2, 8))
87+
abort();
88+
array.assert_equal({2, 2, 2, 2, 2, 2, 2, 2});
89+
}
90+
91+
// Testing memset with pointer offset.
92+
{
93+
Memory<8> array(1);
94+
if (!__arm_sc_memset(array.ptr + 1, 2, 6))
95+
abort();
96+
array.assert_equal({0, 2, 2, 2, 2, 2, 2, 7});
97+
}
98+
99+
// Testing memmove with a simple non-overlap case.
100+
{
101+
Memory<8> src(1);
102+
Memory<8> dst(1);
103+
if (!__arm_sc_memmove(dst.ptr + 1, src.ptr, 6))
104+
abort();
105+
dst.assert_equal({0, 0, 1, 2, 3, 4, 5, 7});
106+
}
107+
108+
// Testing memove with overlap pointers dst > src, dst < src.
109+
{
110+
Memory<8> srcdst(1);
111+
if (!__arm_sc_memmove(srcdst.ptr + 1, srcdst.ptr, 6))
112+
abort();
113+
srcdst.assert_equal({0, 0, 1, 2, 3, 4, 5, 7});
114+
if (!__arm_sc_memmove(srcdst.ptr, srcdst.ptr + 1, 6))
115+
abort();
116+
srcdst.assert_equal({0, 1, 2, 3, 4, 5, 5, 7});
117+
}
118+
119+
return 0;
120+
}

compiler-rt/test/lit.common.cfg.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,9 @@ def get_ios_commands_dir():
454454
if config.has_lld:
455455
config.available_features.add("lld-available")
456456

457+
if config.aarch64_sme:
458+
config.available_features.add("aarch64-sme-available")
459+
457460
if config.use_lld:
458461
config.available_features.add("lld")
459462

compiler-rt/test/lit.common.configured.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ set_default("gwp_asan", @COMPILER_RT_HAS_GWP_ASAN_PYBOOL@)
5050
set_default("expensive_checks", @LLVM_ENABLE_EXPENSIVE_CHECKS_PYBOOL@)
5151
set_default("test_standalone_build_libs", @COMPILER_RT_TEST_STANDALONE_BUILD_LIBS_PYBOOL@)
5252
set_default("has_compiler_rt_libatomic", @COMPILER_RT_BUILD_STANDALONE_LIBATOMIC_PYBOOL@)
53+
set_default("aarch64_sme", @COMPILER_RT_HAS_AARCH64_SME@)
5354
# True iff the test suite supports ignoring the test compiler's runtime library path
5455
# and using `config.compiler_rt_libdir` instead. This only matters when the runtime
5556
# library paths differ.

compiler-rt/unittests/lit.common.unit.configured.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ config.llvm_obj_root = "@LLVM_BINARY_DIR@"
77
config.llvm_tools_dir = lit_config.substitute("@LLVM_TOOLS_DIR@")
88
config.compiler_rt_src_root = "@COMPILER_RT_SOURCE_DIR@"
99
config.compiler_rt_libdir = lit_config.substitute("@COMPILER_RT_RESOLVED_LIBRARY_OUTPUT_DIR@")
10+
config.aarch64_sme = "@COMPILER_RT_HAS_AARCH64_SME@"
1011
config.enable_per_target_runtime_dir = @LLVM_ENABLE_PER_TARGET_RUNTIME_DIR_PYBOOL@
1112
config.llvm_build_mode = lit_config.substitute("@LLVM_BUILD_MODE@")
1213
config.host_arch = "@HOST_ARCH@"

0 commit comments

Comments
 (0)