Skip to content

Commit cd24ec5

Browse files
committed
[compiler-rt][AArch64] Provide basic implementations of SME memcpy/memmove in case of strictly aligned memory access
The existing implementations, written in assembly, make use of unaligned accesses for performance reasons. They are not compatible with strict aligned configurations, i.e. with `-mno-unaligned-access`. If the functions are used in this scenario, an exception is raised due to unaligned memory accesses. This patch reintroduces vanilla implementations for these functions to be used in strictly aligned configurations. The actual code is largely based on the code from #77496
1 parent 8b56fb7 commit cd24ec5

File tree

2 files changed

+51
-3
lines changed

2 files changed

+51
-3
lines changed

compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
#include "../assembly.h"
88

9+
#ifdef __ARM_FEATURE_UNALIGNED
10+
911
//
1012
// __arm_sc_memcpy / __arm_sc_memmove
1113
//
@@ -346,4 +348,6 @@ DEFINE_COMPILERRT_FUNCTION(__arm_sc_memset)
346348
ret
347349
END_COMPILERRT_FUNCTION(__arm_sc_memset)
348350

349-
#endif // __aarch64__
351+
#endif /* defined(__aarch64__) && __ARM_FP != 0 */
352+
353+
#endif /* __ARM_FEATURE_UNALIGNED */

compiler-rt/lib/builtins/aarch64/sme-libc-routines.c

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
#include <stddef.h>
22

3-
/* The asm version uses FP registers. Use this on targets without them */
4-
#if __ARM_FP == 0
3+
// The asm version uses FP registers and unaligned memory accesses. Use this on
4+
// targets without them.
5+
#if __ARM_FP == 0 || !defined(__ARM_FEATURE_UNALIGNED)
56
void *__arm_sc_memset(void *dest, int c, size_t n) __arm_streaming_compatible {
67
unsigned char *destp = (unsigned char *)dest;
78
unsigned char c8 = (unsigned char)c;
@@ -22,3 +23,46 @@ const void *__arm_sc_memchr(const void *src, int c,
2223

2324
return NULL;
2425
}
26+
27+
#ifndef __ARM_FEATURE_UNALIGNED
28+
29+
static void *memcpy_fwd(void *dest, const void *src,
30+
size_t n) __arm_streaming_compatible {
31+
unsigned char *destp = (unsigned char *)dest;
32+
const unsigned char *srcp = (const unsigned char *)src;
33+
34+
for (size_t i = 0; i < n; ++i)
35+
destp[i] = srcp[i];
36+
return dest;
37+
}
38+
39+
static void *memcpy_rev(void *dest, const void *src,
40+
size_t n) __arm_streaming_compatible {
41+
unsigned char *destp = (unsigned char *)dest;
42+
const unsigned char *srcp = (const unsigned char *)src;
43+
44+
while (n > 0) {
45+
--n;
46+
destp[n] = srcp[n];
47+
}
48+
return dest;
49+
}
50+
51+
void *__arm_sc_memcpy(void *__restrict dest, const void *__restrict src,
52+
size_t n) __arm_streaming_compatible {
53+
return memcpy_fwd(dest, src, n);
54+
}
55+
56+
void *__arm_sc_memmove(void *dest, const void *src,
57+
size_t n) __arm_streaming_compatible {
58+
unsigned char *destp = (unsigned char *)dest;
59+
const unsigned char *srcp = (const unsigned char *)src;
60+
61+
if ((srcp > (destp + n)) || (destp > (srcp + n)))
62+
return __arm_sc_memcpy(dest, src, n);
63+
if (srcp > destp)
64+
return memcpy_fwd(dest, src, n);
65+
return memcpy_rev(dest, src, n);
66+
}
67+
68+
#endif /* !defined(__ARM_FEATURE_UNALIGNED) */

0 commit comments

Comments
 (0)