Skip to content

Commit 3b20924

Browse files
committed
Changes:
- Split functions into separate files. - Select which implementation to use based on target features. The selection is now done in CMake.
1 parent f947062 commit 3b20924

File tree

7 files changed

+395
-189
lines changed

7 files changed

+395
-189
lines changed

compiler-rt/cmake/builtin-config-ix.cmake

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,24 @@ void foo(void) __arm_streaming_compatible {
5050
}
5151
")
5252

53+
builtin_check_c_compiler_source(COMPILER_RT_HAS_ARM_UNALIGNED
54+
"
55+
void foo() {
56+
#ifndef __ARM_FEATURE_UNALIGNED
57+
#error \"Unaligned accesses unsupported\"
58+
#endif
59+
}
60+
")
61+
62+
builtin_check_c_compiler_source(COMPILER_RT_HAS_ARM_FP
63+
"
64+
void foo() {
65+
#ifndef __ARM_FP
66+
#error \"No floating-point support\"
67+
#endif
68+
}
69+
")
70+
5371
check_include_files("sys/auxv.h" COMPILER_RT_HAS_AUXV)
5472

5573
if(ANDROID)

compiler-rt/lib/builtins/CMakeLists.txt

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -600,9 +600,17 @@ if (COMPILER_RT_HAS_AARCH64_SME)
600600
set_source_files_properties(aarch64/arm_apple_sme_abi.s PROPERTIES COMPILE_FLAGS -march=armv8a+sme)
601601
message(STATUS "AArch64 Apple SME ABI routines enabled")
602602
elseif (NOT COMPILER_RT_DISABLE_AARCH64_FMV AND COMPILER_RT_HAS_FNO_BUILTIN_FLAG AND COMPILER_RT_AARCH64_FMV_USES_GLOBAL_CONSTRUCTOR)
603-
list(APPEND aarch64_SOURCES aarch64/sme-abi.S aarch64/sme-libc-mem-routines.S aarch64/sme-abi-assert.c aarch64/sme-libc-routines.c)
603+
if(COMPILER_RT_HAS_ARM_UNALIGNED AND COMPILER_RT_HAS_ARM_FP)
604+
list(APPEND aarch64_SOURCES aarch64/sme-libc-opt-memset-memchr.S aarch64/sme-libc-opt-memcpy-memmove.S)
605+
elseif(COMPILER_RT_HAS_ARM_UNALIGNED)
606+
list(APPEND aarch64_SOURCES aarch64/sme-libc-memset-memchr.c aarch64/sme-libc-opt-memcpy-memmove.S)
607+
message(WARNING "AArch64 SME ABI assembly-optimized memset/memchr disabled: target does not have hardware floating-point support.")
608+
else()
609+
list(APPEND aarch64_SOURCES aarch64/sme-libc-memset-memchr.c aarch64/sme-libc-memcpy-memmove.c)
610+
message(WARNING "AArch64 SME ABI assembly-optimized routines disabled: target does not support unaligned accesses.")
611+
endif()
604612
message(STATUS "AArch64 SME ABI routines enabled")
605-
set_source_files_properties(aarch64/sme-libc-routines.c PROPERTIES COMPILE_FLAGS "-fno-builtin")
613+
set_source_files_properties(aarch64/sme-libc-memset-memchr.c aarch64/sme-libc-memcpy-memmove.c PROPERTIES COMPILE_FLAGS "-fno-builtin")
606614
else()
607615
if(COMPILER_RT_DISABLE_AARCH64_FMV)
608616
message(WARNING "AArch64 SME ABI routines require function multiversioning support.")
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
///
9+
/// \file
10+
/// This file contains basic implementations of Scalable Matrix Extension (SME)
11+
/// compatible memcpy and memmove functions to be used when their assembly-
12+
/// optimized counterparts can't.
13+
///
14+
//===----------------------------------------------------------------------===//
15+
16+
#include <stddef.h>
17+
18+
static void *__arm_sc_memcpy_fwd(void *dest, const void *src,
19+
size_t n) __arm_streaming_compatible {
20+
unsigned char *destp = (unsigned char *)dest;
21+
const unsigned char *srcp = (const unsigned char *)src;
22+
23+
for (size_t i = 0; i < n; ++i)
24+
destp[i] = srcp[i];
25+
return dest;
26+
}
27+
28+
static void *__arm_sc_memcpy_rev(void *dest, const void *src,
29+
size_t n) __arm_streaming_compatible {
30+
unsigned char *destp = (unsigned char *)dest;
31+
const unsigned char *srcp = (const unsigned char *)src;
32+
33+
while (n > 0) {
34+
--n;
35+
destp[n] = srcp[n];
36+
}
37+
return dest;
38+
}
39+
40+
extern void *__arm_sc_memcpy(void *__restrict dest, const void *__restrict src,
41+
size_t n) __arm_streaming_compatible {
42+
return __arm_sc_memcpy_fwd(dest, src, n);
43+
}
44+
45+
extern void *__arm_sc_memmove(void *dest, const void *src,
46+
size_t n) __arm_streaming_compatible {
47+
unsigned char *destp = (unsigned char *)dest;
48+
const unsigned char *srcp = (const unsigned char *)src;
49+
50+
if ((srcp > (destp + n)) || (destp > (srcp + n)))
51+
return __arm_sc_memcpy(dest, src, n);
52+
if (srcp > destp)
53+
return __arm_sc_memcpy_fwd(dest, src, n);
54+
return __arm_sc_memcpy_rev(dest, src, n);
55+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
///
9+
/// \file
10+
/// This file contains basic implementations of Scalable Matrix Extension (SME)
11+
/// compatible memset and memchr functions to be used when their assembly-
12+
/// optimized counterparts can't.
13+
///
14+
//===----------------------------------------------------------------------===//
15+
16+
#include <stddef.h>
17+
18+
extern void *__arm_sc_memset(void *dest, int c,
19+
size_t n) __arm_streaming_compatible {
20+
unsigned char *destp = (unsigned char *)dest;
21+
unsigned char c8 = (unsigned char)c;
22+
for (size_t i = 0; i < n; ++i)
23+
destp[i] = c8;
24+
25+
return dest;
26+
}
27+
28+
extern const void *__arm_sc_memchr(const void *src, int c,
29+
size_t n) __arm_streaming_compatible {
30+
const unsigned char *srcp = (const unsigned char *)src;
31+
unsigned char c8 = (unsigned char)c;
32+
for (size_t i = 0; i < n; ++i)
33+
if (srcp[i] == c8)
34+
return &srcp[i];
35+
36+
return NULL;
37+
}

compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S renamed to compiler-rt/lib/builtins/aarch64/sme-libc-opt-memcpy-memmove.S

Lines changed: 14 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,23 @@
1+
//===----------------------------------------------------------------------===//
2+
//
13
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
24
// See https://llvm.org/LICENSE.txt for license information.
35
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4-
5-
// Routines taken from libc/AOR_v20.02/string/aarch64
6+
//
7+
//===----------------------------------------------------------------------===//
8+
///
9+
/// \file
10+
/// This file contains assembly-optimized implementations of Scalable Matrix
11+
/// Extension (SME) compatible memcpy and memmove functions.
12+
///
13+
/// These implementations depend on unaligned access support.
14+
///
15+
/// Routines taken from libc/AOR_v20.02/string/aarch64.
16+
///
17+
//===----------------------------------------------------------------------===//
618

719
#include "../assembly.h"
820

9-
#ifdef __ARM_FEATURE_UNALIGNED
10-
1121
//
1222
// __arm_sc_memcpy / __arm_sc_memmove
1323
//
@@ -236,118 +246,3 @@ END_COMPILERRT_FUNCTION(__arm_sc_memcpy)
236246

237247
DEFINE_COMPILERRT_FUNCTION_ALIAS(__arm_sc_memmove, __arm_sc_memcpy)
238248

239-
// This version uses FP registers. Use this only on targets with them
240-
#if (defined(__aarch64__) && __ARM_FP != 0) || defined(__arm64ec__)
241-
//
242-
// __arm_sc_memset
243-
//
244-
245-
#define dstin x0
246-
#define val x1
247-
#define valw w1
248-
#define count x2
249-
#define dst x3
250-
#define dstend2 x4
251-
#define zva_val x5
252-
253-
DEFINE_COMPILERRT_FUNCTION(__arm_sc_memset)
254-
#ifdef __ARM_FEATURE_SVE
255-
mov z0.b, valw
256-
#else
257-
bfi valw, valw, #8, #8
258-
bfi valw, valw, #16, #16
259-
bfi val, val, #32, #32
260-
fmov d0, val
261-
fmov v0.d[1], val
262-
#endif
263-
add dstend2, dstin, count
264-
265-
cmp count, 96
266-
b.hi 7f // set_long
267-
cmp count, 16
268-
b.hs 4f // set_medium
269-
mov val, v0.D[0]
270-
271-
/* Set 0..15 bytes. */
272-
tbz count, 3, 1f
273-
str val, [dstin]
274-
str val, [dstend2, -8]
275-
ret
276-
nop
277-
1: tbz count, 2, 2f
278-
str valw, [dstin]
279-
str valw, [dstend2, -4]
280-
ret
281-
2: cbz count, 3f
282-
strb valw, [dstin]
283-
tbz count, 1, 3f
284-
strh valw, [dstend2, -2]
285-
3: ret
286-
287-
/* Set 17..96 bytes. */
288-
4: // set_medium
289-
str q0, [dstin]
290-
tbnz count, 6, 6f // set96
291-
str q0, [dstend2, -16]
292-
tbz count, 5, 5f
293-
str q0, [dstin, 16]
294-
str q0, [dstend2, -32]
295-
5: ret
296-
297-
.p2align 4
298-
/* Set 64..96 bytes. Write 64 bytes from the start and
299-
32 bytes from the end. */
300-
6: // set96
301-
str q0, [dstin, 16]
302-
stp q0, q0, [dstin, 32]
303-
stp q0, q0, [dstend2, -32]
304-
ret
305-
306-
.p2align 4
307-
7: // set_long
308-
and valw, valw, 255
309-
bic dst, dstin, 15
310-
str q0, [dstin]
311-
cmp count, 160
312-
ccmp valw, 0, 0, hs
313-
b.ne 9f // no_zva
314-
315-
#ifndef SKIP_ZVA_CHECK
316-
mrs zva_val, dczid_el0
317-
and zva_val, zva_val, 31
318-
cmp zva_val, 4 /* ZVA size is 64 bytes. */
319-
b.ne 9f // no_zva
320-
#endif
321-
str q0, [dst, 16]
322-
stp q0, q0, [dst, 32]
323-
bic dst, dst, 63
324-
sub count, dstend2, dst /* Count is now 64 too large. */
325-
sub count, count, 128 /* Adjust count and bias for loop. */
326-
327-
.p2align 4
328-
8: // zva_loop
329-
add dst, dst, 64
330-
dc zva, dst
331-
subs count, count, 64
332-
b.hi 8b // zva_loop
333-
stp q0, q0, [dstend2, -64]
334-
stp q0, q0, [dstend2, -32]
335-
ret
336-
337-
9: // no_zva
338-
sub count, dstend2, dst /* Count is 16 too large. */
339-
sub dst, dst, 16 /* Dst is biased by -32. */
340-
sub count, count, 64 + 16 /* Adjust count and bias for loop. */
341-
10: // no_zva_loop
342-
stp q0, q0, [dst, 32]
343-
stp q0, q0, [dst, 64]!
344-
subs count, count, 64
345-
b.hi 10b // no_zva_loop
346-
stp q0, q0, [dstend2, -64]
347-
stp q0, q0, [dstend2, -32]
348-
ret
349-
END_COMPILERRT_FUNCTION(__arm_sc_memset)
350-
351-
#endif /* defined(__aarch64__) && __ARM_FP != 0 */
352-
353-
#endif /* __ARM_FEATURE_UNALIGNED */

0 commit comments

Comments
 (0)