Skip to content

Commit da30225

Browse files
authored
[libc] Add support for string/memory_utils functions for AArch64 without HW FP/SIMD (#137592)
Add conditional compilation to add support for AArch64 without vector registers and/or hardware FPUs by using the generic implementation. **Context:** A few functions were hard-coded to use vector registers/hardware FPUs. This meant that libc would not compile on architectures that did not support these features. This fix falls back on the generic implementation if a feature is not supported.
1 parent 72f5ac4 commit da30225

File tree

11 files changed

+151
-48
lines changed

11 files changed

+151
-48
lines changed

libc/src/__support/FPUtil/FEnvImpl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
#include "src/__support/macros/properties/architectures.h"
1818
#include "src/errno/libc_errno.h"
1919

20-
#if defined(LIBC_TARGET_ARCH_IS_AARCH64)
20+
#if defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_FP)
2121
#if defined(__APPLE__)
2222
#include "aarch64/fenv_darwin_impl.h"
2323
#else

libc/src/__support/FPUtil/nearest_integer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
#if (defined(LIBC_TARGET_ARCH_IS_X86_64) && defined(LIBC_TARGET_CPU_HAS_SSE4_2))
1818
#include "x86_64/nearest_integer.h"
19-
#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
19+
#elif (defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_FP))
2020
#include "aarch64/nearest_integer.h"
2121
#elif defined(LIBC_TARGET_ARCH_IS_GPU)
2222

libc/src/__support/FPUtil/sqrt.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ template <> LIBC_INLINE double sqrt<double>(double x) {
4242
// Use inline assembly when __builtin_elementwise_sqrt is not available.
4343
#if defined(LIBC_TARGET_CPU_HAS_SSE2)
4444
#include "x86_64/sqrt.h"
45-
#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
45+
#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_FP)
4646
#include "aarch64/sqrt.h"
4747
#elif defined(LIBC_TARGET_ARCH_IS_ARM)
4848
#include "arm/sqrt.h"

libc/src/string/memory_utils/aarch64/inline_bcmp.h

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,43 @@
1919

2020
namespace LIBC_NAMESPACE_DECL {
2121

22-
[[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_aarch64(CPtr p1,
23-
CPtr p2,
24-
size_t count) {
22+
[[maybe_unused]] LIBC_INLINE BcmpReturnType
23+
inline_bcmp_aarch64_no_fp(CPtr p1, CPtr p2, size_t count) {
24+
if (LIBC_LIKELY(count < 16)) {
25+
switch (count) {
26+
case 0:
27+
return BcmpReturnType::zero();
28+
case 1:
29+
return generic::Bcmp<uint8_t>::block(p1, p2);
30+
case 2:
31+
return generic::Bcmp<uint16_t>::block(p1, p2);
32+
case 3:
33+
return generic::Bcmp<uint16_t>::head_tail(p1, p2, count);
34+
case 4:
35+
return generic::Bcmp<uint32_t>::block(p1, p2);
36+
case 5:
37+
case 6:
38+
case 7:
39+
return generic::Bcmp<uint32_t>::head_tail(p1, p2, count);
40+
case 8:
41+
return generic::Bcmp<uint64_t>::block(p1, p2);
42+
case 9:
43+
case 10:
44+
case 11:
45+
case 12:
46+
case 13:
47+
case 14:
48+
case 15:
49+
return generic::Bcmp<uint64_t>::head_tail(p1, p2, count);
50+
}
51+
}
52+
53+
return generic::Bcmp<uint64_t>::loop_and_tail_align_above(256, p1, p2, count);
54+
}
55+
56+
#ifdef __ARM_NEON
57+
[[maybe_unused]] LIBC_INLINE BcmpReturnType
58+
inline_bcmp_aarch64_with_fp(CPtr p1, CPtr p2, size_t count) {
2559
if (LIBC_LIKELY(count <= 32)) {
2660
if (LIBC_UNLIKELY(count >= 16)) {
2761
return aarch64::Bcmp<16>::head_tail(p1, p2, count);
@@ -65,6 +99,16 @@ namespace LIBC_NAMESPACE_DECL {
6599
}
66100
return aarch64::Bcmp<32>::loop_and_tail(p1, p2, count);
67101
}
102+
#endif
103+
104+
[[gnu::flatten]] LIBC_INLINE BcmpReturnType
105+
inline_bcmp_aarch64_dispatch(CPtr p1, CPtr p2, size_t count) {
106+
#if defined(__ARM_NEON)
107+
return inline_bcmp_aarch64_with_fp(p1, p2, count);
108+
#else
109+
return inline_bcmp_aarch64_no_fp(p1, p2, count);
110+
#endif
111+
}
68112

69113
} // namespace LIBC_NAMESPACE_DECL
70114

libc/src/string/memory_utils/aarch64/inline_memcmp.h

Lines changed: 39 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,40 @@
1717
namespace LIBC_NAMESPACE_DECL {
1818

1919
[[maybe_unused]] LIBC_INLINE MemcmpReturnType
20-
inline_memcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) {
21-
if (LIBC_UNLIKELY(count >= 384)) {
22-
if (auto value = generic::Memcmp<uint8x16_t>::block(p1, p2))
23-
return value;
24-
align_to_next_boundary<16, Arg::P1>(p1, p2, count);
25-
}
26-
return generic::Memcmp<uint8x16_t>::loop_and_tail(p1, p2, count);
20+
inline_memcmp_aarch64_no_fp(CPtr p1, CPtr p2, size_t count) {
21+
if (count == 0)
22+
return MemcmpReturnType::zero();
23+
if (count == 1)
24+
return generic::Memcmp<uint8_t>::block(p1, p2);
25+
if (count == 2)
26+
return generic::Memcmp<uint16_t>::block(p1, p2);
27+
if (count == 3)
28+
return generic::MemcmpSequence<uint16_t, uint8_t>::block(p1, p2);
29+
if (count <= 8)
30+
return generic::Memcmp<uint32_t>::head_tail(p1, p2, count);
31+
if (count <= 16)
32+
return generic::Memcmp<uint64_t>::head_tail(p1, p2, count);
33+
34+
return generic::Memcmp<uint64_t>::loop_and_tail_align_above(384, p1, p2,
35+
count);
2736
}
2837

38+
#if defined(__ARM_NEON)
2939
[[maybe_unused]] LIBC_INLINE MemcmpReturnType
30-
inline_memcmp_aarch64_neon_gt16(CPtr p1, CPtr p2, size_t count) {
40+
inline_memcmp_aarch64_with_fp(CPtr p1, CPtr p2, size_t count) {
41+
if (count == 0)
42+
return MemcmpReturnType::zero();
43+
if (count == 1)
44+
return generic::Memcmp<uint8_t>::block(p1, p2);
45+
if (count == 2)
46+
return generic::Memcmp<uint16_t>::block(p1, p2);
47+
if (count == 3)
48+
return generic::MemcmpSequence<uint16_t, uint8_t>::block(p1, p2);
49+
if (count <= 8)
50+
return generic::Memcmp<uint32_t>::head_tail(p1, p2, count);
51+
if (count <= 16)
52+
return generic::Memcmp<uint64_t>::head_tail(p1, p2, count);
53+
3154
if (LIBC_UNLIKELY(count >= 128)) { // [128, ∞]
3255
if (auto value = generic::Memcmp<uint8x16_t>::block(p1, p2))
3356
return value;
@@ -46,25 +69,15 @@ inline_memcmp_aarch64_neon_gt16(CPtr p1, CPtr p2, size_t count) {
4669
return generic::Memcmp<uint8x16_t>::loop_and_tail(p1 + 32, p2 + 32,
4770
count - 32);
4871
}
72+
#endif
4973

50-
LIBC_INLINE MemcmpReturnType inline_memcmp_aarch64(CPtr p1, CPtr p2,
51-
size_t count) {
52-
if (count == 0)
53-
return MemcmpReturnType::zero();
54-
if (count == 1)
55-
return generic::Memcmp<uint8_t>::block(p1, p2);
56-
if (count == 2)
57-
return generic::Memcmp<uint16_t>::block(p1, p2);
58-
if (count == 3)
59-
return generic::MemcmpSequence<uint16_t, uint8_t>::block(p1, p2);
60-
if (count <= 8)
61-
return generic::Memcmp<uint32_t>::head_tail(p1, p2, count);
62-
if (count <= 16)
63-
return generic::Memcmp<uint64_t>::head_tail(p1, p2, count);
64-
if constexpr (aarch64::kNeon)
65-
return inline_memcmp_aarch64_neon_gt16(p1, p2, count);
66-
else
67-
return inline_memcmp_generic_gt16(p1, p2, count);
74+
[[gnu::flatten]] LIBC_INLINE MemcmpReturnType
75+
inline_memcmp_aarch64_dispatch(CPtr p1, CPtr p2, size_t count) {
76+
#if defined(__ARM_NEON)
77+
return inline_memcmp_aarch64_with_fp(p1, p2, count);
78+
#else
79+
return inline_memcmp_aarch64_no_fp(p1, p2, count);
80+
#endif
6881
}
6982
} // namespace LIBC_NAMESPACE_DECL
7083

libc/src/string/memory_utils/aarch64/inline_memmove.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@
88
#ifndef LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_MEMMOVE_H
99
#define LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_MEMMOVE_H
1010

11-
#include "src/__support/macros/attributes.h" // LIBC_INLINE
12-
#include "src/string/memory_utils/op_aarch64.h" // aarch64::kNeon
11+
#include "src/__support/macros/attributes.h" // LIBC_INLINE
1312
#include "src/string/memory_utils/op_builtin.h"
1413
#include "src/string/memory_utils/op_generic.h"
1514
#include "src/string/memory_utils/utils.h"
@@ -19,7 +18,6 @@
1918
namespace LIBC_NAMESPACE_DECL {
2019

2120
LIBC_INLINE void inline_memmove_aarch64(Ptr dst, CPtr src, size_t count) {
22-
static_assert(aarch64::kNeon, "aarch64 supports vector types");
2321
using uint128_t = generic_v128;
2422
using uint256_t = generic_v256;
2523
using uint512_t = generic_v512;

libc/src/string/memory_utils/aarch64/inline_memset.h

Lines changed: 51 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,12 @@
1818

1919
namespace LIBC_NAMESPACE_DECL {
2020

21+
using uint128_t = generic_v128;
22+
using uint256_t = generic_v256;
23+
using uint512_t = generic_v512;
24+
2125
[[maybe_unused]] LIBC_INLINE static void
22-
inline_memset_aarch64(Ptr dst, uint8_t value, size_t count) {
23-
static_assert(aarch64::kNeon, "aarch64 supports vector types");
24-
using uint128_t = generic_v128;
25-
using uint256_t = generic_v256;
26-
using uint512_t = generic_v512;
26+
inline_memset_aarch64_no_fp(Ptr dst, uint8_t value, size_t count) {
2727
if (count == 0)
2828
return;
2929
if (count <= 3) {
@@ -46,15 +46,57 @@ inline_memset_aarch64(Ptr dst, uint8_t value, size_t count) {
4646
generic::Memset<uint256_t>::tail(dst, value, count);
4747
return;
4848
}
49+
50+
generic::Memset<uint128_t>::block(dst, value);
51+
align_to_next_boundary<16>(dst, count);
52+
return generic::Memset<uint512_t>::loop_and_tail(dst, value, count);
53+
}
54+
55+
#if defined(__ARM_NEON)
56+
[[maybe_unused]] LIBC_INLINE static void
57+
inline_memset_aarch64_with_fp(Ptr dst, uint8_t value, size_t count) {
58+
if (count == 0)
59+
return;
60+
if (count <= 3) {
61+
generic::Memset<uint8_t>::block(dst, value);
62+
if (count > 1)
63+
generic::Memset<uint16_t>::tail(dst, value, count);
64+
return;
65+
}
66+
if (count <= 8)
67+
return generic::Memset<uint32_t>::head_tail(dst, value, count);
68+
if (count <= 16)
69+
return generic::Memset<uint64_t>::head_tail(dst, value, count);
70+
if (count <= 32)
71+
return generic::Memset<uint128_t>::head_tail(dst, value, count);
72+
if (count <= (32 + 64)) {
73+
generic::Memset<uint256_t>::block(dst, value);
74+
if (count <= 64)
75+
return generic::Memset<uint256_t>::tail(dst, value, count);
76+
generic::Memset<uint256_t>::block(dst + 32, value);
77+
generic::Memset<uint256_t>::tail(dst, value, count);
78+
return;
79+
}
80+
4981
if (count >= 448 && value == 0 && aarch64::neon::hasZva()) {
5082
generic::Memset<uint512_t>::block(dst, 0);
5183
align_to_next_boundary<64>(dst, count);
5284
return aarch64::neon::BzeroCacheLine::loop_and_tail(dst, 0, count);
53-
} else {
54-
generic::Memset<uint128_t>::block(dst, value);
55-
align_to_next_boundary<16>(dst, count);
56-
return generic::Memset<uint512_t>::loop_and_tail(dst, value, count);
5785
}
86+
87+
generic::Memset<uint128_t>::block(dst, value);
88+
align_to_next_boundary<16>(dst, count);
89+
return generic::Memset<uint512_t>::loop_and_tail(dst, value, count);
90+
}
91+
#endif
92+
93+
[[gnu::flatten]] [[maybe_unused]] LIBC_INLINE static void
94+
inline_memset_aarch64_dispatch(Ptr dst, uint8_t value, size_t count) {
95+
#if defined(__ARM_NEON)
96+
return inline_memset_aarch64_with_fp(dst, value, count);
97+
#else
98+
return inline_memset_aarch64_no_fp(dst, value, count);
99+
#endif
58100
}
59101

60102
} // namespace LIBC_NAMESPACE_DECL

libc/src/string/memory_utils/inline_bcmp.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_x86
2222
#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
2323
#include "src/string/memory_utils/aarch64/inline_bcmp.h"
24-
#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_aarch64
24+
#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_aarch64_dispatch
2525
#elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
2626
#include "src/string/memory_utils/riscv/inline_bcmp.h"
2727
#define LIBC_SRC_STRING_MEMORY_UTILS_BCMP inline_bcmp_riscv

libc/src/string/memory_utils/inline_memcmp.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
#define LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP inline_memcmp_x86
2121
#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
2222
#include "src/string/memory_utils/aarch64/inline_memcmp.h"
23-
#define LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP inline_memcmp_aarch64
23+
#define LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP inline_memcmp_aarch64_dispatch
2424
#elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
2525
#include "src/string/memory_utils/riscv/inline_memcmp.h"
2626
#define LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP inline_memcmp_riscv

libc/src/string/memory_utils/inline_memset.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
#define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_x86
2121
#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
2222
#include "src/string/memory_utils/aarch64/inline_memset.h"
23-
#define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_aarch64
23+
#define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_aarch64_dispatch
2424
#elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
2525
#include "src/string/memory_utils/riscv/inline_memset.h"
2626
#define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET inline_memset_riscv

libc/src/string/memory_utils/op_aarch64.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525

2626
#ifdef __ARM_NEON
2727
#include <arm_neon.h>
28-
#endif //__ARM_NEON
2928

3029
namespace LIBC_NAMESPACE_DECL {
3130
namespace aarch64 {
@@ -176,6 +175,8 @@ template <size_t Size> struct Bcmp {
176175
} // namespace aarch64
177176
} // namespace LIBC_NAMESPACE_DECL
178177

178+
#endif //__ARM_NEON
179+
179180
namespace LIBC_NAMESPACE_DECL {
180181
namespace generic {
181182

@@ -225,6 +226,8 @@ LIBC_INLINE MemcmpReturnType cmp<uint64_t>(CPtr p1, CPtr p2, size_t offset) {
225226
return MemcmpReturnType::zero();
226227
}
227228

229+
#if defined(__ARM_NEON)
230+
228231
///////////////////////////////////////////////////////////////////////////////
229232
// Specializations for uint8x16_t
230233
template <> struct is_vector<uint8x16_t> : cpp::true_type {};
@@ -269,6 +272,9 @@ LIBC_INLINE MemcmpReturnType cmp<uint8x16x2_t>(CPtr p1, CPtr p2,
269272
}
270273
return MemcmpReturnType::zero();
271274
}
275+
276+
#endif // __ARM_NEON
277+
272278
} // namespace generic
273279
} // namespace LIBC_NAMESPACE_DECL
274280

0 commit comments

Comments
 (0)