Skip to content

Commit f447844

Browse files
committed
[libc] Add support for string/memory_utils functions for AArch64 without HW FP/SIMD
Add conditional compilation to add support for AArch64 without vector registers and/or hardware FPUs by using the generic implementation
1 parent bc2a5b5 commit f447844

File tree

8 files changed

+89
-48
lines changed

8 files changed

+89
-48
lines changed

libc/src/__support/FPUtil/FEnvImpl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
#include "src/__support/macros/properties/architectures.h"
1818
#include "src/errno/libc_errno.h"
1919

20-
#if defined(LIBC_TARGET_ARCH_IS_AARCH64)
20+
#if defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_FP)
2121
#if defined(__APPLE__)
2222
#include "aarch64/fenv_darwin_impl.h"
2323
#else

libc/src/__support/FPUtil/nearest_integer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
#if (defined(LIBC_TARGET_ARCH_IS_X86_64) && defined(LIBC_TARGET_CPU_HAS_SSE4_2))
1818
#include "x86_64/nearest_integer.h"
19-
#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
19+
#elif (defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_FP))
2020
#include "aarch64/nearest_integer.h"
2121
#elif defined(LIBC_TARGET_ARCH_IS_GPU)
2222

libc/src/__support/FPUtil/sqrt.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ template <> LIBC_INLINE double sqrt<double>(double x) {
4242
// Use inline assembly when __builtin_elementwise_sqrt is not available.
4343
#if defined(LIBC_TARGET_CPU_HAS_SSE2)
4444
#include "x86_64/sqrt.h"
45-
#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
45+
#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_FP)
4646
#include "aarch64/sqrt.h"
4747
#elif defined(LIBC_TARGET_ARCH_IS_ARM)
4848
#include "arm/sqrt.h"

libc/src/string/memory_utils/aarch64/inline_bcmp.h

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,36 @@
1919

2020
namespace LIBC_NAMESPACE_DECL {
2121

22+
#if defined(__ARM_NEON)
23+
[[maybe_unused]] LIBC_INLINE BcmpReturnType
24+
inline_bcmp_aarch64_neon(CPtr p1, CPtr p2, size_t count) {
25+
if (count <= 32) {
26+
return aarch64::Bcmp<16>::head_tail(p1, p2, count);
27+
}
28+
29+
if (count <= 64) {
30+
return aarch64::Bcmp<32>::head_tail(p1, p2, count);
31+
}
32+
33+
if (LIBC_UNLIKELY(count > 256)) {
34+
if (auto value = aarch64::Bcmp<32>::block(p1, p2))
35+
return value;
36+
align_to_next_boundary<16, Arg::P1>(p1, p2, count);
37+
}
38+
39+
return aarch64::Bcmp<32>::loop_and_tail(p1, p2, count);
40+
}
41+
#else
42+
[[maybe_unused]] LIBC_INLINE BcmpReturnType
43+
inline_bcmp_aarch64_no_neon(CPtr p1, CPtr p2, size_t count) {
44+
return generic::Bcmp<uint64_t>::loop_and_tail_align_above(256, p1, p2, count);
45+
}
46+
#endif // __ARM_NEON
47+
2248
[[maybe_unused]] LIBC_INLINE BcmpReturnType inline_bcmp_aarch64(CPtr p1,
2349
CPtr p2,
2450
size_t count) {
25-
if (LIBC_LIKELY(count <= 32)) {
26-
if (LIBC_UNLIKELY(count >= 16)) {
27-
return aarch64::Bcmp<16>::head_tail(p1, p2, count);
28-
}
51+
if (LIBC_LIKELY(count <= 16)) {
2952
switch (count) {
3053
case 0:
3154
return BcmpReturnType::zero();
@@ -54,16 +77,11 @@ namespace LIBC_NAMESPACE_DECL {
5477
}
5578
}
5679

57-
if (count <= 64)
58-
return aarch64::Bcmp<32>::head_tail(p1, p2, count);
59-
60-
// Aligned loop if > 256, otherwise normal loop
61-
if (LIBC_UNLIKELY(count > 256)) {
62-
if (auto value = aarch64::Bcmp<32>::block(p1, p2))
63-
return value;
64-
align_to_next_boundary<16, Arg::P1>(p1, p2, count);
65-
}
66-
return aarch64::Bcmp<32>::loop_and_tail(p1, p2, count);
80+
#if defined(__ARM_NEON)
81+
return inline_bcmp_aarch64_neon(p1, p2, count);
82+
#else
83+
return inline_bcmp_aarch64_no_neon(p1, p2, count);
84+
#endif
6785
}
6886

6987
} // namespace LIBC_NAMESPACE_DECL

libc/src/string/memory_utils/aarch64/inline_memcmp.h

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,7 @@
1616

1717
namespace LIBC_NAMESPACE_DECL {
1818

19-
[[maybe_unused]] LIBC_INLINE MemcmpReturnType
20-
inline_memcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) {
21-
if (LIBC_UNLIKELY(count >= 384)) {
22-
if (auto value = generic::Memcmp<uint8x16_t>::block(p1, p2))
23-
return value;
24-
align_to_next_boundary<16, Arg::P1>(p1, p2, count);
25-
}
26-
return generic::Memcmp<uint8x16_t>::loop_and_tail(p1, p2, count);
27-
}
28-
19+
#if defined(__ARM_NEON)
2920
[[maybe_unused]] LIBC_INLINE MemcmpReturnType
3021
inline_memcmp_aarch64_neon_gt16(CPtr p1, CPtr p2, size_t count) {
3122
if (LIBC_UNLIKELY(count >= 128)) { // [128, ∞]
@@ -46,6 +37,13 @@ inline_memcmp_aarch64_neon_gt16(CPtr p1, CPtr p2, size_t count) {
4637
return generic::Memcmp<uint8x16_t>::loop_and_tail(p1 + 32, p2 + 32,
4738
count - 32);
4839
}
40+
#else
41+
[[maybe_unused]] LIBC_INLINE MemcmpReturnType
42+
inline_memcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) {
43+
return generic::Memcmp<uint64_t>::loop_and_tail_align_above(384, p1, p2,
44+
count);
45+
}
46+
#endif
4947

5048
LIBC_INLINE MemcmpReturnType inline_memcmp_aarch64(CPtr p1, CPtr p2,
5149
size_t count) {
@@ -61,10 +59,12 @@ LIBC_INLINE MemcmpReturnType inline_memcmp_aarch64(CPtr p1, CPtr p2,
6159
return generic::Memcmp<uint32_t>::head_tail(p1, p2, count);
6260
if (count <= 16)
6361
return generic::Memcmp<uint64_t>::head_tail(p1, p2, count);
64-
if constexpr (aarch64::kNeon)
65-
return inline_memcmp_aarch64_neon_gt16(p1, p2, count);
66-
else
67-
return inline_memcmp_generic_gt16(p1, p2, count);
62+
63+
#if defined(__ARM_NEON)
64+
return inline_memcmp_aarch64_neon_gt16(p1, p2, count);
65+
#else
66+
return inline_memcmp_generic_gt16(p1, p2, count);
67+
#endif
6868
}
6969
} // namespace LIBC_NAMESPACE_DECL
7070

libc/src/string/memory_utils/aarch64/inline_memmove.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@
88
#ifndef LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_MEMMOVE_H
99
#define LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_MEMMOVE_H
1010

11-
#include "src/__support/macros/attributes.h" // LIBC_INLINE
12-
#include "src/string/memory_utils/op_aarch64.h" // aarch64::kNeon
11+
#include "src/__support/macros/attributes.h" // LIBC_INLINE
1312
#include "src/string/memory_utils/op_builtin.h"
1413
#include "src/string/memory_utils/op_generic.h"
1514
#include "src/string/memory_utils/utils.h"
@@ -19,7 +18,6 @@
1918
namespace LIBC_NAMESPACE_DECL {
2019

2120
LIBC_INLINE void inline_memmove_aarch64(Ptr dst, CPtr src, size_t count) {
22-
static_assert(aarch64::kNeon, "aarch64 supports vector types");
2321
using uint128_t = generic_v128;
2422
using uint256_t = generic_v256;
2523
using uint512_t = generic_v512;

libc/src/string/memory_utils/aarch64/inline_memset.h

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,34 @@
1818

1919
namespace LIBC_NAMESPACE_DECL {
2020

21+
using uint128_t = generic_v128;
22+
using uint256_t = generic_v256;
23+
using uint512_t = generic_v512;
24+
25+
#if defined(__ARM_NEON)
26+
[[maybe_unused]] LIBC_INLINE static void
27+
inline_memset_aarch64_neon(Ptr dst, uint8_t value, size_t count) {
28+
if (count >= 448 && value == 0 && aarch64::neon::hasZva()) {
29+
generic::Memset<uint512_t>::block(dst, 0);
30+
align_to_next_boundary<64>(dst, count);
31+
return aarch64::neon::BzeroCacheLine::loop_and_tail(dst, 0, count);
32+
}
33+
34+
generic::Memset<uint128_t>::block(dst, value);
35+
align_to_next_boundary<16>(dst, count);
36+
return generic::Memset<uint512_t>::loop_and_tail(dst, value, count);
37+
}
38+
#else
39+
[[maybe_unused]] LIBC_INLINE static void
40+
inline_memset_aarch64_no_neon(Ptr dst, uint8_t value, size_t count) {
41+
generic::Memset<uint128_t>::block(dst, value);
42+
align_to_next_boundary<16>(dst, count);
43+
return generic::Memset<uint512_t>::loop_and_tail(dst, value, count);
44+
}
45+
#endif // __ARM_NEON
46+
2147
[[maybe_unused]] LIBC_INLINE static void
2248
inline_memset_aarch64(Ptr dst, uint8_t value, size_t count) {
23-
static_assert(aarch64::kNeon, "aarch64 supports vector types");
24-
using uint128_t = generic_v128;
25-
using uint256_t = generic_v256;
26-
using uint512_t = generic_v512;
2749
if (count == 0)
2850
return;
2951
if (count <= 3) {
@@ -46,15 +68,12 @@ inline_memset_aarch64(Ptr dst, uint8_t value, size_t count) {
4668
generic::Memset<uint256_t>::tail(dst, value, count);
4769
return;
4870
}
49-
if (count >= 448 && value == 0 && aarch64::neon::hasZva()) {
50-
generic::Memset<uint512_t>::block(dst, 0);
51-
align_to_next_boundary<64>(dst, count);
52-
return aarch64::neon::BzeroCacheLine::loop_and_tail(dst, 0, count);
53-
} else {
54-
generic::Memset<uint128_t>::block(dst, value);
55-
align_to_next_boundary<16>(dst, count);
56-
return generic::Memset<uint512_t>::loop_and_tail(dst, value, count);
57-
}
71+
72+
#if defined(__ARM_NEON)
73+
return inline_memset_aarch64_neon(dst, value, count);
74+
#else
75+
return inline_memset_aarch64_no_neon(dst, value, count);
76+
#endif
5877
}
5978

6079
} // namespace LIBC_NAMESPACE_DECL

libc/src/string/memory_utils/op_aarch64.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525

2626
#ifdef __ARM_NEON
2727
#include <arm_neon.h>
28-
#endif //__ARM_NEON
2928

3029
namespace LIBC_NAMESPACE_DECL {
3130
namespace aarch64 {
@@ -176,6 +175,8 @@ template <size_t Size> struct Bcmp {
176175
} // namespace aarch64
177176
} // namespace LIBC_NAMESPACE_DECL
178177

178+
#endif //__ARM_NEON
179+
179180
namespace LIBC_NAMESPACE_DECL {
180181
namespace generic {
181182

@@ -225,6 +226,8 @@ LIBC_INLINE MemcmpReturnType cmp<uint64_t>(CPtr p1, CPtr p2, size_t offset) {
225226
return MemcmpReturnType::zero();
226227
}
227228

229+
#if defined(__ARM_NEON)
230+
228231
///////////////////////////////////////////////////////////////////////////////
229232
// Specializations for uint8x16_t
230233
template <> struct is_vector<uint8x16_t> : cpp::true_type {};
@@ -269,6 +272,9 @@ LIBC_INLINE MemcmpReturnType cmp<uint8x16x2_t>(CPtr p1, CPtr p2,
269272
}
270273
return MemcmpReturnType::zero();
271274
}
275+
276+
#endif // __ARM_NEON
277+
272278
} // namespace generic
273279
} // namespace LIBC_NAMESPACE_DECL
274280

0 commit comments

Comments
 (0)