Skip to content

Commit 177be88

Browse files
committed
[libc] Extend fputil::sqrt to use floating point instruction for arm32 CPUs if FPUs are available.
1 parent c07ab9e commit 177be88

File tree

3 files changed

+24
-7
lines changed

3 files changed

+24
-7
lines changed

libc/src/__support/FPUtil/aarch64/sqrt.h

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,9 @@
1212
#include "src/__support/common.h"
1313
#include "src/__support/macros/config.h"
1414
#include "src/__support/macros/properties/architectures.h"
15+
#include "src/__support/macros/properties/cpu_features.h"
1516

16-
#if !defined(LIBC_TARGET_ARCH_IS_AARCH64)
17+
#if !defined(LIBC_TARGET_ARCH_IS_ANY_ARM)
1718
#error "Invalid include"
1819
#endif
1920

@@ -22,17 +23,21 @@
2223
namespace LIBC_NAMESPACE_DECL {
2324
namespace fputil {
2425

26+
#ifdef LIBC_TARGET_CPU_HAS_ARM_FPU_FLOAT
2527
template <> LIBC_INLINE float sqrt<float>(float x) {
2628
float y;
27-
__asm__ __volatile__("fsqrt %s0, %s1\n\t" : "=w"(y) : "w"(x));
29+
asm("fsqrt %s0, %s1\n\t" : "=w"(y) : "w"(x));
2830
return y;
2931
}
32+
#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_FLOAT
3033

34+
#ifdef LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE
3135
template <> LIBC_INLINE double sqrt<double>(double x) {
3236
double y;
33-
__asm__ __volatile__("fsqrt %d0, %d1\n\t" : "=w"(y) : "w"(x));
37+
asm("fsqrt %d0, %d1\n\t" : "=w"(y) : "w"(x));
3438
return y;
3539
}
40+
#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE
3641

3742
} // namespace fputil
3843
} // namespace LIBC_NAMESPACE_DECL

libc/src/__support/FPUtil/sqrt.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
#if defined(LIBC_TARGET_ARCH_IS_X86_64) && defined(LIBC_TARGET_CPU_HAS_SSE2)
1616
#include "x86_64/sqrt.h"
17-
#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
17+
#elif defined(LIBC_TARGET_ARCH_IS_ANY_ARM)
1818
#include "aarch64/sqrt.h"
1919
#elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
2020
#include "riscv/sqrt.h"

libc/src/__support/macros/properties/cpu_features.h

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,18 +42,30 @@
4242
#define LIBC_TARGET_CPU_HAS_AVX512BW
4343
#endif
4444

45+
#if defined(__ARM_FP)
46+
#if (__ARM_FP & 0x2)
47+
#define LIBC_TARGET_CPU_HAS_ARM_FPU_HALF
48+
#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_HALF
49+
#if (__ARM_FP & 0x4)
50+
#define LIBC_TARGET_CPU_HAS_ARM_FPU_FLOAT
51+
#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_FLOAT
52+
#if (__ARM_FP & 0x8)
53+
#define LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE
54+
#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE
55+
#endif // __ARM_FP
56+
4557
#if defined(__ARM_FEATURE_FMA) || (defined(__AVX2__) && defined(__FMA__)) || \
4658
defined(__NVPTX__) || defined(__AMDGPU__) || defined(__LIBC_RISCV_USE_FMA)
4759
#define LIBC_TARGET_CPU_HAS_FMA
4860
// Provide a more fine-grained control of FMA instruction for ARM targets.
4961
#if defined(__ARM_FP)
50-
#if (__ARM_FP & 0x2)
62+
#if defined(LIBC_TARGET_CPU_HAS_ARM_FPU_HALF)
5163
#define LIBC_TARGET_CPU_HAS_FMA_HALF
5264
#endif // LIBC_TARGET_CPU_HAS_FMA_HALF
53-
#if (__ARM_FP & 0x4)
65+
#if defined(LIBC_TARGET_CPU_HAS_ARM_FPU_FLOAT)
5466
#define LIBC_TARGET_CPU_HAS_FMA_FLOAT
5567
#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
56-
#if (__ARM_FP & 0x8)
68+
#if defined(LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE)
5769
#define LIBC_TARGET_CPU_HAS_FMA_DOUBLE
5870
#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
5971
#else

0 commit comments

Comments
 (0)