[libc] Extend fputil::sqrt to use floating point instruction for arm32 CPUs if FPUs are available.

lntue · lntue · commit 177be88a687f · 2025-04-05T12:22:08.000-04:00
diff --git a/libc/src/__support/FPUtil/aarch64/sqrt.h b/libc/src/__support/FPUtil/aarch64/sqrt.h
@@ -12,8 +12,9 @@
 #include "src/__support/common.h"
 #include "src/__support/macros/config.h"
 #include "src/__support/macros/properties/architectures.h"
+#include "src/__support/macros/properties/cpu_features.h"
 
-#if !defined(LIBC_TARGET_ARCH_IS_AARCH64)
+#if !defined(LIBC_TARGET_ARCH_IS_ANY_ARM)
 #error "Invalid include"
 #endif
 
@@ -22,17 +23,21 @@
 namespace LIBC_NAMESPACE_DECL {
 namespace fputil {
 
+#ifdef LIBC_TARGET_CPU_HAS_ARM_FPU_FLOAT
 template <> LIBC_INLINE float sqrt<float>(float x) {
   float y;
-  __asm__ __volatile__("fsqrt %s0, %s1\n\t" : "=w"(y) : "w"(x));
+  asm("fsqrt %s0, %s1\n\t" : "=w"(y) : "w"(x));
   return y;
 }
+#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_FLOAT
 
+#ifdef LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE
 template <> LIBC_INLINE double sqrt<double>(double x) {
   double y;
-  __asm__ __volatile__("fsqrt %d0, %d1\n\t" : "=w"(y) : "w"(x));
+  asm("fsqrt %d0, %d1\n\t" : "=w"(y) : "w"(x));
   return y;
 }
+#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE
 
 } // namespace fputil
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/FPUtil/sqrt.h b/libc/src/__support/FPUtil/sqrt.h
@@ -14,7 +14,7 @@
 
 #if defined(LIBC_TARGET_ARCH_IS_X86_64) && defined(LIBC_TARGET_CPU_HAS_SSE2)
 #include "x86_64/sqrt.h"
-#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
+#elif defined(LIBC_TARGET_ARCH_IS_ANY_ARM)
 #include "aarch64/sqrt.h"
 #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
 #include "riscv/sqrt.h"
diff --git a/libc/src/__support/macros/properties/cpu_features.h b/libc/src/__support/macros/properties/cpu_features.h
@@ -42,18 +42,30 @@
 #define LIBC_TARGET_CPU_HAS_AVX512BW
 #endif
 
+#if defined(__ARM_FP)
+#if (__ARM_FP & 0x2)
+#define LIBC_TARGET_CPU_HAS_ARM_FPU_HALF
+#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_HALF
+#if (__ARM_FP & 0x4)
+#define LIBC_TARGET_CPU_HAS_ARM_FPU_FLOAT
+#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_FLOAT
+#if (__ARM_FP & 0x8)
+#define LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE
+#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE
+#endif // __ARM_FP
+
 #if defined(__ARM_FEATURE_FMA) || (defined(__AVX2__) && defined(__FMA__)) ||   \
     defined(__NVPTX__) || defined(__AMDGPU__) || defined(__LIBC_RISCV_USE_FMA)
 #define LIBC_TARGET_CPU_HAS_FMA
 // Provide a more fine-grained control of FMA instruction for ARM targets.
 #if defined(__ARM_FP)
-#if (__ARM_FP & 0x2)
+#if defined(LIBC_TARGET_CPU_HAS_ARM_FPU_HALF)
 #define LIBC_TARGET_CPU_HAS_FMA_HALF
 #endif // LIBC_TARGET_CPU_HAS_FMA_HALF
-#if (__ARM_FP & 0x4)
+#if defined(LIBC_TARGET_CPU_HAS_ARM_FPU_FLOAT)
 #define LIBC_TARGET_CPU_HAS_FMA_FLOAT
 #endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
-#if (__ARM_FP & 0x8)
+#if defined(LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE)
 #define LIBC_TARGET_CPU_HAS_FMA_DOUBLE
 #endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
 #else