Skip to content

[libc] Extend fputil::sqrt to use floating point instructions for arm32. #134499

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,35 +6,38 @@
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_AARCH64_SQRT_H
#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_AARCH64_SQRT_H
#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_ARM_SQRT_H
#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_ARM_SQRT_H

#include "src/__support/common.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/properties/architectures.h"
#include "src/__support/macros/properties/cpu_features.h"

#if !defined(LIBC_TARGET_ARCH_IS_AARCH64)
#if !defined(LIBC_TARGET_ARCH_IS_ANY_ARM)
#error "Invalid include"
#endif

#include "src/__support/FPUtil/generic/sqrt.h"

namespace LIBC_NAMESPACE_DECL {
namespace fputil {

#ifdef LIBC_TARGET_CPU_HAS_FPU_FLOAT
template <> LIBC_INLINE float sqrt<float>(float x) {
float y;
__asm__ __volatile__("fsqrt %s0, %s1\n\t" : "=w"(y) : "w"(x));
asm("fsqrt %s0, %s1\n\t" : "=w"(y) : "w"(x));
return y;
}
#endif // LIBC_TARGET_CPU_HAS_FPU_FLOAT

#ifdef LIBC_TARGET_CPU_HAS_FPU_DOUBLE
template <> LIBC_INLINE double sqrt<double>(double x) {
double y;
__asm__ __volatile__("fsqrt %d0, %d1\n\t" : "=w"(y) : "w"(x));
asm("fsqrt %d0, %d1\n\t" : "=w"(y) : "w"(x));
return y;
}
#endif // LIBC_TARGET_CPU_HAS_FPU_DOUBLE

} // namespace fputil
} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_AARCH64_SQRT_H
#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_ARM_SQRT_H
15 changes: 7 additions & 8 deletions libc/src/__support/FPUtil/riscv/sqrt.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,31 +12,30 @@
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/properties/architectures.h"
#include "src/__support/macros/properties/cpu_features.h"

#if !defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
#error "Invalid include"
#endif

#include "src/__support/FPUtil/generic/sqrt.h"

namespace LIBC_NAMESPACE_DECL {
namespace fputil {

#ifdef __riscv_flen
#ifdef LIBC_TARGET_CPU_HAS_FPU_FLOAT
template <> LIBC_INLINE float sqrt<float>(float x) {
float result;
__asm__ __volatile__("fsqrt.s %0, %1\n\t" : "=f"(result) : "f"(x));
asm("fsqrt.s %0, %1\n\t" : "=f"(result) : "f"(x));
return result;
}
#endif // LIBC_TARGET_CPU_HAS_FPU_FLOAT

#if __riscv_flen >= 64
#if LIBC_TARGET_CPU_HAS_FPU_DOUBLE
template <> LIBC_INLINE double sqrt<double>(double x) {
double result;
__asm__ __volatile__("fsqrt.d %0, %1\n\t" : "=f"(result) : "f"(x));
asm("fsqrt.d %0, %1\n\t" : "=f"(result) : "f"(x));
return result;
}
#endif // __riscv_flen >= 64
#endif // __riscv_flen
#endif // LIBC_TARGET_CPU_HAS_FPU_FLOAT

} // namespace fputil
} // namespace LIBC_NAMESPACE_DECL
Expand Down
42 changes: 36 additions & 6 deletions libc/src/__support/FPUtil/sqrt.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,44 @@
#include "src/__support/macros/properties/architectures.h"
#include "src/__support/macros/properties/cpu_features.h"

#if defined(LIBC_TARGET_ARCH_IS_X86_64) && defined(LIBC_TARGET_CPU_HAS_SSE2)
#include "src/__support/FPUtil/generic/sqrt.h"

// Generic instruction specializations with __builtin_elementwise_sqrt.
#if defined(LIBC_TARGET_CPU_HAS_FPU_FLOAT) || \
defined(LIBC_TARGET_CPU_HAS_FPU_DOUBLE)

#if __has_builtin(__builtin_elementwise_sqrt)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Clang will always evaluate this to 1 and if the target doesn't support this instruction natively it'll lower __builtin_elementwise_sqrt to a call to sqrt (or sqrtf) which is a problem when __builtin_elementwise_sqrt is used to implement sqrt (or sqrtf).

I think we'll need a CMake check that compiles __builtin_elementwise_sqrt and checks if the resulting object file has a reference to sqrt. https://github.com/llvm/llvm-project/blob/36cb81cced6cb9ab8a68a4313963d4ccf7669e76/compiler-rt/cmake/Modules/CheckSectionExists.cmake is an example of such a check. Let me know if you need help with this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

even __ARM_FP flag does not guard this correctly?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like __ARM_FP is sufficient to check if sqrt is supported by the target (at least based on my local testing) so ignore my comment.


namespace LIBC_NAMESPACE_DECL {
namespace fputil {

#ifdef LIBC_TARGET_CPU_HAS_FPU_FLOAT
template <> LIBC_INLINE float sqrt<float>(float x) {
return __builtin_elementwise_sqrt(x);
}
#endif // LIBC_TARGET_CPU_HAS_FPU_FLOAT

#ifdef LIBC_TARGET_CPU_HAS_FPU_DOUBLE
template <> LIBC_INLINE double sqrt<double>(double x) {
return __builtin_elementwise_sqrt(x);
}
#endif // LIBC_TARGET_CPU_HAS_FPU_DOUBLE

} // namespace fputil
} // namespace LIBC_NAMESPACE_DECL

#else // __builtin_elementwise_sqrt
// Use inline assembly when __builtin_elementwise_sqrt is not available.
#if defined(LIBC_TARGET_CPU_HAS_SSE2)
#include "x86_64/sqrt.h"
#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
#include "aarch64/sqrt.h"
#elif defined(LIBC_TARGET_ARCH_IS_ANY_ARM)
#include "arm/sqrt.h"
#elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
#include "riscv/sqrt.h"
#else
#include "generic/sqrt.h"
#endif // Target specific header of inline asm.

#endif // __builtin_elementwise_sqrt

#endif // LIBC_TARGET_CPU_HAS_FPU_FLOAT or DOUBLE

#endif
#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_SQRT_H
2 changes: 0 additions & 2 deletions libc/src/__support/FPUtil/x86_64/sqrt.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@
#error "sqrtss / sqrtsd need SSE2"
#endif

#include "src/__support/FPUtil/generic/sqrt.h"

namespace LIBC_NAMESPACE_DECL {
namespace fputil {

Expand Down
49 changes: 41 additions & 8 deletions libc/src/__support/macros/properties/cpu_features.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

#if defined(__SSE2__)
#define LIBC_TARGET_CPU_HAS_SSE2
#define LIBC_TARGET_CPU_HAS_FPU_FLOAT
#define LIBC_TARGET_CPU_HAS_FPU_DOUBLE
#endif

#if defined(__SSE4_2__)
Expand All @@ -42,24 +44,55 @@
#define LIBC_TARGET_CPU_HAS_AVX512BW
#endif

#if defined(__ARM_FP)
#if (__ARM_FP & 0x2)
#define LIBC_TARGET_CPU_HAS_ARM_FPU_HALF
#define LIBC_TARGET_CPU_HAS_FPU_HALF
#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_HALF
#if (__ARM_FP & 0x4)
#define LIBC_TARGET_CPU_HAS_ARM_FPU_FLOAT
#define LIBC_TARGET_CPU_HAS_FPU_FLOAT
#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_FLOAT
#if (__ARM_FP & 0x8)
#define LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE
#define LIBC_TARGET_CPU_HAS_FPU_DOUBLE
#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE
#endif // __ARM_FP

#if defined(__riscv_flen)
// https://github.com/riscv-non-isa/riscv-c-api-doc/blob/main/src/c-api.adoc
#if (__riscv_flen & 0x10)
#define LIBC_TARGET_CPU_HAS_RISCV_FPU_HALF
#define LIBC_TARGET_CPU_HAS_FPU_HALF
#endif // LIBC_TARGET_CPU_HAS_RISCV_FPU_HALF
#if (__riscv_flen & 0x20)
#define LIBC_TARGET_CPU_HAS_RISCV_FPU_FLOAT
#define LIBC_TARGET_CPU_HAS_FPU_FLOAT
#endif // LIBC_TARGET_CPU_HAS_RISCV_FPU_FLOAT
#if (__riscv_flen & 0x40)
#define LIBC_TARGET_CPU_HAS_RISCV_FPU_DOUBLE
#define LIBC_TARGET_CPU_HAS_FPU_DOUBLE
#endif // LIBC_TARGET_CPU_HAS_RISCV_FPU_DOUBLE
#endif // __riscv_flen

#if defined(__NVPTX__) || defined(__AMDGPU__)
#define LIBC_TARGET_CPU_HAS_FPU_FLOAT
#define LIBC_TARGET_CPU_HAS_FPU_DOUBLE
#endif

#if defined(__ARM_FEATURE_FMA) || (defined(__AVX2__) && defined(__FMA__)) || \
defined(__NVPTX__) || defined(__AMDGPU__) || defined(__LIBC_RISCV_USE_FMA)
#define LIBC_TARGET_CPU_HAS_FMA
// Provide a more fine-grained control of FMA instruction for ARM targets.
#if defined(__ARM_FP)
#if (__ARM_FP & 0x2)
#if defined(LIBC_TARGET_CPU_HAS_FPU_HALF)
#define LIBC_TARGET_CPU_HAS_FMA_HALF
#endif // LIBC_TARGET_CPU_HAS_FMA_HALF
#if (__ARM_FP & 0x4)
#if defined(LIBC_TARGET_CPU_HAS_FPU_FLOAT)
#define LIBC_TARGET_CPU_HAS_FMA_FLOAT
#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
#if (__ARM_FP & 0x8)
#if defined(LIBC_TARGET_CPU_HAS_FPU_DOUBLE)
#define LIBC_TARGET_CPU_HAS_FMA_DOUBLE
#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
#else
#define LIBC_TARGET_CPU_HAS_FMA_FLOAT
#define LIBC_TARGET_CPU_HAS_FMA_DOUBLE
#endif
#endif

#if defined(LIBC_TARGET_ARCH_IS_AARCH64) || \
Expand Down
3 changes: 2 additions & 1 deletion utils/bazel/llvm-project-overlay/libc/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -1179,7 +1179,7 @@ sqrt_hdrs = selects.with_or({
"src/__support/FPUtil/x86_64/sqrt.h",
],
PLATFORM_CPU_ARM64: sqrt_common_hdrs + [
"src/__support/FPUtil/aarch64/sqrt.h",
"src/__support/FPUtil/arm/sqrt.h",
],
})

Expand All @@ -1195,6 +1195,7 @@ libc_support_library(
":__support_fputil_fenv_impl",
":__support_fputil_fp_bits",
":__support_fputil_rounding_mode",
":__support_macros_properties_cpu_features",
":__support_uint128",
],
)
Expand Down
Loading