Skip to content

Commit 912e8cc

Browse files
committed
Move __builtin_elementwise_sqrt to generic, and add GPUs, RISCV support.
1 parent 532002a commit 912e8cc

File tree

6 files changed

+76
-38
lines changed

6 files changed

+76
-38
lines changed

libc/src/__support/FPUtil/aarch64/sqrt.h renamed to libc/src/__support/FPUtil/arm/sqrt.h

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9-
#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_AARCH64_SQRT_H
10-
#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_AARCH64_SQRT_H
9+
#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_ARM_SQRT_H
10+
#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_ARM_SQRT_H
1111

1212
#include "src/__support/common.h"
1313
#include "src/__support/macros/config.h"
@@ -18,36 +18,26 @@
1818
#error "Invalid include"
1919
#endif
2020

21-
#include "src/__support/FPUtil/generic/sqrt.h"
22-
2321
namespace LIBC_NAMESPACE_DECL {
2422
namespace fputil {
2523

26-
#ifdef LIBC_TARGET_CPU_HAS_ARM_FPU_FLOAT
24+
#ifdef LIBC_TARGET_CPU_HAS_FPU_FLOAT
2725
template <> LIBC_INLINE float sqrt<float>(float x) {
28-
#if __has_builtin(__builtin_elementwise_sqrt)
29-
return __builtin_elementwise_sqrt(x);
30-
#else
3126
float y;
3227
asm("fsqrt %s0, %s1\n\t" : "=w"(y) : "w"(x));
3328
return y;
34-
#endif // __builtin_elementwise_sqrt
3529
}
36-
#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_FLOAT
30+
#endif // LIBC_TARGET_CPU_HAS_FPU_FLOAT
3731

38-
#ifdef LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE
32+
#ifdef LIBC_TARGET_CPU_HAS_FPU_DOUBLE
3933
template <> LIBC_INLINE double sqrt<double>(double x) {
40-
#if __has_builtin(__builtin_elementwise_sqrt)
41-
return __builtin_elementwise_sqrt(x);
42-
#else
4334
double y;
4435
asm("fsqrt %d0, %d1\n\t" : "=w"(y) : "w"(x));
4536
return y;
46-
#endif // __builtin_elementwise_sqrt
4737
}
48-
#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE
38+
#endif // LIBC_TARGET_CPU_HAS_FPU_DOUBLE
4939

5040
} // namespace fputil
5141
} // namespace LIBC_NAMESPACE_DECL
5242

53-
#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_AARCH64_SQRT_H
43+
#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_ARM_SQRT_H

libc/src/__support/FPUtil/riscv/sqrt.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,31 +12,30 @@
1212
#include "src/__support/common.h"
1313
#include "src/__support/macros/config.h"
1414
#include "src/__support/macros/properties/architectures.h"
15+
#include "src/__support/macros/properties/cpu_features.h"
1516

1617
#if !defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
1718
#error "Invalid include"
1819
#endif
1920

20-
#include "src/__support/FPUtil/generic/sqrt.h"
21-
2221
namespace LIBC_NAMESPACE_DECL {
2322
namespace fputil {
2423

25-
#ifdef __riscv_flen
24+
#ifdef LIBC_TARGET_CPU_HAS_FPU_FLOAT
2625
template <> LIBC_INLINE float sqrt<float>(float x) {
2726
float result;
2827
__asm__ __volatile__("fsqrt.s %0, %1\n\t" : "=f"(result) : "f"(x));
2928
return result;
3029
}
30+
#endif // LIBC_TARGET_CPU_HAS_FPU_FLOAT
3131

32-
#if __riscv_flen >= 64
32+
#if LIBC_TARGET_CPU_HAS_FPU_DOUBLE
3333
template <> LIBC_INLINE double sqrt<double>(double x) {
3434
double result;
3535
__asm__ __volatile__("fsqrt.d %0, %1\n\t" : "=f"(result) : "f"(x));
3636
return result;
3737
}
38-
#endif // __riscv_flen >= 64
39-
#endif // __riscv_flen
38+
#endif // LIBC_TARGET_CPU_HAS_FPU_FLOAT
4039

4140
} // namespace fputil
4241
} // namespace LIBC_NAMESPACE_DECL

libc/src/__support/FPUtil/sqrt.h

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,43 @@
1212
#include "src/__support/macros/properties/architectures.h"
1313
#include "src/__support/macros/properties/cpu_features.h"
1414

15-
#if defined(LIBC_TARGET_ARCH_IS_X86_64) && defined(LIBC_TARGET_CPU_HAS_SSE2)
15+
#include "src/__support/FPUtil/generic/sqrt.h"
16+
17+
// Generic instruction specializations with __builtin_elementwise_sqrt.
18+
#if defined(LIBC_TARGET_CPU_HAS_FPU_FLOAT) || \
19+
defined(LIBC_TARGET_CPU_HAS_FPU_DOUBLE)
20+
21+
#if __has_builtin(__builtin_elementwise_sqrt)
22+
23+
namespace LIBC_NAMESPACE_DECL {
24+
namespace fputil {
25+
26+
#ifdef LIBC_TARGET_CPU_HAS_FPU_FLOAT
27+
template <> LIBC_INLINE float sqrt<float>(float x) {
28+
return __builtin_elementwise_sqrt(x);
29+
}
30+
#endif // LIBC_TARGET_CPU_HAS_FPU_FLOAT
31+
32+
#ifdef LIBC_TARGET_CPU_HAS_FPU_DOUBLE
33+
template <> LIBC_INLINE double sqrt<double>(double x) {
34+
return __builtin_elementwise_sqrt(x);
35+
}
36+
#endif // LIBC_TARGET_CPU_HAS_FPU_DOUBLE
37+
38+
} // namespace fputil
39+
} // namespace LIBC_NAMESPACE_DECL
40+
41+
#else
42+
// Use inline assembly when __builtin_elementwise_sqrt is not available.
43+
#if defined(LIBC_TARGET_CPU_HAS_SSE2)
1644
#include "x86_64/sqrt.h"
1745
#elif defined(LIBC_TARGET_ARCH_IS_ANY_ARM)
18-
#include "aarch64/sqrt.h"
46+
#include "arm/sqrt.h"
1947
#elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
2048
#include "riscv/sqrt.h"
21-
#else
22-
#include "generic/sqrt.h"
49+
50+
#endif // __builtin_elementwise_sqrt
2351

2452
#endif
53+
2554
#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_SQRT_H

libc/src/__support/FPUtil/x86_64/sqrt.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@
1818
#error "sqrtss / sqrtsd need SSE2"
1919
#endif
2020

21-
#include "src/__support/FPUtil/generic/sqrt.h"
22-
2321
namespace LIBC_NAMESPACE_DECL {
2422
namespace fputil {
2523

libc/src/__support/macros/properties/cpu_features.h

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020

2121
#if defined(__SSE2__)
2222
#define LIBC_TARGET_CPU_HAS_SSE2
23+
#define LIBC_TARGET_CPU_HAS_FPU_FLOAT
24+
#define LIBC_TARGET_CPU_HAS_FPU_DOUBLE
2325
#endif
2426

2527
#if defined(__SSE4_2__)
@@ -45,33 +47,52 @@
4547
#if defined(__ARM_FP)
4648
#if (__ARM_FP & 0x2)
4749
#define LIBC_TARGET_CPU_HAS_ARM_FPU_HALF
50+
#define LIBC_TARGET_CPU_HAS_FPU_HALF
4851
#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_HALF
4952
#if (__ARM_FP & 0x4)
5053
#define LIBC_TARGET_CPU_HAS_ARM_FPU_FLOAT
54+
#define LIBC_TARGET_CPU_HAS_FPU_FLOAT
5155
#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_FLOAT
5256
#if (__ARM_FP & 0x8)
5357
#define LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE
58+
#define LIBC_TARGET_CPU_HAS_FPU_DOUBLE
5459
#endif // LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE
5560
#endif // __ARM_FP
5661

62+
#if defined(__riscv_flen)
63+
// https://github.com/riscv-non-isa/riscv-c-api-doc/blob/main/src/c-api.adoc
64+
#if (__riscv_flen & 0x20)
65+
#define LIBC_TARGET_CPU_HAS_RISCV_FPU_HALF
66+
#define LIBC_TARGET_CPU_HAS_FPU_HALF
67+
#endif // LIBC_TARGET_CPU_HAS_RISCV_FPU_HALF
68+
#if (__riscv_flen & 0x40)
69+
#define LIBC_TARGET_CPU_HAS_RISCV_FPU_FLOAT
70+
#define LIBC_TARGET_CPU_HAS_FPU_FLOAT
71+
#endif // LIBC_TARGET_CPU_HAS_RISCV_FPU_FLOAT
72+
#if (__riscv_flen & 0x80)
73+
#define LIBC_TARGET_CPU_HAS_RISCV_FPU_DOUBLE
74+
#define LIBC_TARGET_CPU_HAS_FPU_DOUBLE
75+
#endif // LIBC_TARGET_CPU_HAS_RISCV_FPU_DOUBLE
76+
#endif // __riscv_flen
77+
78+
#if defined(__NVPTX__) || defined(__AMDGPU__)
79+
#define LIBC_TARGET_CPU_HAS_FPU_FLOAT
80+
#define LIBC_TARGET_CPU_HAS_FPU_DOUBLE
81+
#endif
82+
5783
#if defined(__ARM_FEATURE_FMA) || (defined(__AVX2__) && defined(__FMA__)) || \
5884
defined(__NVPTX__) || defined(__AMDGPU__) || defined(__LIBC_RISCV_USE_FMA)
5985
#define LIBC_TARGET_CPU_HAS_FMA
6086
// Provide a more fine-grained control of FMA instruction for ARM targets.
61-
#if defined(__ARM_FP)
62-
#if defined(LIBC_TARGET_CPU_HAS_ARM_FPU_HALF)
87+
#if defined(LIBC_TARGET_CPU_HAS_FPU_HALF)
6388
#define LIBC_TARGET_CPU_HAS_FMA_HALF
6489
#endif // LIBC_TARGET_CPU_HAS_FMA_HALF
65-
#if defined(LIBC_TARGET_CPU_HAS_ARM_FPU_FLOAT)
90+
#if defined(LIBC_TARGET_CPU_HAS_FPU_FLOAT)
6691
#define LIBC_TARGET_CPU_HAS_FMA_FLOAT
6792
#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT
68-
#if defined(LIBC_TARGET_CPU_HAS_ARM_FPU_DOUBLE)
93+
#if defined(LIBC_TARGET_CPU_HAS_FPU_DOUBLE)
6994
#define LIBC_TARGET_CPU_HAS_FMA_DOUBLE
7095
#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE
71-
#else
72-
#define LIBC_TARGET_CPU_HAS_FMA_FLOAT
73-
#define LIBC_TARGET_CPU_HAS_FMA_DOUBLE
74-
#endif
7596
#endif
7697

7798
#if defined(LIBC_TARGET_ARCH_IS_AARCH64) || \

utils/bazel/llvm-project-overlay/libc/BUILD.bazel

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1179,7 +1179,7 @@ sqrt_hdrs = selects.with_or({
11791179
"src/__support/FPUtil/x86_64/sqrt.h",
11801180
],
11811181
PLATFORM_CPU_ARM64: sqrt_common_hdrs + [
1182-
"src/__support/FPUtil/aarch64/sqrt.h",
1182+
"src/__support/FPUtil/arm/sqrt.h",
11831183
],
11841184
})
11851185

@@ -1195,6 +1195,7 @@ libc_support_library(
11951195
":__support_fputil_fenv_impl",
11961196
":__support_fputil_fp_bits",
11971197
":__support_fputil_rounding_mode",
1198+
":__support_macros_properties_cpu_features",
11981199
":__support_uint128",
11991200
],
12001201
)

0 commit comments

Comments
 (0)