Skip to content

Commit de4dc4a

Browse files
committed
[libc] Implement basic 'fenv.h' utilities on the AMD GPU
Summary: This patch implements a basic floating point environment on the AMDGPU. Users should be able to check rounding modes or certain floating point exceptions using the standard functions. This patch implements the basic set, but only exposes the `fegetround` and `fesetround` utilities. This ps because getting the exceptions to work is difficult due to the behavior with the DX10_CLAMP bit that is always set. It is worth noting that this utility is not strictly standards conformant because we can only control this behavior on individual warps. Whether or not we can say it's truly implemented then is an exercise to the reader.
1 parent 6626eab commit de4dc4a

File tree

7 files changed

+251
-19
lines changed

7 files changed

+251
-19
lines changed

libc/config/gpu/api.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def StdlibAPI : PublicAPI<"stdlib.h"> {
5555
}
5656

5757
def FenvAPI: PublicAPI<"fenv.h"> {
58-
let Types = ["fenv_t"];
58+
let Types = ["fenv_t", "fexcept_t"];
5959
}
6060

6161
def StdIOAPI : PublicAPI<"stdio.h"> {

libc/config/gpu/entrypoints.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,19 @@ set(TARGET_LIBC_ENTRYPOINTS
213213
libc.src.gpu.rpc_host_call
214214
)
215215

216+
if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
217+
set(extra_entrypoints
218+
# fenv.h entrypoints
219+
libc.src.fenv.fegetenv
220+
libc.src.fenv.fegetround
221+
libc.src.fenv.fesetenv
222+
libc.src.fenv.fesetround
223+
)
224+
endif()
225+
216226
set(TARGET_LIBM_ENTRYPOINTS
227+
${extra_entrypoints}
228+
217229
# math.h entrypoints
218230
libc.src.math.acos
219231
libc.src.math.acosf

libc/include/llvm-libc-macros/math-macros.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@
4545
#define math_errhandling 0
4646
#elif defined(__NO_MATH_ERRNO__)
4747
#define math_errhandling (MATH_ERREXCEPT)
48+
#elif defined(__AMDGPU__)
49+
#define math_errhandling (MATH_ERREXCEPT)
4850
#elif defined(__NVPTX__) || defined(__AMDGPU__)
4951
#define math_errhandling (MATH_ERRNO)
5052
#else

libc/include/llvm-libc-types/fenv_t.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ typedef struct {
2727
typedef unsigned int fenv_t;
2828
#elif defined(__AMDGPU__) || defined(__NVPTX__)
2929
typedef struct {
30-
unsigned int __fpc;
30+
unsigned long long __fpc;
3131
} fenv_t;
3232
#else
3333
#error "fenv_t not defined for your platform"

libc/src/__support/FPUtil/FEnvImpl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
#include "arm/FEnvImpl.h"
3232
#elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
3333
#include "riscv/FEnvImpl.h"
34+
#elif defined(LIBC_TARGET_ARCH_IS_AMDGPU)
35+
#include "amdgpu/FEnvImpl.h"
3436
#else
3537

3638
namespace LIBC_NAMESPACE::fputil {
Lines changed: 213 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
//===-- amdgpu floating point env manipulation functions --------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_AMDGPU_FENVIMPL_H
10+
#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_AMDGPU_FENVIMPL_H
11+
12+
#include "src/__support/GPU/utils.h"
13+
#include "src/__support/macros/attributes.h"
14+
#include "src/__support/macros/properties/architectures.h"
15+
16+
#if !defined(LIBC_TARGET_ARCH_IS_AMDGPU)
17+
#error "Invalid include"
18+
#endif
19+
20+
#include <fenv.h>
21+
#include <stdint.h>
22+
23+
namespace LIBC_NAMESPACE {
24+
namespace fputil {
25+
26+
namespace internal {
27+
// Retuns the current status of the AMDGPU floating point environment. In
28+
// practice this is simply a 64-bit concatenation of the mode register and the
29+
// trap status register.
30+
//
31+
// The mode register controls the floating point behaviour of the device. It
32+
// can be read or written to by the kernel during runtime It is laid out as a
33+
// bit field with the following offsets and sizes listed for the relevant
34+
// entries.
35+
//
36+
// ┌─────┬─────────────┬─────┬─────────┬──────────┬─────────────┬────────────┐
37+
// │ ... │ EXCP[20:12] │ ... │ IEEE[9] │ CLAMP[8] │ DENORM[7:4] │ ROUND[3:0] │
38+
// └─────┴─────────────┴─────┴─────────┴──────────┴─────────────┴────────────┘
39+
//
40+
// The rounding mode and denormal modes both control f64/f16 and f32 precision
41+
// operations separately with two bits. The accepted values for the rounding
42+
// mode are nearest, upward, downward, and toward given 0, 1, 2, and 3
43+
// respectively.
44+
//
45+
// The CLAMP bit indicates that DirectX 10 handling of NaNs is enabled in the
46+
// vector ALU. When set this will clamp NaN values to zero and pass them
47+
// otherwise. A hardware bug causes this bit to prevent floating exceptions
48+
// from being recorded if this bit is set on all generations before GFX12.
49+
//
50+
// The IEEE bit controls whether or not floating point operations supporting
51+
// exception gathering are IEEE 754-2008 compliant.
52+
//
53+
// The EXCP field indicates which exceptions will cause the instruction to
54+
// take a trap if traps are enabled, see the status register. The bit layout
55+
// is identical to that in the trap status register. We are only concerned
56+
// with the first six bits and ignore the other three.
57+
//
58+
// The trap status register contains information about the status of the
59+
// exceptions. These bits are accumulated regarless of trap handling statuss
60+
// and are sticky until cleared.
61+
//
62+
// 5 4 3 2 1 0
63+
// ┌─────────┬───────────┬──────────┬────────────────┬──────────┬─────────┐
64+
// │ Inexact │ Underflow │ Overflow │ Divide by zero │ Denormal │ Invalid │
65+
// └─────────┴───────────┴──────────┴────────────────┴──────────┴─────────┘
66+
//
67+
// These exceptions indicate that at least one lane in the current wavefront
68+
// signalled an floating point exception. There is no way to increase the
69+
// granularity.
70+
//
71+
// The returned value has the following layout.
72+
//
73+
// ┌────────────────────┬─────────────────────┐
74+
// │ Trap Status[38:32] │ Mode Register[31:0] │
75+
// └────────────────────┴─────────────────────┘
76+
LIBC_INLINE uint64_t get_fpenv() { return __builtin_amdgcn_get_fpenv(); }
77+
78+
// Set the floating point environment using the same layout as above.
79+
LIBC_INLINE void set_fpenv(uint64_t env) { __builtin_amdgcn_set_fpenv(env); }
80+
81+
// The six bits used to encode the standard floating point exceptions in the
82+
// trap status register.
83+
enum ExceptionFlags : uint32_t {
84+
EXCP_INVALID_F = 0x1,
85+
EXCP_DENORMAL_F = 0x2,
86+
EXCP_DIV_BY_ZERO_F = 0x4,
87+
EXCP_OVERFLOW_F = 0x8,
88+
EXCP_UNDERFLOW_F = 0x10,
89+
EXCP_INEXACT_F = 0x20,
90+
};
91+
92+
// The two bit encoded rounding modes used in the mode register.
93+
enum RoundingFlags : uint32_t {
94+
ROUND_TO_NEAREST = 0x0,
95+
ROUND_UPWARD = 0x1,
96+
ROUND_DOWNWARD = 0x2,
97+
ROUND_TOWARD_ZERO = 0x3,
98+
};
99+
100+
// Exception flags are individual bits in the corresponding hardware register.
101+
// This converts between the exported C standard values and the hardware values.
102+
LIBC_INLINE uint32_t get_status_value_for_except(uint32_t excepts) {
103+
return (excepts & FE_INVALID ? EXCP_INVALID_F : 0) |
104+
#ifdef __FE_DENORM
105+
(excepts & __FE_DENORM ? EXCP_DENORMAL_F : 0) |
106+
#endif // __FE_DENORM
107+
(excepts & FE_DIVBYZERO ? EXCP_DIV_BY_ZERO_F : 0) |
108+
(excepts & FE_OVERFLOW ? EXCP_OVERFLOW_F : 0) |
109+
(excepts & FE_UNDERFLOW ? EXCP_UNDERFLOW_F : 0) |
110+
(excepts & FE_INEXACT ? EXCP_INEXACT_F : 0);
111+
}
112+
113+
LIBC_INLINE uint32_t get_except_value_for_status(uint32_t status) {
114+
return (status & EXCP_INVALID_F ? FE_INVALID : 0) |
115+
#ifdef __FE_DENORM
116+
(status & EXCP_DENORMAL_F ? __FE_DENORM : 0) |
117+
#endif // __FE_DENORM
118+
(status & EXCP_DIV_BY_ZERO_F ? FE_DIVBYZERO : 0) |
119+
(status & EXCP_OVERFLOW_F ? FE_OVERFLOW : 0) |
120+
(status & EXCP_UNDERFLOW_F ? FE_UNDERFLOW : 0) |
121+
(status & EXCP_INEXACT_F ? FE_INEXACT : 0);
122+
}
123+
124+
// Access the four bits in the mode register's ROUND[3:0] field. The hardware
125+
// supports setting the f64/f16 and f32 precision rounding modes separately but
126+
// we will assume that these always match.
127+
LIBC_INLINE void set_rounding_mode(uint32_t flags) {
128+
uint64_t old = get_fpenv() & 0xfffffffffffffff0;
129+
set_fpenv(old | flags << 2 | flags);
130+
}
131+
132+
// The control register can modify f32/f64 rounding modes individually. For our
133+
// purposes we assume that these always match as we do not expose this through
134+
// the C interface.
135+
LIBC_INLINE uint32_t get_rounding_mode() { return get_fpenv() & 0x3; }
136+
137+
} // namespace internal
138+
139+
// TODO: Not implemented yet.
140+
LIBC_INLINE int clear_except(int) { return 0; }
141+
142+
// TODO: Not implemented yet.
143+
LIBC_INLINE int test_except(int) { return 0; }
144+
145+
// TODO: Not implemented yet.
146+
LIBC_INLINE int get_except() { return 0; }
147+
148+
// TODO: Not implemented yet.
149+
LIBC_INLINE int set_except(int) { return 0; }
150+
151+
// TODO: Not implemented yet.
152+
LIBC_INLINE int enable_except(int) { return 0; }
153+
154+
// TODO: Not implemented yet.
155+
LIBC_INLINE int disable_except(int) { return 0; }
156+
157+
// TODO: Not implemented yet.
158+
LIBC_INLINE int raise_except(int) { return 0; }
159+
160+
LIBC_INLINE int get_round() {
161+
switch (internal::get_rounding_mode()) {
162+
case internal::ROUND_TO_NEAREST:
163+
return FE_TONEAREST;
164+
case internal::ROUND_UPWARD:
165+
return FE_UPWARD;
166+
case internal::ROUND_DOWNWARD:
167+
return FE_DOWNWARD;
168+
case internal::ROUND_TOWARD_ZERO:
169+
return FE_TOWARDZERO;
170+
}
171+
__builtin_unreachable();
172+
}
173+
174+
LIBC_INLINE int set_round(int rounding_mode) {
175+
switch (rounding_mode) {
176+
case FE_TONEAREST:
177+
internal::set_rounding_mode(internal::ROUND_TO_NEAREST);
178+
break;
179+
case FE_UPWARD:
180+
internal::set_rounding_mode(internal::ROUND_UPWARD);
181+
break;
182+
case FE_DOWNWARD:
183+
internal::set_rounding_mode(internal::ROUND_DOWNWARD);
184+
break;
185+
case FE_TOWARDZERO:
186+
internal::set_rounding_mode(internal::ROUND_TOWARD_ZERO);
187+
break;
188+
default:
189+
return 1;
190+
}
191+
return 0;
192+
}
193+
194+
LIBC_INLINE int get_env(fenv_t *env) {
195+
if (!env)
196+
return 1;
197+
198+
env->__fpc = internal::get_fpenv();
199+
return 0;
200+
}
201+
202+
LIBC_INLINE int set_env(const fenv_t *env) {
203+
if (!env)
204+
return 1;
205+
206+
internal::set_fpenv(env->__fpc);
207+
return 0;
208+
}
209+
210+
} // namespace fputil
211+
} // namespace LIBC_NAMESPACE
212+
213+
#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_AMDGPU_FENVIMPL_H

libc/test/src/fenv/CMakeLists.txt

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,20 @@
1-
add_custom_target(libc_fenv_unittests)
1+
add_custom_target(libc_fenv_tests)
22

3-
add_libc_unittest(
3+
add_libc_test(
44
rounding_mode_test
55
SUITE
6-
libc_fenv_unittests
6+
libc_fenv_tests
77
SRCS
88
rounding_mode_test.cpp
99
DEPENDS
1010
libc.src.fenv.fegetround
1111
libc.src.fenv.fesetround
1212
)
1313

14-
add_libc_unittest(
14+
add_libc_test(
1515
exception_status_test
1616
SUITE
17-
libc_fenv_unittests
17+
libc_fenv_tests
1818
SRCS
1919
exception_status_test.cpp
2020
DEPENDS
@@ -24,24 +24,27 @@ add_libc_unittest(
2424
libc.src.__support.FPUtil.fenv_impl
2525
)
2626

27-
add_libc_unittest(
27+
add_libc_test(
2828
getenv_and_setenv_test
2929
SUITE
30-
libc_fenv_unittests
30+
libc_fenv_tests
3131
SRCS
3232
getenv_and_setenv_test.cpp
3333
DEPENDS
3434
libc.src.fenv.fegetenv
3535
libc.src.fenv.fegetround
3636
libc.src.fenv.fesetenv
3737
libc.src.fenv.fesetround
38+
libc.src.fenv.feclearexcept
39+
libc.src.fenv.feraiseexcept
40+
libc.src.fenv.fetestexcept
3841
libc.src.__support.FPUtil.fenv_impl
3942
)
4043

41-
add_libc_unittest(
44+
add_libc_test(
4245
exception_flags_test
4346
SUITE
44-
libc_fenv_unittests
47+
libc_fenv_tests
4548
SRCS
4649
exception_flags_test.cpp
4750
DEPENDS
@@ -50,10 +53,10 @@ add_libc_unittest(
5053
libc.src.__support.FPUtil.fenv_impl
5154
)
5255

53-
add_libc_unittest(
56+
add_libc_test(
5457
feupdateenv_test
5558
SUITE
56-
libc_fenv_unittests
59+
libc_fenv_tests
5760
SRCS
5861
feupdateenv_test.cpp
5962
DEPENDS
@@ -62,21 +65,21 @@ add_libc_unittest(
6265
libc.src.__support.FPUtil.fenv_impl
6366
)
6467

65-
add_libc_unittest(
68+
add_libc_test(
6669
feclearexcept_test
6770
SUITE
68-
libc_fenv_unittests
71+
libc_fenv_tests
6972
SRCS
7073
feclearexcept_test.cpp
7174
DEPENDS
7275
libc.src.fenv.feclearexcept
7376
libc.src.__support.FPUtil.fenv_impl
7477
)
7578

76-
add_libc_unittest(
79+
add_libc_test(
7780
feenableexcept_test
7881
SUITE
79-
libc_fenv_unittests
82+
libc_fenv_tests
8083
SRCS
8184
feenableexcept_test.cpp
8285
DEPENDS
@@ -96,7 +99,7 @@ if (NOT (LLVM_USE_SANITIZER OR (${LIBC_TARGET_OS} STREQUAL "windows")
9699
enabled_exceptions_test
97100
UNIT_TEST_ONLY
98101
SUITE
99-
libc_fenv_unittests
102+
libc_fenv_tests
100103
SRCS
101104
enabled_exceptions_test.cpp
102105
DEPENDS
@@ -113,7 +116,7 @@ if (NOT (LLVM_USE_SANITIZER OR (${LIBC_TARGET_OS} STREQUAL "windows")
113116
feholdexcept_test
114117
UNIT_TEST_ONLY
115118
SUITE
116-
libc_fenv_unittests
119+
libc_fenv_tests
117120
SRCS
118121
feholdexcept_test.cpp
119122
DEPENDS

0 commit comments

Comments
 (0)