Skip to content

Commit 76bbd2d

Browse files
committed
[libc] Implement basic 'fenv.h' utilities on the AMD GPU
Summary: This patch implements a basic floating point environment on the AMDGPU. Users should be able to check rounding modes or certain floating point exceptions using the standard functions. This patch implements the basic set, but only exposes the `fegetround` and `fesetround` utilities. This ps because getting the exceptions to work is difficult due to the behavior with the DX10_CLAMP bit that is always set. It is worth noting that this utility is not strictly standards conformant because we can only control this behavior on individual warps. Whether or not we can say it's truly implemented then is an exercise to the reader.
1 parent 3e54768 commit 76bbd2d

File tree

8 files changed

+319
-25
lines changed

8 files changed

+319
-25
lines changed

libc/config/gpu/api.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def StdlibAPI : PublicAPI<"stdlib.h"> {
5555
}
5656

5757
def FenvAPI: PublicAPI<"fenv.h"> {
58-
let Types = ["fenv_t"];
58+
let Types = ["fenv_t", "fexcept_t"];
5959
}
6060

6161
def StdIOAPI : PublicAPI<"stdio.h"> {

libc/config/gpu/entrypoints.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,19 @@ set(TARGET_LIBC_ENTRYPOINTS
214214
libc.src.gpu.rpc_fprintf
215215
)
216216

217+
if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
218+
set(extra_entrypoints
219+
# fenv.h entrypoints
220+
libc.src.fenv.fegetenv
221+
libc.src.fenv.fegetround
222+
libc.src.fenv.fesetenv
223+
libc.src.fenv.fesetround
224+
)
225+
endif()
226+
217227
set(TARGET_LIBM_ENTRYPOINTS
228+
${extra_entrypoints}
229+
218230
# math.h entrypoints
219231
libc.src.math.acos
220232
libc.src.math.acosf

libc/include/llvm-libc-macros/math-macros.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@
4545
#define math_errhandling 0
4646
#elif defined(__NO_MATH_ERRNO__)
4747
#define math_errhandling (MATH_ERREXCEPT)
48+
#elif defined(__AMDGPU__)
49+
#define math_errhandling (MATH_ERREXCEPT)
4850
#elif defined(__NVPTX__) || defined(__AMDGPU__)
4951
#define math_errhandling (MATH_ERRNO)
5052
#else

libc/include/llvm-libc-types/fenv_t.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ typedef struct {
2727
typedef unsigned int fenv_t;
2828
#elif defined(__AMDGPU__) || defined(__NVPTX__)
2929
typedef struct {
30-
unsigned int __fpc;
30+
unsigned long long __fpc;
3131
} fenv_t;
3232
#else
3333
#error "fenv_t not defined for your platform"

libc/src/__support/FPUtil/FEnvImpl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
#include "arm/FEnvImpl.h"
3333
#elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
3434
#include "riscv/FEnvImpl.h"
35+
#elif defined(LIBC_TARGET_ARCH_IS_AMDGPU)
36+
#include "amdgpu/FEnvImpl.h"
3537
#else
3638

3739
namespace LIBC_NAMESPACE::fputil {
Lines changed: 223 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,223 @@
1+
//===-- amdgpu floating point env manipulation functions --------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_AMDGPU_FENVIMPL_H
10+
#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_AMDGPU_FENVIMPL_H
11+
12+
#include "src/__support/GPU/utils.h"
13+
#include "src/__support/macros/attributes.h"
14+
#include "src/__support/macros/properties/architectures.h"
15+
16+
#if !defined(LIBC_TARGET_ARCH_IS_AMDGPU)
17+
#error "Invalid include"
18+
#endif
19+
20+
#include "hdr/types/fenv_t.h"
21+
#include "hdr/fenv_macros.h"
22+
23+
#include <stdint.h>
24+
25+
namespace LIBC_NAMESPACE {
26+
namespace fputil {
27+
28+
namespace internal {
29+
// Retuns the current status of the AMDGPU floating point environment. In
30+
// practice this is simply a 64-bit concatenation of the mode register and the
31+
// trap status register.
32+
//
33+
// The mode register controls the floating point behaviour of the device. It
34+
// can be read or written to by the kernel during runtime It is laid out as a
35+
// bit field with the following offsets and sizes listed for the relevant
36+
// entries.
37+
//
38+
// ┌─────┬─────────────┬─────┬─────────┬──────────┬─────────────┬────────────┐
39+
// │ ... │ EXCP[20:12] │ ... │ IEEE[9] │ CLAMP[8] │ DENORM[7:4] │ ROUND[3:0] │
40+
// └─────┴─────────────┴─────┴─────────┴──────────┴─────────────┴────────────┘
41+
//
42+
// The rounding mode and denormal modes both control f64/f16 and f32 precision
43+
// operations separately with two bits. The accepted values for the rounding
44+
// mode are nearest, upward, downward, and toward given 0, 1, 2, and 3
45+
// respectively.
46+
//
47+
// The CLAMP bit indicates that DirectX 10 handling of NaNs is enabled in the
48+
// vector ALU. When set this will clamp NaN values to zero and pass them
49+
// otherwise. A hardware bug causes this bit to prevent floating exceptions
50+
// from being recorded if this bit is set on all generations before GFX12.
51+
//
52+
// The IEEE bit controls whether or not floating point operations supporting
53+
// exception gathering are IEEE 754-2008 compliant.
54+
//
55+
// The EXCP field indicates which exceptions will cause the instruction to
56+
// take a trap if traps are enabled, see the status register. The bit layout
57+
// is identical to that in the trap status register. We are only concerned
58+
// with the first six bits and ignore the other three.
59+
//
60+
// The trap status register contains information about the status of the
61+
// exceptions. These bits are accumulated regarless of trap handling statuss
62+
// and are sticky until cleared.
63+
//
64+
// 5 4 3 2 1 0
65+
// ┌─────────┬───────────┬──────────┬────────────────┬──────────┬─────────┐
66+
// │ Inexact │ Underflow │ Overflow │ Divide by zero │ Denormal │ Invalid │
67+
// └─────────┴───────────┴──────────┴────────────────┴──────────┴─────────┘
68+
//
69+
// These exceptions indicate that at least one lane in the current wavefront
70+
// signalled an floating point exception. There is no way to increase the
71+
// granularity.
72+
//
73+
// The returned value has the following layout.
74+
//
75+
// ┌────────────────────┬─────────────────────┐
76+
// │ Trap Status[38:32] │ Mode Register[31:0] │
77+
// └────────────────────┴─────────────────────┘
78+
LIBC_INLINE uint64_t get_fpenv() { return __builtin_amdgcn_get_fpenv(); }
79+
80+
// Set the floating point environment using the same layout as above.
81+
LIBC_INLINE void set_fpenv(uint64_t env) { __builtin_amdgcn_set_fpenv(env); }
82+
83+
// The six bits used to encode the standard floating point exceptions in the
84+
// trap status register.
85+
enum ExceptionFlags : uint32_t {
86+
EXCP_INVALID_F = 0x1,
87+
EXCP_DENORMAL_F = 0x2,
88+
EXCP_DIV_BY_ZERO_F = 0x4,
89+
EXCP_OVERFLOW_F = 0x8,
90+
EXCP_UNDERFLOW_F = 0x10,
91+
EXCP_INEXACT_F = 0x20,
92+
};
93+
94+
// The values used by the AMDGPU backend to handle the `llvm.get.rounding`
95+
// intrinsic function. See the values in the documentation for more information.
96+
// https://llvm.org/docs/AMDGPUUsage.html#amdgpu-rounding-mode-enumeration-values-table
97+
enum RoundingFlags : uint32_t {
98+
ROUND_TOWARD_ZERO = 0x0,
99+
ROUND_TO_NEAREST = 0x1,
100+
ROUND_UPWARD = 0x2,
101+
ROUND_DOWNWARD = 0x3,
102+
};
103+
104+
// Exception flags are individual bits in the corresponding hardware register.
105+
// This converts between the exported C standard values and the hardware values.
106+
LIBC_INLINE uint32_t get_status_value_for_except(uint32_t excepts) {
107+
return (excepts & FE_INVALID ? EXCP_INVALID_F : 0) |
108+
(excepts & FE_DIVBYZERO ? EXCP_DIV_BY_ZERO_F : 0) |
109+
(excepts & FE_OVERFLOW ? EXCP_OVERFLOW_F : 0) |
110+
(excepts & FE_UNDERFLOW ? EXCP_UNDERFLOW_F : 0) |
111+
(excepts & FE_INEXACT ? EXCP_INEXACT_F : 0);
112+
}
113+
114+
LIBC_INLINE uint32_t get_except_value_for_status(uint32_t status) {
115+
return (status & EXCP_INVALID_F ? FE_INVALID : 0) |
116+
(status & EXCP_DIV_BY_ZERO_F ? FE_DIVBYZERO : 0) |
117+
(status & EXCP_OVERFLOW_F ? FE_OVERFLOW : 0) |
118+
(status & EXCP_UNDERFLOW_F ? FE_UNDERFLOW : 0) |
119+
(status & EXCP_INEXACT_F ? FE_INEXACT : 0);
120+
}
121+
122+
// Set the hardware rounding mode using the llvm.set.rounding intrinsic
123+
// function.
124+
// FIXME: This requires `noinline` to flush the hardware register in time.
125+
[[gnu::noinline]] LIBC_INLINE void set_rounding_mode(uint32_t mode) {
126+
__builtin_set_flt_rounds(mode);
127+
}
128+
129+
// Get the hardware rounding mode using the llvm.get.rounding intrinsic
130+
// function.
131+
// FIXME: This requires `noinline` to flush the hardware register in time.
132+
[[gnu::noinline]] LIBC_INLINE uint32_t get_rounding_mode() {
133+
return __builtin_flt_rounds();
134+
}
135+
136+
} // namespace internal
137+
138+
// TODO: Not implemented yet.
139+
LIBC_INLINE int clear_except(int) { return 0; }
140+
141+
// TODO: Not implemented yet.
142+
LIBC_INLINE int test_except(int) { return 0; }
143+
144+
// TODO: Not implemented yet.
145+
LIBC_INLINE int get_except() { return 0; }
146+
147+
// TODO: Not implemented yet.
148+
LIBC_INLINE int set_except(int) { return 0; }
149+
150+
// TODO: Not implemented yet.
151+
LIBC_INLINE int enable_except(int) { return 0; }
152+
153+
// TODO: Not implemented yet.
154+
LIBC_INLINE int disable_except(int) { return 0; }
155+
156+
// TODO: Not implemented yet.
157+
LIBC_INLINE int raise_except(int) { return 0; }
158+
159+
// Get the currently set rounding mode from the environment. The AMDGPU backend
160+
// supports an extension for separate f64 / f32 rounding control. If the
161+
// provided value is outside of the standard region we handle it without
162+
// modification.
163+
LIBC_INLINE int get_round() {
164+
uint32_t mode = internal::get_rounding_mode();
165+
switch (mode) {
166+
case internal::ROUND_TO_NEAREST:
167+
return FE_TONEAREST;
168+
case internal::ROUND_UPWARD:
169+
return FE_UPWARD;
170+
case internal::ROUND_DOWNWARD:
171+
return FE_DOWNWARD;
172+
case internal::ROUND_TOWARD_ZERO:
173+
return FE_TOWARDZERO;
174+
default:
175+
return mode;
176+
}
177+
__builtin_unreachable();
178+
}
179+
180+
// Set the rounding mode for the environment. If the provided mode is above the
181+
// expected range we assume it is an extended value to control f32 / f64
182+
// separately.
183+
LIBC_INLINE int set_round(int rounding_mode) {
184+
switch (rounding_mode) {
185+
case FE_TONEAREST:
186+
internal::set_rounding_mode(internal::ROUND_TO_NEAREST);
187+
break;
188+
case FE_UPWARD:
189+
internal::set_rounding_mode(internal::ROUND_UPWARD);
190+
break;
191+
case FE_DOWNWARD:
192+
internal::set_rounding_mode(internal::ROUND_DOWNWARD);
193+
break;
194+
case FE_TOWARDZERO:
195+
internal::set_rounding_mode(internal::ROUND_TOWARD_ZERO);
196+
break;
197+
default:
198+
internal::set_rounding_mode(rounding_mode);
199+
break;
200+
}
201+
return 0;
202+
}
203+
204+
LIBC_INLINE int get_env(fenv_t *env) {
205+
if (!env)
206+
return 1;
207+
208+
env->__fpc = internal::get_fpenv();
209+
return 0;
210+
}
211+
212+
LIBC_INLINE int set_env(const fenv_t *env) {
213+
if (!env)
214+
return 1;
215+
216+
internal::set_fpenv(env->__fpc);
217+
return 0;
218+
}
219+
220+
} // namespace fputil
221+
} // namespace LIBC_NAMESPACE
222+
223+
#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_AMDGPU_FENVIMPL_H

libc/test/src/fenv/CMakeLists.txt

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
add_custom_target(libc_fenv_unittests)
1+
add_custom_target(libc_fenv_tests)
22

3-
add_libc_unittest(
3+
add_libc_test(
44
rounding_mode_test
55
SUITE
6-
libc_fenv_unittests
6+
libc_fenv_tests
77
SRCS
88
rounding_mode_test.cpp
99
DEPENDS
@@ -13,10 +13,10 @@ add_libc_unittest(
1313
LibcFPTestHelpers
1414
)
1515

16-
add_libc_unittest(
16+
add_libc_test(
1717
exception_status_test
1818
SUITE
19-
libc_fenv_unittests
19+
libc_fenv_tests
2020
SRCS
2121
exception_status_test.cpp
2222
DEPENDS
@@ -29,26 +29,29 @@ add_libc_unittest(
2929
LibcFPTestHelpers
3030
)
3131

32-
add_libc_unittest(
32+
add_libc_test(
3333
getenv_and_setenv_test
3434
SUITE
35-
libc_fenv_unittests
35+
libc_fenv_tests
3636
SRCS
3737
getenv_and_setenv_test.cpp
3838
DEPENDS
3939
libc.src.fenv.fegetenv
4040
libc.src.fenv.fegetround
4141
libc.src.fenv.fesetenv
4242
libc.src.fenv.fesetround
43+
libc.src.fenv.feclearexcept
44+
libc.src.fenv.feraiseexcept
45+
libc.src.fenv.fetestexcept
4346
libc.src.__support.FPUtil.fenv_impl
4447
LINK_LIBRARIES
4548
LibcFPTestHelpers
4649
)
4750

48-
add_libc_unittest(
51+
add_libc_test(
4952
exception_flags_test
5053
SUITE
51-
libc_fenv_unittests
54+
libc_fenv_tests
5255
SRCS
5356
exception_flags_test.cpp
5457
DEPENDS
@@ -60,10 +63,10 @@ add_libc_unittest(
6063
LibcFPTestHelpers
6164
)
6265

63-
add_libc_unittest(
66+
add_libc_test(
6467
feupdateenv_test
6568
SUITE
66-
libc_fenv_unittests
69+
libc_fenv_tests
6770
SRCS
6871
feupdateenv_test.cpp
6972
DEPENDS
@@ -74,10 +77,10 @@ add_libc_unittest(
7477
LibcFPTestHelpers
7578
)
7679

77-
add_libc_unittest(
80+
add_libc_test(
7881
feclearexcept_test
7982
SUITE
80-
libc_fenv_unittests
83+
libc_fenv_tests
8184
SRCS
8285
feclearexcept_test.cpp
8386
DEPENDS
@@ -87,10 +90,10 @@ add_libc_unittest(
8790
LibcFPTestHelpers
8891
)
8992

90-
add_libc_unittest(
93+
add_libc_test(
9194
feenableexcept_test
9295
SUITE
93-
libc_fenv_unittests
96+
libc_fenv_tests
9497
SRCS
9598
feenableexcept_test.cpp
9699
DEPENDS
@@ -112,7 +115,7 @@ if (NOT (LLVM_USE_SANITIZER OR (${LIBC_TARGET_OS} STREQUAL "windows")
112115
enabled_exceptions_test
113116
UNIT_TEST_ONLY
114117
SUITE
115-
libc_fenv_unittests
118+
libc_fenv_tests
116119
SRCS
117120
enabled_exceptions_test.cpp
118121
DEPENDS
@@ -130,7 +133,7 @@ if (NOT (LLVM_USE_SANITIZER OR (${LIBC_TARGET_OS} STREQUAL "windows")
130133
feholdexcept_test
131134
UNIT_TEST_ONLY
132135
SUITE
133-
libc_fenv_unittests
136+
libc_fenv_tests
134137
SRCS
135138
feholdexcept_test.cpp
136139
DEPENDS

0 commit comments

Comments
 (0)