Skip to content

[libc] Implement basic 'fenv.h' utilities on the AMD GPU #83500

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion libc/config/gpu/api.td
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def StdlibAPI : PublicAPI<"stdlib.h"> {
}

def FenvAPI: PublicAPI<"fenv.h"> {
let Types = ["fenv_t"];
let Types = ["fenv_t", "fexcept_t"];
}

def StdIOAPI : PublicAPI<"stdio.h"> {
Expand Down
12 changes: 12 additions & 0 deletions libc/config/gpu/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,19 @@ set(TARGET_LIBC_ENTRYPOINTS
libc.src.gpu.rpc_fprintf
)

if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
set(extra_entrypoints
# fenv.h entrypoints
libc.src.fenv.fegetenv
libc.src.fenv.fegetround
libc.src.fenv.fesetenv
libc.src.fenv.fesetround
)
endif()

set(TARGET_LIBM_ENTRYPOINTS
${extra_entrypoints}

# math.h entrypoints
libc.src.math.acos
libc.src.math.acosf
Expand Down
2 changes: 2 additions & 0 deletions libc/include/llvm-libc-macros/math-macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@
#define math_errhandling 0
#elif defined(__NO_MATH_ERRNO__)
#define math_errhandling (MATH_ERREXCEPT)
#elif defined(__AMDGPU__)
#define math_errhandling (MATH_ERREXCEPT)
#elif defined(__NVPTX__) || defined(__AMDGPU__)
#define math_errhandling (MATH_ERRNO)
#else
Expand Down
2 changes: 1 addition & 1 deletion libc/include/llvm-libc-types/fenv_t.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ typedef struct {
typedef unsigned int fenv_t;
#elif defined(__AMDGPU__) || defined(__NVPTX__)
typedef struct {
unsigned int __fpc;
unsigned long long __fpc;
} fenv_t;
#else
#error "fenv_t not defined for your platform"
Expand Down
2 changes: 2 additions & 0 deletions libc/src/__support/FPUtil/FEnvImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
#include "arm/FEnvImpl.h"
#elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
#include "riscv/FEnvImpl.h"
#elif defined(LIBC_TARGET_ARCH_IS_AMDGPU)
#include "amdgpu/FEnvImpl.h"
#else

namespace LIBC_NAMESPACE::fputil {
Expand Down
221 changes: 221 additions & 0 deletions libc/src/__support/FPUtil/amdgpu/FEnvImpl.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
//===-- amdgpu floating point env manipulation functions --------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_AMDGPU_FENVIMPL_H
#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_AMDGPU_FENVIMPL_H

#include "src/__support/GPU/utils.h"
#include "src/__support/macros/attributes.h"
#include "src/__support/macros/properties/architectures.h"

#if !defined(LIBC_TARGET_ARCH_IS_AMDGPU)
#error "Invalid include"
#endif

#include "hdr/fenv_macros.h"
#include "hdr/types/fenv_t.h"

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also #include "hdr/fenv_macros.h" for other FE_* macro definitions.

#include <stdint.h>

namespace LIBC_NAMESPACE {
namespace fputil {

namespace internal {
// Retuns the current status of the AMDGPU floating point environment. In
// practice this is simply a 64-bit concatenation of the mode register and the
// trap status register.
//
// The mode register controls the floating point behaviour of the device. It
// can be read or written to by the kernel during runtime It is laid out as a
// bit field with the following offsets and sizes listed for the relevant
// entries.
//
// ┌─────┬─────────────┬─────┬─────────┬──────────┬─────────────┬────────────┐
// │ ... │ EXCP[20:12] │ ... │ IEEE[9] │ CLAMP[8] │ DENORM[7:4] │ ROUND[3:0] │
// └─────┴─────────────┴─────┴─────────┴──────────┴─────────────┴────────────┘
//
// The rounding mode and denormal modes both control f64/f16 and f32 precision
// operations separately with two bits. The accepted values for the rounding
// mode are nearest, upward, downward, and toward given 0, 1, 2, and 3
// respectively.
//
// The CLAMP bit indicates that DirectX 10 handling of NaNs is enabled in the
// vector ALU. When set this will clamp NaN values to zero and pass them
// otherwise. A hardware bug causes this bit to prevent floating exceptions
// from being recorded if this bit is set on all generations before GFX12.
//
// The IEEE bit controls whether or not floating point operations supporting
// exception gathering are IEEE 754-2008 compliant.
//
// The EXCP field indicates which exceptions will cause the instruction to
// take a trap if traps are enabled, see the status register. The bit layout
// is identical to that in the trap status register. We are only concerned
// with the first six bits and ignore the other three.
//
// The trap status register contains information about the status of the
// exceptions. These bits are accumulated regarless of trap handling statuss
// and are sticky until cleared.
//
// 5 4 3 2 1 0
// ┌─────────┬───────────┬──────────┬────────────────┬──────────┬─────────┐
// │ Inexact │ Underflow │ Overflow │ Divide by zero │ Denormal │ Invalid │
// └─────────┴───────────┴──────────┴────────────────┴──────────┴─────────┘
//
// These exceptions indicate that at least one lane in the current wavefront
// signalled an floating point exception. There is no way to increase the
// granularity.
//
// The returned value has the following layout.
//
// ┌────────────────────┬─────────────────────┐
// │ Trap Status[38:32] │ Mode Register[31:0] │
// └────────────────────┴─────────────────────┘
LIBC_INLINE uint64_t get_fpenv() { return __builtin_amdgcn_get_fpenv(); }

// Set the floating point environment using the same layout as above.
LIBC_INLINE void set_fpenv(uint64_t env) { __builtin_amdgcn_set_fpenv(env); }

// The six bits used to encode the standard floating point exceptions in the
// trap status register.
enum ExceptionFlags : uint32_t {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you want to add a link to the documentation of the encodings in the comments?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

b/c technically uint8_t(x) << 11 will always give you 0, right?

I think integer promotion makes it take the int32_t argument from the shift.

Do you want to add a link to the documentation of the encodings in the comments?

I can, if needed, but I pretty much copied everything relevant here so I figured it was redundant.

EXCP_INVALID_F = 0x1,
EXCP_DENORMAL_F = 0x2,
EXCP_DIV_BY_ZERO_F = 0x4,
EXCP_OVERFLOW_F = 0x8,
EXCP_UNDERFLOW_F = 0x10,
EXCP_INEXACT_F = 0x20,
};

// The values used by the AMDGPU backend to handle the `llvm.get.rounding`
// intrinsic function. See the values in the documentation for more information.
// https://llvm.org/docs/AMDGPUUsage.html#amdgpu-rounding-mode-enumeration-values-table
enum RoundingFlags : uint32_t {
ROUND_TOWARD_ZERO = 0x0,
ROUND_TO_NEAREST = 0x1,
ROUND_UPWARD = 0x2,
ROUND_DOWNWARD = 0x3,
};

// Exception flags are individual bits in the corresponding hardware register.
// This converts between the exported C standard values and the hardware values.
LIBC_INLINE uint32_t get_status_value_for_except(uint32_t excepts) {
return (excepts & FE_INVALID ? EXCP_INVALID_F : 0) |
(excepts & FE_DIVBYZERO ? EXCP_DIV_BY_ZERO_F : 0) |
(excepts & __FE_DENORM ? EXCP_DENORMAL_F : 0) |
(excepts & FE_OVERFLOW ? EXCP_OVERFLOW_F : 0) |
(excepts & FE_UNDERFLOW ? EXCP_UNDERFLOW_F : 0) |
(excepts & FE_INEXACT ? EXCP_INEXACT_F : 0);
}

LIBC_INLINE uint32_t get_except_value_for_status(uint32_t status) {
return (status & EXCP_INVALID_F ? FE_INVALID : 0) |
(status & EXCP_DIV_BY_ZERO_F ? FE_DIVBYZERO : 0) |
(status & EXCP_DENORMAL_F ? __FE_DENORM : 0) |
(status & EXCP_OVERFLOW_F ? FE_OVERFLOW : 0) |
(status & EXCP_UNDERFLOW_F ? FE_UNDERFLOW : 0) |
(status & EXCP_INEXACT_F ? FE_INEXACT : 0);
}

// Set the hardware rounding mode using the llvm.set.rounding intrinsic
// function.
LIBC_INLINE void set_rounding_mode(uint32_t mode) {
__builtin_set_flt_rounds(mode);
}

// Get the hardware rounding mode using the llvm.get.rounding intrinsic
// function.
LIBC_INLINE uint32_t get_rounding_mode() { return __builtin_flt_rounds(); }
Comment on lines +124 to +132
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would assume this would be defined in the generic implementation


} // namespace internal

// TODO: Not implemented yet.
LIBC_INLINE int clear_except(int) { return 0; }

// TODO: Not implemented yet.
LIBC_INLINE int test_except(int) { return 0; }

// TODO: Not implemented yet.
LIBC_INLINE int get_except() { return 0; }

// TODO: Not implemented yet.
LIBC_INLINE int set_except(int) { return 0; }

// TODO: Not implemented yet.
LIBC_INLINE int enable_except(int) { return 0; }

// TODO: Not implemented yet.
LIBC_INLINE int disable_except(int) { return 0; }

// TODO: Not implemented yet.
LIBC_INLINE int raise_except(int) { return 0; }

// Get the currently set rounding mode from the environment. The AMDGPU backend
// supports an extension for separate f64 / f32 rounding control. If the
// provided value is outside of the standard region we handle it without
// modification.
LIBC_INLINE int get_round() {
uint32_t mode = internal::get_rounding_mode();
switch (mode) {
case internal::ROUND_TO_NEAREST:
return FE_TONEAREST;
case internal::ROUND_UPWARD:
return FE_UPWARD;
case internal::ROUND_DOWNWARD:
return FE_DOWNWARD;
case internal::ROUND_TOWARD_ZERO:
return FE_TOWARDZERO;
default:
return mode;
}
__builtin_unreachable();
}

// Set the rounding mode for the environment. If the provided mode is above the
// expected range we assume it is an extended value to control f32 / f64
// separately.
LIBC_INLINE int set_round(int rounding_mode) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the point of this having a return value ?

switch (rounding_mode) {
case FE_TONEAREST:
internal::set_rounding_mode(internal::ROUND_TO_NEAREST);
break;
case FE_UPWARD:
internal::set_rounding_mode(internal::ROUND_UPWARD);
break;
case FE_DOWNWARD:
internal::set_rounding_mode(internal::ROUND_DOWNWARD);
break;
case FE_TOWARDZERO:
internal::set_rounding_mode(internal::ROUND_TOWARD_ZERO);
break;
default:
internal::set_rounding_mode(rounding_mode);
break;
}
return 0;
}

LIBC_INLINE int get_env(fenv_t *env) {
if (!env)
return 1;

env->__fpc = internal::get_fpenv();
return 0;
}

LIBC_INLINE int set_env(const fenv_t *env) {
if (!env)
return 1;

internal::set_fpenv(env->__fpc);
return 0;
}

} // namespace fputil
} // namespace LIBC_NAMESPACE

#endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_AMDGPU_FENVIMPL_H
37 changes: 20 additions & 17 deletions libc/test/src/fenv/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
add_custom_target(libc_fenv_unittests)
add_custom_target(libc_fenv_tests)

add_libc_unittest(
add_libc_test(
rounding_mode_test
SUITE
libc_fenv_unittests
libc_fenv_tests
SRCS
rounding_mode_test.cpp
DEPENDS
Expand All @@ -13,10 +13,10 @@ add_libc_unittest(
LibcFPTestHelpers
)

add_libc_unittest(
add_libc_test(
exception_status_test
SUITE
libc_fenv_unittests
libc_fenv_tests
SRCS
exception_status_test.cpp
DEPENDS
Expand All @@ -29,26 +29,29 @@ add_libc_unittest(
LibcFPTestHelpers
)

add_libc_unittest(
add_libc_test(
getenv_and_setenv_test
SUITE
libc_fenv_unittests
libc_fenv_tests
SRCS
getenv_and_setenv_test.cpp
DEPENDS
libc.src.fenv.fegetenv
libc.src.fenv.fegetround
libc.src.fenv.fesetenv
libc.src.fenv.fesetround
libc.src.fenv.feclearexcept
libc.src.fenv.feraiseexcept
libc.src.fenv.fetestexcept
libc.src.__support.FPUtil.fenv_impl
LINK_LIBRARIES
LibcFPTestHelpers
)

add_libc_unittest(
add_libc_test(
exception_flags_test
SUITE
libc_fenv_unittests
libc_fenv_tests
SRCS
exception_flags_test.cpp
DEPENDS
Expand All @@ -60,10 +63,10 @@ add_libc_unittest(
LibcFPTestHelpers
)

add_libc_unittest(
add_libc_test(
feupdateenv_test
SUITE
libc_fenv_unittests
libc_fenv_tests
SRCS
feupdateenv_test.cpp
DEPENDS
Expand All @@ -74,10 +77,10 @@ add_libc_unittest(
LibcFPTestHelpers
)

add_libc_unittest(
add_libc_test(
feclearexcept_test
SUITE
libc_fenv_unittests
libc_fenv_tests
SRCS
feclearexcept_test.cpp
DEPENDS
Expand All @@ -87,10 +90,10 @@ add_libc_unittest(
LibcFPTestHelpers
)

add_libc_unittest(
add_libc_test(
feenableexcept_test
SUITE
libc_fenv_unittests
libc_fenv_tests
SRCS
feenableexcept_test.cpp
DEPENDS
Expand All @@ -112,7 +115,7 @@ if (NOT (LLVM_USE_SANITIZER OR (${LIBC_TARGET_OS} STREQUAL "windows")
enabled_exceptions_test
UNIT_TEST_ONLY
SUITE
libc_fenv_unittests
libc_fenv_tests
SRCS
enabled_exceptions_test.cpp
DEPENDS
Expand All @@ -130,7 +133,7 @@ if (NOT (LLVM_USE_SANITIZER OR (${LIBC_TARGET_OS} STREQUAL "windows")
feholdexcept_test
UNIT_TEST_ONLY
SUITE
libc_fenv_unittests
libc_fenv_tests
SRCS
feholdexcept_test.cpp
DEPENDS
Expand Down
Loading