Skip to content

[SYCL] Added the support for std::numeric_limits<cl::sycl::half> #536

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 13, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 128 additions & 12 deletions sycl/include/CL/sycl/half_type.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@

#pragma once

#include <cmath>
#include <cstdint>
#include <functional>
#include <iostream>
#include <limits>

namespace cl {
namespace sycl {
Expand Down Expand Up @@ -73,8 +75,8 @@ class half {
// on arithmetic types. We can't specify half type as arithmetic/floating
// point(via std::is_floating_point) since only float, double and long double
// types are "floating point" according to the standard. In order to use half
// type with these math functions we cast half to float using template function
// helper.
// type with these math functions we cast half to float using template
// function helper.
template <typename T> inline T cast_if_host_half(T val) { return val; }

inline float cast_if_host_half(half_impl::half val) {
Expand All @@ -86,22 +88,136 @@ inline float cast_if_host_half(half_impl::half val) {
} // namespace sycl
} // namespace cl

#ifdef __SYCL_DEVICE_ONLY__
using half = _Float16;
#else
using half = cl::sycl::detail::half_impl::half;
#endif

// Partial specialization of some functions in namespace `std`
namespace std {

template <> struct hash<cl::sycl::detail::half_impl::half> {
size_t operator()(cl::sycl::detail::half_impl::half const &key) const
noexcept {
return hash<uint16_t>()(key.Buf);
#ifdef __SYCL_DEVICE_ONLY__
// `constexpr` could work because the implicit conversion from `float` to
// `_Float16` can be `constexpr`.
#define CONSTEXPR_QUALIFIER constexpr
#else
// The qualifier is `const` instead of `constexpr` that is original to be
// because the constructor is not `constexpr` function.
#define CONSTEXPR_QUALIFIER const
#endif

// Partial specialization of `std::hash<cl::sycl::half>`
template <> struct hash<half> {
size_t operator()(half const &Key) const noexcept {
return hash<uint16_t>{}(reinterpret_cast<const uint16_t &>(Key));
}
};

} // namespace std
// Partial specialization of `std::numeric<cl::sycl::half>`

#ifdef __SYCL_DEVICE_ONLY__
using half = _Float16;
#else
using half = cl::sycl::detail::half_impl::half;
#endif
// All following values are either calculated based on description of each
// function/value on https://en.cppreference.com/w/cpp/types/numeric_limits, or
// cl_platform.h.
#define SYCL_HLF_MIN 6.103515625e-05F

#define SYCL_HLF_MAX 65504.0F

#define SYCL_HLF_MAX_10_EXP 4

#define SYCL_HLF_MAX_EXP 16

#define SYCL_HLF_MIN_10_EXP -4

#define SYCL_HLF_MIN_EXP -13

#define SYCL_HLF_MANT_DIG 11

#define SYCL_HLF_DIG 3

#define SYCL_HLF_DECIMAL_DIG 5

#define SYCL_HLF_EPSILON 9.765625e-04F

#define SYCL_HLF_RADIX 2

template <> struct numeric_limits<half> {
static constexpr const bool is_specialized = true;

static constexpr const bool is_signed = true;

static constexpr const bool is_integer = false;

static constexpr const bool is_exact = false;

static constexpr const bool has_infinity = true;

static constexpr const bool has_quiet_NaN = true;

static constexpr const bool has_signaling_NaN = true;

static constexpr const float_denorm_style has_denorm = denorm_present;

static constexpr const bool has_denorm_loss = false;

static constexpr const bool tinyness_before = false;

static constexpr const bool traps = false;

static constexpr const int max_exponent10 = SYCL_HLF_MAX_10_EXP;

static constexpr const int max_exponent = SYCL_HLF_MAX_EXP;

static constexpr const int min_exponent10 = SYCL_HLF_MIN_10_EXP;

static constexpr const int min_exponent = SYCL_HLF_MIN_EXP;

static constexpr const int radix = SYCL_HLF_RADIX;

static constexpr const int max_digits10 = SYCL_HLF_DECIMAL_DIG;

static constexpr const int digits = SYCL_HLF_MANT_DIG;

static constexpr const bool is_bounded = true;

static constexpr const int digits10 = SYCL_HLF_DIG;

static constexpr const bool is_modulo = false;

static constexpr const bool is_iec559 = true;

static constexpr const float_round_style round_style = round_to_nearest;

static CONSTEXPR_QUALIFIER half min() noexcept { return SYCL_HLF_MIN; }

static CONSTEXPR_QUALIFIER half max() noexcept { return SYCL_HLF_MAX; }

static CONSTEXPR_QUALIFIER half lowest() noexcept { return -SYCL_HLF_MAX; }

static CONSTEXPR_QUALIFIER half epsilon() noexcept {
return SYCL_HLF_EPSILON;
}

static CONSTEXPR_QUALIFIER half round_error() noexcept { return 0.5F; }

static CONSTEXPR_QUALIFIER half infinity() noexcept {
return __builtin_huge_valf();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does MSVC compiler have __builtin_huge_valf?
Can we use std::numeric_limitscl::sycl::half::infinity() in SYCL lib? For Microsoft Windows we compile SYCL via MSVC compiler.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't std::numeric_limits<cl::sycl::half>::infinity() what we're implementing here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, I have an idea for an experiment.
There is a code with W/A for half type in source\detail\builtins_math.cpp.
Can you try fix it?

s::cl_half nan(s::cl_ushort nancode) __NOEXC {
  return s::cl_half(d::quiet_NaN<s::cl_float>());
}

to

s::cl_half nan(s::cl_ushort nancode) __NOEXC {
  return d::quiet_NaN<s::cl_half>(); // or  td::numeric_limits<s::cl_half>::quiet_NaN() if you have a problem with constexpr
}

If you can pass the tests, then all is well.
Otherwise your implementation differs from for double and float.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does. The pre-check-in passes. :) I'll also take a look at the W/A later.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does. The pre-check-in passes. :) I'll also take a look at the W/A later.

Good. On Microsoft Windows too(I'm worried about)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. The pre-check-in includes testing on both Linux and Windows.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's wonderful

}

static CONSTEXPR_QUALIFIER half quiet_NaN() noexcept {
return __builtin_nanf("");
}

static CONSTEXPR_QUALIFIER half signaling_NaN() noexcept {
return __builtin_nansf("");
}

static CONSTEXPR_QUALIFIER half denorm_min() noexcept { return 5.96046e-08F; }
};

#undef CONSTEXPR_QUALIFIER

} // namespace std

inline std::ostream &operator<<(std::ostream &O, half const &rhs) {
O << static_cast<float>(rhs);
Expand Down
108 changes: 97 additions & 11 deletions sycl/test/basic_tests/half_type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,22 +15,22 @@
#include <CL/sycl.hpp>

#include <cmath>
#include <unordered_set>

using namespace cl::sycl;

constexpr float flt_epsilon = 9.77e-4;

constexpr size_t N = 100;

template <typename T> void assert_close(const T &C, const float ref) {
template <typename T> void assert_close(const T &C, const cl::sycl::half ref) {
for (size_t i = 0; i < N; i++) {
float diff = C[i] - ref;
assert(std::fabs(diff) < flt_epsilon);
auto diff = C[i] - ref;
assert(std::fabs(static_cast<float>(diff)) <
std::numeric_limits<cl::sycl::half>::epsilon());
}
}

void verify_add(queue &q, buffer<half, 1> &a, buffer<half, 1> &b, range<1> &r,
const float ref) {
const half ref) {
buffer<half, 1> c{r};

q.submit([&](handler &cgh) {
Expand All @@ -45,7 +45,7 @@ void verify_add(queue &q, buffer<half, 1> &a, buffer<half, 1> &b, range<1> &r,
}

void verify_min(queue &q, buffer<half, 1> &a, buffer<half, 1> &b, range<1> &r,
const float ref) {
const half ref) {
buffer<half, 1> c{r};

q.submit([&](handler &cgh) {
Expand All @@ -60,7 +60,7 @@ void verify_min(queue &q, buffer<half, 1> &a, buffer<half, 1> &b, range<1> &r,
}

void verify_mul(queue &q, buffer<half, 1> &a, buffer<half, 1> &b, range<1> &r,
const float ref) {
const half ref) {
buffer<half, 1> c{r};

q.submit([&](handler &cgh) {
Expand Down Expand Up @@ -103,17 +103,97 @@ void verify_vec(queue &q) {
assert(e.get_access<access::mode::read>()[0] == 0);
}

void verify_numeric_limits(queue &q) {
// Verify on host side
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe that you can delete this part, because it will tested in kernel side code via // RUN: env SYCL_DEVICE_TYPE=HOST %t.out

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No. They have different types during the two compilation stages. One is for _Float16 and another is for the half I implemented.

// Static member variables
std::numeric_limits<cl::sycl::half>::is_specialized;
std::numeric_limits<cl::sycl::half>::is_signed;
std::numeric_limits<cl::sycl::half>::is_integer;
std::numeric_limits<cl::sycl::half>::is_exact;
std::numeric_limits<cl::sycl::half>::has_infinity;
std::numeric_limits<cl::sycl::half>::has_quiet_NaN;
std::numeric_limits<cl::sycl::half>::has_signaling_NaN;
std::numeric_limits<cl::sycl::half>::has_denorm;
std::numeric_limits<cl::sycl::half>::has_denorm_loss;
std::numeric_limits<cl::sycl::half>::tinyness_before;
std::numeric_limits<cl::sycl::half>::traps;
std::numeric_limits<cl::sycl::half>::max_exponent10;
std::numeric_limits<cl::sycl::half>::max_exponent;
std::numeric_limits<cl::sycl::half>::min_exponent10;
std::numeric_limits<cl::sycl::half>::min_exponent;
std::numeric_limits<cl::sycl::half>::radix;
std::numeric_limits<cl::sycl::half>::max_digits10;
std::numeric_limits<cl::sycl::half>::digits;
std::numeric_limits<cl::sycl::half>::is_bounded;
std::numeric_limits<cl::sycl::half>::digits10;
std::numeric_limits<cl::sycl::half>::is_modulo;
std::numeric_limits<cl::sycl::half>::is_iec559;
std::numeric_limits<cl::sycl::half>::round_style;

// Static member functions
std::numeric_limits<cl::sycl::half>::min();
std::numeric_limits<cl::sycl::half>::max();
std::numeric_limits<cl::sycl::half>::lowest();
std::numeric_limits<cl::sycl::half>::epsilon();
std::numeric_limits<cl::sycl::half>::round_error();
std::numeric_limits<cl::sycl::half>::infinity();
std::numeric_limits<cl::sycl::half>::quiet_NaN();
std::numeric_limits<cl::sycl::half>::signaling_NaN();
std::numeric_limits<cl::sycl::half>::denorm_min();

// Verify in kernel function for device side check
q.submit([&](cl::sycl::handler &cgh) {
cgh.single_task<class kernel>([]() {
// Static member variables
std::numeric_limits<cl::sycl::half>::is_specialized;
std::numeric_limits<cl::sycl::half>::is_signed;
std::numeric_limits<cl::sycl::half>::is_integer;
std::numeric_limits<cl::sycl::half>::is_exact;
std::numeric_limits<cl::sycl::half>::has_infinity;
std::numeric_limits<cl::sycl::half>::has_quiet_NaN;
std::numeric_limits<cl::sycl::half>::has_signaling_NaN;
std::numeric_limits<cl::sycl::half>::has_denorm;
std::numeric_limits<cl::sycl::half>::has_denorm_loss;
std::numeric_limits<cl::sycl::half>::tinyness_before;
std::numeric_limits<cl::sycl::half>::traps;
std::numeric_limits<cl::sycl::half>::max_exponent10;
std::numeric_limits<cl::sycl::half>::max_exponent;
std::numeric_limits<cl::sycl::half>::min_exponent10;
std::numeric_limits<cl::sycl::half>::min_exponent;
std::numeric_limits<cl::sycl::half>::radix;
std::numeric_limits<cl::sycl::half>::max_digits10;
std::numeric_limits<cl::sycl::half>::digits;
std::numeric_limits<cl::sycl::half>::is_bounded;
std::numeric_limits<cl::sycl::half>::digits10;
std::numeric_limits<cl::sycl::half>::is_modulo;
std::numeric_limits<cl::sycl::half>::is_iec559;
std::numeric_limits<cl::sycl::half>::round_style;

// Static member functions
std::numeric_limits<cl::sycl::half>::min();
std::numeric_limits<cl::sycl::half>::max();
std::numeric_limits<cl::sycl::half>::lowest();
std::numeric_limits<cl::sycl::half>::epsilon();
std::numeric_limits<cl::sycl::half>::round_error();
std::numeric_limits<cl::sycl::half>::infinity();
std::numeric_limits<cl::sycl::half>::quiet_NaN();
std::numeric_limits<cl::sycl::half>::signaling_NaN();
std::numeric_limits<cl::sycl::half>::denorm_min();
});
});
}

inline bool bitwise_comparison_fp16(const half val, const uint16_t exp) {
return reinterpret_cast<const uint16_t&>(val) == exp;
return reinterpret_cast<const uint16_t &>(val) == exp;
}

inline bool bitwise_comparison_fp32(const half val, const uint32_t exp) {
const float fp32 = static_cast<float>(val);
return reinterpret_cast<const uint32_t&>(fp32) == exp;
return reinterpret_cast<const uint32_t &>(fp32) == exp;
}

int main() {
// We assert that the length is 1 because we use macro to select the device
// We assert that the length is 1 because we use env to select the device
assert(device::get_devices().size() == 1);

auto dev = device::get_devices()[0];
Expand All @@ -137,6 +217,7 @@ int main() {
verify_mul(q, a, b, r, 10.0);
verify_div(q, a, b, r, 2.5);
verify_vec(q);
verify_numeric_limits(q);

if (!dev.is_host()) {
return 0;
Expand Down Expand Up @@ -197,5 +278,10 @@ int main() {
assert(bitwise_comparison_fp32(reinterpret_cast<const half &>(subnormal),
882900992));

// std::hash<cl::sycl::half>
std::unordered_set<half> sets;
sets.insert(1.2);
assert(sets.find(1.2) != sets.end());

return 0;
}