Skip to content

[SYCL][libdevice] Add fast_* in imf libdevice #10004

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions libdevice/device_imf.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,77 @@ static inline double __trunc(double x) {
#endif
}

static inline float __fast_exp10f(float x) {
#if defined(__LIBDEVICE_HOST_IMPL__)
return __builtin_expf(0x1.26bb1cp1f * x);
#elif defined(__SPIR__)
return __spirv_ocl_native_exp(0x1.26bb1cp1f * x);
#endif
}

static inline float __fast_expf(float x) {
#if defined(__LIBDEVICE_HOST_IMPL__)
return __builtin_expf(x);
#elif defined(__SPIR__)
return __spirv_ocl_native_exp(x);
#endif
}

static inline float __fast_logf(float x) {
#if defined(__LIBDEVICE_HOST_IMPL__)
return __builtin_logf(x);
#elif defined(__SPIR__)
return __spirv_ocl_native_log(x);
#endif
}

static inline float __fast_log2f(float x) {
#if defined(__LIBDEVICE_HOST_IMPL__)
return __builtin_log2f(x);
#elif defined(__SPIR__)
return __spirv_ocl_native_log(x) / 0x1.62e43p-1f;
#endif
}

static inline float __fast_log10f(float x) {
#if defined(__LIBDEVICE_HOST_IMPL__)
return __builtin_log10f(x);
#elif defined(__SPIR__)
return __spirv_ocl_native_log(x) / 0x1.26bb1cp1f;
#endif
}

static inline float __fast_powf(float x, float y) {
#if defined(__LIBDEVICE_HOST_IMPL__)
return __builtin_powf(x, y);
#elif defined(__SPIR__)
return __spirv_ocl_native_powr(x, y);
#endif
}

static inline float __fast_fdividef(float x, float y) {
unsigned ybits = __builtin_bit_cast(unsigned, y);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, @akolesov-intel and @zettai-reido
For __nv_fast_fdividef: https://docs.nvidia.com/cuda/libdevice-users-guide/__nv_fast_fdividef.html#__nv_fast_fdividef
NV has requirements for 2^126 < y < 2^128 which sycl native math doesn't have, the code below is to handle this.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks good to me. We might optimize it in future updates: simplify the range check and use fast approximation while fdividef allows 2 ulp rather than correctly rounded x/y.

unsigned xbits = __builtin_bit_cast(unsigned, x);
ybits &= 0x7FFF'FFFF;
xbits &= 0x7FFF'FFFF;
unsigned yexp_bits = (ybits >> 23) & 0xFF;
unsigned xexp_bits = (xbits >> 23) & 0xFF;
unsigned yman_bits = ybits & 0x7F'FFFF;
unsigned xman_bits = xbits & 0x7F'FFFF;
if (ybits > 0x7E80'0000) {
if ((xexp_bits = 0xFF) && (xman_bits == 0))
return __builtin_bit_cast(float, 0x7FC00000);
else
return 0;
}

#if defined(__LIBDEVICE_HOST_IMPL__)
return x / y;
#elif defined(__SPIR__)
return __spirv_ocl_native_divide(x, y);
#endif
}

static inline _iml_half __trunc(_iml_half x) {
_iml_half_internal x_i = x.get_internal();
#if defined(__LIBDEVICE_HOST_IMPL__)
Expand Down
22 changes: 22 additions & 0 deletions libdevice/imf/imf_fp32_dl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,26 @@ DEVICE_EXTERN_C_INLINE float __devicelib_imf_fabsf(float x) {
DEVICE_EXTERN_C_INLINE
int64_t __devicelib_imf_llabs(int64_t x) { return x >= 0 ? x : -x; }

DEVICE_EXTERN_C_INLINE
float __devicelib_imf_fast_exp10f(float x) { return __fast_exp10f(x); }

DEVICE_EXTERN_C_INLINE
float __devicelib_imf_fast_expf(float x) { return __fast_expf(x); }

DEVICE_EXTERN_C_INLINE
float __devicelib_imf_fast_fdividef(float x, float y) {
return __fast_fdividef(x, y);
}

DEVICE_EXTERN_C_INLINE
float __devicelib_imf_fast_logf(float x) { return __fast_logf(x); }

DEVICE_EXTERN_C_INLINE
float __devicelib_imf_fast_log2f(float x) { return __fast_log2f(x); }

DEVICE_EXTERN_C_INLINE
float __devicelib_imf_fast_log10f(float x) { return __fast_log10f(x); }

DEVICE_EXTERN_C_INLINE
float __devicelib_imf_fast_powf(float x, float y) { return __fast_powf(x, y); }
#endif /*__LIBDEVICE_IMF_ENABLED__*/
46 changes: 46 additions & 0 deletions libdevice/imf_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1788,4 +1788,50 @@ DEVICE_EXTERN_C_INLINE
_iml_half_internal __imf_ushort_as_half(unsigned short x) {
return __devicelib_imf_ushort_as_half(x);
}

DEVICE_EXTERN_C_INLINE
float __devicelib_imf_fast_exp10f(float);

DEVICE_EXTERN_C_INLINE
float __imf_fast_exp10f(float x) { return __devicelib_imf_fast_exp10f(x); }

DEVICE_EXTERN_C_INLINE
float __devicelib_imf_fast_expf(float);

DEVICE_EXTERN_C_INLINE
float __imf_fast_expf(float x) { return __devicelib_imf_fast_expf(x); }

DEVICE_EXTERN_C_INLINE
float __devicelib_imf_fast_fdividef(float, float);

DEVICE_EXTERN_C_INLINE
float __imf_fast_fdividef(float x, float y) {
return __devicelib_imf_fast_fdividef(x, y);
}

DEVICE_EXTERN_C_INLINE
float __devicelib_imf_fast_logf(float);

DEVICE_EXTERN_C_INLINE
float __imf_fast_logf(float x) { return __devicelib_imf_fast_logf(x); }

DEVICE_EXTERN_C_INLINE
float __devicelib_imf_fast_log2f(float);

DEVICE_EXTERN_C_INLINE
float __imf_fast_log2f(float x) { return __devicelib_imf_fast_log2f(x); }

DEVICE_EXTERN_C_INLINE
float __devicelib_imf_fast_log10f(float);

DEVICE_EXTERN_C_INLINE
float __imf_fast_log10f(float x) { return __devicelib_imf_fast_log10f(x); }

DEVICE_EXTERN_C_INLINE
float __devicelib_imf_fast_powf(float, float);

DEVICE_EXTERN_C_INLINE
float __imf_fast_powf(float x, float y) {
return __devicelib_imf_fast_powf(x, y);
}
#endif // __LIBDEVICE_IMF_ENABLED__
7 changes: 7 additions & 0 deletions llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,13 @@ SYCLDeviceLibFuncMap SDLMap = {
{"__devicelib_imf_fmaxf", DeviceLibExt::cl_intel_devicelib_imf},
{"__devicelib_imf_fminf", DeviceLibExt::cl_intel_devicelib_imf},
{"__devicelib_imf_copysignf", DeviceLibExt::cl_intel_devicelib_imf},
{"__devicelib_imf_fast_exp10f", DeviceLibExt::cl_intel_devicelib_imf},
{"__devicelib_imf_fast_expf", DeviceLibExt::cl_intel_devicelib_imf},
{"__devicelib_imf_fast_logf", DeviceLibExt::cl_intel_devicelib_imf},
{"__devicelib_imf_fast_log2f", DeviceLibExt::cl_intel_devicelib_imf},
{"__devicelib_imf_fast_log10f", DeviceLibExt::cl_intel_devicelib_imf},
{"__devicelib_imf_fast_powf", DeviceLibExt::cl_intel_devicelib_imf},
{"__devicelib_imf_fast_fdividef", DeviceLibExt::cl_intel_devicelib_imf},
{"__devicelib_imf_float2int_rd", DeviceLibExt::cl_intel_devicelib_imf},
{"__devicelib_imf_float2int_rn", DeviceLibExt::cl_intel_devicelib_imf},
{"__devicelib_imf_float2int_ru", DeviceLibExt::cl_intel_devicelib_imf},
Expand Down
7 changes: 7 additions & 0 deletions sycl/include/sycl/builtins.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3124,6 +3124,13 @@ extern __DPCPP_SYCL_EXTERNAL float __imf_invf(float x);
extern __DPCPP_SYCL_EXTERNAL float __imf_fmaxf(float x, float y);
extern __DPCPP_SYCL_EXTERNAL float __imf_fminf(float x, float y);
extern __DPCPP_SYCL_EXTERNAL float __imf_copysignf(float x, float y);
extern __DPCPP_SYCL_EXTERNAL float __imf_fast_exp10f(float x);
extern __DPCPP_SYCL_EXTERNAL float __imf_fast_expf(float x);
extern __DPCPP_SYCL_EXTERNAL float __imf_fast_logf(float x);
extern __DPCPP_SYCL_EXTERNAL float __imf_fast_log2f(float x);
extern __DPCPP_SYCL_EXTERNAL float __imf_fast_log10f(float x);
extern __DPCPP_SYCL_EXTERNAL float __imf_fast_powf(float x, float y);
extern __DPCPP_SYCL_EXTERNAL float __imf_fast_fdividef(float x, float y);
extern __DPCPP_SYCL_EXTERNAL int __imf_float2int_rd(float x);
extern __DPCPP_SYCL_EXTERNAL int __imf_float2int_rn(float x);
extern __DPCPP_SYCL_EXTERNAL int __imf_float2int_ru(float x);
Expand Down