Skip to content

Commit 9e4768c

Browse files
authored
[SYCL][libclc] Add generic addrspace overloads of math builtins (#13015)
The generic implementations of the math builtins which take pointer arguments were using unqualified address spaces. This could either resolve to the generic address space or the private address space, depending on whether the target supports the generic address space or not. The newer unified OpenCL C specification is clearer in mandating that all targets must provide overloads on the explicitly qualified 'private' address space, as well as optionally defining ones on the (unqualified) generic address space. This meant that most of these math builtins were lacking one overload: either the private or generic one, depending on which target was compiling the builtins. One notable exception here is NVIDIA, which maps the private and generic address spaces to the same target address space. Thus declaring builtins overloaded on these two address spaces results in a mangling clash, which we can't have. Therefore we now say that NVIDIA targets don't support the generic address space for the purposes of these builtins. In reality, the builtins with the private address space are functionally equivalent to the generic ones, so users won't notice. For the sake of code clarity, although the 'generic' keyword is technically reserved, we know that clang defines it to be the corresponding unqualified generic address space, so we use that to be explicit. We always compile with clang so it shouldn't be a problem with portability. With this we can also enable a LIT test for HIP, which was previously failing as it couldn't find the generic address space overloads of the fract and lgamma_r builtins. There are other builtins that this treatment (may) need applied to, such as the vload and vstore variants. Those will be handled in a subsequent patch.
1 parent 98894fe commit 9e4768c

File tree

22 files changed

+561
-170
lines changed

22 files changed

+561
-170
lines changed

libclc/CMakeLists.txt

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
400400
endif()
401401
message( " DEVICE: ${d} ( ${${d}_aliases} )" )
402402

403+
set ( supports_generic_addrspace TRUE )
403404
if ( ${ARCH} STREQUAL "spirv" OR ${ARCH} STREQUAL "spirv64" )
404405
if( ${ARCH} STREQUAL "spirv" )
405406
set( t "spir--" )
@@ -416,6 +417,14 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
416417
elseif( ${ARCH} STREQUAL "nvptx" OR ${ARCH} STREQUAL "nvptx64" )
417418
set( build_flags )
418419
set( opt_flags -O3 "--nvvm-reflect-enable=false" )
420+
# Note: when declaring builtins, we don't consider NVIDIA as supporting
421+
# the generic address space. This is because it maps to the same target
422+
# address space as the private address space, resulting in a mangling
423+
# clash.
424+
# Since we can't declare builtins overloaded on both address spaces
425+
# simultaneously, we choose declare the builtins using the private space,
426+
# which will also work for the generic address space.
427+
set( supports_generic_addrspace FALSE )
419428
elseif( ${ARCH} STREQUAL "clspv64" )
420429
set( t "spir64--" )
421430
set( build_flags "-Wno-unknown-assumption")
@@ -437,8 +446,10 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
437446
"+cl_khr_fp16,"
438447
"+__opencl_c_3d_image_writes,"
439448
"+__opencl_c_images,"
440-
"+cl_khr_3d_image_writes,"
441-
"+__opencl_c_generic_address_space")
449+
"+cl_khr_3d_image_writes")
450+
if(supports_generic_addrspace)
451+
string( APPEND CL_3_0_EXTENSIONS ",+__opencl_c_generic_address_space" )
452+
endif()
442453
list( APPEND flags ${CL_3_0_EXTENSIONS})
443454

444455
# Add platform specific flags

libclc/generic/include/clc/math/fract.inc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,8 @@
2323
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE fract(__CLC_GENTYPE x, global __CLC_GENTYPE *iptr);
2424
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE fract(__CLC_GENTYPE x, local __CLC_GENTYPE *iptr);
2525
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE fract(__CLC_GENTYPE x, private __CLC_GENTYPE *iptr);
26+
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
27+
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
28+
defined(__opencl_c_generic_address_space))
29+
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE fract(__CLC_GENTYPE x, generic __CLC_GENTYPE *iptr);
30+
#endif
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
11
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE frexp(__CLC_GENTYPE x, global __CLC_INTN *iptr);
22
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE frexp(__CLC_GENTYPE x, local __CLC_INTN *iptr);
33
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE frexp(__CLC_GENTYPE x, private __CLC_INTN *iptr);
4+
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
5+
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
6+
defined(__opencl_c_generic_address_space))
7+
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE frexp(__CLC_GENTYPE x, generic __CLC_INTN *iptr);
8+
#endif

libclc/generic/include/clc/math/modf.inc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,8 @@
2323
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE modf(__CLC_GENTYPE x, global __CLC_GENTYPE *iptr);
2424
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE modf(__CLC_GENTYPE x, local __CLC_GENTYPE *iptr);
2525
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE modf(__CLC_GENTYPE x, private __CLC_GENTYPE *iptr);
26+
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
27+
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
28+
defined(__opencl_c_generic_address_space))
29+
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE modf(__CLC_GENTYPE x, generic __CLC_GENTYPE *iptr);
30+
#endif

libclc/generic/include/clc/math/remquo.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,13 @@
1515
#include <clc/math/gentype.inc>
1616
#undef __CLC_ADDRESS_SPACE
1717

18+
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
19+
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
20+
defined(__opencl_c_generic_address_space))
21+
#define __CLC_BODY <clc/math/remquo.inc>
22+
#define __CLC_ADDRESS_SPACE generic
23+
#include <clc/math/gentype.inc>
24+
#undef __CLC_ADDRESS_SPACE
25+
#endif
26+
1827
#undef __CLC_FUNCTION
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
11
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sincos (__CLC_GENTYPE x, global __CLC_GENTYPE * cosval);
22
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sincos (__CLC_GENTYPE x, local __CLC_GENTYPE * cosval);
33
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sincos (__CLC_GENTYPE x, private __CLC_GENTYPE * cosval);
4+
#if __OPENCL_C_VERSION__ == CL_VERSION_2_0 || \
5+
(__OPENCL_C_VERSION__ >= CL_VERSION_3_0 && \
6+
defined(__opencl_c_generic_address_space))
7+
_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sincos (__CLC_GENTYPE x, generic __CLC_GENTYPE * cosval);
8+
#endif

0 commit comments

Comments
 (0)