Skip to content

Commit 9e016ef

Browse files
author
iclsrc
committed
Merge from 'sycl' to 'sycl-web'
2 parents c6f5e7e + fdcaeae commit 9e016ef

33 files changed

+632
-613
lines changed

sycl/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ option(SYCL_ADD_DEV_VERSION_POSTFIX "Adds -V postfix to version string" ON)
1111
set(SYCL_MAJOR_VERSION 1)
1212
set(SYCL_MINOR_VERSION 0)
1313
set(SYCL_PATCH_VERSION 0)
14-
set(SYCL_DEV_ABI_VERSION 0)
14+
set(SYCL_DEV_ABI_VERSION 1)
1515
if (SYCL_ADD_DEV_VERSION_POSTFIX)
1616
set(SYCL_VERSION_POSTFIX "-${SYCL_DEV_ABI_VERSION}")
1717
endif()

sycl/include/CL/sycl/detail/defines.hpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
#pragma once
1010

11+
#include <climits>
12+
1113
#ifndef __SYCL_DISABLE_NAMESPACE_INLINE__
1214
#define __SYCL_INLINE_NAMESPACE(X) inline namespace X
1315
#else
@@ -18,6 +20,10 @@
1820
#define __has_attribute(x) 0
1921
#endif
2022

23+
#ifndef __has_builtin
24+
#define __has_builtin(x) 0
25+
#endif
26+
2127
#if __has_attribute(always_inline)
2228
#define ALWAYS_INLINE __attribute__((always_inline))
2329
#else
@@ -31,3 +37,12 @@
3137
#ifndef SYCL_EXTERNAL
3238
#define SYCL_EXTERNAL
3339
#endif
40+
41+
#if defined(__SYCL_ID_QUERIES_FIT_IN_INT__) && __has_builtin(__builtin_assume)
42+
#define __SYCL_ASSUME_INT(x) __builtin_assume((x) <= INT_MAX)
43+
#else
44+
#define __SYCL_ASSUME_INT(x)
45+
#if defined(__SYCL_ID_QUERIES_FIT_IN_INT__) && !__has_builtin(__builtin_assume)
46+
#warning "No assumptions will be emitted due to no __builtin_assume available"
47+
#endif
48+
#endif

sycl/include/CL/sycl/detail/pi.h

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
// TODO: we need a mapping of PI to OpenCL somewhere, and this can be done
4646
// elsewhere, e.g. in the pi_opencl, but constants/enums mapping is now
4747
// done here, for efficiency and simplicity.
48-
48+
#include <CL/cl_ext_intel.h>
4949
#include <CL/cl_usm_ext.h>
5050
#include <CL/sycl/detail/cl.h>
5151
#include <CL/sycl/detail/export.hpp>
@@ -133,16 +133,9 @@ typedef enum {
133133
// make the translation to OpenCL transparent.
134134
//
135135
typedef enum : pi_uint64 {
136-
PI_DEVICE_TYPE_DEFAULT =
137-
CL_DEVICE_TYPE_DEFAULT, ///< The default device available in the PI
138-
///< plugin.
139-
PI_DEVICE_TYPE_ALL =
140-
CL_DEVICE_TYPE_ALL, ///< All devices available in the PI plugin.
141-
PI_DEVICE_TYPE_CPU =
142-
CL_DEVICE_TYPE_CPU, ///< A PI device that is the host processor.
143-
PI_DEVICE_TYPE_GPU = CL_DEVICE_TYPE_GPU, ///< A PI device that is a GPU.
144-
PI_DEVICE_TYPE_ACC = CL_DEVICE_TYPE_ACCELERATOR ///< A PI device that is a
145-
///< dedicated accelerator.
136+
PI_DEVICE_TYPE_CPU = CL_DEVICE_TYPE_CPU,
137+
PI_DEVICE_TYPE_GPU = CL_DEVICE_TYPE_GPU,
138+
PI_DEVICE_TYPE_ACC = CL_DEVICE_TYPE_ACCELERATOR
146139
} _pi_device_type;
147140

148141
typedef enum {
@@ -316,6 +309,14 @@ typedef enum {
316309
PI_IMAGE_INFO_DEPTH = CL_IMAGE_DEPTH
317310
} _pi_image_info;
318311

312+
typedef enum {
313+
PI_KERNEL_MAX_SUB_GROUP_SIZE = CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
314+
PI_KERNEL_MAX_NUM_SUB_GROUPS = CL_KERNEL_MAX_NUM_SUB_GROUPS,
315+
PI_KERNEL_COMPILE_NUM_SUB_GROUPS = CL_KERNEL_COMPILE_NUM_SUB_GROUPS,
316+
PI_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL =
317+
CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL
318+
} _pi_kernel_sub_group_info;
319+
319320
typedef enum {
320321
PI_EVENT_INFO_COMMAND_QUEUE = CL_EVENT_COMMAND_QUEUE,
321322
PI_EVENT_INFO_CONTEXT = CL_EVENT_CONTEXT,
@@ -510,6 +511,7 @@ using pi_queue_info = _pi_queue_info;
510511
using pi_image_info = _pi_image_info;
511512
using pi_kernel_info = _pi_kernel_info;
512513
using pi_kernel_group_info = _pi_kernel_group_info;
514+
using pi_kernel_sub_group_info = _pi_kernel_sub_group_info;
513515
using pi_event_info = _pi_event_info;
514516
using pi_command_type = _pi_command_type;
515517
using pi_mem_type = _pi_mem_type;
@@ -1056,9 +1058,26 @@ __SYCL_EXPORT pi_result piKernelGetGroupInfo(pi_kernel kernel, pi_device device,
10561058
void *param_value,
10571059
size_t *param_value_size_ret);
10581060

1061+
/// API to query information from the sub-group from a kernel
1062+
///
1063+
/// \param kernel is the pi_kernel to query
1064+
/// \param device is the device the kernel is executed on
1065+
/// \param param_name is a pi_kernel_sub_group_info enum value that
1066+
/// specifies the informtation queried for.
1067+
/// \param input_value_size is the size of input value passed in
1068+
/// ptr input_value param
1069+
/// \param input_value is the ptr to the input value passed.
1070+
/// \param param_value_size is the size of the value in bytes.
1071+
/// \param param_value is a pointer to the value to set.
1072+
/// \param param_value_size_ret is a pointer to return the size of data in
1073+
/// param_value ptr.
1074+
///
1075+
/// All queries expect a return of 4 bytes in param_value_size,
1076+
/// param_value_size_ret, and a uint32_t value should to be written in
1077+
/// param_value ptr.
1078+
/// Note: This behaviour differs from OpenCL. OpenCL returns size_t.
10591079
__SYCL_EXPORT pi_result piKernelGetSubGroupInfo(
1060-
pi_kernel kernel, pi_device device,
1061-
cl_kernel_sub_group_info param_name, // TODO: untie from OpenCL
1080+
pi_kernel kernel, pi_device device, pi_kernel_sub_group_info param_name,
10621081
size_t input_value_size, const void *input_value, size_t param_value_size,
10631082
void *param_value, size_t *param_value_size_ret);
10641083

sycl/include/CL/sycl/id.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,10 @@ template <int dimensions = 1> class id : public detail::array<dimensions> {
9696
* conversion:
9797
* int a = id<1>(value); */
9898

99-
operator EnableIfT<(dimensions == 1), size_t>() const {
100-
return this->common_array[0];
99+
ALWAYS_INLINE operator EnableIfT<(dimensions == 1), size_t>() const {
100+
size_t Result = this->common_array[0];
101+
__SYCL_ASSUME_INT(Result);
102+
return Result;
101103
}
102104
#endif // __SYCL_DISABLE_ID_TO_INT_CONV__
103105

sycl/include/CL/sycl/info/info_desc.hpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -206,9 +206,7 @@ enum class kernel_work_group : cl_kernel_work_group_info {
206206
};
207207

208208
enum class kernel_sub_group : cl_kernel_sub_group_info {
209-
max_sub_group_size_for_ndrange = CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
210-
sub_group_count_for_ndrange = CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
211-
local_size_for_sub_group_count = CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT,
209+
max_sub_group_size = CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
212210
max_num_sub_groups = CL_KERNEL_MAX_NUM_SUB_GROUPS,
213211
compile_num_sub_groups = CL_KERNEL_COMPILE_NUM_SUB_GROUPS,
214212
compile_sub_group_size = CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL
Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,5 @@
1-
PARAM_TRAITS_SPEC_WITH_INPUT(kernel_sub_group, max_sub_group_size_for_ndrange,
2-
size_t, cl::sycl::range<3>)
3-
PARAM_TRAITS_SPEC_WITH_INPUT(kernel_sub_group, sub_group_count_for_ndrange,
4-
size_t, cl::sycl::range<3>)
5-
PARAM_TRAITS_SPEC_WITH_INPUT(kernel_sub_group, local_size_for_sub_group_count,
6-
cl::sycl::range<3>, size_t)
7-
PARAM_TRAITS_SPEC(kernel_sub_group, max_num_sub_groups, size_t)
8-
PARAM_TRAITS_SPEC(kernel_sub_group, compile_num_sub_groups, size_t)
9-
PARAM_TRAITS_SPEC(kernel_sub_group, compile_sub_group_size, size_t)
10-
1+
PARAM_TRAITS_SPEC_WITH_INPUT(kernel_sub_group, max_sub_group_size,
2+
uint32_t, cl::sycl::range<3>)
3+
PARAM_TRAITS_SPEC(kernel_sub_group, max_num_sub_groups, uint32_t)
4+
PARAM_TRAITS_SPEC(kernel_sub_group, compile_num_sub_groups, uint32_t)
5+
PARAM_TRAITS_SPEC(kernel_sub_group, compile_sub_group_size, uint32_t)

sycl/include/CL/sycl/item.hpp

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,15 @@
88

99
#pragma once
1010

11+
#include <CL/sycl/detail/defines.hpp>
1112
#include <CL/sycl/detail/helpers.hpp>
1213
#include <CL/sycl/detail/item_base.hpp>
1314
#include <CL/sycl/detail/type_traits.hpp>
1415
#include <CL/sycl/id.hpp>
1516
#include <CL/sycl/range.hpp>
1617

18+
#include <cstddef>
19+
1720
__SYCL_INLINE_NAMESPACE(cl) {
1821
namespace sycl {
1922
namespace detail {
@@ -28,22 +31,37 @@ template <int dimensions = 1, bool with_offset = true> class item {
2831

2932
id<dimensions> get_id() const { return MImpl.MIndex; }
3033

31-
size_t get_id(int dimension) const { return MImpl.MIndex[dimension]; }
34+
size_t ALWAYS_INLINE get_id(int dimension) const {
35+
size_t Id = MImpl.MIndex[dimension];
36+
__SYCL_ASSUME_INT(Id);
37+
return Id;
38+
}
3239

33-
size_t operator[](int dimension) const { return MImpl.MIndex[dimension]; }
40+
size_t ALWAYS_INLINE operator[](int dimension) const {
41+
size_t Id = MImpl.MIndex[dimension];
42+
__SYCL_ASSUME_INT(Id);
43+
return Id;
44+
}
3445

3546
range<dimensions> get_range() const { return MImpl.MExtent; }
3647

37-
size_t get_range(int dimension) const { return MImpl.MExtent[dimension]; }
48+
size_t ALWAYS_INLINE get_range(int dimension) const {
49+
size_t Id = MImpl.MExtent[dimension];
50+
__SYCL_ASSUME_INT(Id);
51+
return Id;
52+
}
3853

3954
template <bool has_offset = with_offset>
4055
detail::enable_if_t<has_offset, id<dimensions>> get_offset() const {
4156
return MImpl.MOffset;
4257
}
4358

4459
template <bool has_offset = with_offset>
45-
detail::enable_if_t<has_offset, size_t> get_offset(int dimension) const {
46-
return MImpl.MOffset[dimension];
60+
detail::enable_if_t<has_offset, size_t>
61+
ALWAYS_INLINE get_offset(int dimension) const {
62+
size_t Id = MImpl.MOffset[dimension];
63+
__SYCL_ASSUME_INT(Id);
64+
return Id;
4765
}
4866

4967
template <bool has_offset = with_offset>
@@ -52,7 +70,11 @@ template <int dimensions = 1, bool with_offset = true> class item {
5270
MImpl.MExtent, MImpl.MIndex, /*Offset*/ {});
5371
}
5472

55-
size_t get_linear_id() const { return MImpl.get_linear_id(); }
73+
size_t ALWAYS_INLINE get_linear_id() const {
74+
size_t Id = MImpl.get_linear_id();
75+
__SYCL_ASSUME_INT(Id);
76+
return Id;
77+
}
5678

5779
item(const item &rhs) = default;
5880

sycl/include/CL/sycl/nd_item.hpp

Lines changed: 41 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <CL/sycl/nd_range.hpp>
2020
#include <CL/sycl/range.hpp>
2121

22+
#include <cstddef>
2223
#include <stdexcept>
2324
#include <type_traits>
2425

@@ -33,34 +34,57 @@ template <int dimensions = 1> class nd_item {
3334

3435
id<dimensions> get_global_id() const { return globalItem.get_id(); }
3536

36-
size_t get_global_id(int dimension) const {
37-
return globalItem.get_id(dimension);
37+
size_t ALWAYS_INLINE get_global_id(int dimension) const {
38+
size_t Id = globalItem.get_id(dimension);
39+
__SYCL_ASSUME_INT(Id);
40+
return Id;
3841
}
3942

40-
size_t get_global_linear_id() const { return globalItem.get_linear_id(); }
43+
size_t ALWAYS_INLINE get_global_linear_id() const {
44+
size_t Id = globalItem.get_linear_id();
45+
__SYCL_ASSUME_INT(Id);
46+
return Id;
47+
}
4148

4249
id<dimensions> get_local_id() const { return localItem.get_id(); }
4350

44-
size_t get_local_id(int dimension) const {
45-
return localItem.get_id(dimension);
51+
size_t ALWAYS_INLINE get_local_id(int dimension) const {
52+
size_t Id = localItem.get_id(dimension);
53+
__SYCL_ASSUME_INT(Id);
54+
return Id;
4655
}
4756

48-
size_t get_local_linear_id() const { return localItem.get_linear_id(); }
57+
size_t get_local_linear_id() const {
58+
size_t Id = localItem.get_linear_id();
59+
__SYCL_ASSUME_INT(Id);
60+
return Id;
61+
}
4962

5063
group<dimensions> get_group() const { return Group; }
5164

5265
intel::sub_group get_sub_group() const { return intel::sub_group(); }
5366

54-
size_t get_group(int dimension) const { return Group[dimension]; }
67+
size_t ALWAYS_INLINE get_group(int dimension) const {
68+
size_t Size = Group[dimension];
69+
__SYCL_ASSUME_INT(Size);
70+
return Size;
71+
}
5572

56-
size_t get_group_linear_id() const { return Group.get_linear_id(); }
73+
size_t ALWAYS_INLINE get_group_linear_id() const {
74+
size_t Id = Group.get_linear_id();
75+
__SYCL_ASSUME_INT(Id);
76+
return Id;
77+
}
5778

5879
range<dimensions> get_group_range() const {
5980
return Group.get_global_range() / Group.get_local_range();
6081
}
6182

62-
size_t get_group_range(int dimension) const {
63-
return Group.get_global_range(dimension) / Group.get_local_range(dimension);
83+
size_t ALWAYS_INLINE get_group_range(int dimension) const {
84+
size_t Range =
85+
Group.get_global_range(dimension) / Group.get_local_range(dimension);
86+
__SYCL_ASSUME_INT(Range);
87+
return Range;
6488
}
6589

6690
range<dimensions> get_global_range() const { return globalItem.get_range(); }
@@ -101,39 +125,36 @@ template <int dimensions = 1> class nd_item {
101125
Group.mem_fence();
102126
}
103127

104-
template<typename dataT>
128+
template <typename dataT>
105129
device_event async_work_group_copy(local_ptr<dataT> dest,
106130
global_ptr<dataT> src,
107131
size_t numElements) const {
108132
return Group.async_work_group_copy(dest, src, numElements);
109133
}
110134

111-
template<typename dataT>
135+
template <typename dataT>
112136
device_event async_work_group_copy(global_ptr<dataT> dest,
113137
local_ptr<dataT> src,
114138
size_t numElements) const {
115139
return Group.async_work_group_copy(dest, src, numElements);
116140
}
117141

118-
template<typename dataT>
142+
template <typename dataT>
119143
device_event async_work_group_copy(local_ptr<dataT> dest,
120-
global_ptr<dataT> src,
121-
size_t numElements,
144+
global_ptr<dataT> src, size_t numElements,
122145
size_t srcStride) const {
123146

124147
return Group.async_work_group_copy(dest, src, numElements, srcStride);
125148
}
126149

127-
template<typename dataT>
150+
template <typename dataT>
128151
device_event async_work_group_copy(global_ptr<dataT> dest,
129-
local_ptr<dataT> src,
130-
size_t numElements,
152+
local_ptr<dataT> src, size_t numElements,
131153
size_t destStride) const {
132154
return Group.async_work_group_copy(dest, src, numElements, destStride);
133155
}
134156

135-
template<typename... eventTN>
136-
void wait_for(eventTN... events) const {
157+
template <typename... eventTN> void wait_for(eventTN... events) const {
137158
Group.wait_for(events...);
138159
}
139160

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -716,18 +716,15 @@ pi_result cuda_piDevicesGet(pi_platform platform, pi_device_type device_type,
716716
pi_uint32 *num_devices) {
717717

718718
pi_result err = PI_SUCCESS;
719-
const bool askingForDefault = device_type == PI_DEVICE_TYPE_DEFAULT;
720-
const bool askingForGPU = device_type & PI_DEVICE_TYPE_GPU;
721-
const bool returnDevices = askingForDefault || askingForGPU;
722-
723-
size_t numDevices = returnDevices ? platform->devices_.size() : 0;
719+
const bool askingForGPU = (device_type & PI_DEVICE_TYPE_GPU);
720+
size_t numDevices = askingForGPU ? platform->devices_.size() : 0;
724721

725722
try {
726723
if (num_devices) {
727724
*num_devices = numDevices;
728725
}
729726

730-
if (returnDevices && devices) {
727+
if (askingForGPU && devices) {
731728
for (size_t i = 0; i < std::min(size_t(num_entries), numDevices); ++i) {
732729
devices[i] = platform->devices_[i].get();
733730
}

0 commit comments

Comments
 (0)