Skip to content

Commit 248f550

Browse files
[SYCL] Sub-group load/store for raw pointers (#3255)
- If pointers are decorated address space information is obtained from the attribute; - If pointers are not decorated explicit cast (GenericCastToPtrExplicit method) are used to obtain information about address space. Only local and global address spaces are supported by load/store methods. For other address spaces assertion is generated. Also fixed target triple (spir64-unknown-unknown-sycldevice) used for SPIR-V compilation path in LIT tests. The previous one was not supported by device library. Co-authored-by: John Pennycook <[email protected]>
1 parent 090d764 commit 248f550

File tree

12 files changed

+471
-21
lines changed

12 files changed

+471
-21
lines changed

sycl/doc/extensions/SubGroupAlgorithms/SYCL_INTEL_sub_group_algorithms.asciidoc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,12 +134,18 @@ The load and store sub-group functions enable developers to assert that all work
134134
|===
135135
|Function|Description
136136

137+
|+template <typename T> T load(sub_group sg, const T *src)+
138+
|Load contiguous data from _src_. Returns one element per work-item, corresponding to the memory location at _src_ + +get_local_id()+. The value of _src_ must be the same for all work-items in the sub-group. The address space information is deduced automatically. Only pointers to global and local address spaces are valid. Passing a pointer to other address spaces will cause the run time assertion.
139+
137140
|+template <typename T, access::address_space Space> T load(sub_group sg, const multi_ptr<T,Space> src)+
138141
|Load contiguous data from _src_. Returns one element per work-item, corresponding to the memory location at _src_ + +get_local_id()+. The value of _src_ must be the same for all work-items in the sub-group. _Space_ must be +access::address_space::global_space+ or +access::address_space::local_space+.
139142

140143
|+template <int N, typename T, access::address_space Space> vec<T,N> load(sub_group sg, const multi_ptr<T,Space> src)+
141144
|Load contiguous data from _src_. Returns _N_ elements per work-item, corresponding to the _N_ memory locations at _src_ + +i+ * +get_max_local_range()+ + +get_local_id()+ for +i+ between 0 and _N_. The value of _src_ must be the same for all work-items in the sub-group. _Space_ must be +access::address_space::global_space+ or +access::address_space::local_space+.
142145

146+
|+template <typename T> void store(sub_group sg, T *dst, const T& x)+
147+
|Store contiguous data to _dst_. The value of _x_ from each work-item is written to the memory location at _dst_ + +get_local_id()+. The value of _dst_ must be the same for all work-items in the sub-group. The address space information is deduced automatically. Only pointers to global and local address spaces are valid. Passing a pointer to other address spaces will cause the run time assertion.
148+
143149
|+template <typename T, access::address_space Space> void store(sub_group sg, multi_ptr<T,Space> dst, const T& x)+
144150
|Store contiguous data to _dst_. The value of _x_ from each work-item is written to the memory location at _dst_ + +get_local_id()+. The value of _dst_ must be the same for all work-items in the sub-group. _Space_ must be +access::address_space::global_space+ or +access::address_space::local_space+.
145151

@@ -165,6 +171,7 @@ None.
165171
|========================================
166172
|Rev|Date|Author|Changes
167173
|1|2020-03-16|John Pennycook|*Initial public working draft*
174+
|2|2021-02-26|Vladimir Lazarev|*Add load/store method for raw pointers*
168175
|========================================
169176
170177
//************************************************************************

sycl/include/CL/__spirv/spirv_ops.hpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,30 @@ __SPIRV_ATOMICS(__SPIRV_ATOMIC_UNSIGNED, unsigned long long)
189189
__SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Min)
190190
__SPIRV_ATOMICS(__SPIRV_ATOMIC_MINMAX, Max)
191191

192+
extern SYCL_EXTERNAL __attribute__((opencl_global)) void *
193+
__spirv_GenericCastToPtrExplicit_ToGlobal(const void *Ptr,
194+
__spv::StorageClass::Flag S) noexcept;
195+
196+
extern SYCL_EXTERNAL __attribute__((opencl_local)) void *
197+
__spirv_GenericCastToPtrExplicit_ToLocal(const void *Ptr,
198+
__spv::StorageClass::Flag S) noexcept;
199+
200+
template <typename dataT>
201+
extern __attribute__((opencl_global)) dataT *
202+
__spirv_GenericCastToPtrExplicit_ToGlobal(
203+
const void *Ptr, __spv::StorageClass::Flag S) noexcept {
204+
return (__attribute__((opencl_global))
205+
dataT *)__spirv_GenericCastToPtrExplicit_ToGlobal(Ptr, S);
206+
}
207+
208+
template <typename dataT>
209+
extern __attribute__((opencl_local)) dataT *
210+
__spirv_GenericCastToPtrExplicit_ToLocal(const void *Ptr,
211+
__spv::StorageClass::Flag S) noexcept {
212+
return (__attribute__((opencl_local))
213+
dataT *)__spirv_GenericCastToPtrExplicit_ToLocal(Ptr, S);
214+
}
215+
192216
template <typename dataT>
193217
__SYCL_CONVERGENT__ extern SYCL_EXTERNAL dataT
194218
__spirv_SubgroupShuffleINTEL(dataT Data, uint32_t InvocationId) noexcept;

sycl/include/CL/__spirv/spirv_types.hpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,46 @@ struct Scope {
3636
Flag flag_value;
3737
};
3838

39+
struct StorageClass {
40+
enum Flag : uint32_t {
41+
UniformConstant = 0,
42+
Input = 1,
43+
Uniform = 2,
44+
Output = 3,
45+
Workgroup = 4,
46+
CrossWorkgroup = 5,
47+
Private = 6,
48+
Function = 7,
49+
Generic = 8,
50+
PushConstant = 9,
51+
AtomicCounter = 10,
52+
Image = 11,
53+
StorageBuffer = 12,
54+
CallableDataKHR = 5328,
55+
CallableDataNV = 5328,
56+
IncomingCallableDataKHR = 5329,
57+
IncomingCallableDataNV = 5329,
58+
RayPayloadKHR = 5338,
59+
RayPayloadNV = 5338,
60+
HitAttributeKHR = 5339,
61+
HitAttributeNV = 5339,
62+
IncomingRayPayloadKHR = 5342,
63+
IncomingRayPayloadNV = 5342,
64+
ShaderRecordBufferKHR = 5343,
65+
ShaderRecordBufferNV = 5343,
66+
PhysicalStorageBuffer = 5349,
67+
PhysicalStorageBufferEXT = 5349,
68+
CodeSectionINTEL = 5605,
69+
CapabilityUSMStorageClassesINTEL = 5935,
70+
DeviceOnlyINTEL = 5936,
71+
HostOnlyINTEL = 5937,
72+
Max = 0x7fffffff,
73+
};
74+
constexpr StorageClass(Flag flag) : flag_value(flag) {}
75+
constexpr operator uint32_t() const { return flag_value; }
76+
Flag flag_value;
77+
};
78+
3979
struct MemorySemanticsMask {
4080

4181
enum Flag : uint32_t {

sycl/include/CL/sycl/ONEAPI/sub_group.hpp

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,47 @@ struct sub_group {
224224

225225
/* --- sub_group load/stores --- */
226226
/* these can map to SIMD or block read/write hardware where available */
227+
#ifdef __SYCL_DEVICE_ONLY__
228+
// Method for decorated pointer
229+
template <typename T>
230+
detail::enable_if_t<
231+
!std::is_same<typename detail::remove_AS<T>::type, T>::value, T>
232+
load(T *src) const {
233+
return load(sycl::multi_ptr<typename detail::remove_AS<T>::type,
234+
sycl::detail::deduce_AS<T>::value>(
235+
(typename detail::remove_AS<T>::type *)src));
236+
}
237+
238+
// Method for raw pointer
239+
template <typename T>
240+
detail::enable_if_t<
241+
std::is_same<typename detail::remove_AS<T>::type, T>::value, T>
242+
load(T *src) const {
243+
244+
#ifdef __NVPTX__
245+
return src[get_local_id()[0]];
246+
#else // __NVPTX__
247+
auto l = __spirv_GenericCastToPtrExplicit_ToLocal<T>(
248+
src, __spv::StorageClass::Workgroup);
249+
if (l)
250+
return load(l);
251+
252+
auto g = __spirv_GenericCastToPtrExplicit_ToGlobal<T>(
253+
src, __spv::StorageClass::CrossWorkgroup);
254+
if (g)
255+
return load(g);
256+
257+
assert(!"Sub-group load() is supported for local or global pointers only.");
258+
return {};
259+
#endif // __NVPTX__
260+
}
261+
#else //__SYCL_DEVICE_ONLY__
262+
template <typename T> T load(T *src) const {
263+
(void)src;
264+
throw runtime_error("Sub-groups are not supported on host device.",
265+
PI_INVALID_DEVICE);
266+
}
267+
#endif //__SYCL_DEVICE_ONLY__
227268

228269
template <typename T, access::address_space Space>
229270
sycl::detail::enable_if_t<
@@ -315,6 +356,55 @@ struct sub_group {
315356
#endif
316357
}
317358

359+
#ifdef __SYCL_DEVICE_ONLY__
360+
// Method for decorated pointer
361+
template <typename T>
362+
detail::enable_if_t<
363+
!std::is_same<typename detail::remove_AS<T>::type, T>::value>
364+
store(T *dst, const typename detail::remove_AS<T>::type &x) const {
365+
store(sycl::multi_ptr<typename detail::remove_AS<T>::type,
366+
sycl::detail::deduce_AS<T>::value>(
367+
(typename detail::remove_AS<T>::type *)dst),
368+
x);
369+
}
370+
371+
// Method for raw pointer
372+
template <typename T>
373+
detail::enable_if_t<
374+
std::is_same<typename detail::remove_AS<T>::type, T>::value>
375+
store(T *dst, const typename detail::remove_AS<T>::type &x) const {
376+
377+
#ifdef __NVPTX__
378+
dst[get_local_id()[0]] = x;
379+
#else // __NVPTX__
380+
auto l = __spirv_GenericCastToPtrExplicit_ToLocal<T>(
381+
dst, __spv::StorageClass::Workgroup);
382+
if (l) {
383+
store(l, x);
384+
return;
385+
}
386+
387+
auto g = __spirv_GenericCastToPtrExplicit_ToGlobal<T>(
388+
dst, __spv::StorageClass::CrossWorkgroup);
389+
if (g) {
390+
store(g, x);
391+
return;
392+
}
393+
394+
assert(
395+
!"Sub-group store() is supported for local or global pointers only.");
396+
return;
397+
#endif // __NVPTX__
398+
}
399+
#else //__SYCL_DEVICE_ONLY__
400+
template <typename T> void store(T *dst, const T &x) const {
401+
(void)dst;
402+
(void)x;
403+
throw runtime_error("Sub-groups are not supported on host device.",
404+
PI_INVALID_DEVICE);
405+
}
406+
#endif //__SYCL_DEVICE_ONLY__
407+
318408
template <typename T, access::address_space Space>
319409
sycl::detail::enable_if_t<
320410
sycl::detail::sub_group::AcceptableForGlobalLoadStore<T, Space>::value>

sycl/include/CL/sycl/access/access.hpp

Lines changed: 39 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -187,18 +187,16 @@ template <typename ElementType>
187187
struct DecoratedType<ElementType, access::address_space::local_space> {
188188
using type = __OPENCL_LOCAL_AS__ ElementType;
189189
};
190-
191-
template <class T>
192-
struct remove_AS {
193-
typedef T type;
194-
};
190+
template <class T> struct remove_AS { typedef T type; };
195191

196192
#ifdef __SYCL_DEVICE_ONLY__
197-
template <class T>
198-
struct remove_AS<__OPENCL_GLOBAL_AS__ T> {
199-
typedef T type;
193+
template <class T> struct deduce_AS {
194+
static_assert(!std::is_same<typename detail::remove_AS<T>::type, T>::value,
195+
"Only types with address space attributes are supported");
200196
};
201197

198+
template <class T> struct remove_AS<__OPENCL_GLOBAL_AS__ T> { typedef T type; };
199+
202200
#ifdef __ENABLE_USM_ADDR_SPACE__
203201
template <class T> struct remove_AS<__OPENCL_GLOBAL_DEVICE_AS__ T> {
204202
typedef T type;
@@ -207,21 +205,45 @@ template <class T> struct remove_AS<__OPENCL_GLOBAL_DEVICE_AS__ T> {
207205
template <class T> struct remove_AS<__OPENCL_GLOBAL_HOST_AS__ T> {
208206
typedef T type;
209207
};
208+
209+
template <class T> struct deduce_AS<__OPENCL_GLOBAL_DEVICE_AS__ T> {
210+
static const access::address_space value =
211+
access::address_space::global_device_space;
212+
};
213+
214+
template <class T> struct deduce_AS<__OPENCL_GLOBAL_HOST_AS__ T> {
215+
static const access::address_space value =
216+
access::address_space::global_host_space;
217+
};
210218
#endif // __ENABLE_USM_ADDR_SPACE__
211219

212-
template <class T>
213-
struct remove_AS<__OPENCL_PRIVATE_AS__ T> {
220+
template <class T> struct remove_AS<__OPENCL_PRIVATE_AS__ T> {
214221
typedef T type;
215222
};
216223

217-
template <class T>
218-
struct remove_AS<__OPENCL_LOCAL_AS__ T> {
224+
template <class T> struct remove_AS<__OPENCL_LOCAL_AS__ T> { typedef T type; };
225+
226+
template <class T> struct remove_AS<__OPENCL_CONSTANT_AS__ T> {
219227
typedef T type;
220228
};
221229

222-
template <class T>
223-
struct remove_AS<__OPENCL_CONSTANT_AS__ T> {
224-
typedef T type;
230+
template <class T> struct deduce_AS<__OPENCL_GLOBAL_AS__ T> {
231+
static const access::address_space value =
232+
access::address_space::global_space;
233+
};
234+
235+
template <class T> struct deduce_AS<__OPENCL_PRIVATE_AS__ T> {
236+
static const access::address_space value =
237+
access::address_space::private_space;
238+
};
239+
240+
template <class T> struct deduce_AS<__OPENCL_LOCAL_AS__ T> {
241+
static const access::address_space value = access::address_space::local_space;
242+
};
243+
244+
template <class T> struct deduce_AS<__OPENCL_CONSTANT_AS__ T> {
245+
static const access::address_space value =
246+
access::address_space::constant_space;
225247
};
226248
#endif
227249

@@ -231,8 +253,7 @@ struct remove_AS<__OPENCL_CONSTANT_AS__ T> {
231253
#undef __OPENCL_LOCAL_AS__
232254
#undef __OPENCL_CONSTANT_AS__
233255
#undef __OPENCL_PRIVATE_AS__
234-
235256
} // namespace detail
236257

237-
} // namespace sycl
238-
} // __SYCL_INLINE_NAMESPACE(cl)
258+
} // namespace sycl
259+
} // __SYCL_INLINE_NAMESPACE(cl)

sycl/test/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ set_target_properties(check-sycl-deploy PROPERTIES FOLDER "SYCL tests")
5454
add_lit_testsuite(check-sycl-spirv "Running device-agnostic SYCL regression tests for SPIR-V"
5555
${CMAKE_CURRENT_BINARY_DIR}
5656
ARGS ${RT_TEST_ARGS}
57-
PARAMS "SYCL_TRIPLE=spir64-unknown-linux-sycldevice"
57+
PARAMS "SYCL_TRIPLE=spir64-unknown-unknown-sycldevice"
5858
DEPENDS ${SYCL_TEST_DEPS}
5959
EXCLUDE_FROM_CHECK_ALL
6060
)
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// RUN: %clangxx -fsycl -fsycl-device-only -D__ENABLE_USM_ADDR_SPACE__ -fsycl-targets=%sycl_triple %s -c
2+
3+
#include <CL/sycl.hpp>
4+
#include <cassert>
5+
6+
using namespace cl::sycl;
7+
int main() {
8+
9+
queue myQueue;
10+
myQueue.submit([&](handler &cgh) {
11+
cgh.single_task<class dummy>([=]() {
12+
static_assert(
13+
detail::deduce_AS<__attribute__((opencl_global)) int>::value ==
14+
access::address_space::global_space,
15+
"Unexpected address space");
16+
static_assert(
17+
detail::deduce_AS<__attribute__((opencl_local)) int>::value ==
18+
access::address_space::local_space,
19+
"Unexpected address space");
20+
static_assert(
21+
detail::deduce_AS<__attribute__((opencl_private)) int>::value ==
22+
access::address_space::private_space,
23+
"Unexpected address space");
24+
static_assert(
25+
detail::deduce_AS<__attribute__((opencl_constant)) int>::value ==
26+
access::address_space::constant_space,
27+
"Unexpected address space");
28+
static_assert(
29+
detail::deduce_AS<__attribute__((opencl_global_device)) int>::value ==
30+
access::address_space::global_device_space,
31+
"Unexpected address space");
32+
static_assert(
33+
detail::deduce_AS<__attribute__((opencl_global_host)) int>::value ==
34+
access::address_space::global_host_space,
35+
"Unexpected address space");
36+
});
37+
});
38+
}

0 commit comments

Comments
 (0)