Skip to content

Commit dafcc9e

Browse files
author
iclsrc
committed
Merge from 'sycl' to 'sycl-web' (#3)
2 parents 6925c69 + 43af08d commit dafcc9e

File tree

11 files changed

+708
-82
lines changed

11 files changed

+708
-82
lines changed

sycl/include/CL/sycl/accessor.hpp

100644100755
Lines changed: 85 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -987,9 +987,9 @@ class accessor :
987987

988988
template <typename T = DataT, int Dims = Dimensions, typename AllocatorT,
989989
typename TagT,
990-
typename = detail::enable_if_t<IsSameAsBuffer<T, Dims>() &&
991-
IsValidTag<TagT>() && IsPlaceH &&
992-
(IsGlobalBuf || IsConstantBuf)>>
990+
typename = detail::enable_if_t<
991+
IsSameAsBuffer<T, Dims>() && IsValidTag<TagT>() && IsPlaceH &&
992+
(IsGlobalBuf || IsConstantBuf || IsHostBuf)>>
993993
accessor(buffer<T, Dims, AllocatorT> &BufferRef, TagT,
994994
const property_list &PropertyList = {})
995995
: accessor(BufferRef, PropertyList) {}
@@ -1024,9 +1024,9 @@ class accessor :
10241024

10251025
template <typename T = DataT, int Dims = Dimensions, typename AllocatorT,
10261026
typename TagT,
1027-
typename = detail::enable_if_t<IsSameAsBuffer<T, Dims>() &&
1028-
IsValidTag<TagT>() && !IsPlaceH &&
1029-
(IsGlobalBuf || IsConstantBuf)>>
1027+
typename = detail::enable_if_t<
1028+
IsSameAsBuffer<T, Dims>() && IsValidTag<TagT>() && !IsPlaceH &&
1029+
(IsGlobalBuf || IsConstantBuf || IsHostBuf)>>
10301030
accessor(buffer<T, Dims, AllocatorT> &BufferRef, handler &CommandGroupHandler,
10311031
TagT, const property_list &PropertyList = {})
10321032
: accessor(BufferRef, CommandGroupHandler, PropertyList) {}
@@ -1058,9 +1058,9 @@ class accessor :
10581058
#endif
10591059

10601060
template <typename T = DataT, int Dims = Dimensions, typename AllocatorT,
1061-
typename = detail::enable_if_t<IsSameAsBuffer<T, Dims>() &&
1062-
(!IsPlaceH &&
1063-
(IsGlobalBuf || IsConstantBuf))>>
1061+
typename = detail::enable_if_t<
1062+
IsSameAsBuffer<T, Dims>() &&
1063+
(!IsPlaceH && (IsGlobalBuf || IsConstantBuf || IsHostBuf))>>
10641064
accessor(buffer<T, Dims, AllocatorT> &BufferRef, handler &CommandGroupHandler,
10651065
range<Dimensions> AccessRange,
10661066
const property_list &PropertyList = {})
@@ -1071,9 +1071,9 @@ class accessor :
10711071

10721072
template <typename T = DataT, int Dims = Dimensions, typename AllocatorT,
10731073
typename TagT,
1074-
typename = detail::enable_if_t<IsSameAsBuffer<T, Dims>() &&
1075-
IsValidTag<TagT>() && !IsPlaceH &&
1076-
(IsGlobalBuf || IsConstantBuf)>>
1074+
typename = detail::enable_if_t<
1075+
IsSameAsBuffer<T, Dims>() && IsValidTag<TagT>() && !IsPlaceH &&
1076+
(IsGlobalBuf || IsConstantBuf || IsHostBuf)>>
10771077
accessor(buffer<T, Dims, AllocatorT> &BufferRef, handler &CommandGroupHandler,
10781078
range<Dimensions> AccessRange, TagT,
10791079
const property_list &PropertyList = {})
@@ -1123,9 +1123,9 @@ class accessor :
11231123
#endif
11241124

11251125
template <typename T = DataT, int Dims = Dimensions, typename AllocatorT,
1126-
typename = detail::enable_if_t<IsSameAsBuffer<T, Dims>() &&
1127-
(!IsPlaceH &&
1128-
(IsGlobalBuf || IsConstantBuf))>>
1126+
typename = detail::enable_if_t<
1127+
IsSameAsBuffer<T, Dims>() &&
1128+
(!IsPlaceH && (IsGlobalBuf || IsConstantBuf || IsHostBuf))>>
11291129
accessor(buffer<T, Dims, AllocatorT> &BufferRef, handler &CommandGroupHandler,
11301130
range<Dimensions> AccessRange, id<Dimensions> AccessOffset,
11311131
const property_list &PropertyList = {})
@@ -1151,9 +1151,9 @@ class accessor :
11511151

11521152
template <typename T = DataT, int Dims = Dimensions, typename AllocatorT,
11531153
typename TagT,
1154-
typename = detail::enable_if_t<IsSameAsBuffer<T, Dims>() &&
1155-
IsValidTag<TagT>() && !IsPlaceH &&
1156-
(IsGlobalBuf || IsConstantBuf)>>
1154+
typename = detail::enable_if_t<
1155+
IsSameAsBuffer<T, Dims>() && IsValidTag<TagT>() && !IsPlaceH &&
1156+
(IsGlobalBuf || IsConstantBuf || IsHostBuf)>>
11571157
accessor(buffer<T, Dims, AllocatorT> &BufferRef, handler &CommandGroupHandler,
11581158
range<Dimensions> AccessRange, id<Dimensions> AccessOffset, TagT,
11591159
const property_list &PropertyList = {})
@@ -1675,8 +1675,6 @@ class host_accessor
16751675
// buffer | handler | range | id | | property_list
16761676
// buffer | handler | range | id | mode_tag | property_list
16771677
// -------+---------+-------+----+----------+--------------
1678-
// host_accessor with handler argument will be added later
1679-
// to facilitate non-blocking accessor use case
16801678

16811679
template <typename T = DataT, int Dims = Dimensions, typename AllocatorT,
16821680
typename = typename detail::enable_if_t<
@@ -1699,6 +1697,24 @@ class host_accessor
16991697
mode_tag_t<AccessMode>, const property_list &PropertyList = {})
17001698
: host_accessor(BufferRef, PropertyList) {}
17011699

1700+
#endif
1701+
1702+
template <typename T = DataT, int Dims = Dimensions, typename AllocatorT,
1703+
typename = detail::enable_if_t<IsSameAsBuffer<T, Dims>()>>
1704+
host_accessor(buffer<T, Dims, AllocatorT> &BufferRef,
1705+
handler &CommandGroupHandler,
1706+
const property_list &PropertyList = {})
1707+
: AccessorT(BufferRef, CommandGroupHandler, PropertyList) {}
1708+
1709+
#if __cplusplus > 201402L
1710+
1711+
template <typename T = DataT, int Dims = Dimensions, typename AllocatorT,
1712+
typename = detail::enable_if_t<IsSameAsBuffer<T, Dims>()>>
1713+
host_accessor(buffer<DataT, Dimensions, AllocatorT> &BufferRef,
1714+
handler &CommandGroupHandler, mode_tag_t<AccessMode>,
1715+
const property_list &PropertyList = {})
1716+
: host_accessor(BufferRef, CommandGroupHandler, PropertyList) {}
1717+
17021718
#endif
17031719

17041720
template <typename T = DataT, int Dims = Dimensions, typename AllocatorT,
@@ -1717,6 +1733,26 @@ class host_accessor
17171733
const property_list &PropertyList = {})
17181734
: host_accessor(BufferRef, AccessRange, {}, PropertyList) {}
17191735

1736+
#endif
1737+
1738+
template <typename T = DataT, int Dims = Dimensions, typename AllocatorT,
1739+
typename = detail::enable_if_t<IsSameAsBuffer<T, Dims>()>>
1740+
host_accessor(buffer<DataT, Dimensions, AllocatorT> &BufferRef,
1741+
handler &CommandGroupHandler, range<Dimensions> AccessRange,
1742+
const property_list &PropertyList = {})
1743+
: AccessorT(BufferRef, CommandGroupHandler, AccessRange, {},
1744+
PropertyList) {}
1745+
1746+
#if __cplusplus > 201402L
1747+
1748+
template <typename T = DataT, int Dims = Dimensions, typename AllocatorT,
1749+
typename = detail::enable_if_t<IsSameAsBuffer<T, Dims>()>>
1750+
host_accessor(buffer<DataT, Dimensions, AllocatorT> &BufferRef,
1751+
handler &CommandGroupHandler, range<Dimensions> AccessRange,
1752+
mode_tag_t<AccessMode>, const property_list &PropertyList = {})
1753+
: host_accessor(BufferRef, CommandGroupHandler, AccessRange, {},
1754+
PropertyList) {}
1755+
17201756
#endif
17211757

17221758
template <typename T = DataT, int Dims = Dimensions, typename AllocatorT,
@@ -1735,6 +1771,28 @@ class host_accessor
17351771
mode_tag_t<AccessMode>, const property_list &PropertyList = {})
17361772
: host_accessor(BufferRef, AccessRange, AccessOffset, PropertyList) {}
17371773

1774+
#endif
1775+
1776+
template <typename T = DataT, int Dims = Dimensions, typename AllocatorT,
1777+
typename = detail::enable_if_t<IsSameAsBuffer<T, Dims>()>>
1778+
host_accessor(buffer<DataT, Dimensions, AllocatorT> &BufferRef,
1779+
handler &CommandGroupHandler, range<Dimensions> AccessRange,
1780+
id<Dimensions> AccessOffset,
1781+
const property_list &PropertyList = {})
1782+
: AccessorT(BufferRef, CommandGroupHandler, AccessRange, AccessOffset,
1783+
PropertyList) {}
1784+
1785+
#if __cplusplus > 201402L
1786+
1787+
template <typename T = DataT, int Dims = Dimensions, typename AllocatorT,
1788+
typename = detail::enable_if_t<IsSameAsBuffer<T, Dims>()>>
1789+
host_accessor(buffer<DataT, Dimensions, AllocatorT> &BufferRef,
1790+
handler &CommandGroupHandler, range<Dimensions> AccessRange,
1791+
id<Dimensions> AccessOffset, mode_tag_t<AccessMode>,
1792+
const property_list &PropertyList = {})
1793+
: host_accessor(BufferRef, CommandGroupHandler, AccessRange, AccessOffset,
1794+
PropertyList) {}
1795+
17381796
#endif
17391797
};
17401798

@@ -1767,6 +1825,13 @@ host_accessor(buffer<DataT, Dimensions, AllocatorT>, Type1, Type2, Type3, Type4)
17671825
->host_accessor<DataT, Dimensions,
17681826
detail::deduceAccessMode<Type3, Type4>()>;
17691827

1828+
template <typename DataT, int Dimensions, typename AllocatorT, typename Type1,
1829+
typename Type2, typename Type3, typename Type4, typename Type5>
1830+
host_accessor(buffer<DataT, Dimensions, AllocatorT>, Type1, Type2, Type3, Type4,
1831+
Type5)
1832+
->host_accessor<DataT, Dimensions,
1833+
detail::deduceAccessMode<Type4, Type5>()>;
1834+
17701835
#endif
17711836

17721837
} // namespace sycl

sycl/include/CL/sycl/buffer.hpp

100644100755
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,11 @@ class buffer {
292292
return host_accessor{*this, args...};
293293
}
294294

295+
template <typename... Ts>
296+
auto get_host_access(handler &commandGroupHandler, Ts... args) {
297+
return host_accessor{*this, commandGroupHandler, args...};
298+
}
299+
295300
#endif
296301

297302
template <typename Destination = std::nullptr_t>

sycl/include/CL/sycl/detail/spirv.hpp

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,188 @@ AtomicMax(multi_ptr<T, AddressSpace> MPtr, intel::memory_scope Scope,
290290
return __spirv_AtomicMax(Ptr, SPIRVScope, SPIRVOrder, Value);
291291
}
292292

293+
// Native shuffles map directly to a SPIR-V SubgroupShuffle intrinsic
294+
template <typename T>
295+
using EnableIfNativeShuffle =
296+
detail::enable_if_t<detail::is_arithmetic<T>::value, T>;
297+
298+
template <typename T>
299+
EnableIfNativeShuffle<T> SubgroupShuffle(T x, id<1> local_id) {
300+
using OCLT = detail::ConvertToOpenCLType_t<T>;
301+
return __spirv_SubgroupShuffleINTEL(OCLT(x),
302+
static_cast<uint32_t>(local_id.get(0)));
303+
}
304+
305+
template <typename T>
306+
EnableIfNativeShuffle<T> SubgroupShuffleXor(T x, id<1> local_id) {
307+
using OCLT = detail::ConvertToOpenCLType_t<T>;
308+
return __spirv_SubgroupShuffleXorINTEL(
309+
OCLT(x), static_cast<uint32_t>(local_id.get(0)));
310+
}
311+
312+
template <typename T>
313+
EnableIfNativeShuffle<T> SubgroupShuffleDown(T x, T y, id<1> local_id) {
314+
using OCLT = detail::ConvertToOpenCLType_t<T>;
315+
return __spirv_SubgroupShuffleDownINTEL(
316+
OCLT(x), OCLT(y), static_cast<uint32_t>(local_id.get(0)));
317+
}
318+
319+
template <typename T>
320+
EnableIfNativeShuffle<T> SubgroupShuffleUp(T x, T y, id<1> local_id) {
321+
using OCLT = detail::ConvertToOpenCLType_t<T>;
322+
return __spirv_SubgroupShuffleUpINTEL(OCLT(x), OCLT(y),
323+
static_cast<uint32_t>(local_id.get(0)));
324+
}
325+
326+
// Bitcast shuffles can be implemented using a single SPIR-V SubgroupShuffle
327+
// intrinsic, but require type-punning via an appropriate integer type
328+
template <typename T>
329+
using EnableIfBitcastShuffle =
330+
detail::enable_if_t<!detail::is_arithmetic<T>::value &&
331+
(std::is_trivially_copyable<T>::value &&
332+
(sizeof(T) == 1 || sizeof(T) == 2 ||
333+
sizeof(T) == 4 || sizeof(T) == 8)),
334+
T>;
335+
336+
template <typename T>
337+
using ConvertToNativeShuffleType_t = select_cl_scalar_integral_unsigned_t<T>;
338+
339+
template <typename T>
340+
EnableIfBitcastShuffle<T> SubgroupShuffle(T x, id<1> local_id) {
341+
using ShuffleT = ConvertToNativeShuffleType_t<T>;
342+
auto ShuffleX = detail::bit_cast<ShuffleT>(x);
343+
ShuffleT Result = __spirv_SubgroupShuffleINTEL(
344+
ShuffleX, static_cast<uint32_t>(local_id.get(0)));
345+
return detail::bit_cast<T>(Result);
346+
}
347+
348+
template <typename T>
349+
EnableIfBitcastShuffle<T> SubgroupShuffleXor(T x, id<1> local_id) {
350+
using ShuffleT = ConvertToNativeShuffleType_t<T>;
351+
auto ShuffleX = detail::bit_cast<ShuffleT>(x);
352+
ShuffleT Result = __spirv_SubgroupShuffleXorINTEL(
353+
ShuffleX, static_cast<uint32_t>(local_id.get(0)));
354+
return detail::bit_cast<T>(Result);
355+
}
356+
357+
template <typename T>
358+
EnableIfBitcastShuffle<T> SubgroupShuffleDown(T x, T y, id<1> local_id) {
359+
using ShuffleT = ConvertToNativeShuffleType_t<T>;
360+
auto ShuffleX = detail::bit_cast<ShuffleT>(x);
361+
auto ShuffleY = detail::bit_cast<ShuffleT>(y);
362+
ShuffleT Result = __spirv_SubgroupShuffleDownINTEL(
363+
ShuffleX, ShuffleY, static_cast<uint32_t>(local_id.get(0)));
364+
return detail::bit_cast<T>(Result);
365+
}
366+
367+
template <typename T>
368+
EnableIfBitcastShuffle<T> SubgroupShuffleUp(T x, T y, id<1> local_id) {
369+
using ShuffleT = ConvertToNativeShuffleType_t<T>;
370+
auto ShuffleX = detail::bit_cast<ShuffleT>(x);
371+
auto ShuffleY = detail::bit_cast<ShuffleT>(y);
372+
ShuffleT Result = __spirv_SubgroupShuffleUpINTEL(
373+
ShuffleX, ShuffleY, static_cast<uint32_t>(local_id.get(0)));
374+
return detail::bit_cast<T>(Result);
375+
}
376+
377+
// Generic shuffles may require multiple calls to SPIR-V SubgroupShuffle
378+
// intrinsics, and should use the fewest shuffles possible:
379+
// - Loop over 64-bit chunks until remaining bytes < 64-bit
380+
// - At most one 32-bit, 16-bit and 8-bit chunk left over
381+
template <typename T>
382+
using EnableIfGenericShuffle =
383+
detail::enable_if_t<!detail::is_arithmetic<T>::value &&
384+
!(std::is_trivially_copyable<T>::value &&
385+
(sizeof(T) == 1 || sizeof(T) == 2 ||
386+
sizeof(T) == 4 || sizeof(T) == 8)),
387+
T>;
388+
389+
template <typename T, typename ShuffleFunctor>
390+
void GenericShuffle(const ShuffleFunctor &ShuffleBytes) {
391+
if (sizeof(T) >= sizeof(uint64_t)) {
392+
#pragma unroll
393+
for (size_t Offset = 0; Offset < sizeof(T); Offset += sizeof(uint64_t)) {
394+
ShuffleBytes(Offset, sizeof(uint64_t));
395+
}
396+
}
397+
if (sizeof(T) % sizeof(uint64_t) >= sizeof(uint32_t)) {
398+
size_t Offset = sizeof(T) / sizeof(uint64_t) * sizeof(uint64_t);
399+
ShuffleBytes(Offset, sizeof(uint32_t));
400+
}
401+
if (sizeof(T) % sizeof(uint32_t) >= sizeof(uint16_t)) {
402+
size_t Offset = sizeof(T) / sizeof(uint32_t) * sizeof(uint32_t);
403+
ShuffleBytes(Offset, sizeof(uint16_t));
404+
}
405+
if (sizeof(T) % sizeof(uint16_t) >= sizeof(uint8_t)) {
406+
size_t Offset = sizeof(T) / sizeof(uint16_t) * sizeof(uint16_t);
407+
ShuffleBytes(Offset, sizeof(uint8_t));
408+
}
409+
}
410+
411+
template <typename T>
412+
EnableIfGenericShuffle<T> SubgroupShuffle(T x, id<1> local_id) {
413+
T Result;
414+
char *XBytes = reinterpret_cast<char *>(&x);
415+
char *ResultBytes = reinterpret_cast<char *>(&Result);
416+
auto ShuffleBytes = [=](size_t Offset, size_t Size) {
417+
uint64_t ShuffleX, ShuffleResult;
418+
detail::memcpy(&ShuffleX, XBytes + Offset, Size);
419+
ShuffleResult = SubgroupShuffle(ShuffleX, local_id);
420+
detail::memcpy(ResultBytes + Offset, &ShuffleResult, Size);
421+
};
422+
GenericShuffle<T>(ShuffleBytes);
423+
return Result;
424+
}
425+
426+
template <typename T>
427+
EnableIfGenericShuffle<T> SubgroupShuffleXor(T x, id<1> local_id) {
428+
T Result;
429+
char *XBytes = reinterpret_cast<char *>(&x);
430+
char *ResultBytes = reinterpret_cast<char *>(&Result);
431+
auto ShuffleBytes = [=](size_t Offset, size_t Size) {
432+
uint64_t ShuffleX, ShuffleResult;
433+
detail::memcpy(&ShuffleX, XBytes + Offset, Size);
434+
ShuffleResult = SubgroupShuffleXor(ShuffleX, local_id);
435+
detail::memcpy(ResultBytes + Offset, &ShuffleResult, Size);
436+
};
437+
GenericShuffle<T>(ShuffleBytes);
438+
return Result;
439+
}
440+
441+
template <typename T>
442+
EnableIfGenericShuffle<T> SubgroupShuffleDown(T x, T y, id<1> local_id) {
443+
T Result;
444+
char *XBytes = reinterpret_cast<char *>(&x);
445+
char *YBytes = reinterpret_cast<char *>(&y);
446+
char *ResultBytes = reinterpret_cast<char *>(&Result);
447+
auto ShuffleBytes = [=](size_t Offset, size_t Size) {
448+
uint64_t ShuffleX, ShuffleY, ShuffleResult;
449+
detail::memcpy(&ShuffleX, XBytes + Offset, Size);
450+
detail::memcpy(&ShuffleY, YBytes + Offset, Size);
451+
ShuffleResult = SubgroupShuffleDown(ShuffleX, ShuffleY, local_id);
452+
detail::memcpy(ResultBytes + Offset, &ShuffleResult, Size);
453+
};
454+
GenericShuffle<T>(ShuffleBytes);
455+
return Result;
456+
}
457+
458+
template <typename T>
459+
EnableIfGenericShuffle<T> SubgroupShuffleUp(T x, T y, id<1> local_id) {
460+
T Result;
461+
char *XBytes = reinterpret_cast<char *>(&x);
462+
char *YBytes = reinterpret_cast<char *>(&y);
463+
char *ResultBytes = reinterpret_cast<char *>(&Result);
464+
auto ShuffleBytes = [=](size_t Offset, size_t Size) {
465+
uint64_t ShuffleX, ShuffleY, ShuffleResult;
466+
detail::memcpy(&ShuffleX, XBytes + Offset, Size);
467+
detail::memcpy(&ShuffleY, YBytes + Offset, Size);
468+
ShuffleResult = SubgroupShuffleUp(ShuffleX, ShuffleY, local_id);
469+
detail::memcpy(ResultBytes + Offset, &ShuffleResult, Size);
470+
};
471+
GenericShuffle<T>(ShuffleBytes);
472+
return Result;
473+
}
474+
293475
} // namespace spirv
294476
} // namespace detail
295477
} // namespace sycl

0 commit comments

Comments
 (0)