Skip to content

Commit df201b1

Browse files
committed
Merge remote-tracking branch 'upstream/sycl' into glyons-name
2 parents f5083b2 + 55eef6c commit df201b1

File tree

9 files changed

+132
-47
lines changed

9 files changed

+132
-47
lines changed

sycl/include/CL/sycl/detail/spirv.hpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,29 @@ template <typename Group> bool GroupAny(bool pred) {
4242
}
4343

4444
// Broadcast with scalar local index
45+
// Work-group supports any integral type
46+
// Sub-group currently supports only uint32_t
4547
template <typename Group, typename T, typename IdT>
46-
detail::enable_if_t<std::is_integral<IdT>::value, T>
48+
detail::enable_if_t<is_group<Group>::value && std::is_integral<IdT>::value, T>
4749
GroupBroadcast(T x, IdT local_id) {
4850
using OCLT = detail::ConvertToOpenCLType_t<T>;
4951
using OCLIdT = detail::ConvertToOpenCLType_t<IdT>;
5052
OCLT ocl_x = detail::convertDataToType<T, OCLT>(x);
5153
OCLIdT ocl_id = detail::convertDataToType<IdT, OCLIdT>(local_id);
5254
return __spirv_GroupBroadcast(group_scope<Group>::value, ocl_x, ocl_id);
5355
}
56+
template <typename Group, typename T, typename IdT>
57+
detail::enable_if_t<is_sub_group<Group>::value && std::is_integral<IdT>::value,
58+
T>
59+
GroupBroadcast(T x, IdT local_id) {
60+
using SGIdT = uint32_t;
61+
SGIdT sg_local_id = static_cast<SGIdT>(local_id);
62+
using OCLT = detail::ConvertToOpenCLType_t<T>;
63+
using OCLIdT = detail::ConvertToOpenCLType_t<SGIdT>;
64+
OCLT ocl_x = detail::convertDataToType<T, OCLT>(x);
65+
OCLIdT ocl_id = detail::convertDataToType<SGIdT, OCLIdT>(sg_local_id);
66+
return __spirv_GroupBroadcast(group_scope<Group>::value, ocl_x, ocl_id);
67+
}
5468

5569
// Broadcast with vector local index
5670
template <typename Group, typename T, int Dimensions>

sycl/include/CL/sycl/detail/type_traits.hpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@
1717

1818
__SYCL_INLINE_NAMESPACE(cl) {
1919
namespace sycl {
20+
template <int Dimensions> class group;
21+
namespace intel {
22+
struct sub_group;
23+
} // namespace intel
2024
namespace detail {
2125
namespace half_impl {
2226
class half;
@@ -302,6 +306,20 @@ template <access::address_space AS, class DataT>
302306
using const_if_const_AS = DataT;
303307
#endif
304308

309+
template <typename T> struct is_group : std::false_type {};
310+
311+
template <int Dimensions>
312+
struct is_group<group<Dimensions>> : std::true_type {};
313+
314+
template <typename T> struct is_sub_group : std::false_type {};
315+
316+
template <> struct is_sub_group<intel::sub_group> : std::true_type {};
317+
318+
template <typename T>
319+
struct is_generic_group
320+
: std::integral_constant<bool,
321+
is_group<T>::value || is_sub_group<T>::value> {};
322+
305323
} // namespace detail
306324
} // namespace sycl
307325
} // __SYCL_INLINE_NAMESPACE(cl)

sycl/include/CL/sycl/intel/group_algorithm.hpp

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -77,20 +77,6 @@ template <> inline id<3> linear_id_to_id(range<3> r, size_t linear_id) {
7777
return result;
7878
}
7979

80-
template <typename T> struct is_group : std::false_type {};
81-
82-
template <int Dimensions>
83-
struct is_group<group<Dimensions>> : std::true_type {};
84-
85-
template <typename T> struct is_sub_group : std::false_type {};
86-
87-
template <> struct is_sub_group<intel::sub_group> : std::true_type {};
88-
89-
template <typename T>
90-
struct is_generic_group
91-
: std::integral_constant<bool,
92-
is_group<T>::value || is_sub_group<T>::value> {};
93-
9480
template <typename T, class BinaryOperation> struct identity {};
9581

9682
template <typename T, typename V> struct identity<T, intel::plus<V>> {

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -678,7 +678,7 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
678678

679679
std::vector<ze_device_memory_properties_t> ZeDeviceMemoryProperties;
680680
try {
681-
ZeDeviceMemoryProperties.reserve(ZeAvailMemCount);
681+
ZeDeviceMemoryProperties.resize(ZeAvailMemCount);
682682
} catch (const std::bad_alloc &) {
683683
return PI_OUT_OF_HOST_MEMORY;
684684
} catch (...) {

sycl/source/detail/context_impl.cpp

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,16 @@ namespace detail {
2525
context_impl::context_impl(const device &Device, async_handler AsyncHandler,
2626
bool UseCUDAPrimaryContext)
2727
: MAsyncHandler(AsyncHandler), MDevices(1, Device), MContext(nullptr),
28-
MPlatform(), MPluginInterop(false), MHostContext(true),
28+
MPlatform(), MHostContext(Device.is_host()),
2929
MUseCUDAPrimaryContext(UseCUDAPrimaryContext) {
3030
MKernelProgramCache.setContextPtr(this);
3131
}
3232

3333
context_impl::context_impl(const vector_class<cl::sycl::device> Devices,
34-
async_handler AsyncHandler, bool UseCUDAPrimaryContext)
34+
async_handler AsyncHandler,
35+
bool UseCUDAPrimaryContext)
3536
: MAsyncHandler(AsyncHandler), MDevices(Devices), MContext(nullptr),
36-
MPlatform(), MPluginInterop(true), MHostContext(false),
37+
MPlatform(), MHostContext(false),
3738
MUseCUDAPrimaryContext(UseCUDAPrimaryContext) {
3839
MPlatform = detail::getSyclObjImpl(MDevices[0].get_platform());
3940
vector_class<RT::PiDevice> DeviceIds;
@@ -48,14 +49,15 @@ context_impl::context_impl(const vector_class<cl::sycl::device> Devices,
4849
static_cast<pi_context_properties>(PI_CONTEXT_PROPERTIES_CUDA_PRIMARY),
4950
static_cast<pi_context_properties>(UseCUDAPrimaryContext), 0};
5051

51-
getPlugin().call<PiApiKind::piContextCreate>(props, DeviceIds.size(),
52-
DeviceIds.data(), nullptr, nullptr, &MContext);
52+
getPlugin().call<PiApiKind::piContextCreate>(
53+
props, DeviceIds.size(), DeviceIds.data(), nullptr, nullptr, &MContext);
5354
#else
5455
cl::sycl::detail::pi::die("CUDA support was not enabled at compilation time");
5556
#endif
5657
} else {
57-
getPlugin().call<PiApiKind::piContextCreate>(nullptr, DeviceIds.size(),
58-
DeviceIds.data(), nullptr, nullptr, &MContext);
58+
getPlugin().call<PiApiKind::piContextCreate>(nullptr, DeviceIds.size(),
59+
DeviceIds.data(), nullptr,
60+
nullptr, &MContext);
5961
}
6062

6163
MKernelProgramCache.setContextPtr(this);
@@ -64,7 +66,7 @@ context_impl::context_impl(const vector_class<cl::sycl::device> Devices,
6466
context_impl::context_impl(RT::PiContext PiContext, async_handler AsyncHandler,
6567
const plugin &Plugin)
6668
: MAsyncHandler(AsyncHandler), MDevices(), MContext(PiContext), MPlatform(),
67-
MPluginInterop(true), MHostContext(false) {
69+
MHostContext(false) {
6870

6971
vector_class<RT::PiDevice> DeviceIds;
7072
size_t DevicesNum = 0;
@@ -92,7 +94,7 @@ context_impl::context_impl(RT::PiContext PiContext, async_handler AsyncHandler,
9294
}
9395

9496
cl_context context_impl::get() const {
95-
if (MPluginInterop) {
97+
if (!MHostContext) {
9698
// TODO catch an exception and put it to list of asynchronous exceptions
9799
getPlugin().call<PiApiKind::piContextRetain>(MContext);
98100
return pi::cast<cl_context>(MContext);
@@ -102,17 +104,17 @@ cl_context context_impl::get() const {
102104
PI_INVALID_CONTEXT);
103105
}
104106

105-
bool context_impl::is_host() const { return MHostContext || !MPluginInterop; }
107+
bool context_impl::is_host() const { return MHostContext; }
106108

107109
context_impl::~context_impl() {
108-
if (MPluginInterop) {
109-
// TODO catch an exception and put it to list of asynchronous exceptions
110-
getPlugin().call<PiApiKind::piContextRelease>(MContext);
111-
}
112110
for (auto LibProg : MCachedLibPrograms) {
113111
assert(LibProg.second && "Null program must not be kept in the cache");
114112
getPlugin().call<PiApiKind::piProgramRelease>(LibProg.second);
115113
}
114+
if (!MHostContext) {
115+
// TODO catch an exception and put it to list of asynchronous exceptions
116+
getPlugin().call<PiApiKind::piContextRelease>(MContext);
117+
}
116118
}
117119

118120
const async_handler &context_impl::get_async_handler() const {

sycl/source/detail/context_impl.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,6 @@ class context_impl {
153153
vector_class<device> MDevices;
154154
RT::PiContext MContext;
155155
PlatformImplPtr MPlatform;
156-
bool MPluginInterop;
157156
bool MHostContext;
158157
bool MUseCUDAPrimaryContext;
159158
std::map<DeviceLibExt, RT::PiProgram> MCachedLibPrograms;

sycl/source/detail/memory_manager.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -353,10 +353,12 @@ void copyD2D(SYCLMemObjI *SYCLMemObj, RT::PiMem SrcMem, QueueImplPtr SrcQueue,
353353
DepEvents.size(), &DepEvents[0], &OutEvent);
354354
} else {
355355
size_t SrcRowPitch = (1 == DimSrc) ? 0 : SrcSize[0];
356-
size_t SrcSlicePitch = (3 == DimSrc) ? SrcSize[0] * SrcSize[1] : 0;
356+
size_t SrcSlicePitch =
357+
(DimSrc > 1) ? SrcSize[0] * SrcSize[1] : SrcSize[0];
357358

358359
size_t DstRowPitch = (1 == DimDst) ? 0 : DstSize[0];
359-
size_t DstSlicePitch = (3 == DimDst) ? DstSize[0] * DstSize[1] : 0;
360+
size_t DstSlicePitch =
361+
(DimDst > 1) ? DstSize[0] * DstSize[1] : DstSize[0];
360362

361363
Plugin.call<PiApiKind::piEnqueueMemBufferCopyRect>(
362364
Queue, SrcMem, DstMem, &SrcOffset[0], &DstOffset[0],

sycl/test/basic_tests/buffer/buffer_full_copy.cpp

Lines changed: 65 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
// RUN: %GPU_RUN_PLACEHOLDER %t2.out
77
// RUN: %ACC_RUN_PLACEHOLDER %t2.out
88

9+
// XFAIL: level0
10+
911
//==------------- buffer_full_copy.cpp - SYCL buffer basic test ------------==//
1012
//
1113
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -18,7 +20,7 @@
1820
#include <cassert>
1921

2022
void check_copy_device_to_host(cl::sycl::queue &Queue) {
21-
const int size = 6, offset = 2;
23+
constexpr int size = 6, offset = 2;
2224

2325
// Create 2d buffer
2426
cl::sycl::buffer<int, 2> simple_buffer(cl::sycl::range<2>(size, size));
@@ -56,11 +58,11 @@ void check_copy_device_to_host(cl::sycl::queue &Queue) {
5658
}
5759

5860
void check_fill(cl::sycl::queue &Queue) {
59-
const int size = 6, offset = 2;
61+
constexpr int size = 6, offset = 2;
6062
cl::sycl::buffer<float, 1> buf_1(size);
6163
cl::sycl::buffer<float, 1> buf_2(size / 2);
62-
std::vector<float> expected_res_1(size);
63-
std::vector<float> expected_res_2(size / 2);
64+
std::array<float, size> expected_res_1;
65+
std::array<float, size / 2> expected_res_2;
6466

6567
// fill with offset
6668
{
@@ -86,11 +88,11 @@ void check_fill(cl::sycl::queue &Queue) {
8688
}
8789

8890
void check_copy_host_to_device(cl::sycl::queue &Queue) {
89-
const int size = 6, offset = 2;
91+
constexpr int size = 6, offset = 2;
9092
cl::sycl::buffer<float, 1> buf_1(size);
9193
cl::sycl::buffer<float, 1> buf_2(size / 2);
92-
std::vector<float> expected_res_1(size);
93-
std::vector<float> expected_res_2(size / 2);
94+
std::array<float, size> expected_res_1;
95+
std::array<float, size / 2> expected_res_2;
9496

9597
// copy acc 2 acc with offset
9698
{
@@ -126,24 +128,23 @@ void check_copy_host_to_device(cl::sycl::queue &Queue) {
126128

127129
cl::sycl::buffer<float, 2> buf_3({size, size});
128130
cl::sycl::buffer<float, 2> buf_4({size / 2, size / 2});
129-
std::vector<float> expected_res_3(size * size);
130-
std::vector<float> expected_res_4(size * size / 4);
131+
std::array<std::array<float, size>, size> expected_res_3;
132+
std::array<std::array<float, size / 2>, size / 2> expected_res_4;
131133

132134
// copy acc 2 acc with offset for 2D buffers
133135
{
134136
auto acc = buf_3.get_access<cl::sycl::access::mode::write>();
135137
for (int i = 0; i < size; ++i) {
136138
for (int j = 0; j < size; ++j) {
137139
acc[i][j] = i * size + j + 1;
138-
expected_res_3[i * size + j] = i * size + j + 1;
140+
expected_res_3[i][j] = acc[i][j];
139141
}
140142
}
141143
}
142144

143145
for (int i = 0; i < size / 2; ++i)
144146
for (int j = 0; j < size / 2; ++j)
145-
expected_res_4[i * size / 2 + j] =
146-
expected_res_3[(i + offset) * size + j + offset];
147+
expected_res_4[i][j] = expected_res_3[i + offset][j + offset];
147148

148149
e = Queue.submit([&](cl::sycl::handler &cgh) {
149150
auto a = buf_3.get_access<cl::sycl::access::mode::read>(
@@ -161,12 +162,61 @@ void check_copy_host_to_device(cl::sycl::queue &Queue) {
161162
// check that there was no data corruption/loss
162163
for (int i = 0; i < size; ++i) {
163164
for (int j = 0; j < size; ++j)
164-
assert(expected_res_3[i * size + j] == acc_1[i][j]);
165+
assert(expected_res_3[i][j] == acc_1[i][j]);
165166
}
166167

167168
for (int i = 0; i < size / 2; ++i)
168169
for (int j = 0; j < size / 2; ++j)
169-
assert(expected_res_4[i * size / 2 + j] == acc_2[i][j]);
170+
assert(expected_res_4[i][j] == acc_2[i][j]);
171+
}
172+
173+
cl::sycl::buffer<float, 3> buf_5({size, size, size});
174+
cl::sycl::buffer<float, 3> buf_6({size / 2, size / 2, size / 2});
175+
std::array<std::array<std::array<float, size>, size>, size> expected_res_5;
176+
std::array<std::array<std::array<float, size / 2>, size / 2>, size / 2> expected_res_6;
177+
178+
// copy acc 2 acc with offset for 3D buffers
179+
{
180+
auto acc = buf_5.get_access<cl::sycl::access::mode::write>();
181+
for (int i = 0; i < size; ++i) {
182+
for (int j = 0; j < size; ++j) {
183+
for (int k = 0; k < size; ++k) {
184+
acc[i][j][k] = (i * size * size) + (j * size) + k + 1;
185+
expected_res_5[i][j][k] = (i * size * size) + (j * size) + k + 1;
186+
}
187+
}
188+
}
189+
}
190+
191+
for (int i = 0; i < size / 2; ++i)
192+
for (int j = 0; j < size / 2; ++j)
193+
for (int k = 0; k < size / 2; ++k)
194+
expected_res_6[i][j][k] = expected_res_5[i + offset][j + offset][k + offset];
195+
196+
e = Queue.submit([&](cl::sycl::handler &cgh) {
197+
auto a = buf_5.get_access<cl::sycl::access::mode::read>(
198+
cgh, {size / 2, size / 2, size / 2}, {offset, offset, offset});
199+
auto b = buf_6.get_access<cl::sycl::access::mode::write>(
200+
cgh, {size / 2, size / 2, size / 2});
201+
cgh.copy(a, b);
202+
});
203+
e.wait();
204+
205+
{
206+
auto acc_1 = buf_5.get_access<cl::sycl::access::mode::read>();
207+
auto acc_2 = buf_6.get_access<cl::sycl::access::mode::read>();
208+
209+
// check that there was no data corruption/loss
210+
for (int i = 0; i < size; ++i)
211+
for (int j = 0; j < size; ++j)
212+
for (int k = 0; k < size; ++k)
213+
assert(expected_res_5[i][j][k] == acc_1[i][j][k]);
214+
215+
for (int i = 0; i < size / 2; ++i) {
216+
for (int j = 0; j < size / 2; ++j)
217+
for (int k = 0; k < size / 2; ++k)
218+
assert(expected_res_6[i][j][k] == acc_2[i][j][k]);
219+
}
170220
}
171221
}
172222

@@ -178,6 +228,7 @@ int main() {
178228
check_fill(Queue);
179229
} catch (cl::sycl::exception &ex) {
180230
std::cerr << ex.what() << std::endl;
231+
return 1;
181232
}
182233

183234
return 0;

sycl/test/regression/pi_release.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// REQUIRES: cpu
2+
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
3+
// RUN: env SYCL_PI_TRACE=-1 %CPU_RUN_PLACEHOLDER %t.out 2>&1 | FileCheck %s
4+
5+
#include <CL/sycl.hpp>
6+
7+
int main() {
8+
sycl::queue q;
9+
return 0;
10+
}
11+
12+
// CHECK: piQueueRelease
13+
// CHECK: piContextRelease

0 commit comments

Comments
 (0)