Skip to content

Commit df32ed7

Browse files
committed
Merge branch 'sycl' into remove-event-callback
2 parents 1ef4cd6 + a6381fe commit df32ed7

29 files changed

+605
-439
lines changed

llvm/test/Bindings/Go/go.test

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,3 @@
22

33
; REQUIRES: shell
44
; UNSUPPORTED: asan, ubsan, msan
5-
; XFAIL: *

sycl/include/CL/sycl/detail/pi.h

Lines changed: 41 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,16 @@ typedef enum {
133133
// make the translation to OpenCL transparent.
134134
//
135135
typedef enum : pi_uint64 {
136-
PI_DEVICE_TYPE_CPU = CL_DEVICE_TYPE_CPU,
137-
PI_DEVICE_TYPE_GPU = CL_DEVICE_TYPE_GPU,
138-
PI_DEVICE_TYPE_ACC = CL_DEVICE_TYPE_ACCELERATOR
136+
PI_DEVICE_TYPE_DEFAULT =
137+
CL_DEVICE_TYPE_DEFAULT, ///< The default device available in the PI
138+
///< plugin.
139+
PI_DEVICE_TYPE_ALL =
140+
CL_DEVICE_TYPE_ALL, ///< All devices available in the PI plugin.
141+
PI_DEVICE_TYPE_CPU =
142+
CL_DEVICE_TYPE_CPU, ///< A PI device that is the host processor.
143+
PI_DEVICE_TYPE_GPU = CL_DEVICE_TYPE_GPU, ///< A PI device that is a GPU.
144+
PI_DEVICE_TYPE_ACC = CL_DEVICE_TYPE_ACCELERATOR ///< A PI device that is a
145+
///< dedicated accelerator.
139146
} _pi_device_type;
140147

141148
typedef enum {
@@ -224,6 +231,7 @@ typedef enum {
224231
PI_DEVICE_INFO_BUILT_IN_KERNELS = CL_DEVICE_BUILT_IN_KERNELS,
225232
PI_DEVICE_INFO_PLATFORM = CL_DEVICE_PLATFORM,
226233
PI_DEVICE_INFO_REFERENCE_COUNT = CL_DEVICE_REFERENCE_COUNT,
234+
PI_DEVICE_INFO_IL_VERSION = CL_DEVICE_IL_VERSION_KHR,
227235
PI_DEVICE_INFO_NAME = CL_DEVICE_NAME,
228236
PI_DEVICE_INFO_VENDOR = CL_DEVICE_VENDOR,
229237
PI_DEVICE_INFO_DRIVER_VERSION = CL_DRIVER_VERSION,
@@ -241,6 +249,10 @@ typedef enum {
241249
PI_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN =
242250
CL_DEVICE_PARTITION_AFFINITY_DOMAIN,
243251
PI_DEVICE_INFO_PARTITION_TYPE = CL_DEVICE_PARTITION_TYPE,
252+
PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS = CL_DEVICE_MAX_NUM_SUB_GROUPS,
253+
PI_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS =
254+
CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS,
255+
PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL = CL_DEVICE_SUB_GROUP_SIZES_INTEL,
244256
PI_DEVICE_INFO_USM_HOST_SUPPORT = CL_DEVICE_HOST_MEM_CAPABILITIES_INTEL,
245257
PI_DEVICE_INFO_USM_DEVICE_SUPPORT = CL_DEVICE_DEVICE_MEM_CAPABILITIES_INTEL,
246258
PI_DEVICE_INFO_USM_SINGLE_SHARED_SUPPORT =
@@ -299,6 +311,16 @@ typedef enum {
299311
PI_KERNEL_GROUP_INFO_PRIVATE_MEM_SIZE = CL_KERNEL_PRIVATE_MEM_SIZE
300312
} _pi_kernel_group_info;
301313

314+
typedef enum {
315+
PI_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT = CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT,
316+
PI_FP_ROUND_TO_NEAREST = CL_FP_ROUND_TO_NEAREST,
317+
PI_FP_ROUND_TO_ZERO = CL_FP_ROUND_TO_ZERO,
318+
PI_FP_ROUND_TO_INF = CL_FP_ROUND_TO_INF,
319+
PI_FP_INF_NAN = CL_FP_INF_NAN,
320+
PI_FP_DENORM = CL_FP_DENORM,
321+
PI_FP_FMA = CL_FP_FMA
322+
} _pi_fp_capabilities;
323+
302324
typedef enum {
303325
PI_IMAGE_INFO_FORMAT = CL_IMAGE_FORMAT,
304326
PI_IMAGE_INFO_ELEMENT_SIZE = CL_IMAGE_ELEMENT_SIZE,
@@ -512,6 +534,7 @@ using pi_image_info = _pi_image_info;
512534
using pi_kernel_info = _pi_kernel_info;
513535
using pi_kernel_group_info = _pi_kernel_group_info;
514536
using pi_kernel_sub_group_info = _pi_kernel_sub_group_info;
537+
using pi_fp_capabilities = _pi_fp_capabilities;
515538
using pi_event_info = _pi_event_info;
516539
using pi_command_type = _pi_command_type;
517540
using pi_mem_type = _pi_mem_type;
@@ -678,6 +701,13 @@ struct pi_device_binary_struct {
678701
};
679702
using pi_device_binary = pi_device_binary_struct *;
680703

704+
// pi_buffer_region structure repeats cl_buffer_region
705+
struct pi_buffer_region_struct {
706+
size_t origin;
707+
size_t size;
708+
};
709+
using pi_buffer_region = pi_buffer_region_struct *;
710+
681711
// Offload binaries descriptor version supported by this library.
682712
static const uint16_t PI_DEVICE_BINARIES_VERSION = 1;
683713

@@ -1118,10 +1148,10 @@ __SYCL_EXPORT pi_result piKernelSetExecInfo(pi_kernel kernel,
11181148
//
11191149
__SYCL_EXPORT pi_result piEventCreate(pi_context context, pi_event *ret_event);
11201150

1121-
__SYCL_EXPORT pi_result piEventGetInfo(
1122-
pi_event event,
1123-
cl_event_info param_name, // TODO: untie from OpenCL
1124-
size_t param_value_size, void *param_value, size_t *param_value_size_ret);
1151+
__SYCL_EXPORT pi_result piEventGetInfo(pi_event event, pi_event_info param_name,
1152+
size_t param_value_size,
1153+
void *param_value,
1154+
size_t *param_value_size_ret);
11251155

11261156
__SYCL_EXPORT pi_result piEventGetProfilingInfo(pi_event event,
11271157
pi_profiling_info param_name,
@@ -1439,7 +1469,8 @@ __SYCL_EXPORT pi_result piextUSMEnqueuePrefetch(
14391469
// USM memadvise API to govern behavior of automatic migration mechanisms
14401470
__SYCL_EXPORT pi_result piextUSMEnqueueMemAdvise(pi_queue queue,
14411471
const void *ptr, size_t length,
1442-
int advice, pi_event *event);
1472+
pi_mem_advice advice,
1473+
pi_event *event);
14431474

14441475
/// API to query information about USM allocated pointers
14451476
/// Valid Queries:
@@ -1469,9 +1500,9 @@ struct _pi_plugin {
14691500
// Some choices are:
14701501
// - Use of integers to keep major and minor version.
14711502
// - Keeping char* Versions.
1472-
const char PiVersion[4] = _PI_H_VERSION_STRING;
1503+
char PiVersion[4];
14731504
// Plugin edits this.
1474-
char PluginVersion[4] = _PI_H_VERSION_STRING;
1505+
char PluginVersion[4];
14751506
char *Targets;
14761507
struct FunctionPointers {
14771508
#define _PI_API(api) decltype(::api) *api;

sycl/include/CL/sycl/info/info_desc.hpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -117,18 +117,20 @@ enum class device : cl_device_info {
117117
partition_affinity_domains = CL_DEVICE_PARTITION_AFFINITY_DOMAIN,
118118
partition_type_affinity_domain = CL_DEVICE_PARTITION_TYPE,
119119
reference_count = CL_DEVICE_REFERENCE_COUNT,
120+
il_version =
121+
CL_DEVICE_IL_VERSION_KHR, // Same as CL_DEVICE_IL_VERSION for >=OpenCL 2.1
120122
max_num_sub_groups = CL_DEVICE_MAX_NUM_SUB_GROUPS,
121123
sub_group_independent_forward_progress =
122124
CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS,
123125
sub_group_sizes = CL_DEVICE_SUB_GROUP_SIZES_INTEL,
124126
partition_type_property,
125127
kernel_kernel_pipe_support,
126128
// USM
127-
usm_device_allocations = PI_USM_DEVICE_SUPPORT,
128-
usm_host_allocations = PI_USM_HOST_SUPPORT,
129-
usm_shared_allocations = PI_USM_SINGLE_SHARED_SUPPORT,
129+
usm_device_allocations = PI_USM_DEVICE_SUPPORT,
130+
usm_host_allocations = PI_USM_HOST_SUPPORT,
131+
usm_shared_allocations = PI_USM_SINGLE_SHARED_SUPPORT,
130132
usm_restricted_shared_allocations = PI_USM_CROSS_SHARED_SUPPORT,
131-
usm_system_allocator = PI_USM_SYSTEM_SHARED_SUPPORT
133+
usm_system_allocator = PI_USM_SYSTEM_SHARED_SUPPORT
132134
};
133135

134136
enum class device_type : pi_uint64 {

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -653,15 +653,18 @@ pi_result cuda_piDevicesGet(pi_platform platform, pi_device_type device_type,
653653
pi_uint32 *num_devices) {
654654

655655
pi_result err = PI_SUCCESS;
656-
const bool askingForGPU = (device_type & PI_DEVICE_TYPE_GPU);
657-
size_t numDevices = askingForGPU ? platform->devices_.size() : 0;
656+
const bool askingForDefault = device_type == PI_DEVICE_TYPE_DEFAULT;
657+
const bool askingForGPU = device_type & PI_DEVICE_TYPE_GPU;
658+
const bool returnDevices = askingForDefault || askingForGPU;
659+
660+
size_t numDevices = returnDevices ? platform->devices_.size() : 0;
658661

659662
try {
660663
if (num_devices) {
661664
*num_devices = numDevices;
662665
}
663666

664-
if (askingForGPU && devices) {
667+
if (returnDevices && devices) {
665668
for (size_t i = 0; i < std::min(size_t(num_entries), numDevices); ++i) {
666669
devices[i] = platform->devices_[i].get();
667670
}
@@ -966,15 +969,15 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
966969
}
967970
case PI_DEVICE_INFO_SINGLE_FP_CONFIG: {
968971
// TODO: is this config consistent across all NVIDIA GPUs?
969-
auto config = CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST |
970-
CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_FMA |
971-
CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
972+
auto config = PI_FP_DENORM | PI_FP_INF_NAN | PI_FP_ROUND_TO_NEAREST |
973+
PI_FP_ROUND_TO_ZERO | PI_FP_ROUND_TO_INF | PI_FP_FMA |
974+
PI_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
972975
return getInfo(param_value_size, param_value, param_value_size_ret, config);
973976
}
974977
case PI_DEVICE_INFO_DOUBLE_FP_CONFIG: {
975978
// TODO: is this config consistent across all NVIDIA GPUs?
976-
auto config = CL_FP_DENORM | CL_FP_INF_NAN | CL_FP_ROUND_TO_NEAREST |
977-
CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_FMA;
979+
auto config = PI_FP_DENORM | PI_FP_INF_NAN | PI_FP_ROUND_TO_NEAREST |
980+
PI_FP_ROUND_TO_ZERO | PI_FP_ROUND_TO_INF | PI_FP_FMA;
978981
return getInfo(param_value_size, param_value, param_value_size_ret, config);
979982
}
980983
case PI_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: {
@@ -1611,7 +1614,7 @@ pi_result cuda_piMemBufferPartition(pi_mem parent_buffer, pi_mem_flags flags,
16111614
assert(memObj != nullptr);
16121615

16131616
const auto bufferRegion =
1614-
*reinterpret_cast<const cl_buffer_region *>(buffer_create_info);
1617+
*reinterpret_cast<const pi_buffer_region>(buffer_create_info);
16151618
assert((bufferRegion.size != 0u) && "PI_INVALID_BUFFER_SIZE");
16161619

16171620
assert((bufferRegion.origin <= (bufferRegion.origin + bufferRegion.size)) &&
@@ -3500,7 +3503,7 @@ pi_result cuda_piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr,
35003503

35013504
/// USM: memadvise API to govern behavior of automatic migration mechanisms
35023505
pi_result cuda_piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr,
3503-
size_t length, int advice,
3506+
size_t length, pi_mem_advice advice,
35043507
pi_event *event) {
35053508
assert(queue != nullptr);
35063509
assert(ptr != nullptr);

sycl/plugins/opencl/pi_opencl.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -939,7 +939,8 @@ pi_result piextUSMEnqueuePrefetch(pi_queue queue, const void *ptr, size_t size,
939939
/// \param event is the event that represents this operation
940940
// USM memadvise API to govern behavior of automatic migration mechanisms
941941
pi_result piextUSMEnqueueMemAdvise(pi_queue queue, const void *ptr,
942-
size_t length, int advice, pi_event *event) {
942+
size_t length, pi_mem_advice advice,
943+
pi_event *event) {
943944

944945
return cast<pi_result>(
945946
clEnqueueMarkerWithWaitList(cast<cl_command_queue>(queue), 0, nullptr,

sycl/source/detail/memory_manager.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,8 +189,7 @@ void *MemoryManager::allocateMemSubBuffer(ContextImplPtr TargetContext,
189189
SizeInBytes *= Range[I];
190190

191191
RT::PiResult Error = PI_SUCCESS;
192-
// TODO replace with pi_buffer_region
193-
cl_buffer_region Region{Offset, SizeInBytes};
192+
pi_buffer_region_struct Region{Offset, SizeInBytes};
194193
RT::PiMem NewMem;
195194
const detail::plugin &Plugin = TargetContext->getPlugin();
196195
Error = Plugin.call_nocheck<PiApiKind::piMemBufferPartition>(

sycl/source/detail/pi.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,9 @@ vector_class<plugin> initialize() {
264264
std::cerr << "SYCL_PI_TRACE[all]: "
265265
<< "No Plugins Found." << std::endl;
266266

267-
PiPlugin PluginInformation;
267+
PiPlugin PluginInformation{_PI_H_VERSION_STRING, _PI_H_VERSION_STRING,
268+
nullptr};
269+
268270
for (unsigned int I = 0; I < PluginNames.size(); I++) {
269271
void *Library = loadPlugin(PluginNames[I].first);
270272

sycl/source/detail/plugin.hpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,11 @@ class plugin {
3434
plugin(RT::PiPlugin Plugin, backend UseBackend)
3535
: MPlugin(Plugin), MBackend(UseBackend) {}
3636

37+
plugin &operator=(const plugin &) = default;
38+
plugin(const plugin &) = default;
39+
plugin &operator=(plugin &&other) noexcept = default;
40+
plugin(plugin &&other) noexcept = default;
41+
3742
~plugin() = default;
3843

3944
const RT::PiPlugin &getPiPlugin() const { return MPlugin; }
@@ -97,7 +102,7 @@ class plugin {
97102

98103
private:
99104
RT::PiPlugin MPlugin;
100-
const backend MBackend;
105+
backend MBackend;
101106
}; // class plugin
102107
} // namespace detail
103108
} // namespace sycl

sycl/source/detail/scheduler/graph_builder.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -920,10 +920,7 @@ void Scheduler::GraphBuilder::removeRecordForMemObj(SYCLMemObjI *MemObject) {
920920
void Scheduler::GraphBuilder::connectDepEvent(Command *const Cmd,
921921
EventImplPtr DepEvent,
922922
const DepDesc &Dep) {
923-
const ContextImplPtr &Context = Cmd->getContext();
924-
const ContextImplPtr &DepEventContext = DepEvent->getContextImpl();
925-
926-
assert(Context != DepEventContext);
923+
assert(Cmd->getContext() != DepEvent->getContextImpl());
927924

928925
// construct Host Task type command manually and make it depend on DepEvent
929926
ExecCGCommand *ConnectCmd = nullptr;

sycl/source/detail/scheduler/scheduler.cpp

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@ EventImplPtr Scheduler::addCG(std::unique_ptr<detail::CG> CommandGroup,
6666
Command *NewCmd = nullptr;
6767
const bool IsKernel = CommandGroup->getType() == CG::KERNEL;
6868
{
69-
std::lock_guard<std::shared_timed_mutex> Lock(MGraphLock);
69+
std::unique_lock<std::shared_timed_mutex> Lock(MGraphLock, std::defer_lock);
70+
lockSharedTimedMutex(Lock);
7071

7172
switch (CommandGroup->getType()) {
7273
case CG::UPDATE_HOST:
@@ -98,7 +99,8 @@ EventImplPtr Scheduler::addCG(std::unique_ptr<detail::CG> CommandGroup,
9899
}
99100

100101
EventImplPtr Scheduler::addCopyBack(Requirement *Req) {
101-
std::lock_guard<std::shared_timed_mutex> Lock(MGraphLock);
102+
std::unique_lock<std::shared_timed_mutex> Lock(MGraphLock, std::defer_lock);
103+
lockSharedTimedMutex(Lock);
102104
Command *NewCmd = MGraphBuilder.addCopyBack(Req);
103105
// Command was not creted because there were no operations with
104106
// buffer.
@@ -154,7 +156,8 @@ void Scheduler::cleanupFinishedCommands(EventImplPtr FinishedEvent) {
154156
}
155157

156158
void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) {
157-
std::lock_guard<std::shared_timed_mutex> Lock(MGraphLock);
159+
std::unique_lock<std::shared_timed_mutex> Lock(MGraphLock, std::defer_lock);
160+
lockSharedTimedMutex(Lock);
158161

159162
MemObjRecord *Record = MGraphBuilder.getMemObjRecord(MemObj);
160163
if (!Record)
@@ -169,7 +172,8 @@ void Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj) {
169172

170173
EventImplPtr Scheduler::addHostAccessor(Requirement *Req,
171174
const bool destructor) {
172-
std::lock_guard<std::shared_timed_mutex> Lock(MGraphLock);
175+
std::unique_lock<std::shared_timed_mutex> Lock(MGraphLock, std::defer_lock);
176+
lockSharedTimedMutex(Lock);
173177

174178
Command *NewCmd = MGraphBuilder.addHostAccessor(Req, destructor);
175179

@@ -216,6 +220,25 @@ Scheduler::Scheduler() {
216220
QueueOrder::Ordered, /*PropList=*/{}));
217221
}
218222

223+
void Scheduler::lockSharedTimedMutex(
224+
std::unique_lock<std::shared_timed_mutex> &Lock) {
225+
#ifdef _WIN32
226+
// Avoiding deadlock situation for MSVC. std::shared_timed_mutex specification
227+
// does not specify a priority for shared and exclusive accesses. It will be a
228+
// deadlock in MSVC's std::shared_timed_mutex implementation, if exclusive
229+
// access occurs after shared access.
230+
// TODO: after switching to C++17, change std::shared_timed_mutex to
231+
// std::shared_mutex and use std::lock_guard here both for Windows and Linux.
232+
while (!Lock.owns_lock()) {
233+
Lock.try_lock();
234+
}
235+
#else
236+
// It is a deadlock on UNIX in implementation of lock and lock_shared, if
237+
// try_lock in the loop above will be executed, so using a single lock here
238+
Lock.lock();
239+
#endif // _WIN32
240+
}
241+
219242
} // namespace detail
220243
} // namespace sycl
221244
} // __SYCL_INLINE_NAMESPACE(cl)

sycl/source/detail/scheduler/scheduler.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,13 @@ class Scheduler {
430430
Scheduler();
431431
static Scheduler instance;
432432

433+
/// Provides exclusive access to std::shared_timed_mutex object with deadlock
434+
/// avoidance
435+
///
436+
/// \param Lock is an instance of std::unique_lock<std::shared_timed_mutex>
437+
/// class
438+
void lockSharedTimedMutex(std::unique_lock<std::shared_timed_mutex> &Lock);
439+
433440
static void enqueueLeavesOfReqUnlocked(const Requirement *const Req);
434441

435442
/// Graph builder class.
@@ -687,6 +694,8 @@ class Scheduler {
687694
void waitForRecordToFinish(MemObjRecord *Record);
688695

689696
GraphBuilder MGraphBuilder;
697+
// TODO: after switching to C++17, change std::shared_timed_mutex to
698+
// std::shared_mutex
690699
std::shared_timed_mutex MGraphLock;
691700

692701
QueueImplPtr DefaultHostQueue;

sycl/test/host-interop-task/host-task-two-queues.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ void test() {
5555
});
5656
};
5757

58-
static const size_t NTIMES = 100;
58+
static const size_t NTIMES = 4;
5959

6060
for (size_t Idx = 0; Idx < NTIMES; ++Idx) {
6161
Q1.submit(CG1);

sycl/unittests/pi/BackendString.hpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
2+
// See https://llvm.org/LICENSE.txt for license information.
3+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4+
5+
#pragma once
6+
7+
#include <detail/plugin.hpp>
8+
9+
namespace pi {
10+
inline const char *GetBackendString(cl::sycl::backend backend) {
11+
switch (backend) {
12+
#define PI_BACKEND_STR(backend_name) \
13+
case cl::sycl::backend::backend_name: \
14+
return #backend_name
15+
PI_BACKEND_STR(cuda);
16+
PI_BACKEND_STR(host);
17+
PI_BACKEND_STR(opencl);
18+
#undef PI_BACKEND_STR
19+
default:
20+
return "Unknown Plugin";
21+
}
22+
}
23+
} // namespace pi

0 commit comments

Comments
 (0)