Skip to content

Commit 66d35e2

Browse files
0x12CCkbenzieagainullsteffenlarsen
authored
[SYCL] Use PI APIs for cooperative kernels (#12367)
This change updates the SYCL runtime to use `piextKernelSuggestMaxCooperativeGroupCount` and `piextEnqueueCooperativeKernelLaunch` for cooperative kernels. These functions are used to implement the query and launch kernels as described in the sycl_ext_oneapi_root_group extension. --------- Signed-off-by: Michael Aziz <[email protected]> Co-authored-by: Kenneth Benzie (Benie) <[email protected]> Co-authored-by: Artur Gainullin <[email protected]> Co-authored-by: Steffen Larsen <[email protected]>
1 parent d6eecfa commit 66d35e2

35 files changed

+334
-61
lines changed

sycl/include/sycl/detail/cg.hpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ class CG {
125125
CG(CG &&CommandGroup) = default;
126126
CG(const CG &CommandGroup) = default;
127127

128-
CGTYPE getType() { return MType; }
128+
CGTYPE getType() const { return MType; }
129129

130130
std::vector<std::vector<char>> &getArgsStorage() {
131131
return MData.MArgsStorage;
@@ -176,6 +176,7 @@ class CGExecKernel : public CG {
176176
std::vector<std::shared_ptr<detail::stream_impl>> MStreams;
177177
std::vector<std::shared_ptr<const void>> MAuxiliaryResources;
178178
sycl::detail::pi::PiKernelCacheConfig MKernelCacheConfig;
179+
bool MKernelIsCooperative = false;
179180

180181
CGExecKernel(NDRDescT NDRDesc, std::shared_ptr<HostKernelBase> HKernel,
181182
std::shared_ptr<detail::kernel_impl> SyclKernel,
@@ -186,14 +187,15 @@ class CGExecKernel : public CG {
186187
std::vector<std::shared_ptr<const void>> AuxiliaryResources,
187188
CGTYPE Type,
188189
sycl::detail::pi::PiKernelCacheConfig KernelCacheConfig,
189-
detail::code_location loc = {})
190+
bool KernelIsCooperative, detail::code_location loc = {})
190191
: CG(Type, std::move(CGData), std::move(loc)),
191192
MNDRDesc(std::move(NDRDesc)), MHostKernel(std::move(HKernel)),
192193
MSyclKernel(std::move(SyclKernel)),
193194
MKernelBundle(std::move(KernelBundle)), MArgs(std::move(Args)),
194195
MKernelName(std::move(KernelName)), MStreams(std::move(Streams)),
195196
MAuxiliaryResources(std::move(AuxiliaryResources)),
196-
MKernelCacheConfig(std::move(KernelCacheConfig)) {
197+
MKernelCacheConfig(std::move(KernelCacheConfig)),
198+
MKernelIsCooperative(KernelIsCooperative) {
197199
assert(getType() == Kernel && "Wrong type of exec kernel CG.");
198200
}
199201

sycl/include/sycl/detail/pi.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ _PI_API(piextKernelSetArgPointer)
8787
_PI_API(piKernelSetExecInfo)
8888
_PI_API(piextKernelCreateWithNativeHandle)
8989
_PI_API(piextKernelGetNativeHandle)
90+
_PI_API(piextKernelSuggestMaxCooperativeGroupCount)
9091
// Event
9192
_PI_API(piEventCreate)
9293
_PI_API(piEventGetInfo)
@@ -105,6 +106,7 @@ _PI_API(piSamplerRetain)
105106
_PI_API(piSamplerRelease)
106107
// Queue commands
107108
_PI_API(piEnqueueKernelLaunch)
109+
_PI_API(piextEnqueueCooperativeKernelLaunch)
108110
_PI_API(piEnqueueEventsWait)
109111
_PI_API(piEnqueueEventsWaitWithBarrier)
110112
_PI_API(piEnqueueMemBufferRead)

sycl/include/sycl/detail/pi.h

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,9 +152,11 @@
152152
// 15.43 Changed the signature of piextMemGetNativeHandle to also take a
153153
// pi_device
154154
// 15.44 Add coarse-grain memory advice flag for HIP.
155+
// 15.45 Added piextKernelSuggestMaxCooperativeGroupCount and
156+
// piextEnqueueCooperativeKernelLaunch.
155157

156158
#define _PI_H_VERSION_MAJOR 15
157-
#define _PI_H_VERSION_MINOR 44
159+
#define _PI_H_VERSION_MINOR 45
158160

159161
#define _PI_STRING_HELPER(a) #a
160162
#define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b)
@@ -1670,6 +1672,18 @@ __SYCL_EXPORT pi_result piextKernelCreateWithNativeHandle(
16701672
__SYCL_EXPORT pi_result
16711673
piextKernelGetNativeHandle(pi_kernel kernel, pi_native_handle *nativeHandle);
16721674

1675+
/// Gets the max work group count for a cooperative kernel.
1676+
///
1677+
/// \param kernel is the PI kernel being queried.
1678+
/// \param local_work_size is the number of work items in a work group that will
1679+
/// be used when the kernel is launched. \param dynamic_shared_memory_size is
1680+
/// the size of dynamic shared memory, for each work group, in bytes, that will
1681+
/// be used when the kernel is launched." \param group_count_ret is a pointer to
1682+
/// where the query result will be stored.
1683+
__SYCL_EXPORT pi_result piextKernelSuggestMaxCooperativeGroupCount(
1684+
pi_kernel kernel, size_t local_work_size, size_t dynamic_shared_memory_size,
1685+
pi_uint32 *group_count_ret);
1686+
16731687
//
16741688
// Events
16751689
//
@@ -1752,6 +1766,12 @@ __SYCL_EXPORT pi_result piEnqueueKernelLaunch(
17521766
const size_t *local_work_size, pi_uint32 num_events_in_wait_list,
17531767
const pi_event *event_wait_list, pi_event *event);
17541768

1769+
__SYCL_EXPORT pi_result piextEnqueueCooperativeKernelLaunch(
1770+
pi_queue queue, pi_kernel kernel, pi_uint32 work_dim,
1771+
const size_t *global_work_offset, const size_t *global_work_size,
1772+
const size_t *local_work_size, pi_uint32 num_events_in_wait_list,
1773+
const pi_event *event_wait_list, pi_event *event);
1774+
17551775
__SYCL_EXPORT pi_result piEnqueueEventsWait(pi_queue command_queue,
17561776
pi_uint32 num_events_in_wait_list,
17571777
const pi_event *event_wait_list,

sycl/include/sycl/detail/spirv.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ struct sub_group;
2323
namespace experimental {
2424
template <typename ParentGroup> class ballot_group;
2525
template <size_t PartitionSize, typename ParentGroup> class fixed_size_group;
26+
template <int Dimensions> class root_group;
2627
template <typename ParentGroup> class tangle_group;
2728
class opportunistic_group;
2829
} // namespace experimental
@@ -51,6 +52,11 @@ namespace spirv {
5152

5253
template <typename Group> struct group_scope {};
5354

55+
template <int Dimensions>
56+
struct group_scope<sycl::ext::oneapi::experimental::root_group<Dimensions>> {
57+
static constexpr __spv::Scope::Flag value = __spv::Scope::Flag::Device;
58+
};
59+
5460
template <int Dimensions> struct group_scope<group<Dimensions>> {
5561
static constexpr __spv::Scope::Flag value = __spv::Scope::Flag::Workgroup;
5662
};

sycl/include/sycl/detail/type_traits.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,9 @@ template <class T>
4545
inline constexpr bool is_fixed_topology_group_v =
4646
is_fixed_topology_group<T>::value;
4747

48-
#ifdef SYCL_EXT_ONEAPI_ROOT_GROUP
49-
template <> struct is_fixed_topology_group<root_group> : std::true_type {};
50-
#endif
48+
template <int Dimensions> class root_group;
49+
template <int Dimensions>
50+
struct is_fixed_topology_group<root_group<Dimensions>> : std::true_type {};
5151

5252
template <int Dimensions>
5353
struct is_fixed_topology_group<sycl::group<Dimensions>> : std::true_type {};

sycl/include/sycl/ext/oneapi/experimental/root_group.hpp

Lines changed: 17 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@
1010

1111
#include <sycl/builtins.hpp>
1212
#include <sycl/ext/oneapi/properties/properties.hpp>
13+
#include <sycl/group.hpp>
1314
#include <sycl/memory_enums.hpp>
14-
#include <sycl/queue.hpp>
15-
16-
#define SYCL_EXT_ONEAPI_ROOT_GROUP 1
15+
#include <sycl/nd_item.hpp>
16+
#include <sycl/sub_group.hpp>
1717

1818
namespace sycl {
1919
inline namespace _V1 {
@@ -106,31 +106,25 @@ template <int Dimensions> root_group<Dimensions> get_root_group() {
106106

107107
} // namespace ext::oneapi::experimental
108108

109-
template <>
110-
typename ext::oneapi::experimental::info::kernel_queue_specific::
111-
max_num_work_group_sync::return_type
112-
kernel::ext_oneapi_get_info<
113-
ext::oneapi::experimental::info::kernel_queue_specific::
114-
max_num_work_group_sync>(const queue &q) const {
115-
// TODO: query the backend to return a value >= 1.
116-
return 1;
117-
}
118-
119109
template <int dimensions>
120110
void group_barrier(ext::oneapi::experimental::root_group<dimensions> G,
121111
memory_scope FenceScope = decltype(G)::fence_scope) {
122-
(void)G;
123-
(void)FenceScope;
124112
#ifdef __SYCL_DEVICE_ONLY__
125-
// TODO: Change __spv::Scope::Workgroup to __spv::Scope::Device once backends
126-
// support device scope. __spv::Scope::Workgroup is only valid when
127-
// max_num_work_group_sync is 1, so that all work items in a root group will
128-
// also be in the same work group.
129-
__spirv_ControlBarrier(__spv::Scope::Workgroup, __spv::Scope::Workgroup,
130-
__spv::MemorySemanticsMask::SubgroupMemory |
131-
__spv::MemorySemanticsMask::WorkgroupMemory |
132-
__spv::MemorySemanticsMask::CrossWorkgroupMemory);
113+
// Root group barrier synchronizes using a work group barrier if there's only
114+
// one work group. This allows backends to ignore the ControlBarrier with
115+
// Device scope if their maximum number of work groups is 1. This is a
116+
// workaround that's not intended to reduce the bar for SPIR-V modules
117+
// acceptance, but rather make a pessimistic case work until we have full
118+
// support for the device barrier built-in from backends.
119+
const auto ChildGroup = ext::oneapi::experimental::get_child_group(G);
120+
if (ChildGroup.get_group_linear_range() == 1) {
121+
group_barrier(ChildGroup);
122+
} else {
123+
detail::spirv::ControlBarrier(G, FenceScope, memory_order::seq_cst);
124+
}
133125
#else
126+
(void)G;
127+
(void)FenceScope;
134128
throw sycl::runtime_error("Barriers are not supported on host device",
135129
PI_ERROR_INVALID_DEVICE);
136130
#endif

sycl/include/sycl/handler.hpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#include <sycl/ext/oneapi/device_global/device_global.hpp>
3333
#include <sycl/ext/oneapi/device_global/properties.hpp>
3434
#include <sycl/ext/oneapi/experimental/graph.hpp>
35+
#include <sycl/ext/oneapi/experimental/root_group.hpp>
3536
#include <sycl/ext/oneapi/kernel_properties/properties.hpp>
3637
#include <sycl/ext/oneapi/properties/properties.hpp>
3738
#include <sycl/group.hpp>
@@ -933,6 +934,10 @@ class __SYCL_EXPORT handler {
933934
} else {
934935
std::ignore = Props;
935936
}
937+
938+
constexpr bool UsesRootSync = PropertiesT::template has_property<
939+
sycl::ext::oneapi::experimental::use_root_sync_key>();
940+
setKernelIsCooperative(UsesRootSync);
936941
}
937942

938943
/// Checks whether it is possible to copy the source shape to the destination
@@ -3622,6 +3627,8 @@ class __SYCL_EXPORT handler {
36223627

36233628
// Set value of the gpu cache configuration for the kernel.
36243629
void setKernelCacheConfig(sycl::detail::pi::PiKernelCacheConfig);
3630+
// Set value of the kernel is cooperative flag
3631+
void setKernelIsCooperative(bool);
36253632

36263633
template <
36273634
ext::oneapi::experimental::detail::UnsupportedGraphFeatures FeatureT>

sycl/include/sycl/sycl.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@
9090
#include <sycl/ext/oneapi/experimental/fixed_size_group.hpp>
9191
#include <sycl/ext/oneapi/experimental/opportunistic_group.hpp>
9292
#include <sycl/ext/oneapi/experimental/prefetch.hpp>
93+
#include <sycl/ext/oneapi/experimental/root_group.hpp>
9394
#include <sycl/ext/oneapi/experimental/tangle_group.hpp>
9495
#include <sycl/ext/oneapi/filter_selector.hpp>
9596
#include <sycl/ext/oneapi/functional.hpp>

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,16 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
544544
NumEventsInWaitList, EventWaitList, OutEvent);
545545
}
546546

547+
pi_result piextEnqueueCooperativeKernelLaunch(
548+
pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
549+
const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize,
550+
const size_t *LocalWorkSize, pi_uint32 NumEventsInWaitList,
551+
const pi_event *EventWaitList, pi_event *OutEvent) {
552+
return pi2ur::piextEnqueueCooperativeKernelLaunch(
553+
Queue, Kernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, LocalWorkSize,
554+
NumEventsInWaitList, EventWaitList, OutEvent);
555+
}
556+
547557
pi_result piextKernelCreateWithNativeHandle(pi_native_handle NativeHandle,
548558
pi_context Context,
549559
pi_program Program,
@@ -559,6 +569,13 @@ pi_result piextKernelGetNativeHandle(pi_kernel Kernel,
559569
return pi2ur::piextKernelGetNativeHandle(Kernel, NativeHandle);
560570
}
561571

572+
pi_result piextKernelSuggestMaxCooperativeGroupCount(
573+
pi_kernel Kernel, size_t LocalWorkSize, size_t DynamicSharedMemorySize,
574+
pi_uint32 *GroupCountRet) {
575+
return pi2ur::piextKernelSuggestMaxCooperativeGroupCount(
576+
Kernel, LocalWorkSize, DynamicSharedMemorySize, GroupCountRet);
577+
}
578+
562579
pi_result piEventCreate(pi_context Context, pi_event *RetEvent) {
563580
return pi2ur::piEventCreate(Context, RetEvent);
564581
}

sycl/plugins/hip/pi_hip.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,16 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
547547
NumEventsInWaitList, EventWaitList, OutEvent);
548548
}
549549

550+
pi_result piextEnqueueCooperativeKernelLaunch(
551+
pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
552+
const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize,
553+
const size_t *LocalWorkSize, pi_uint32 NumEventsInWaitList,
554+
const pi_event *EventWaitList, pi_event *OutEvent) {
555+
return pi2ur::piextEnqueueCooperativeKernelLaunch(
556+
Queue, Kernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, LocalWorkSize,
557+
NumEventsInWaitList, EventWaitList, OutEvent);
558+
}
559+
550560
pi_result piextKernelCreateWithNativeHandle(pi_native_handle NativeHandle,
551561
pi_context Context,
552562
pi_program Program,
@@ -562,6 +572,13 @@ pi_result piextKernelGetNativeHandle(pi_kernel Kernel,
562572
return pi2ur::piextKernelGetNativeHandle(Kernel, NativeHandle);
563573
}
564574

575+
pi_result piextKernelSuggestMaxCooperativeGroupCount(
576+
pi_kernel Kernel, size_t LocalWorkSize, size_t DynamicSharedMemorySize,
577+
pi_uint32 *GroupCountRet) {
578+
return pi2ur::piextKernelSuggestMaxCooperativeGroupCount(
579+
Kernel, LocalWorkSize, DynamicSharedMemorySize, GroupCountRet);
580+
}
581+
565582
pi_result piEventCreate(pi_context Context, pi_event *RetEvent) {
566583
return pi2ur::piEventCreate(Context, RetEvent);
567584
}

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -558,6 +558,16 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
558558
NumEventsInWaitList, EventWaitList, OutEvent);
559559
}
560560

561+
pi_result piextEnqueueCooperativeKernelLaunch(
562+
pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
563+
const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize,
564+
const size_t *LocalWorkSize, pi_uint32 NumEventsInWaitList,
565+
const pi_event *EventWaitList, pi_event *OutEvent) {
566+
return pi2ur::piextEnqueueCooperativeKernelLaunch(
567+
Queue, Kernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, LocalWorkSize,
568+
NumEventsInWaitList, EventWaitList, OutEvent);
569+
}
570+
561571
pi_result piextKernelCreateWithNativeHandle(pi_native_handle NativeHandle,
562572
pi_context Context,
563573
pi_program Program,
@@ -573,6 +583,13 @@ pi_result piextKernelGetNativeHandle(pi_kernel Kernel,
573583
return pi2ur::piextKernelGetNativeHandle(Kernel, NativeHandle);
574584
}
575585

586+
pi_result piextKernelSuggestMaxCooperativeGroupCount(
587+
pi_kernel Kernel, size_t LocalWorkSize, size_t DynamicSharedMemorySize,
588+
pi_uint32 *GroupCountRet) {
589+
return pi2ur::piextKernelSuggestMaxCooperativeGroupCount(
590+
Kernel, LocalWorkSize, DynamicSharedMemorySize, GroupCountRet);
591+
}
592+
576593
//
577594
// Events
578595
//

sycl/plugins/opencl/pi_opencl.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,16 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
527527
NumEventsInWaitList, EventWaitList, OutEvent);
528528
}
529529

530+
pi_result piextEnqueueCooperativeKernelLaunch(
531+
pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
532+
const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize,
533+
const size_t *LocalWorkSize, pi_uint32 NumEventsInWaitList,
534+
const pi_event *EventWaitList, pi_event *OutEvent) {
535+
return pi2ur::piextEnqueueCooperativeKernelLaunch(
536+
Queue, Kernel, WorkDim, GlobalWorkOffset, GlobalWorkSize, LocalWorkSize,
537+
NumEventsInWaitList, EventWaitList, OutEvent);
538+
}
539+
530540
pi_result piextKernelCreateWithNativeHandle(pi_native_handle NativeHandle,
531541
pi_context Context,
532542
pi_program Program,
@@ -541,6 +551,13 @@ pi_result piextKernelGetNativeHandle(pi_kernel Kernel,
541551
return pi2ur::piextKernelGetNativeHandle(Kernel, NativeHandle);
542552
}
543553

554+
pi_result piextKernelSuggestMaxCooperativeGroupCount(
555+
pi_kernel Kernel, size_t LocalWorkSize, size_t DynamicSharedMemorySize,
556+
pi_uint32 *GroupCountRet) {
557+
return pi2ur::piextKernelSuggestMaxCooperativeGroupCount(
558+
Kernel, LocalWorkSize, DynamicSharedMemorySize, GroupCountRet);
559+
}
560+
544561
pi_result piEventCreate(pi_context Context, pi_event *RetEvent) {
545562
return pi2ur::piEventCreate(Context, RetEvent);
546563
}

sycl/plugins/unified_runtime/pi2ur.hpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2599,6 +2599,19 @@ inline pi_result piextKernelGetNativeHandle(pi_kernel Kernel,
25992599
return PI_SUCCESS;
26002600
}
26012601

2602+
inline pi_result piextKernelSuggestMaxCooperativeGroupCount(
2603+
pi_kernel Kernel, size_t LocalWorkSize, size_t DynamicSharedMemorySize,
2604+
pi_uint32 *GroupCountRet) {
2605+
PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL);
2606+
PI_ASSERT(GroupCountRet, PI_ERROR_INVALID_VALUE);
2607+
2608+
ur_kernel_handle_t UrKernel = reinterpret_cast<ur_kernel_handle_t>(Kernel);
2609+
HANDLE_ERRORS(urKernelSuggestMaxCooperativeGroupCountExp(
2610+
UrKernel, LocalWorkSize, DynamicSharedMemorySize, GroupCountRet));
2611+
2612+
return PI_SUCCESS;
2613+
}
2614+
26022615
/// API for writing data from host to a device global variable.
26032616
///
26042617
/// \param Queue is the queue
@@ -3669,6 +3682,30 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
36693682
return PI_SUCCESS;
36703683
}
36713684

3685+
inline pi_result piextEnqueueCooperativeKernelLaunch(
3686+
pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim,
3687+
const size_t *GlobalWorkOffset, const size_t *GlobalWorkSize,
3688+
const size_t *LocalWorkSize, pi_uint32 NumEventsInWaitList,
3689+
const pi_event *EventsWaitList, pi_event *OutEvent) {
3690+
3691+
PI_ASSERT(Kernel, PI_ERROR_INVALID_KERNEL);
3692+
PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE);
3693+
PI_ASSERT((WorkDim > 0) && (WorkDim < 4), PI_ERROR_INVALID_WORK_DIMENSION);
3694+
3695+
ur_queue_handle_t UrQueue = reinterpret_cast<ur_queue_handle_t>(Queue);
3696+
ur_kernel_handle_t UrKernel = reinterpret_cast<ur_kernel_handle_t>(Kernel);
3697+
const ur_event_handle_t *UrEventsWaitList =
3698+
reinterpret_cast<const ur_event_handle_t *>(EventsWaitList);
3699+
3700+
ur_event_handle_t *UREvent = reinterpret_cast<ur_event_handle_t *>(OutEvent);
3701+
3702+
HANDLE_ERRORS(urEnqueueCooperativeKernelLaunchExp(
3703+
UrQueue, UrKernel, WorkDim, GlobalWorkOffset, GlobalWorkSize,
3704+
LocalWorkSize, NumEventsInWaitList, UrEventsWaitList, UREvent));
3705+
3706+
return PI_SUCCESS;
3707+
}
3708+
36723709
inline pi_result
36733710
piEnqueueMemImageWrite(pi_queue Queue, pi_mem Image, pi_bool BlockingWrite,
36743711
pi_image_offset Origin, pi_image_region Region,

0 commit comments

Comments
 (0)