Skip to content

Commit e339962

Browse files
jbrodmanbader
authored andcommitted
[SYCL][USM] Enable per-context USM behavior (#517)
* Use PI interfaces and avoid directly calling CL inside SYCL RT. * Update CLUSM test. clext test no longer relevant. * Fix findplatforms issue due to CL weirdness for return types. Add multictxt tests. Signed-off-by: James Brodman <[email protected]>
1 parent 2276a42 commit e339962

21 files changed

+631
-577
lines changed

sycl/include/CL/sycl/detail/clusm.hpp

Lines changed: 2 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,8 @@ namespace usm {
2424

2525
class CLUSM {
2626
public:
27-
static bool Create(CLUSM *&pCLUSM);
28-
static void Delete(CLUSM *&pCLUSM);
29-
30-
void initExtensions(cl_platform_id platform);
27+
CLUSM() = default;
28+
~CLUSM() = default;
3129

3230
void *hostMemAlloc(cl_context context, cl_mem_properties_intel *properties,
3331
size_t size, cl_uint alignment, cl_int *errcode_ret);
@@ -54,18 +52,9 @@ class CLUSM {
5452
cl_int writeParamToMemory(size_t param_value_size, T param,
5553
size_t *param_value_size_ret, T *pointer) const;
5654

57-
bool useCLUSM() { return mEnableCLUSM; }
58-
59-
bool isInitialized() { return mInitialized; }
60-
6155
private:
62-
bool mEnableCLUSM = true;
63-
bool mInitialized = false;
6456
std::mutex mLock;
6557

66-
CLUSM() = default;
67-
~CLUSM() = default;
68-
6958
struct SUSMAllocInfo {
7059
SUSMAllocInfo() = default;
7160

@@ -106,25 +95,6 @@ class CLUSM {
10695
};
10796

10897
} // namespace usm
109-
110-
namespace cliext {
111-
bool initializeExtensions(cl_platform_id platform);
112-
} // namespace cliext
113-
11498
} // namespace detail
11599
} // namespace sycl
116100
} // namespace cl
117-
118-
__SYCL_EXPORTED extern cl::sycl::detail::usm::CLUSM *gCLUSM;
119-
inline cl::sycl::detail::usm::CLUSM *GetCLUSM() {
120-
if (gCLUSM == nullptr) {
121-
cl::sycl::detail::usm::CLUSM::Create(gCLUSM);
122-
}
123-
124-
cl::sycl::detail::usm::CLUSM *retVal = nullptr;
125-
if (cl::sycl::detail::pi::useBackend(
126-
cl::sycl::detail::pi::Backend::SYCL_BE_PI_OPENCL)) {
127-
retVal = gCLUSM;
128-
}
129-
return retVal;
130-
}

sycl/include/CL/sycl/detail/context_impl.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <CL/sycl/detail/common.hpp>
1111
#include <CL/sycl/detail/os_util.hpp>
1212
#include <CL/sycl/detail/pi.hpp>
13+
#include <CL/sycl/detail/usm_dispatch.hpp>
1314
#include <CL/sycl/exception_list.hpp>
1415
#include <CL/sycl/info/info_desc.hpp>
1516
#include <CL/sycl/platform.hpp>
@@ -64,6 +65,7 @@ class context_impl {
6465
return m_CachedKernels;
6566
}
6667

68+
std::shared_ptr<usm::USMDispatcher> getUSMDispatch() const;
6769
private:
6870
async_handler m_AsyncHandler;
6971
vector_class<device> m_Devices;
@@ -73,6 +75,7 @@ class context_impl {
7375
bool m_HostContext;
7476
std::map<OSModuleHandle, RT::PiProgram> m_CachedPrograms;
7577
std::map<RT::PiProgram, std::map<string_class, RT::PiKernel>> m_CachedKernels;
78+
std::shared_ptr<usm::USMDispatcher> m_USMDispatch;
7679
};
7780

7881
} // namespace detail
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
//==-------------- usm_dispatch.hpp - SYCL USM Dispatch --------*- C++ -*---==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
// ===--------------------------------------------------------------------=== //
8+
#pragma once
9+
10+
#include <CL/sycl/detail/clusm.hpp>
11+
12+
#include <memory>
13+
14+
namespace cl {
15+
namespace sycl {
16+
namespace detail {
17+
namespace usm {
18+
19+
class USMDispatcher {
20+
public:
21+
USMDispatcher(cl_platform_id Platform);
22+
23+
void *hostMemAlloc(pi_context Context, cl_mem_properties_intel *Properties,
24+
size_t Size, pi_uint32 Alignment, pi_result *ErrcodeRet);
25+
void *deviceMemAlloc(pi_context Context, pi_device Device,
26+
cl_mem_properties_intel *Properties, size_t Size,
27+
pi_uint32 Alignment, pi_result *ErrcodeRet);
28+
void *sharedMemAlloc(pi_context Context, pi_device Device,
29+
cl_mem_properties_intel *Properties, size_t Size,
30+
pi_uint32 Alignment, pi_result *ErrcodeRet);
31+
pi_result memFree(pi_context Context, void *Ptr);
32+
pi_result setKernelArgMemPointer(pi_kernel Kernel, pi_uint32 ArgIndex,
33+
const void *ArgValue);
34+
void setKernelIndirectAccess(pi_kernel Kernel, pi_queue Queue);
35+
pi_result enqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value,
36+
size_t Count, pi_uint32 NumEventsInWaitList,
37+
const pi_event *EventWaitList, pi_event *Event);
38+
pi_result enqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DestPtr,
39+
const void *SrcPtr, size_t Size,
40+
pi_uint32 NumEventsInWaitList,
41+
const pi_event *EventWaitList, pi_event *Event);
42+
pi_result enqueueMigrateMem(pi_queue Queue, const void *Ptr, size_t Size,
43+
cl_mem_migration_flags Flags,
44+
pi_uint32 NumEventsInWaitList,
45+
const pi_event *EventWaitList, pi_event *Event);
46+
pi_result enqueueMemAdvise(pi_queue Queue, void *Ptr, size_t Size,
47+
cl_mem_advice_intel Advice,
48+
pi_uint32 NumEventsInWaitList,
49+
const pi_event *EventWaitList, pi_event *Event);
50+
pi_result getMemAllocInfo(pi_context Context, const void *Ptr,
51+
cl_mem_info_intel ParamName, size_t ParamValueSize,
52+
void *ParamValue, size_t *ParamValueSizeRet);
53+
54+
private:
55+
bool mEmulated = false;
56+
std::unique_ptr<CLUSM> mEmulator;
57+
58+
clHostMemAllocINTEL_fn pfn_clHostMemAllocINTEL = nullptr;
59+
clDeviceMemAllocINTEL_fn pfn_clDeviceMemAllocINTEL = nullptr;
60+
clSharedMemAllocINTEL_fn pfn_clSharedMemAllocINTEL = nullptr;
61+
clMemFreeINTEL_fn pfn_clMemFreeINTEL = nullptr;
62+
clGetMemAllocInfoINTEL_fn pfn_clGetMemAllocInfoINTEL = nullptr;
63+
clSetKernelArgMemPointerINTEL_fn pfn_clSetKernelArgMemPointerINTEL = nullptr;
64+
clEnqueueMemsetINTEL_fn pfn_clEnqueueMemsetINTEL = nullptr;
65+
clEnqueueMemcpyINTEL_fn pfn_clEnqueueMemcpyINTEL = nullptr;
66+
clEnqueueMigrateMemINTEL_fn pfn_clEnqueueMigrateMemINTEL = nullptr;
67+
clEnqueueMemAdviseINTEL_fn pfn_clEnqueueMemAdviseINTEL = nullptr;
68+
};
69+
70+
} // namespace usm
71+
} // namespace detail
72+
} // namespace sycl
73+
} // namespace cl

sycl/include/CL/sycl/detail/usm_impl.hpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,13 @@ namespace sycl {
1616
namespace detail {
1717
namespace usm {
1818

19-
void *alignedAlloc(size_t alignment, size_t bytes, const context *ctxt,
20-
const device *dev, cl::sycl::usm::alloc kind);
19+
void *alignedAlloc(size_t Alignment, size_t Bytes, const context &Ctxt,
20+
const device &Dev, cl::sycl::usm::alloc Kind);
2121

22-
void free(void *ptr, const context *ctxt);
22+
void *alignedAlloc(size_t Alignment, size_t Bytes, const context &Ctxt,
23+
cl::sycl::usm::alloc Kind);
24+
25+
void free(void *Ptr, const context &Ctxt);
2326

2427
} // namespace usm
2528
} // namespace detail

sycl/include/CL/sycl/usm/usm_allocator.hpp

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,11 @@ class usm_allocator {
3333
typedef usm_allocator<U, AllocKind, Alignment> other;
3434
};
3535

36-
usm_allocator() : mContext(nullptr), mDevice(nullptr) {}
37-
usm_allocator(const context *ctxt, const device *dev)
38-
: mContext(ctxt), mDevice(dev) {}
39-
usm_allocator(const usm_allocator &other)
40-
: mContext(other.mContext), mDevice(other.mDevice) {}
36+
usm_allocator() = delete;
37+
usm_allocator(const context &Ctxt, const device &Dev)
38+
: mContext(Ctxt), mDevice(Dev) {}
39+
usm_allocator(const usm_allocator &Other)
40+
: mContext(Other.mContext), mDevice(Other.mDevice) {}
4141

4242
// Construct an object
4343
// Note: AllocKind == alloc::device is not allowed
@@ -105,10 +105,21 @@ class usm_allocator {
105105
}
106106

107107
// Allocate memory
108+
template <
109+
usm::alloc AllocT = AllocKind,
110+
typename std::enable_if<AllocT == usm::alloc::host, int>::type = 0>
108111
pointer allocate(size_t Size) {
109-
if (!mContext && !mDevice) {
112+
auto Result = reinterpret_cast<pointer>(detail::usm::alignedAlloc(
113+
getAlignment(), Size * sizeof(value_type), mContext, AllocKind));
114+
if (!Result) {
110115
throw memory_allocation_error();
111116
}
117+
return Result;
118+
}
119+
120+
template <usm::alloc AllocT = AllocKind,
121+
typename std::enable_if<AllocT != usm::alloc::host, int>::type = 0>
122+
pointer allocate(size_t Size) {
112123
auto Result = reinterpret_cast<pointer>(
113124
detail::usm::alignedAlloc(getAlignment(), Size * sizeof(value_type),
114125
mContext, mDevice, AllocKind));
@@ -137,8 +148,8 @@ class usm_allocator {
137148
return Alignment;
138149
}
139150

140-
const context *mContext;
141-
const device *mDevice;
151+
const context mContext;
152+
const device mDevice;
142153
};
143154

144155
} // namespace sycl

sycl/source/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ add_library(sycl SHARED
4343
"detail/scheduler/graph_processor.cpp"
4444
"detail/scheduler/graph_builder.cpp"
4545
"detail/usm/clusm.cpp"
46-
"detail/usm/opencl_shim.cpp"
46+
"detail/usm/usm_dispatch.cpp"
4747
"detail/usm/usm_impl.cpp"
4848
"detail/util.cpp"
4949
"context.cpp"

sycl/source/detail/context_impl.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,7 @@ context_impl::context_impl(const vector_class<cl::sycl::device> Devices,
4040
RT::piContextCreate(0, DeviceIds.size(), DeviceIds.data(), 0, 0, &Err),
4141
Err));
4242

43-
if (usm::CLUSM* clusm = GetCLUSM()) {
44-
cl_platform_id id = m_Platform.get();
45-
clusm->initExtensions(id);
46-
}
43+
m_USMDispatch.reset(new usm::USMDispatcher(m_Platform.get()));
4744
}
4845

4946
context_impl::context_impl(cl_context ClContext, async_handler AsyncHandler)
@@ -126,6 +123,10 @@ context_impl::get_info<info::context::devices>() const {
126123
RT::PiContext &context_impl::getHandleRef() { return m_Context; }
127124
const RT::PiContext &context_impl::getHandleRef() const { return m_Context; }
128125

126+
std::shared_ptr<usm::USMDispatcher> context_impl::getUSMDispatch() const {
127+
return m_USMDispatch;
128+
}
129+
129130
} // namespace detail
130131
} // namespace sycl
131132
} // namespace cl

sycl/source/detail/queue_impl.cpp

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <CL/sycl/detail/clusm.hpp>
1111
#include <CL/sycl/detail/pi.hpp>
1212
#include <CL/sycl/detail/queue_impl.hpp>
13+
#include <CL/sycl/detail/usm_dispatch.hpp>
1314
#include <CL/sycl/device.hpp>
1415

1516
namespace cl {
@@ -32,32 +33,31 @@ template <> device queue_impl::get_info<info::queue::device>() const {
3233
return get_device();
3334
}
3435

35-
// TODO: Update with PI interfaces
36-
event queue_impl::memset(void* ptr, int value, size_t count) {
37-
cl_event e;
38-
cl_int error;
39-
cl_command_queue q = pi::cast<cl_command_queue>(getHandleRef());
36+
event queue_impl::memset(void *Ptr, int Value, size_t Count) {
37+
context Context = get_context();
38+
std::shared_ptr<usm::USMDispatcher> USMDispatch =
39+
getSyclObjImpl(Context)->getUSMDispatch();
40+
cl_event Event;
4041

41-
error = clEnqueueMemsetINTEL(q, ptr, value, count,
42-
/* sizeof waitlist */ 0, nullptr, &e);
42+
PI_CHECK(USMDispatch->enqueueMemset(getHandleRef(), Ptr, Value, Count,
43+
/* sizeof waitlist */ 0, nullptr,
44+
reinterpret_cast<pi_event *>(&Event)));
4345

44-
CHECK_OCL_CODE_THROW(error, runtime_error);
45-
46-
return event(e, get_context());
46+
return event(Event, Context);
4747
}
4848

49-
event queue_impl::memcpy(void* dest, const void* src, size_t count) {
50-
cl_event e;
51-
cl_int error;
52-
cl_command_queue q = pi::cast<cl_command_queue>(getHandleRef());
53-
54-
error = clEnqueueMemcpyINTEL(q,
55-
/* blocking */ false, dest, src, count,
56-
/* sizeof waitlist */ 0, nullptr, &e);
49+
event queue_impl::memcpy(void *Dest, const void *Src, size_t Count) {
50+
context Context = get_context();
51+
std::shared_ptr<usm::USMDispatcher> USMDispatch =
52+
getSyclObjImpl(Context)->getUSMDispatch();
53+
cl_event Event;
5754

58-
CHECK_OCL_CODE_THROW(error, runtime_error);
55+
PI_CHECK(USMDispatch->enqueueMemcpy(getHandleRef(),
56+
/* blocking */ false, Dest, Src, Count,
57+
/* sizeof waitlist */ 0, nullptr,
58+
reinterpret_cast<pi_event *>(&Event)));
5959

60-
return event(e, get_context());
60+
return event(Event, Context);
6161
}
6262
} // namespace detail
6363
} // namespace sycl

sycl/source/detail/scheduler/commands.cpp

Lines changed: 8 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -660,10 +660,12 @@ cl_int ExecCGCommand::enqueueImp() {
660660
case kernel_param_kind_t::kind_pointer: {
661661
// TODO: Change to PI
662662
usesUSM = true;
663+
std::shared_ptr<usm::USMDispatcher> USMDispatch =
664+
getSyclObjImpl(Context)->getUSMDispatch();
663665
auto PtrToPtr = reinterpret_cast<intptr_t*>(Arg.MPtr);
664666
auto DerefPtr = reinterpret_cast<void*>(*PtrToPtr);
665-
auto theKernel = pi::cast<cl_kernel>(Kernel);
666-
CHECK_OCL_CODE(clSetKernelArgMemPointerINTEL(theKernel, Arg.MIndex, DerefPtr));
667+
pi::cast<RT::PiResult>(
668+
USMDispatch->setKernelArgMemPointer(Kernel, Arg.MIndex, DerefPtr));
667669
break;
668670
}
669671
default:
@@ -675,38 +677,10 @@ cl_int ExecCGCommand::enqueueImp() {
675677
detail::getSyclObjImpl(
676678
MQueue->get_device())->getHandleRef());
677679

678-
// TODO: Replace CL with PI
679-
auto clusm = GetCLUSM();
680-
if (usesUSM && clusm) {
681-
cl_bool t = CL_TRUE;
682-
auto theKernel = pi::cast<cl_kernel>(Kernel);
683-
// Enable USM Indirect Access for Kernels
684-
if (clusm->useCLUSM()) {
685-
CHECK_OCL_CODE(clusm->setKernelExecInfo(
686-
theKernel, CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL,
687-
sizeof(cl_bool), &t));
688-
CHECK_OCL_CODE(clusm->setKernelExecInfo(
689-
theKernel, CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL,
690-
sizeof(cl_bool), &t));
691-
CHECK_OCL_CODE(clusm->setKernelExecInfo(
692-
theKernel, CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL,
693-
sizeof(cl_bool), &t));
694-
695-
// This passes all the allocations we've tracked as SVM Pointers
696-
CHECK_OCL_CODE(clusm->setKernelIndirectUSMExecInfo(
697-
pi::cast<cl_command_queue>(MQueue->getHandleRef()), theKernel));
698-
} else if (clusm->isInitialized()) {
699-
// Sanity check that nothing went wrong setting up clusm
700-
CHECK_OCL_CODE(clSetKernelExecInfo(
701-
theKernel, CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL,
702-
sizeof(cl_bool), &t));
703-
CHECK_OCL_CODE(clSetKernelExecInfo(
704-
theKernel, CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL,
705-
sizeof(cl_bool), &t));
706-
CHECK_OCL_CODE(clSetKernelExecInfo(
707-
theKernel, CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL,
708-
sizeof(cl_bool), &t));
709-
}
680+
if (usesUSM) {
681+
std::shared_ptr<usm::USMDispatcher> USMDispatch =
682+
getSyclObjImpl(Context)->getUSMDispatch();
683+
USMDispatch->setKernelIndirectAccess(Kernel, MQueue->getHandleRef());
710684
}
711685

712686
PI_CALL(RT::piEnqueueKernelLaunch(

0 commit comments

Comments
 (0)