Skip to content

Commit b00e08a

Browse files
committed
Rewrite USM to go through PI instead of straight to CL
Signed-off-by: James Brodman <[email protected]> Initial commit for rewriting USM to support PI and multiple contexts. Signed-off-by: James Brodman <[email protected]> Update CLUSM test. clext test no longer relevant. Signed-off-by: James Brodman <[email protected]>
1 parent 97b6396 commit b00e08a

File tree

15 files changed

+487
-533
lines changed

15 files changed

+487
-533
lines changed

sycl/include/CL/sycl/detail/clusm.hpp

Lines changed: 2 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,8 @@ namespace usm {
2424

2525
class CLUSM {
2626
public:
27-
static bool Create(CLUSM *&pCLUSM);
28-
static void Delete(CLUSM *&pCLUSM);
29-
30-
void initExtensions(cl_platform_id platform);
27+
CLUSM() = default;
28+
~CLUSM() = default;
3129

3230
void *hostMemAlloc(cl_context context, cl_mem_properties_intel *properties,
3331
size_t size, cl_uint alignment, cl_int *errcode_ret);
@@ -54,18 +52,9 @@ class CLUSM {
5452
cl_int writeParamToMemory(size_t param_value_size, T param,
5553
size_t *param_value_size_ret, T *pointer) const;
5654

57-
bool useCLUSM() { return mEnableCLUSM; }
58-
59-
bool isInitialized() { return mInitialized; }
60-
6155
private:
62-
bool mEnableCLUSM = true;
63-
bool mInitialized = false;
6456
std::mutex mLock;
6557

66-
CLUSM() = default;
67-
~CLUSM() = default;
68-
6958
struct SUSMAllocInfo {
7059
SUSMAllocInfo() = default;
7160

@@ -106,25 +95,6 @@ class CLUSM {
10695
};
10796

10897
} // namespace usm
109-
110-
namespace cliext {
111-
bool initializeExtensions(cl_platform_id platform);
112-
} // namespace cliext
113-
11498
} // namespace detail
11599
} // namespace sycl
116100
} // namespace cl
117-
118-
__SYCL_EXPORTED extern cl::sycl::detail::usm::CLUSM *gCLUSM;
119-
inline cl::sycl::detail::usm::CLUSM *GetCLUSM() {
120-
if (gCLUSM == nullptr) {
121-
cl::sycl::detail::usm::CLUSM::Create(gCLUSM);
122-
}
123-
124-
cl::sycl::detail::usm::CLUSM *retVal = nullptr;
125-
if (cl::sycl::detail::pi::useBackend(
126-
cl::sycl::detail::pi::Backend::SYCL_BE_PI_OPENCL)) {
127-
retVal = gCLUSM;
128-
}
129-
return retVal;
130-
}

sycl/include/CL/sycl/detail/context_impl.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <CL/sycl/detail/common.hpp>
1111
#include <CL/sycl/detail/os_util.hpp>
1212
#include <CL/sycl/detail/pi.hpp>
13+
#include <CL/sycl/detail/usm_dispatch.hpp>
1314
#include <CL/sycl/exception_list.hpp>
1415
#include <CL/sycl/info/info_desc.hpp>
1516
#include <CL/sycl/platform.hpp>
@@ -64,6 +65,7 @@ class context_impl {
6465
return m_CachedKernels;
6566
}
6667

68+
std::shared_ptr<usm::USMDispatcher> getUSMDispatch() const;
6769
private:
6870
async_handler m_AsyncHandler;
6971
vector_class<device> m_Devices;
@@ -73,6 +75,7 @@ class context_impl {
7375
bool m_HostContext;
7476
std::map<OSModuleHandle, RT::PiProgram> m_CachedPrograms;
7577
std::map<RT::PiProgram, std::map<string_class, RT::PiKernel>> m_CachedKernels;
78+
std::shared_ptr<usm::USMDispatcher> m_USMDispatch;
7679
};
7780

7881
} // namespace detail

sycl/include/CL/sycl/detail/pi.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
// done here, for efficiency and simplicity.
2929
//
3030
#include <CL/opencl.h>
31+
#include <CL/cl_usm_ext.h>
3132
#include <cstdint>
3233

3334
#ifdef __cplusplus
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
//==-------------- usm_dispatch.hpp - SYCL USM Dispatch --------*- C++ -*---==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
// ===--------------------------------------------------------------------=== //
8+
#pragma once
9+
10+
#include <CL/sycl/detail/clusm.hpp>
11+
12+
#include <memory>
13+
14+
namespace cl {
15+
namespace sycl {
16+
namespace detail {
17+
namespace usm {
18+
19+
class USMDispatcher {
20+
public:
21+
USMDispatcher(cl_platform_id Platform);
22+
23+
void *hostMemAlloc(pi_context Context, cl_mem_properties_intel *Properties,
24+
size_t Size, pi_uint32 Alignment, pi_result *ErrcodeRet);
25+
void *deviceMemAlloc(pi_context Context, pi_device Device,
26+
cl_mem_properties_intel *Properties, size_t Size,
27+
pi_uint32 Alignment, pi_result *ErrcodeRet);
28+
void *sharedMemAlloc(pi_context Context, pi_device Device,
29+
cl_mem_properties_intel *Properties, size_t Size,
30+
pi_uint32 Alignment, pi_result *ErrcodeRet);
31+
pi_result memFree(pi_context Context, void *Ptr);
32+
pi_result setKernelArgMemPointer(pi_kernel Kernel, pi_uint32 ArgIndex,
33+
const void *ArgValue);
34+
void setKernelIndirectAccess(pi_kernel Kernel, pi_queue Queue);
35+
pi_result enqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value,
36+
size_t Count, pi_uint32 NumEventsInWaitList,
37+
const pi_event *EventWaitList, pi_event *Event);
38+
pi_result enqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DestPtr,
39+
const void *SrcPtr, size_t Size,
40+
pi_uint32 NumEventsInWaitList,
41+
const pi_event *EventWaitList, pi_event *Event);
42+
pi_result enqueueMigrateMem(pi_queue Queue, const void *Ptr, size_t Size,
43+
cl_mem_migration_flags Flags,
44+
pi_uint32 NumEventsInWaitList,
45+
const pi_event *EventWaitList, pi_event *Event);
46+
pi_result enqueueMemAdvise(pi_queue Queue, void *Ptr, size_t Size,
47+
cl_mem_advice_intel Advice,
48+
pi_uint32 NumEventsInWaitList,
49+
const pi_event *EventWaitList, pi_event *Event);
50+
pi_result getMemAllocInfo(pi_context Context, const void *Ptr,
51+
cl_mem_info_intel ParamName, size_t ParamValueSize,
52+
void *ParamValue, size_t *ParamValueSizeRet);
53+
54+
private:
55+
bool mEmulated = false;
56+
std::unique_ptr<CLUSM> mEmulator;
57+
58+
clHostMemAllocINTEL_fn pfn_clHostMemAllocINTEL = nullptr;
59+
clDeviceMemAllocINTEL_fn pfn_clDeviceMemAllocINTEL = nullptr;
60+
clSharedMemAllocINTEL_fn pfn_clSharedMemAllocINTEL = nullptr;
61+
clMemFreeINTEL_fn pfn_clMemFreeINTEL = nullptr;
62+
clGetMemAllocInfoINTEL_fn pfn_clGetMemAllocInfoINTEL = nullptr;
63+
clSetKernelArgMemPointerINTEL_fn pfn_clSetKernelArgMemPointerINTEL = nullptr;
64+
clEnqueueMemsetINTEL_fn pfn_clEnqueueMemsetINTEL = nullptr;
65+
clEnqueueMemcpyINTEL_fn pfn_clEnqueueMemcpyINTEL = nullptr;
66+
clEnqueueMigrateMemINTEL_fn pfn_clEnqueueMigrateMemINTEL = nullptr;
67+
clEnqueueMemAdviseINTEL_fn pfn_clEnqueueMemAdviseINTEL = nullptr;
68+
};
69+
70+
} // namespace usm
71+
} // namespace detail
72+
} // namespace sycl
73+
} // namespace cl

sycl/source/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ add_library(sycl SHARED
4343
"detail/scheduler/graph_processor.cpp"
4444
"detail/scheduler/graph_builder.cpp"
4545
"detail/usm/clusm.cpp"
46-
"detail/usm/opencl_shim.cpp"
46+
"detail/usm/usm_dispatch.cpp"
4747
"detail/usm/usm_impl.cpp"
4848
"detail/util.cpp"
4949
"context.cpp"

sycl/source/detail/context_impl.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,7 @@ context_impl::context_impl(const vector_class<cl::sycl::device> Devices,
4040
RT::piContextCreate(0, DeviceIds.size(), DeviceIds.data(), 0, 0, &Err),
4141
Err));
4242

43-
if (usm::CLUSM* clusm = GetCLUSM()) {
44-
cl_platform_id id = m_Platform.get();
45-
clusm->initExtensions(id);
46-
}
43+
m_USMDispatch.reset(new usm::USMDispatcher(m_Platform.get()));
4744
}
4845

4946
context_impl::context_impl(cl_context ClContext, async_handler AsyncHandler)
@@ -126,6 +123,10 @@ context_impl::get_info<info::context::devices>() const {
126123
RT::PiContext &context_impl::getHandleRef() { return m_Context; }
127124
const RT::PiContext &context_impl::getHandleRef() const { return m_Context; }
128125

126+
std::shared_ptr<usm::USMDispatcher> context_impl::getUSMDispatch() const {
127+
return m_USMDispatch;
128+
}
129+
129130
} // namespace detail
130131
} // namespace sycl
131132
} // namespace cl

sycl/source/detail/pi_opencl.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
//
77
//===----------------------------------------------------------------------===//
88
#include "CL/opencl.h"
9+
#include "CL/cl_usm_ext.h"
910
#include <CL/sycl/detail/pi.hpp>
1011
#include <cassert>
1112
#include <cstring>

sycl/source/detail/queue_impl.cpp

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <CL/sycl/detail/clusm.hpp>
1111
#include <CL/sycl/detail/pi.hpp>
1212
#include <CL/sycl/detail/queue_impl.hpp>
13+
#include <CL/sycl/detail/usm_dispatch.hpp>
1314
#include <CL/sycl/device.hpp>
1415

1516
namespace cl {
@@ -32,32 +33,31 @@ template <> device queue_impl::get_info<info::queue::device>() const {
3233
return get_device();
3334
}
3435

35-
// TODO: Update with PI interfaces
36-
event queue_impl::memset(void* ptr, int value, size_t count) {
37-
cl_event e;
38-
cl_int error;
39-
cl_command_queue q = pi::cast<cl_command_queue>(getHandleRef());
36+
event queue_impl::memset(void *Ptr, int Value, size_t Count) {
37+
context Context = get_context();
38+
std::shared_ptr<usm::USMDispatcher> USMDispatch =
39+
getSyclObjImpl(Context)->getUSMDispatch();
40+
cl_event Event;
4041

41-
error = clEnqueueMemsetINTEL(q, ptr, value, count,
42-
/* sizeof waitlist */ 0, nullptr, &e);
42+
PI_CHECK(USMDispatch->enqueueMemset(getHandleRef(), Ptr, Value, Count,
43+
/* sizeof waitlist */ 0, nullptr,
44+
reinterpret_cast<pi_event *>(&Event)));
4345

44-
CHECK_OCL_CODE_THROW(error, runtime_error);
45-
46-
return event(e, get_context());
46+
return event(Event, Context);
4747
}
4848

49-
event queue_impl::memcpy(void* dest, const void* src, size_t count) {
50-
cl_event e;
51-
cl_int error;
52-
cl_command_queue q = pi::cast<cl_command_queue>(getHandleRef());
53-
54-
error = clEnqueueMemcpyINTEL(q,
55-
/* blocking */ false, dest, src, count,
56-
/* sizeof waitlist */ 0, nullptr, &e);
49+
event queue_impl::memcpy(void *Dest, const void *Src, size_t Count) {
50+
context Context = get_context();
51+
std::shared_ptr<usm::USMDispatcher> USMDispatch =
52+
getSyclObjImpl(Context)->getUSMDispatch();
53+
cl_event Event;
5754

58-
CHECK_OCL_CODE_THROW(error, runtime_error);
55+
PI_CHECK(USMDispatch->enqueueMemcpy(getHandleRef(),
56+
/* blocking */ false, Dest, Src, Count,
57+
/* sizeof waitlist */ 0, nullptr,
58+
reinterpret_cast<pi_event *>(&Event)));
5959

60-
return event(e, get_context());
60+
return event(Event, Context);
6161
}
6262
} // namespace detail
6363
} // namespace sycl

sycl/source/detail/scheduler/commands.cpp

Lines changed: 8 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -660,10 +660,12 @@ cl_int ExecCGCommand::enqueueImp() {
660660
case kernel_param_kind_t::kind_pointer: {
661661
// TODO: Change to PI
662662
usesUSM = true;
663+
std::shared_ptr<usm::USMDispatcher> USMDispatch =
664+
getSyclObjImpl(Context)->getUSMDispatch();
663665
auto PtrToPtr = reinterpret_cast<intptr_t*>(Arg.MPtr);
664666
auto DerefPtr = reinterpret_cast<void*>(*PtrToPtr);
665-
auto theKernel = pi::cast<cl_kernel>(Kernel);
666-
CHECK_OCL_CODE(clSetKernelArgMemPointerINTEL(theKernel, Arg.MIndex, DerefPtr));
667+
PI_CALL(
668+
USMDispatch->setKernelArgMemPointer(Kernel, Arg.MIndex, DerefPtr));
667669
break;
668670
}
669671
default:
@@ -675,38 +677,10 @@ cl_int ExecCGCommand::enqueueImp() {
675677
detail::getSyclObjImpl(
676678
MQueue->get_device())->getHandleRef());
677679

678-
// TODO: Replace CL with PI
679-
auto clusm = GetCLUSM();
680-
if (usesUSM && clusm) {
681-
cl_bool t = CL_TRUE;
682-
auto theKernel = pi::cast<cl_kernel>(Kernel);
683-
// Enable USM Indirect Access for Kernels
684-
if (clusm->useCLUSM()) {
685-
CHECK_OCL_CODE(clusm->setKernelExecInfo(
686-
theKernel, CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL,
687-
sizeof(cl_bool), &t));
688-
CHECK_OCL_CODE(clusm->setKernelExecInfo(
689-
theKernel, CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL,
690-
sizeof(cl_bool), &t));
691-
CHECK_OCL_CODE(clusm->setKernelExecInfo(
692-
theKernel, CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL,
693-
sizeof(cl_bool), &t));
694-
695-
// This passes all the allocations we've tracked as SVM Pointers
696-
CHECK_OCL_CODE(clusm->setKernelIndirectUSMExecInfo(
697-
pi::cast<cl_command_queue>(MQueue->getHandleRef()), theKernel));
698-
} else if (clusm->isInitialized()) {
699-
// Sanity check that nothing went wrong setting up clusm
700-
CHECK_OCL_CODE(clSetKernelExecInfo(
701-
theKernel, CL_KERNEL_EXEC_INFO_INDIRECT_HOST_ACCESS_INTEL,
702-
sizeof(cl_bool), &t));
703-
CHECK_OCL_CODE(clSetKernelExecInfo(
704-
theKernel, CL_KERNEL_EXEC_INFO_INDIRECT_DEVICE_ACCESS_INTEL,
705-
sizeof(cl_bool), &t));
706-
CHECK_OCL_CODE(clSetKernelExecInfo(
707-
theKernel, CL_KERNEL_EXEC_INFO_INDIRECT_SHARED_ACCESS_INTEL,
708-
sizeof(cl_bool), &t));
709-
}
680+
if (usesUSM) {
681+
std::shared_ptr<usm::USMDispatcher> USMDispatch =
682+
getSyclObjImpl(Context)->getUSMDispatch();
683+
USMDispatch->setKernelIndirectAccess(Kernel, MQueue->getHandleRef());
710684
}
711685

712686
PI_CALL(RT::piEnqueueKernelLaunch(

sycl/source/detail/usm/clusm.cpp

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -24,30 +24,6 @@ namespace sycl {
2424
namespace detail {
2525
namespace usm {
2626

27-
bool CLUSM::Create(CLUSM *&pCLUSM) {
28-
pCLUSM = new CLUSM();
29-
if (pCLUSM) {
30-
return true;
31-
}
32-
33-
return false;
34-
}
35-
36-
void CLUSM::Delete(CLUSM *&pCLUSM) {
37-
delete pCLUSM;
38-
pCLUSM = nullptr;
39-
}
40-
41-
void CLUSM::initExtensions(cl_platform_id platform) {
42-
// If OpenCL supports the USM Extension, don't enable CLUSM.
43-
std::lock_guard<std::mutex> guard(mLock);
44-
45-
if (!mInitialized) {
46-
mEnableCLUSM = !cliext::initializeExtensions(platform);
47-
mInitialized = true;
48-
}
49-
}
50-
5127
void *CLUSM::hostMemAlloc(cl_context context,
5228
cl_mem_properties_intel *properties, size_t size,
5329
cl_uint alignment, cl_int *errcode_ret) {

0 commit comments

Comments
 (0)