Skip to content

[SYCL][ESIMD] Support accessors for ESIMD kernels in SYCL RT. #1870

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion sycl/include/CL/__spirv/spirv_vars.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

#define __SPIRV_VAR_QUALIFIERS extern "C" const

#ifdef __SYCL_NVPTX__
#if defined(__SYCL_NVPTX__) || defined(__SYCL_EXPLICIT_SIMD__)

SYCL_EXTERNAL size_t __spirv_GlobalInvocationId_x();
SYCL_EXTERNAL size_t __spirv_GlobalInvocationId_y();
Expand Down
44 changes: 42 additions & 2 deletions sycl/include/CL/sycl/accessor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,17 @@
/// accessor_common contains several helpers common for both accessor(1) and
/// accessor(3)

__SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {
namespace intel {
namespace gpu {
// Forward declare a "back-door" access class to support ESIMD.
class AccessorPrivateProxy;
} // namespace gpu
} // namespace intel
} // namespace sycl
} // __SYCL_INLINE_NAMESPACE(cl)

__SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {

Expand Down Expand Up @@ -419,6 +430,13 @@ class image_accessor

#endif

private:
friend class sycl::intel::gpu::AccessorPrivateProxy;

#if defined(__SYCL_DEVICE_ONLY__) && defined(__SYCL_EXPLICIT_SIMD__)
const OCLImageTy getNativeImageObj() const { return MImageObj; }
#endif // __SYCL_DEVICE_ONLY__ && __SYCL_EXPLICIT_SIMD__

public:
using value_type = DataT;
using reference = DataT &;
Expand Down Expand Up @@ -805,8 +823,27 @@ class accessor :

detail::AccessorImplDevice<AdjustedDim> impl;

ConcreteASPtrType MData;
#ifdef __SYCL_EXPLICIT_SIMD__
using OCLImage1dBufferTy =
typename detail::opencl_image1d_buffer_type<AccessMode>::type;
#endif // __SYCL_EXPLICIT_SIMD__

union {
ConcreteASPtrType MData;
#ifdef __SYCL_EXPLICIT_SIMD__
OCLImage1dBufferTy ImageBuffer;
#endif // __SYCL_EXPLICIT_SIMD__
};

#ifdef __SYCL_EXPLICIT_SIMD__
// TODO In ESIMD accessors usage is limited for now - access range, mem
// range and offset are not supported. The cl_mem object allocated for
// a global accessor is always wrapped into a 1d image buffer to enable
// surface index-based addressing.
void __init(OCLImage1dBufferTy ImgBuf) { ImageBuffer = ImgBuf; }

const OCLImage1dBufferTy getNativeImageObj() const { return ImageBuffer; }
#else
void __init(ConcreteASPtrType Ptr, range<AdjustedDim> AccessRange,
range<AdjustedDim> MemRange, id<AdjustedDim> Offset) {
MData = Ptr;
Expand All @@ -820,7 +857,7 @@ class accessor :
if (1 == AdjustedDim)
MData += Offset[0];
}

#endif // __SYCL_EXPLICIT_SIMD__
ConcreteASPtrType getQualifiedPtr() const { return MData; }

public:
Expand All @@ -843,6 +880,9 @@ class accessor :

#endif // __SYCL_DEVICE_ONLY__

private:
friend class sycl::intel::gpu::AccessorPrivateProxy;

public:
using value_type = DataT;
using reference = DataT &;
Expand Down
39 changes: 35 additions & 4 deletions sycl/include/CL/sycl/detail/accessor_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,17 @@
#include <CL/sycl/range.hpp>
#include <CL/sycl/stl.hpp>

__SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {
namespace intel {
namespace gpu {
// Forward declare a "back-door" access class to support ESIMD.
class AccessorPrivateProxy;
} // namespace gpu
} // namespace intel
} // namespace sycl
} // __SYCL_INLINE_NAMESPACE(cl)

__SYCL_INLINE_NAMESPACE(cl) {
namespace sycl {
namespace detail {
Expand Down Expand Up @@ -59,16 +70,29 @@ template <int Dims> class LocalAccessorBaseDevice {
}
};

// TODO ESIMD Currently all accessors are treated as ESIMD under corresponding
// compiler option enabling the macro below. Eventually ESIMD kernels and usual
// kernels must co-exist and there must be a mechanism for distinguishing usual
// and ESIMD accessors.
#ifndef __SYCL_EXPLICIT_SIMD__
constexpr bool IsESIMDAccInit = false;
#else
constexpr bool IsESIMDAccInit = true;
#endif // __SYCL_EXPLICIT_SIMD__

class __SYCL_EXPORT AccessorImplHost {
public:
AccessorImplHost(id<3> Offset, range<3> AccessRange, range<3> MemoryRange,
access::mode AccessMode, detail::SYCLMemObjI *SYCLMemObject,
int Dims, int ElemSize, int OffsetInBytes = 0,
bool IsSubBuffer = false)
bool IsSubBuffer = false, bool IsESIMDAcc = IsESIMDAccInit)
: MOffset(Offset), MAccessRange(AccessRange), MMemoryRange(MemoryRange),
MAccessMode(AccessMode), MSYCLMemObj(SYCLMemObject), MDims(Dims),
MElemSize(ElemSize), MOffsetInBytes(OffsetInBytes),
MIsSubBuffer(IsSubBuffer) {}
MIsSubBuffer(IsSubBuffer) {
MIsESIMDAcc =
IsESIMDAcc && (SYCLMemObject->getType() == SYCLMemObjI::BUFFER);
}

~AccessorImplHost();

Expand All @@ -77,7 +101,7 @@ class __SYCL_EXPORT AccessorImplHost {
MMemoryRange(Other.MMemoryRange), MAccessMode(Other.MAccessMode),
MSYCLMemObj(Other.MSYCLMemObj), MDims(Other.MDims),
MElemSize(Other.MElemSize), MOffsetInBytes(Other.MOffsetInBytes),
MIsSubBuffer(Other.MIsSubBuffer) {}
MIsSubBuffer(Other.MIsSubBuffer), MIsESIMDAcc(Other.MIsESIMDAcc) {}

// The resize method provides a way to change the size of the
// allocated memory and corresponding properties for the accessor.
Expand Down Expand Up @@ -109,6 +133,9 @@ class __SYCL_EXPORT AccessorImplHost {
Command *MBlockedCmd = nullptr;

bool PerWI = false;

// Whether this accessor is ESIMD accessor with special memory allocation.
bool MIsESIMDAcc;
};

using AccessorImplPtr = shared_ptr_class<AccessorImplHost>;
Expand All @@ -121,7 +148,8 @@ class AccessorBaseHost {
bool IsSubBuffer = false) {
impl = shared_ptr_class<AccessorImplHost>(new AccessorImplHost(
Offset, AccessRange, MemoryRange, AccessMode, SYCLMemObject, Dims,
ElemSize, OffsetInBytes, IsSubBuffer));
ElemSize, OffsetInBytes, IsSubBuffer,
IsESIMDAccInit && (SYCLMemObject->getType() == SYCLMemObjI::BUFFER)));
}

protected:
Expand All @@ -140,6 +168,9 @@ class AccessorBaseHost {
friend decltype(Obj::impl) getSyclObjImpl(const Obj &SyclObject);

AccessorImplPtr impl;

private:
friend class sycl::intel::gpu::AccessorPrivateProxy;
};

class __SYCL_EXPORT LocalAccessorImplHost {
Expand Down
24 changes: 24 additions & 0 deletions sycl/include/CL/sycl/detail/image_ocl_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,30 @@ inline int getSPIRVElementSize(int ImageChannelType, int ImageChannelOrder) {
}
}

#ifdef __SYCL_EXPLICIT_SIMD__
template <access::mode AccessMode> struct opencl_image1d_buffer_type;

// OpenCL types used only when compiling DPCPP ESIMD kernels
#define IMAGE_BUFFER_TY_DEFINE(AccessMode, AMSuffix) \
template <> struct opencl_image1d_buffer_type<access::mode::AccessMode> { \
using type = __ocl_image1d_buffer_##AMSuffix##_t; \
}

IMAGE_BUFFER_TY_DEFINE(read, ro);
IMAGE_BUFFER_TY_DEFINE(write, wo);
IMAGE_BUFFER_TY_DEFINE(discard_write, wo);
IMAGE_BUFFER_TY_DEFINE(read_write, rw);

template <> struct opencl_image1d_buffer_type<access::mode::atomic> {
// static_assert(false && "atomic access not supported for image1d
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably you can have this static assert in the "default" definition of opencl_image1d_buffer_type on line 184 which should be instantiated if there is no matching specialization.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does not work unfortunately, as there is instantiation in stream_impl.hpp - GlobalOffsetAccessorT.

// buffers");
// TODO this should be disabled; currently there is instantiation of this
// class happenning even if atomic access not used - using dummy type
// definition for now.
using type = unsigned int;
};
#endif // __SYCL_EXPLICIT_SIMD__

template <int Dimensions, access::mode AccessMode, access::target AccessTarget>
struct opencl_image_type;

Expand Down
8 changes: 8 additions & 0 deletions sycl/include/CL/sycl/detail/memory_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,14 @@ class __SYCL_EXPORT MemoryManager {
std::vector<EventImplPtr> DepEvents,
RT::PiEvent &OutEvent);

// Allocates memory buffer wrapped into an image. MemObj must be a buffer,
// not an image. Used in ESIMD extension to enable surface index-based access.
static void *wrapIntoImageBuffer(ContextImplPtr TargetContext, void *MemBuf,
SYCLMemObjI *MemObj);

// Releases the image buffer created by wrapIntoImageBuffer.
static void releaseImageBuffer(ContextImplPtr TargetContext, void *ImageBuf);

// The following method creates OpenCL sub buffer for specified
// offset, range, and memory object.
static void *allocateMemSubBuffer(ContextImplPtr TargetContext,
Expand Down
6 changes: 6 additions & 0 deletions sycl/include/CL/sycl/detail/stl_type_traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#pragma once

#include <CL/sycl/detail/defines.hpp>
#include <iterator>
#include <memory>
#include <type_traits>
Expand Down Expand Up @@ -35,6 +36,11 @@ using remove_reference_t = typename std::remove_reference<T>::type;

template <typename T> using add_pointer_t = typename std::add_pointer<T>::type;

template <typename T> using remove_cv_t = typename std::remove_cv<T>::type;

template <typename T>
using remove_reference_t = typename std::remove_reference<T>::type;

// C++17
template <bool V> using bool_constant = std::integral_constant<bool, V>;

Expand Down
1 change: 0 additions & 1 deletion sycl/source/detail/accessor_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,3 @@ void addHostAccessorAndWait(Requirement *Req) {
}
}
}

30 changes: 30 additions & 0 deletions sycl/source/detail/memory_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ void MemoryManager::release(ContextImplPtr TargetContext, SYCLMemObjI *MemObj,
MemObj->releaseMem(TargetContext, MemAllocation);
}

void MemoryManager::releaseImageBuffer(ContextImplPtr TargetContext,
void *ImageBuf) {
auto PIObj = reinterpret_cast<pi_mem>(ImageBuf);
TargetContext->getPlugin().call<PiApiKind::piMemRelease>(PIObj);
}

void MemoryManager::releaseMemObj(ContextImplPtr TargetContext,
SYCLMemObjI *MemObj, void *MemAllocation,
void *UserPtr) {
Expand Down Expand Up @@ -75,6 +81,30 @@ void *MemoryManager::allocate(ContextImplPtr TargetContext, SYCLMemObjI *MemObj,
OutEvent);
}

// Creates an image1d buffer wrapper object around given memory object.
void *MemoryManager::wrapIntoImageBuffer(ContextImplPtr TargetContext,
void *MemBuf, SYCLMemObjI *MemObj) {
// Image format: 1 channel per pixel, each pixel 8 bit, Size pixels occupies
// Size bytes.
pi_image_format Format = {PI_IMAGE_CHANNEL_ORDER_R,
PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8};

// Image descriptor - request wrapper image1d creation.
pi_image_desc Desc = {};
Desc.image_type = PI_MEM_TYPE_IMAGE1D_BUFFER;
Desc.image_width = MemObj->getSize();
Desc.buffer = reinterpret_cast<pi_mem>(MemBuf);

// Create the image object.
const detail::plugin &Plugin = TargetContext->getPlugin();
pi_mem Res = nullptr;
pi_mem_flags Flags = 0;
// Do not ref count the context handle, as it is not captured by the call.
Plugin.call<PiApiKind::piMemImageCreate>(TargetContext->getHandleRef(), Flags,
&Format, &Desc, nullptr, &Res);
return Res;
}

void *MemoryManager::allocateHostMemory(SYCLMemObjI *MemObj, void *UserPtr,
bool HostPtrReadOnly, size_t Size) {
// Can return user pointer directly if it points to writable memory.
Expand Down
8 changes: 8 additions & 0 deletions sycl/source/detail/program_manager/program_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -774,6 +774,14 @@ ProgramManager::build(ProgramPtr Program, const ContextImplPtr Context,
LinkDeviceLibs = false;
}

// TODO: this is a temporary workaround for GPU tests for ESIMD compiler.
// We do not link with other device libraries, because it may fail
// due to unrecognized SPIRV format of those libraries.
if (std::string(LinkOpts).find(std::string("-cmc")) != std::string::npos ||
std::string(LinkOpts).find(std::string("-vc-codegen")) !=
std::string::npos)
LinkDeviceLibs = false;

std::vector<RT::PiProgram> LinkPrograms;
if (LinkDeviceLibs) {
LinkPrograms = getDeviceLibPrograms(Context, Devices, CachedLibPrograms);
Expand Down
20 changes: 17 additions & 3 deletions sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -751,6 +751,14 @@ cl_int AllocaCommand::enqueueImp() {
detail::getSyclObjImpl(MQueue->get_context()), getSYCLMemObj(),
MInitFromUserData, HostPtr, std::move(EventImpls), Event);

// if this is ESIMD accessor, wrap the allocated device memory buffer into
// an image buffer object.
// TODO Address copying SYCL/ESIMD memory between contexts.
if (getRequirement()->MIsESIMDAcc)
ESIMDExt.MWrapperImage = MemoryManager::wrapIntoImageBuffer(
detail::getSyclObjImpl(MQueue->get_context()), MMemAllocation,
getSYCLMemObj());

return CL_SUCCESS;
}

Expand Down Expand Up @@ -937,12 +945,16 @@ cl_int ReleaseCommand::enqueueImp() {
RT::PiEvent &Event = MEvent->getHandleRef();
if (SkipRelease)
Command::waitForEvents(MQueue, EventImpls, Event);
else
else {
MemoryManager::release(detail::getSyclObjImpl(MQueue->get_context()),
MAllocaCmd->getSYCLMemObj(),
MAllocaCmd->getMemAllocation(),
std::move(EventImpls), Event);

// Release the wrapper object if present.
if (void *WrapperImage = MAllocaCmd->ESIMDExt.MWrapperImage)
MemoryManager::releaseImageBuffer(
detail::getSyclObjImpl(MQueue->get_context()), WrapperImage);
}
return CL_SUCCESS;
}

Expand Down Expand Up @@ -1638,7 +1650,9 @@ pi_result ExecCGCommand::SetKernelParamsAndLaunch(
case kernel_param_kind_t::kind_accessor: {
Requirement *Req = (Requirement *)(Arg.MPtr);
AllocaCommandBase *AllocaCmd = getAllocaForReq(Req);
RT::PiMem MemArg = (RT::PiMem)AllocaCmd->getMemAllocation();
RT::PiMem MemArg = Req->MIsESIMDAcc
? (RT::PiMem)AllocaCmd->ESIMDExt.MWrapperImage
: (RT::PiMem)AllocaCmd->getMemAllocation();
if (Plugin.getBackend() == backend::opencl) {
Plugin.call<PiApiKind::piKernelSetArg>(Kernel, Arg.MIndex,
sizeof(RT::PiMem), &MemArg);
Expand Down
7 changes: 7 additions & 0 deletions sycl/source/detail/scheduler/commands.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,13 @@ class AllocaCommandBase : public Command {

void *MMemAllocation = nullptr;

// ESIMD-extension-specific fields.
struct {
// If this alloca corresponds to an ESIMD accessor, then this field holds
// an image buffer wrapping the memory allocation above.
void *MWrapperImage = nullptr;
} ESIMDExt;

/// Alloca command linked with current command.
/// Device and host alloca commands can be linked, so they may share the same
/// memory. Only one allocation from a pair can be accessed at a time. Alloca
Expand Down
4 changes: 3 additions & 1 deletion sycl/source/detail/scheduler/graph_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -559,7 +559,9 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq(

const Requirement FullReq(/*Offset*/ {0, 0, 0}, Req->MMemoryRange,
Req->MMemoryRange, access::mode::read_write,
Req->MSYCLMemObj, Req->MDims, Req->MElemSize);
Req->MSYCLMemObj, Req->MDims, Req->MElemSize,
0 /*ReMOffsetInBytes*/, false /*MIsSubBuffer*/,
Req->MIsESIMDAcc);
// Can reuse user data for the first allocation
const bool InitFromUserData = Record->MAllocaCommands.empty();

Expand Down
6 changes: 5 additions & 1 deletion sycl/source/handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,11 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind,
AccImpl->resize(MNDRDesc.GlobalSize.size());
}
MArgs.emplace_back(Kind, AccImpl, Size, Index + IndexShift);
if (!IsKernelCreatedFromSource) {

// TODO ESIMD currently does not suport offset, memory and access ranges -
// accessor::init for ESIMD-mode accessor has a single field, translated
// to a single kernel argument set above.
if (!AccImpl->MIsESIMDAcc && !IsKernelCreatedFromSource) {
// Dimensionality of the buffer is 1 when dimensionality of the
// accessor is 0.
const size_t SizeAccField =
Expand Down
2 changes: 2 additions & 0 deletions sycl/test/abi/sycl_symbols_linux.dump
Original file line number Diff line number Diff line change
Expand Up @@ -3736,6 +3736,8 @@ _ZN2cl4sycl6detail13MemoryManager7releaseESt10shared_ptrINS1_12context_implEEPNS
_ZN2cl4sycl6detail13MemoryManager8allocateESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEbPvSt6vectorIS3_INS1_10event_implEESaISB_EERP9_pi_event
_ZN2cl4sycl6detail13MemoryManager8copy_usmEPKvSt10shared_ptrINS1_10queue_implEEmPvSt6vectorIP9_pi_eventSaISB_EERSB_
_ZN2cl4sycl6detail13MemoryManager8fill_usmEPvSt10shared_ptrINS1_10queue_implEEmiSt6vectorIP9_pi_eventSaIS9_EERS9_
_ZN2cl4sycl6detail13MemoryManager18releaseImageBufferESt10shared_ptrINS1_12context_implEEPv
_ZN2cl4sycl6detail13MemoryManager19wrapIntoImageBufferESt10shared_ptrINS1_12context_implEEPvPNS1_11SYCLMemObjIE
_ZN2cl4sycl6detail14getBorderColorENS0_19image_channel_orderE
_ZN2cl4sycl6detail14host_half_impl4halfC1ERKf
_ZN2cl4sycl6detail14host_half_impl4halfC2ERKf
Expand Down
Loading