Skip to content

Commit d6f5b35

Browse files
steffenlarsenbader
andauthored
[SYCL] Implement device_global host-side memory operations (#8022)
This commit implements the copy and memcpy operations to and from device_global. If the device_global does not have device_image_scope the memory operation will be on the underlying USM memory, while if the operation is on a device_global with device_image_scope the runtime will try to find a suitable program in the program cache or build a new program using the image using it. --------- Signed-off-by: Steffen Larsen <[email protected]> Co-authored-by: Alexey Bader <[email protected]>
1 parent 928645a commit d6f5b35

21 files changed

+1618
-54
lines changed

sycl/include/sycl/detail/cg.hpp

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ class CG {
7272
Copy2DUSM = 16,
7373
Fill2DUSM = 17,
7474
Memset2DUSM = 18,
75+
CopyToDeviceGlobal = 19,
76+
CopyFromDeviceGlobal = 20,
7577
};
7678

7779
CG(CGTYPE Type, std::vector<std::vector<char>> ArgsStorage,
@@ -490,6 +492,74 @@ class CGMemset2DUSM : public CG {
490492
char getValue() const { return MValue; }
491493
};
492494

495+
/// "Copy to device_global" command group class.
496+
class CGCopyToDeviceGlobal : public CG {
497+
void *MSrc;
498+
void *MDeviceGlobalPtr;
499+
bool MIsDeviceImageScoped;
500+
size_t MNumBytes;
501+
size_t MOffset;
502+
detail::OSModuleHandle MOSModuleHandle;
503+
504+
public:
505+
CGCopyToDeviceGlobal(
506+
void *Src, void *DeviceGlobalPtr, bool IsDeviceImageScoped,
507+
size_t NumBytes, size_t Offset,
508+
std::vector<std::vector<char>> ArgsStorage,
509+
std::vector<detail::AccessorImplPtr> AccStorage,
510+
std::vector<std::shared_ptr<const void>> SharedPtrStorage,
511+
std::vector<AccessorImplHost *> Requirements,
512+
std::vector<detail::EventImplPtr> Events,
513+
detail::OSModuleHandle OSModuleHandle, detail::code_location loc = {})
514+
: CG(CopyToDeviceGlobal, std::move(ArgsStorage), std::move(AccStorage),
515+
std::move(SharedPtrStorage), std::move(Requirements),
516+
std::move(Events), std::move(loc)),
517+
MSrc(Src), MDeviceGlobalPtr(DeviceGlobalPtr),
518+
MIsDeviceImageScoped(IsDeviceImageScoped), MNumBytes(NumBytes),
519+
MOffset(Offset), MOSModuleHandle(OSModuleHandle) {}
520+
521+
void *getSrc() { return MSrc; }
522+
void *getDeviceGlobalPtr() { return MDeviceGlobalPtr; }
523+
bool isDeviceImageScoped() { return MIsDeviceImageScoped; }
524+
size_t getNumBytes() { return MNumBytes; }
525+
size_t getOffset() { return MOffset; }
526+
detail::OSModuleHandle getOSModuleHandle() { return MOSModuleHandle; }
527+
};
528+
529+
/// "Copy to device_global" command group class.
530+
class CGCopyFromDeviceGlobal : public CG {
531+
void *MDeviceGlobalPtr;
532+
void *MDest;
533+
bool MIsDeviceImageScoped;
534+
size_t MNumBytes;
535+
size_t MOffset;
536+
detail::OSModuleHandle MOSModuleHandle;
537+
538+
public:
539+
CGCopyFromDeviceGlobal(
540+
void *DeviceGlobalPtr, void *Dest, bool IsDeviceImageScoped,
541+
size_t NumBytes, size_t Offset,
542+
std::vector<std::vector<char>> ArgsStorage,
543+
std::vector<detail::AccessorImplPtr> AccStorage,
544+
std::vector<std::shared_ptr<const void>> SharedPtrStorage,
545+
std::vector<AccessorImplHost *> Requirements,
546+
std::vector<detail::EventImplPtr> Events,
547+
detail::OSModuleHandle OSModuleHandle, detail::code_location loc = {})
548+
: CG(CopyFromDeviceGlobal, std::move(ArgsStorage), std::move(AccStorage),
549+
std::move(SharedPtrStorage), std::move(Requirements),
550+
std::move(Events), std::move(loc)),
551+
MDeviceGlobalPtr(DeviceGlobalPtr), MDest(Dest),
552+
MIsDeviceImageScoped(IsDeviceImageScoped), MNumBytes(NumBytes),
553+
MOffset(Offset), MOSModuleHandle(OSModuleHandle) {}
554+
555+
void *getDeviceGlobalPtr() { return MDeviceGlobalPtr; }
556+
void *getDest() { return MDest; }
557+
bool isDeviceImageScoped() { return MIsDeviceImageScoped; }
558+
size_t getNumBytes() { return MNumBytes; }
559+
size_t getOffset() { return MOffset; }
560+
detail::OSModuleHandle getOSModuleHandle() { return MOSModuleHandle; }
561+
};
562+
493563
} // namespace detail
494564
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
495565
} // namespace sycl

sycl/include/sycl/handler.hpp

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include <sycl/detail/handler_proxy.hpp>
1919
#include <sycl/detail/os_util.hpp>
2020
#include <sycl/event.hpp>
21+
#include <sycl/ext/oneapi/device_global/device_global.hpp>
2122
#include <sycl/ext/oneapi/kernel_properties/properties.hpp>
2223
#include <sycl/ext/oneapi/properties/properties.hpp>
2324
#include <sycl/ext/oneapi/properties/property.hpp>
@@ -2576,6 +2577,91 @@ class __SYCL_EXPORT handler {
25762577
commonUSMFill2DFallbackKernel(Dest, DestPitch, Pattern, Width, Height);
25772578
}
25782579

2580+
/// Copies data from a USM memory region to a device_global.
2581+
/// Throws an exception if the copy operation intends to write outside the
2582+
/// memory range \param Dest, as specified through \param NumBytes and
2583+
/// \param DestOffset.
2584+
///
2585+
/// \param Dest is the destination device_glboal.
2586+
/// \param Src is a USM pointer to the source memory.
2587+
/// \param NumBytes is a number of bytes to copy.
2588+
/// \param DestOffset is the offset into \param Dest to copy to.
2589+
template <typename T, typename PropertyListT>
2590+
void memcpy(ext::oneapi::experimental::device_global<T, PropertyListT> &Dest,
2591+
const void *Src, size_t NumBytes = sizeof(T),
2592+
size_t DestOffset = 0) {
2593+
if (sizeof(T) < DestOffset + NumBytes)
2594+
throw sycl::exception(make_error_code(errc::invalid),
2595+
"Copy to device_global is out of bounds.");
2596+
2597+
constexpr bool IsDeviceImageScoped = PropertyListT::template has_property<
2598+
ext::oneapi::experimental::device_image_scope_key>();
2599+
memcpyToDeviceGlobal(&Dest, Src, IsDeviceImageScoped, NumBytes, DestOffset);
2600+
}
2601+
2602+
/// Copies data from a device_global to USM memory.
2603+
/// Throws an exception if the copy operation intends to read outside the
2604+
/// memory range \param Src, as specified through \param NumBytes and
2605+
/// \param SrcOffset.
2606+
///
2607+
/// \param Dest is a USM pointer to copy to.
2608+
/// \param Src is the source device_global.
2609+
/// \param NumBytes is a number of bytes to copy.
2610+
/// \param SrcOffset is the offset into \param Src to copy from.
2611+
template <typename T, typename PropertyListT>
2612+
void
2613+
memcpy(void *Dest,
2614+
const ext::oneapi::experimental::device_global<T, PropertyListT> &Src,
2615+
size_t NumBytes = sizeof(T), size_t SrcOffset = 0) {
2616+
if (sizeof(T) < SrcOffset + NumBytes)
2617+
throw sycl::exception(make_error_code(errc::invalid),
2618+
"Copy from device_global is out of bounds.");
2619+
2620+
constexpr bool IsDeviceImageScoped = PropertyListT::template has_property<
2621+
ext::oneapi::experimental::device_image_scope_key>();
2622+
memcpyFromDeviceGlobal(Dest, &Src, IsDeviceImageScoped, NumBytes,
2623+
SrcOffset);
2624+
}
2625+
2626+
/// Copies elements of type `std::remove_all_extents_t<T>` from a USM memory
2627+
/// region to a device_global.
2628+
/// Throws an exception if the copy operation intends to write outside the
2629+
/// memory range \param Dest, as specified through \param Count and
2630+
/// \param StartIndex.
2631+
///
2632+
/// \param Src is a USM pointer to the source memory.
2633+
/// \param Dest is the destination device_glboal.
2634+
/// \param Count is a number of elements to copy.
2635+
/// \param StartIndex is the index of the first element in Dest to copy to.
2636+
template <typename T, typename PropertyListT>
2637+
void copy(const std::remove_all_extents_t<T> *Src,
2638+
ext::oneapi::experimental::device_global<T, PropertyListT> &Dest,
2639+
size_t Count = sizeof(T) / sizeof(std::remove_all_extents_t<T>),
2640+
size_t StartIndex = 0) {
2641+
this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
2642+
StartIndex * sizeof(std::remove_all_extents_t<T>));
2643+
}
2644+
2645+
/// Copies elements of type `std::remove_all_extents_t<T>` from a
2646+
/// device_global to a USM memory region.
2647+
/// Throws an exception if the copy operation intends to write outside the
2648+
/// memory range \param Src, as specified through \param Count and
2649+
/// \param StartIndex.
2650+
///
2651+
/// \param Src is the source device_global.
2652+
/// \param Dest is a USM pointer to copy to.
2653+
/// \param Count is a number of elements to copy.
2654+
/// \param StartIndex is the index of the first element in Src to copy from.
2655+
template <typename T, typename PropertyListT>
2656+
void
2657+
copy(const ext::oneapi::experimental::device_global<T, PropertyListT> &Src,
2658+
std::remove_all_extents_t<T> *Dest,
2659+
size_t Count = sizeof(T) / sizeof(std::remove_all_extents_t<T>),
2660+
size_t StartIndex = 0) {
2661+
this->memcpy(Dest, Src, Count * sizeof(std::remove_all_extents_t<T>),
2662+
StartIndex * sizeof(std::remove_all_extents_t<T>));
2663+
}
2664+
25792665
private:
25802666
std::shared_ptr<detail::handler_impl> MImpl;
25812667
std::shared_ptr<detail::queue_impl> MQueue;
@@ -2774,6 +2860,16 @@ class __SYCL_EXPORT handler {
27742860
// Implementation of ext_oneapi_memset2d using command for native 2D memset.
27752861
void ext_oneapi_memset2d_impl(void *Dest, size_t DestPitch, int Value,
27762862
size_t Width, size_t Height);
2863+
2864+
// Implementation of memcpy to device_global.
2865+
void memcpyToDeviceGlobal(const void *DeviceGlobalPtr, const void *Src,
2866+
bool IsDeviceImageScoped, size_t NumBytes,
2867+
size_t Offset);
2868+
2869+
// Implementation of memcpy from device_global.
2870+
void memcpyFromDeviceGlobal(void *Dest, const void *DeviceGlobalPtr,
2871+
bool IsDeviceImageScoped, size_t NumBytes,
2872+
size_t Offset);
27772873
};
27782874
} // __SYCL_INLINE_VER_NAMESPACE(_V1)
27792875
} // namespace sycl

0 commit comments

Comments
 (0)