Skip to content

[SYCL][L0] Skip uninitialized buffer migration copy #7168

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 38 additions & 10 deletions sycl/plugins/level_zero/pi_level_zero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3127,8 +3127,7 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,

// intel extensions for GPU information
case PI_DEVICE_INFO_DEVICE_ID:
return ReturnValue(
pi_uint32{Device->ZeDeviceProperties->deviceId});
return ReturnValue(pi_uint32{Device->ZeDeviceProperties->deviceId});
case PI_DEVICE_INFO_PCI_ADDRESS: {
if (getenv("ZES_ENABLE_SYSMAN") == nullptr) {
zePrint("Set SYCL_ENABLE_PCI=1 to obtain PCI data.\n");
Expand Down Expand Up @@ -3958,8 +3957,12 @@ pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size,
if ((Flags & PI_MEM_FLAGS_HOST_PTR_USE) != 0 ||
(Flags & PI_MEM_FLAGS_HOST_PTR_COPY) != 0) {

// We don't yet know which device needs this buffer, so make the first
// device in the context be the master, and hold the initial valid
// allocation.
char *ZeHandleDst;
PI_CALL(Buffer->getZeHandle(ZeHandleDst, _pi_mem::write_only));
PI_CALL(Buffer->getZeHandle(ZeHandleDst, _pi_mem::write_only,
Context->Devices[0]));
if (Buffer->OnHost) {
// Do a host to host copy.
// For an imported HostPtr the copy is unneeded.
Expand Down Expand Up @@ -4331,7 +4334,7 @@ pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle,
// represent the buffer in this context) copy the data to a newly
// created device allocation.
char *ZeHandleDst;
PI_CALL(Buffer->getZeHandle(ZeHandleDst, _pi_mem::write_only));
PI_CALL(Buffer->getZeHandle(ZeHandleDst, _pi_mem::write_only, Device));

// zeCommandListAppendMemoryCopy must not be called from simultaneous
// threads with the same command list handle, so we need exclusive lock.
Expand Down Expand Up @@ -8757,9 +8760,16 @@ size_t _pi_buffer::getAlignment() const {
pi_result _pi_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode,
pi_device Device) {

// NOTE: There might be no valid allocation at all yet and we get
// here from piEnqueueKernelLaunch that would be doing the buffer
// initialization. In this case the Device is not null as kernel
// launch is always on a specific device.
if (!Device)
Device = LastDeviceWithValidAllocation;
PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE);
// If the device is still not selected then use the first one in
// the context of the buffer.
if (!Device)
Device = Context->Devices[0];

auto &Allocation = Allocations[Device];

Expand Down Expand Up @@ -8814,6 +8824,9 @@ pi_result _pi_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode,
// devices in the context have the same root.
PI_CALL(getZeHandle(ZeHandle, AccessMode, Context->SingleRootDevice));
Allocation.ReleaseAction = allocation_t::keep;
Allocation.ZeHandle = ZeHandle;
Allocation.Valid = true;
return PI_SUCCESS;
} else { // Create device allocation
if (enableBufferPooling()) {
Allocation.ReleaseAction = allocation_t::free;
Expand All @@ -8836,13 +8849,27 @@ pi_result _pi_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode,
if (Device == LastDeviceWithValidAllocation)
die("getZeHandle: last used allocation is not valid");

Allocation.Valid = true;
// For write-only access the allocation contents is not going to be used.
// So don't do anything to make it "valid".
if (AccessMode != _pi_mem::write_only) {
char *ZeHandleSrc;
bool NeedCopy = AccessMode != _pi_mem::write_only;
// It's also possible that the buffer doesn't have a valid allocation
// yet presumably when it is passed to a kernel that will perform
// it's intialization.
if (NeedCopy && !LastDeviceWithValidAllocation) {
NeedCopy = false;
}
char *ZeHandleSrc = nullptr;
if (NeedCopy) {
PI_CALL(getZeHandle(ZeHandleSrc, _pi_mem::read_only,
LastDeviceWithValidAllocation));
// It's possible with the single root-device contexts that
// the buffer is represented by the single root-device
// allocation and then skip the copy to itself.
if (ZeHandleSrc == ZeHandle)
NeedCopy = false;
Comment on lines +8868 to +8869
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note, that in the case that I was fixing (single root-device context with multiple sub-devices in it) this alone is probably enough, but I thought that handling LastDeviceWithValidAllocation == nullptr is also a good idea and it avoids many copies by itself even without this check

}

if (NeedCopy) {
// Copy valid buffer data to this allocation.
// TODO: see if we should better use peer's device allocation used
// directly, if that capability is reported with zeDeviceCanAccessPeer,
Expand All @@ -8851,7 +8878,7 @@ pi_result _pi_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode,
//
// zeCommandListAppendMemoryCopy must not be called from simultaneous
// threads with the same command list handle, so we need exclusive lock.
ze_bool_t P2P;
ze_bool_t P2P = false;
ZE_CALL(
zeDeviceCanAccessPeer,
(Device->ZeDevice, LastDeviceWithValidAllocation->ZeDevice, &P2P));
Expand Down Expand Up @@ -8895,9 +8922,10 @@ pi_result _pi_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode,
Size, nullptr, 0, nullptr));
}
}
Allocation.Valid = true;
LastDeviceWithValidAllocation = Device;
}

LastDeviceWithValidAllocation = Device;
// Invalidate other allocations that would become not valid if
// this access is not read-only.
if (AccessMode != _pi_mem::read_only) {
Expand Down
8 changes: 2 additions & 6 deletions sycl/plugins/level_zero/pi_level_zero.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1131,12 +1131,8 @@ struct _pi_buffer final : _pi_mem {
}
}

// Make first device in the context be the master. Mark that
// allocation (yet to be made) having "valid" data. And real
// allocation and initialization should follow the buffer
// construction with a "write_only" access copy.
LastDeviceWithValidAllocation = Context->Devices[0];
Allocations[LastDeviceWithValidAllocation].Valid = true;
// This initialization does not end up with any valid allocation yet.
LastDeviceWithValidAllocation = nullptr;
}

// Sub-buffer constructor
Expand Down