Skip to content

Commit b0c3404

Browse files
[SYCL][L0] Skip uninitialized buffer migration copy (#7168)
Signed-off-by: Sergey V Maslov <[email protected]>
1 parent 0417651 commit b0c3404

File tree

2 files changed

+40
-16
lines changed

2 files changed

+40
-16
lines changed

sycl/plugins/level_zero/pi_level_zero.cpp

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3127,8 +3127,7 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
31273127

31283128
// intel extensions for GPU information
31293129
case PI_DEVICE_INFO_DEVICE_ID:
3130-
return ReturnValue(
3131-
pi_uint32{Device->ZeDeviceProperties->deviceId});
3130+
return ReturnValue(pi_uint32{Device->ZeDeviceProperties->deviceId});
31323131
case PI_DEVICE_INFO_PCI_ADDRESS: {
31333132
if (getenv("ZES_ENABLE_SYSMAN") == nullptr) {
31343133
zePrint("Set SYCL_ENABLE_PCI=1 to obtain PCI data.\n");
@@ -3958,8 +3957,12 @@ pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size,
39583957
if ((Flags & PI_MEM_FLAGS_HOST_PTR_USE) != 0 ||
39593958
(Flags & PI_MEM_FLAGS_HOST_PTR_COPY) != 0) {
39603959

3960+
// We don't yet know which device needs this buffer, so make the first
3961+
// device in the context be the master, and hold the initial valid
3962+
// allocation.
39613963
char *ZeHandleDst;
3962-
PI_CALL(Buffer->getZeHandle(ZeHandleDst, _pi_mem::write_only));
3964+
PI_CALL(Buffer->getZeHandle(ZeHandleDst, _pi_mem::write_only,
3965+
Context->Devices[0]));
39633966
if (Buffer->OnHost) {
39643967
// Do a host to host copy.
39653968
// For an imported HostPtr the copy is unneeded.
@@ -4331,7 +4334,7 @@ pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle,
43314334
// represent the buffer in this context) copy the data to a newly
43324335
// created device allocation.
43334336
char *ZeHandleDst;
4334-
PI_CALL(Buffer->getZeHandle(ZeHandleDst, _pi_mem::write_only));
4337+
PI_CALL(Buffer->getZeHandle(ZeHandleDst, _pi_mem::write_only, Device));
43354338

43364339
// zeCommandListAppendMemoryCopy must not be called from simultaneous
43374340
// threads with the same command list handle, so we need exclusive lock.
@@ -8757,9 +8760,16 @@ size_t _pi_buffer::getAlignment() const {
87578760
pi_result _pi_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode,
87588761
pi_device Device) {
87598762

8763+
// NOTE: There might be no valid allocation at all yet and we get
8764+
// here from piEnqueueKernelLaunch that would be doing the buffer
8765+
// initialization. In this case the Device is not null as kernel
8766+
// launch is always on a specific device.
87608767
if (!Device)
87618768
Device = LastDeviceWithValidAllocation;
8762-
PI_ASSERT(Device, PI_ERROR_INVALID_DEVICE);
8769+
// If the device is still not selected then use the first one in
8770+
// the context of the buffer.
8771+
if (!Device)
8772+
Device = Context->Devices[0];
87638773

87648774
auto &Allocation = Allocations[Device];
87658775

@@ -8814,6 +8824,9 @@ pi_result _pi_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode,
88148824
// devices in the context have the same root.
88158825
PI_CALL(getZeHandle(ZeHandle, AccessMode, Context->SingleRootDevice));
88168826
Allocation.ReleaseAction = allocation_t::keep;
8827+
Allocation.ZeHandle = ZeHandle;
8828+
Allocation.Valid = true;
8829+
return PI_SUCCESS;
88178830
} else { // Create device allocation
88188831
if (enableBufferPooling()) {
88198832
Allocation.ReleaseAction = allocation_t::free;
@@ -8836,13 +8849,27 @@ pi_result _pi_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode,
88368849
if (Device == LastDeviceWithValidAllocation)
88378850
die("getZeHandle: last used allocation is not valid");
88388851

8839-
Allocation.Valid = true;
88408852
// For write-only access the allocation contents is not going to be used.
88418853
// So don't do anything to make it "valid".
8842-
if (AccessMode != _pi_mem::write_only) {
8843-
char *ZeHandleSrc;
8854+
bool NeedCopy = AccessMode != _pi_mem::write_only;
8855+
// It's also possible that the buffer doesn't have a valid allocation
8856+
// yet presumably when it is passed to a kernel that will perform
8857+
// it's intialization.
8858+
if (NeedCopy && !LastDeviceWithValidAllocation) {
8859+
NeedCopy = false;
8860+
}
8861+
char *ZeHandleSrc = nullptr;
8862+
if (NeedCopy) {
88448863
PI_CALL(getZeHandle(ZeHandleSrc, _pi_mem::read_only,
88458864
LastDeviceWithValidAllocation));
8865+
// It's possible with the single root-device contexts that
8866+
// the buffer is represented by the single root-device
8867+
// allocation and then skip the copy to itself.
8868+
if (ZeHandleSrc == ZeHandle)
8869+
NeedCopy = false;
8870+
}
8871+
8872+
if (NeedCopy) {
88468873
// Copy valid buffer data to this allocation.
88478874
// TODO: see if we should better use peer's device allocation used
88488875
// directly, if that capability is reported with zeDeviceCanAccessPeer,
@@ -8851,7 +8878,7 @@ pi_result _pi_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode,
88518878
//
88528879
// zeCommandListAppendMemoryCopy must not be called from simultaneous
88538880
// threads with the same command list handle, so we need exclusive lock.
8854-
ze_bool_t P2P;
8881+
ze_bool_t P2P = false;
88558882
ZE_CALL(
88568883
zeDeviceCanAccessPeer,
88578884
(Device->ZeDevice, LastDeviceWithValidAllocation->ZeDevice, &P2P));
@@ -8895,9 +8922,10 @@ pi_result _pi_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode,
88958922
Size, nullptr, 0, nullptr));
88968923
}
88978924
}
8925+
Allocation.Valid = true;
8926+
LastDeviceWithValidAllocation = Device;
88988927
}
88998928

8900-
LastDeviceWithValidAllocation = Device;
89018929
// Invalidate other allocations that would become not valid if
89028930
// this access is not read-only.
89038931
if (AccessMode != _pi_mem::read_only) {

sycl/plugins/level_zero/pi_level_zero.hpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1131,12 +1131,8 @@ struct _pi_buffer final : _pi_mem {
11311131
}
11321132
}
11331133

1134-
// Make first device in the context be the master. Mark that
1135-
// allocation (yet to be made) having "valid" data. And real
1136-
// allocation and initialization should follow the buffer
1137-
// construction with a "write_only" access copy.
1138-
LastDeviceWithValidAllocation = Context->Devices[0];
1139-
Allocations[LastDeviceWithValidAllocation].Valid = true;
1134+
// This initialization does not end up with any valid allocation yet.
1135+
LastDeviceWithValidAllocation = nullptr;
11401136
}
11411137

11421138
// Sub-buffer constructor

0 commit comments

Comments
 (0)