@@ -3127,8 +3127,7 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
3127
3127
3128
3128
// intel extensions for GPU information
3129
3129
case PI_DEVICE_INFO_DEVICE_ID:
3130
- return ReturnValue (
3131
- pi_uint32{Device->ZeDeviceProperties ->deviceId });
3130
+ return ReturnValue (pi_uint32{Device->ZeDeviceProperties ->deviceId });
3132
3131
case PI_DEVICE_INFO_PCI_ADDRESS: {
3133
3132
if (getenv (" ZES_ENABLE_SYSMAN" ) == nullptr ) {
3134
3133
zePrint (" Set SYCL_ENABLE_PCI=1 to obtain PCI data.\n " );
@@ -3958,8 +3957,12 @@ pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size,
3958
3957
if ((Flags & PI_MEM_FLAGS_HOST_PTR_USE) != 0 ||
3959
3958
(Flags & PI_MEM_FLAGS_HOST_PTR_COPY) != 0 ) {
3960
3959
3960
+ // We don't yet know which device needs this buffer, so make the first
3961
+ // device in the context be the master, and hold the initial valid
3962
+ // allocation.
3961
3963
char *ZeHandleDst;
3962
- PI_CALL (Buffer->getZeHandle (ZeHandleDst, _pi_mem::write_only));
3964
+ PI_CALL (Buffer->getZeHandle (ZeHandleDst, _pi_mem::write_only,
3965
+ Context->Devices [0 ]));
3963
3966
if (Buffer->OnHost ) {
3964
3967
// Do a host to host copy.
3965
3968
// For an imported HostPtr the copy is unneeded.
@@ -4331,7 +4334,7 @@ pi_result piextMemCreateWithNativeHandle(pi_native_handle NativeHandle,
4331
4334
// represent the buffer in this context) copy the data to a newly
4332
4335
// created device allocation.
4333
4336
char *ZeHandleDst;
4334
- PI_CALL (Buffer->getZeHandle (ZeHandleDst, _pi_mem::write_only));
4337
+ PI_CALL (Buffer->getZeHandle (ZeHandleDst, _pi_mem::write_only, Device ));
4335
4338
4336
4339
// zeCommandListAppendMemoryCopy must not be called from simultaneous
4337
4340
// threads with the same command list handle, so we need exclusive lock.
@@ -8757,9 +8760,16 @@ size_t _pi_buffer::getAlignment() const {
8757
8760
pi_result _pi_buffer::getZeHandle (char *&ZeHandle, access_mode_t AccessMode,
8758
8761
pi_device Device) {
8759
8762
8763
+ // NOTE: There might be no valid allocation at all yet and we get
8764
+ // here from piEnqueueKernelLaunch that would be doing the buffer
8765
+ // initialization. In this case the Device is not null as kernel
8766
+ // launch is always on a specific device.
8760
8767
if (!Device)
8761
8768
Device = LastDeviceWithValidAllocation;
8762
- PI_ASSERT (Device, PI_ERROR_INVALID_DEVICE);
8769
+ // If the device is still not selected then use the first one in
8770
+ // the context of the buffer.
8771
+ if (!Device)
8772
+ Device = Context->Devices [0 ];
8763
8773
8764
8774
auto &Allocation = Allocations[Device];
8765
8775
@@ -8814,6 +8824,9 @@ pi_result _pi_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode,
8814
8824
// devices in the context have the same root.
8815
8825
PI_CALL (getZeHandle (ZeHandle, AccessMode, Context->SingleRootDevice ));
8816
8826
Allocation.ReleaseAction = allocation_t ::keep;
8827
+ Allocation.ZeHandle = ZeHandle;
8828
+ Allocation.Valid = true ;
8829
+ return PI_SUCCESS;
8817
8830
} else { // Create device allocation
8818
8831
if (enableBufferPooling ()) {
8819
8832
Allocation.ReleaseAction = allocation_t ::free;
@@ -8836,13 +8849,27 @@ pi_result _pi_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode,
8836
8849
if (Device == LastDeviceWithValidAllocation)
8837
8850
die (" getZeHandle: last used allocation is not valid" );
8838
8851
8839
- Allocation.Valid = true ;
8840
8852
// For write-only access the allocation contents is not going to be used.
8841
8853
// So don't do anything to make it "valid".
8842
- if (AccessMode != _pi_mem::write_only) {
8843
- char *ZeHandleSrc;
8854
+ bool NeedCopy = AccessMode != _pi_mem::write_only;
8855
+ // It's also possible that the buffer doesn't have a valid allocation
8856
+ // yet presumably when it is passed to a kernel that will perform
8857
+ // it's intialization.
8858
+ if (NeedCopy && !LastDeviceWithValidAllocation) {
8859
+ NeedCopy = false ;
8860
+ }
8861
+ char *ZeHandleSrc = nullptr ;
8862
+ if (NeedCopy) {
8844
8863
PI_CALL (getZeHandle (ZeHandleSrc, _pi_mem::read_only,
8845
8864
LastDeviceWithValidAllocation));
8865
+ // It's possible with the single root-device contexts that
8866
+ // the buffer is represented by the single root-device
8867
+ // allocation and then skip the copy to itself.
8868
+ if (ZeHandleSrc == ZeHandle)
8869
+ NeedCopy = false ;
8870
+ }
8871
+
8872
+ if (NeedCopy) {
8846
8873
// Copy valid buffer data to this allocation.
8847
8874
// TODO: see if we should better use peer's device allocation used
8848
8875
// directly, if that capability is reported with zeDeviceCanAccessPeer,
@@ -8851,7 +8878,7 @@ pi_result _pi_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode,
8851
8878
//
8852
8879
// zeCommandListAppendMemoryCopy must not be called from simultaneous
8853
8880
// threads with the same command list handle, so we need exclusive lock.
8854
- ze_bool_t P2P;
8881
+ ze_bool_t P2P = false ;
8855
8882
ZE_CALL (
8856
8883
zeDeviceCanAccessPeer,
8857
8884
(Device->ZeDevice , LastDeviceWithValidAllocation->ZeDevice , &P2P));
@@ -8895,9 +8922,10 @@ pi_result _pi_buffer::getZeHandle(char *&ZeHandle, access_mode_t AccessMode,
8895
8922
Size, nullptr , 0 , nullptr ));
8896
8923
}
8897
8924
}
8925
+ Allocation.Valid = true ;
8926
+ LastDeviceWithValidAllocation = Device;
8898
8927
}
8899
8928
8900
- LastDeviceWithValidAllocation = Device;
8901
8929
// Invalidate other allocations that would become not valid if
8902
8930
// this access is not read-only.
8903
8931
if (AccessMode != _pi_mem::read_only) {
0 commit comments