@@ -3091,20 +3091,40 @@ pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size,
3091
3091
3092
3092
pi_result Result;
3093
3093
if (DeviceIsIntegrated) {
3094
- Result = piextUSMHostAlloc (&Ptr, Context, nullptr , Size, Alignment);
3094
+ if (enableBufferPooling ())
3095
+ Result = piextUSMHostAlloc (&Ptr, Context, nullptr , Size, Alignment);
3096
+ else {
3097
+ ze_host_mem_alloc_desc_t ZeDesc = {};
3098
+ ZeDesc.flags = 0 ;
3099
+ ZE_CALL (zeMemAllocHost, (Context->ZeContext , &ZeDesc, Size, 1 , &Ptr));
3100
+ }
3095
3101
} else if (Context->SingleRootDevice ) {
3096
3102
// If we have a single discrete device or all devices in the context are
3097
3103
// sub-devices of the same device then we can allocate on device
3098
- Result = piextUSMDeviceAlloc (&Ptr, Context, Context->SingleRootDevice ,
3099
- nullptr , Size, Alignment);
3104
+ if (enableBufferPooling ())
3105
+ Result = piextUSMDeviceAlloc (&Ptr, Context, Context->SingleRootDevice ,
3106
+ nullptr , Size, Alignment);
3107
+ else {
3108
+ ze_device_mem_alloc_desc_t ZeDesc = {};
3109
+ ZeDesc.flags = 0 ;
3110
+ ZeDesc.ordinal = 0 ;
3111
+ ZE_CALL (zeMemAllocDevice, (Context->ZeContext , &ZeDesc, Size, 1 ,
3112
+ Context->SingleRootDevice ->ZeDevice , &Ptr));
3113
+ }
3100
3114
} else {
3101
3115
// Context with several gpu cards. Temporarily use host allocation because
3102
3116
// it is accessible by all devices. But it is not good in terms of
3103
3117
// performance.
3104
3118
// TODO: We need to either allow remote access to device memory using IPC,
3105
3119
// or do explicit memory transfers from one device to another using host
3106
3120
// resources as backing buffers to allow those transfers.
3107
- Result = piextUSMHostAlloc (&Ptr, Context, nullptr , Size, Alignment);
3121
+ if (enableBufferPooling ())
3122
+ Result = piextUSMHostAlloc (&Ptr, Context, nullptr , Size, Alignment);
3123
+ else {
3124
+ ze_host_mem_alloc_desc_t ZeDesc = {};
3125
+ ZeDesc.flags = 0 ;
3126
+ ZE_CALL (zeMemAllocHost, (Context->ZeContext , &ZeDesc, Size, 1 , &Ptr));
3127
+ }
3108
3128
}
3109
3129
3110
3130
if (Result != PI_SUCCESS)
@@ -3179,7 +3199,11 @@ pi_result piMemRelease(pi_mem Mem) {
3179
3199
} else {
3180
3200
auto Buf = static_cast <_pi_buffer *>(Mem);
3181
3201
if (!Buf->isSubBuffer ()) {
3182
- PI_CALL (piextUSMFree (Mem->Context , Mem->getZeHandle ()));
3202
+ if (enableBufferPooling ()) {
3203
+ PI_CALL (piextUSMFree (Mem->Context , Mem->getZeHandle ()));
3204
+ } else {
3205
+ ZE_CALL (zeMemFree, (Mem->Context ->ZeContext , Mem->getZeHandle ()));
3206
+ }
3183
3207
}
3184
3208
}
3185
3209
delete Mem;
@@ -6489,6 +6513,18 @@ pi_result USMHostMemoryAlloc::allocateImpl(void **ResultPtr, size_t Size,
6489
6513
return USMHostAllocImpl (ResultPtr, Context, nullptr , Size, Alignment);
6490
6514
}
6491
6515
6516
+ SystemMemory::MemType USMSharedMemoryAlloc::getMemTypeImpl () {
6517
+ return SystemMemory::Shared;
6518
+ }
6519
+
6520
+ SystemMemory::MemType USMDeviceMemoryAlloc::getMemTypeImpl () {
6521
+ return SystemMemory::Device;
6522
+ }
6523
+
6524
+ SystemMemory::MemType USMHostMemoryAlloc::getMemTypeImpl () {
6525
+ return SystemMemory::Host;
6526
+ }
6527
+
6492
6528
void *USMMemoryAllocBase::allocate (size_t Size) {
6493
6529
void *Ptr = nullptr ;
6494
6530
@@ -6517,6 +6553,10 @@ void USMMemoryAllocBase::deallocate(void *Ptr) {
6517
6553
}
6518
6554
}
6519
6555
6556
+ SystemMemory::MemType USMMemoryAllocBase::getMemType () {
6557
+ return getMemTypeImpl ();
6558
+ }
6559
+
6520
6560
pi_result piextUSMDeviceAlloc (void **ResultPtr, pi_context Context,
6521
6561
pi_device Device,
6522
6562
pi_usm_mem_properties *Properties, size_t Size,
0 commit comments