@@ -2079,6 +2079,10 @@ pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size,
2079
2079
void *Ptr;
2080
2080
ze_device_handle_t ZeDevice = Context->Devices [0 ]->ZeDevice ;
2081
2081
2082
+ ze_device_mem_alloc_desc_t ZeDeviceMemDesc = {};
2083
+ ZeDeviceMemDesc.flags = 0 ;
2084
+ ZeDeviceMemDesc.ordinal = 0 ;
2085
+
2082
2086
// We treat integrated devices (physical memory shared with the CPU)
2083
2087
// differently from discrete devices (those with distinct memories).
2084
2088
// For integrated devices, allocating the buffer in host shared memory
@@ -2088,36 +2092,36 @@ pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size,
2088
2092
Context->Devices [0 ]->ZeDeviceProperties .flags &
2089
2093
ZE_DEVICE_PROPERTY_FLAG_INTEGRATED;
2090
2094
2095
+ bool AllocHostPtr = Flags & PI_MEM_FLAGS_HOST_PTR_ALLOC;
2096
+
2091
2097
if (DeviceIsIntegrated) {
2092
2098
ze_host_mem_alloc_desc_t ZeDesc = {};
2093
2099
ZeDesc.flags = 0 ;
2094
2100
2095
2101
ZE_CALL (zeMemAllocHost (Context->ZeContext , &ZeDesc, Size, 1 , &Ptr));
2096
2102
2097
- } else {
2098
- ze_device_mem_alloc_desc_t ZeDesc = {};
2099
- ZeDesc.flags = 0 ;
2100
- ZeDesc.ordinal = 0 ;
2103
+ } else if (AllocHostPtr){
2104
+ // Currently L0 does not support allocation of pinned
2105
+ // host memory. So for PI_MEM_FLAGS_HOST_PTR_ALLOC flag, it allocates
2106
+ // from host accessible memory.
2107
+ ze_host_mem_alloc_desc_t ZeHostMemDesc = {};
2108
+ ZeHostMemDesc.flags = 0 ;
2109
+
2110
+ ZE_CALL (zeMemAllocShared (Context->ZeContext , &ZeDeviceMemDesc,
2111
+ &ZeHostMemDesc, Size,
2112
+ 1 , // TODO: alignment
2113
+ nullptr , // not bound to any device
2114
+ &Ptr));
2101
2115
2116
+ } else {
2102
2117
ZE_CALL (
2103
- zeMemAllocDevice (Context->ZeContext , &ZeDesc, Size, 1 , ZeDevice, &Ptr));
2118
+ zeMemAllocDevice (Context->ZeContext , &ZeDeviceMemDesc, Size, 1 ,
2119
+ ZeDevice, &Ptr));
2104
2120
}
2121
+
2105
2122
if (HostPtr) {
2106
- // Currently zeMemAllocHost() does not support allocation of pinned
2107
- // host memory. So for PI_MEM_FLAGS_HOST_PTR_ALLOC flag, it allocates
2108
- // pageable host memory.
2109
- if ((Flags & PI_MEM_FLAGS_HOST_PTR_ALLOC) != 0 ) {
2110
- ze_host_mem_alloc_desc_t ZeHostDesc = {};
2111
- ZeHostDesc.flags = 0 ;
2112
-
2113
- ZE_CALL (zeMemAllocHost (Context->ZeContext , &ZeHostDesc, Size,
2114
- 1 , // TODO: alignment
2115
- &HostPtr));
2116
- ZE_CALL (zeCommandListAppendMemoryCopy (Context->ZeCommandListInit , Ptr,
2117
- HostPtr, Size, nullptr , 0 ,
2118
- nullptr ));
2119
- } else if ((Flags & PI_MEM_FLAGS_HOST_PTR_USE) != 0 ||
2120
- (Flags & PI_MEM_FLAGS_HOST_PTR_COPY) != 0 ) {
2123
+ if ((Flags & PI_MEM_FLAGS_HOST_PTR_USE) != 0 ||
2124
+ (Flags & PI_MEM_FLAGS_HOST_PTR_COPY) != 0 ) {
2121
2125
// Initialize the buffer with user data
2122
2126
if (DeviceIsIntegrated) {
2123
2127
// Do a host to host copy
@@ -2129,14 +2133,16 @@ pi_result piMemBufferCreate(pi_context Context, pi_mem_flags Flags, size_t Size,
2129
2133
HostPtr, Size, nullptr , 0 ,
2130
2134
nullptr ));
2131
2135
}
2136
+ } else if ((Flags & PI_MEM_FLAGS_HOST_PTR_ALLOC) != 0 ) {
2137
+ // Nothing more to do.
2132
2138
} else if (Flags == 0 || (Flags == PI_MEM_FLAGS_ACCESS_RW)) {
2133
2139
// Nothing more to do.
2134
2140
} else {
2135
2141
die (" piMemBufferCreate: not implemented" );
2136
2142
}
2137
2143
}
2138
2144
2139
- auto HostPtrOrNull = (Flags & PI_MEM_FLAGS_HOST_PTR_ALLOC) ||
2145
+ auto HostPtrOrNull =
2140
2146
(Flags & PI_MEM_FLAGS_HOST_PTR_USE) ? pi_cast<char *>(HostPtr) : nullptr ;
2141
2147
try {
2142
2148
*RetMem = new _pi_buffer (
0 commit comments