Skip to content

Commit de12ff1

Browse files
committed
Merge branch 'main' into buffer-copy-host-ptr
2 parents 5df33b3 + cabf128 commit de12ff1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+377
-139
lines changed

.github/workflows/benchmarks_compute.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ on:
1212
default: 'level_zero'
1313
options:
1414
- level_zero
15+
- level_zero_v2
1516
unit:
1617
description: Test unit (cpu/gpu)
1718
type: choice

include/ur_api.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2062,15 +2062,15 @@ typedef struct ur_device_native_properties_t {
20622062
/// - ::UR_RESULT_ERROR_DEVICE_LOST
20632063
/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
20642064
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
2065-
/// + `NULL == hPlatform`
2065+
/// + `NULL == hAdapter`
20662066
/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
20672067
/// + `NULL == phDevice`
20682068
/// - ::UR_RESULT_ERROR_UNSUPPORTED_FEATURE
20692069
/// + If the adapter has no underlying equivalent handle.
20702070
UR_APIEXPORT ur_result_t UR_APICALL
20712071
urDeviceCreateWithNativeHandle(
20722072
ur_native_handle_t hNativeDevice, ///< [in][nocheck] the native handle of the device.
2073-
ur_platform_handle_t hPlatform, ///< [in] handle of the platform instance
2073+
ur_adapter_handle_t hAdapter, ///< [in] handle of the adapter to which `hNativeDevice` belongs
20742074
const ur_device_native_properties_t *pProperties, ///< [in][optional] pointer to native device properties struct.
20752075
ur_device_handle_t *phDevice ///< [out] pointer to the handle of the device object created.
20762076
);
@@ -4785,6 +4785,7 @@ urKernelSetArgValue(
47854785
size_t argSize, ///< [in] size of argument type
47864786
const ur_kernel_arg_value_properties_t *pProperties, ///< [in][optional] pointer to value properties.
47874787
const void *pArgValue ///< [in] argument value represented as matching arg type.
4788+
///< The data pointed to will be copied and therefore can be reused on return.
47884789
);
47894790

47904791
///////////////////////////////////////////////////////////////////////////////
@@ -11972,7 +11973,7 @@ typedef struct ur_device_get_native_handle_params_t {
1197211973
/// allowing the callback the ability to modify the parameter's value
1197311974
typedef struct ur_device_create_with_native_handle_params_t {
1197411975
ur_native_handle_t *phNativeDevice;
11975-
ur_platform_handle_t *phPlatform;
11976+
ur_adapter_handle_t *phAdapter;
1197611977
const ur_device_native_properties_t **ppProperties;
1197711978
ur_device_handle_t **pphDevice;
1197811979
} ur_device_create_with_native_handle_params_t;

include/ur_ddi.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2373,7 +2373,7 @@ typedef ur_result_t(UR_APICALL *ur_pfnDeviceGetNativeHandle_t)(
23732373
/// @brief Function-pointer for urDeviceCreateWithNativeHandle
23742374
typedef ur_result_t(UR_APICALL *ur_pfnDeviceCreateWithNativeHandle_t)(
23752375
ur_native_handle_t,
2376-
ur_platform_handle_t,
2376+
ur_adapter_handle_t,
23772377
const ur_device_native_properties_t *,
23782378
ur_device_handle_t *);
23792379

include/ur_print.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17357,10 +17357,10 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct
1735717357
*(params->phNativeDevice)));
1735817358

1735917359
os << ", ";
17360-
os << ".hPlatform = ";
17360+
os << ".hAdapter = ";
1736117361

1736217362
ur::details::printPtr(os,
17363-
*(params->phPlatform));
17363+
*(params->phAdapter));
1736417364

1736517365
os << ", ";
1736617366
os << ".pProperties = ";

scripts/benchmarks/benches/compute.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ class ComputeBench:
1515
def __init__(self, directory):
1616
self.directory = directory
1717
self.built = False
18-
self.adapter_short_name = {'level_zero' : 'L0'}
18+
self.adapter_short_name = {'level_zero' : 'L0', "level_zero_v2" : 'L0_V2'}
1919
return
2020

2121
def setup(self):

scripts/core/device.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -820,9 +820,9 @@ params:
820820
- type: $x_native_handle_t
821821
name: hNativeDevice
822822
desc: "[in][nocheck] the native handle of the device."
823-
- type: $x_platform_handle_t
824-
name: hPlatform
825-
desc: "[in] handle of the platform instance"
823+
- type: $x_adapter_handle_t
824+
name: hAdapter
825+
desc: "[in] handle of the adapter to which `hNativeDevice` belongs"
826826
- type: const $x_device_native_properties_t*
827827
name: pProperties
828828
desc: "[in][optional] pointer to native device properties struct."

scripts/core/kernel.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,9 @@ params:
6464
desc: "[in][optional] pointer to value properties."
6565
- type: "const void*"
6666
name: pArgValue
67-
desc: "[in] argument value represented as matching arg type."
67+
desc: |
68+
[in] argument value represented as matching arg type.
69+
The data pointed to will be copied and therefore can be reused on return.
6870
returns:
6971
- $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX
7072
- $X_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_SIZE

source/adapters/cuda/device.cpp

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1185,27 +1185,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetNativeHandle(
11851185
/// \return TBD
11861186

11871187
UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle(
1188-
ur_native_handle_t hNativeDevice, ur_platform_handle_t hPlatform,
1189-
const ur_device_native_properties_t *pProperties,
1188+
ur_native_handle_t hNativeDevice,
1189+
[[maybe_unused]] ur_adapter_handle_t hAdapter,
1190+
[[maybe_unused]] const ur_device_native_properties_t *pProperties,
11901191
ur_device_handle_t *phDevice) {
1191-
std::ignore = pProperties;
1192-
11931192
CUdevice CuDevice = static_cast<CUdevice>(hNativeDevice);
11941193

11951194
auto IsDevice = [=](std::unique_ptr<ur_device_handle_t_> &Dev) {
11961195
return Dev->get() == CuDevice;
11971196
};
11981197

1199-
// If a platform is provided just check if the device is in it
1200-
if (hPlatform) {
1201-
auto SearchRes = std::find_if(begin(hPlatform->Devices),
1202-
end(hPlatform->Devices), IsDevice);
1203-
if (SearchRes != end(hPlatform->Devices)) {
1204-
*phDevice = SearchRes->get();
1205-
return UR_RESULT_SUCCESS;
1206-
}
1207-
}
1208-
12091198
// Get list of platforms
12101199
uint32_t NumPlatforms = 0;
12111200
ur_adapter_handle_t AdapterHandle = &adapter;

source/adapters/cuda/image.cpp

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -455,32 +455,40 @@ UR_APIEXPORT ur_result_t UR_APICALL urBindlessImagesImageAllocateExp(
455455

456456
// Allocate a cuArray
457457
if (pImageDesc->numMipLevel == 1) {
458-
CUarray ImageArray;
458+
CUarray ImageArray{};
459459

460460
try {
461461
UR_CHECK_ERROR(cuArray3DCreate(&ImageArray, &array_desc));
462462
*phImageMem = (ur_exp_image_mem_native_handle_t)ImageArray;
463463
} catch (ur_result_t Err) {
464-
cuArrayDestroy(ImageArray);
464+
if (ImageArray != CUarray{}) {
465+
UR_CHECK_ERROR(cuArrayDestroy(ImageArray));
466+
}
465467
return Err;
466468
} catch (...) {
467-
cuArrayDestroy(ImageArray);
469+
if (ImageArray != CUarray{}) {
470+
UR_CHECK_ERROR(cuArrayDestroy(ImageArray));
471+
}
468472
return UR_RESULT_ERROR_UNKNOWN;
469473
}
470474
} else // Allocate a cuMipmappedArray
471475
{
472-
CUmipmappedArray mip_array;
476+
CUmipmappedArray mip_array{};
473477
array_desc.Flags = CUDA_ARRAY3D_SURFACE_LDST;
474478

475479
try {
476480
UR_CHECK_ERROR(cuMipmappedArrayCreate(&mip_array, &array_desc,
477481
pImageDesc->numMipLevel));
478482
*phImageMem = (ur_exp_image_mem_native_handle_t)mip_array;
479483
} catch (ur_result_t Err) {
480-
cuMipmappedArrayDestroy(mip_array);
484+
if (mip_array) {
485+
UR_CHECK_ERROR(cuMipmappedArrayDestroy(mip_array));
486+
}
481487
return Err;
482488
} catch (...) {
483-
cuMipmappedArrayDestroy(mip_array);
489+
if (mip_array) {
490+
UR_CHECK_ERROR(cuMipmappedArrayDestroy(mip_array));
491+
}
484492
return UR_RESULT_ERROR_UNKNOWN;
485493
}
486494
}

source/adapters/cuda/memory.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@ ur_result_t allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t Mem,
439439
UR_CHECK_ERROR(cuMemAlloc(&DevPtr, Buffer.Size));
440440
}
441441
} else {
442-
CUarray ImageArray;
442+
CUarray ImageArray{};
443443
CUsurfObject Surface;
444444
try {
445445
auto &Image = std::get<SurfaceMem>(Mem->Mem);
@@ -465,12 +465,12 @@ ur_result_t allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t Mem,
465465
UR_CHECK_ERROR(cuSurfObjectCreate(&Surface, &ImageResDesc));
466466
Image.SurfObjs[DeviceIdx] = Surface;
467467
} catch (ur_result_t Err) {
468-
if (ImageArray) {
468+
if (ImageArray != CUarray{}) {
469469
UR_CHECK_ERROR(cuArrayDestroy(ImageArray));
470470
}
471471
return Err;
472472
} catch (...) {
473-
if (ImageArray) {
473+
if (ImageArray != CUarray{}) {
474474
UR_CHECK_ERROR(cuArrayDestroy(ImageArray));
475475
}
476476
return UR_RESULT_ERROR_UNKNOWN;

source/adapters/cuda/queue.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ CUstream ur_queue_handle_t_::getNextComputeStream(uint32_t *StreamToken) {
4545
// change NumComputeStreams after that
4646
if (NumComputeStreams < ComputeStreams.size()) {
4747
UR_CHECK_ERROR(cuStreamCreateWithPriority(
48-
&ComputeStreams[NumComputeStreams++], Flags, Priority));
48+
&ComputeStreams[NumComputeStreams], Flags, Priority));
49+
++NumComputeStreams;
4950
}
5051
}
5152
Token = ComputeStreamIndex++;
@@ -110,7 +111,8 @@ CUstream ur_queue_handle_t_::getNextTransferStream() {
110111
// change NumTransferStreams after that
111112
if (NumTransferStreams < TransferStreams.size()) {
112113
UR_CHECK_ERROR(cuStreamCreateWithPriority(
113-
&TransferStreams[NumTransferStreams++], Flags, Priority));
114+
&TransferStreams[NumTransferStreams], Flags, Priority));
115+
++NumTransferStreams;
114116
}
115117
}
116118
uint32_t StreamI = TransferStreamIndex++ % TransferStreams.size();

source/adapters/hip/device.cpp

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -988,7 +988,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetNativeHandle(
988988
}
989989

990990
UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle(
991-
ur_native_handle_t hNativeDevice, ur_platform_handle_t hPlatform,
991+
ur_native_handle_t hNativeDevice,
992+
[[maybe_unused]] ur_adapter_handle_t hAdapter,
992993
[[maybe_unused]] const ur_device_native_properties_t *pProperties,
993994
ur_device_handle_t *phDevice) {
994995
// We can't cast between ur_native_handle_t and hipDevice_t, so memcpy the
@@ -1000,16 +1001,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle(
10001001
return Dev->get() == HIPDevice;
10011002
};
10021003

1003-
// If a platform is provided just check if the device is in it
1004-
if (hPlatform) {
1005-
auto SearchRes = std::find_if(begin(hPlatform->Devices),
1006-
end(hPlatform->Devices), IsDevice);
1007-
if (SearchRes != end(hPlatform->Devices)) {
1008-
*phDevice = SearchRes->get();
1009-
return UR_RESULT_SUCCESS;
1010-
}
1011-
}
1012-
10131004
// Get list of platforms
10141005
uint32_t NumPlatforms = 0;
10151006
ur_adapter_handle_t AdapterHandle = &adapter;

source/adapters/hip/memory.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -498,7 +498,7 @@ ur_result_t allocateMemObjOnDeviceIfNeeded(ur_mem_handle_t Mem,
498498
UR_CHECK_ERROR(hipMalloc(&DevPtr, Buffer.Size));
499499
}
500500
} else {
501-
hipArray *ImageArray;
501+
hipArray *ImageArray{};
502502
hipSurfaceObject_t Surface;
503503
try {
504504
auto &Image = std::get<SurfaceMem>(Mem->Mem);

source/adapters/level_zero/adapter.cpp

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -43,15 +43,31 @@ ur_result_t initPlatforms(PlatformVec &platforms) noexcept try {
4343
}
4444

4545
std::vector<ze_driver_handle_t> ZeDrivers;
46+
std::vector<ze_device_handle_t> ZeDevices;
4647
ZeDrivers.resize(ZeDriverCount);
4748

4849
ZE2UR_CALL(zeDriverGet, (&ZeDriverCount, ZeDrivers.data()));
4950
for (uint32_t I = 0; I < ZeDriverCount; ++I) {
50-
auto platform = std::make_unique<ur_platform_handle_t_>(ZeDrivers[I]);
51-
UR_CALL(platform->initialize());
52-
53-
// Save a copy in the cache for future uses.
54-
platforms.push_back(std::move(platform));
51+
ze_device_properties_t device_properties{};
52+
device_properties.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;
53+
uint32_t ZeDeviceCount = 0;
54+
ZE2UR_CALL(zeDeviceGet, (ZeDrivers[I], &ZeDeviceCount, nullptr));
55+
ZeDevices.resize(ZeDeviceCount);
56+
ZE2UR_CALL(zeDeviceGet, (ZeDrivers[I], &ZeDeviceCount, ZeDevices.data()));
57+
// Check if this driver has GPU Devices
58+
for (uint32_t D = 0; D < ZeDeviceCount; ++D) {
59+
ZE2UR_CALL(zeDeviceGetProperties, (ZeDevices[D], &device_properties));
60+
61+
if (ZE_DEVICE_TYPE_GPU == device_properties.type) {
62+
// If this Driver is a GPU, save it as a usable platform.
63+
auto platform = std::make_unique<ur_platform_handle_t_>(ZeDrivers[I]);
64+
UR_CALL(platform->initialize());
65+
66+
// Save a copy in the cache for future uses.
67+
platforms.push_back(std::move(platform));
68+
break;
69+
}
70+
}
5571
}
5672
return UR_RESULT_SUCCESS;
5773
} catch (...) {
@@ -105,8 +121,16 @@ ur_adapter_handle_t_::ur_adapter_handle_t_()
105121
// We must only initialize the driver once, even if urPlatformGet() is
106122
// called multiple times. Declaring the return value as "static" ensures
107123
// it's only called once.
108-
GlobalAdapter->ZeResult =
109-
ZE_CALL_NOCHECK(zeInit, (ZE_INIT_FLAG_GPU_ONLY));
124+
125+
// Init with all flags set to enable for all driver types to be init in
126+
// the application.
127+
ze_init_flags_t L0InitFlags = ZE_INIT_FLAG_GPU_ONLY;
128+
if (UrL0InitAllDrivers) {
129+
L0InitFlags |= ZE_INIT_FLAG_VPU_ONLY;
130+
}
131+
logger::debug("\nzeInit with flags value of {}\n",
132+
static_cast<int>(L0InitFlags));
133+
GlobalAdapter->ZeResult = ZE_CALL_NOCHECK(zeInit, (L0InitFlags));
110134
}
111135
assert(GlobalAdapter->ZeResult !=
112136
std::nullopt); // verify that level-zero is initialized

source/adapters/level_zero/common.hpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,15 @@ const int UrL0LeaksDebug = [] {
207207
return std::atoi(UrRet);
208208
}();
209209

210+
// Enable for UR L0 Adapter to Init all L0 Drivers on the system with filtering
211+
// in place for only currently used Drivers.
212+
const int UrL0InitAllDrivers = [] {
213+
const char *UrRet = std::getenv("UR_L0_INIT_ALL_DRIVERS");
214+
if (!UrRet)
215+
return 0;
216+
return std::atoi(UrRet);
217+
}();
218+
210219
// Controls Level Zero calls serialization to w/a Level Zero driver being not MT
211220
// ready. Recognized values (can be used as a bit mask):
212221
enum {

source/adapters/level_zero/device.cpp

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1600,14 +1600,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetNativeHandle(
16001600

16011601
UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle(
16021602
ur_native_handle_t NativeDevice, ///< [in] the native handle of the device.
1603-
ur_platform_handle_t Platform, ///< [in] handle of the platform instance
1604-
const ur_device_native_properties_t
1603+
[[maybe_unused]] ur_adapter_handle_t
1604+
Adapter, ///< [in] handle of the platform instance
1605+
[[maybe_unused]] const ur_device_native_properties_t
16051606
*Properties, ///< [in][optional] pointer to native device properties
16061607
///< struct.
16071608
ur_device_handle_t
16081609
*Device ///< [out] pointer to the handle of the device object created.
16091610
) {
1610-
std::ignore = Properties;
16111611
auto ZeDevice = ur_cast<ze_device_handle_t>(NativeDevice);
16121612

16131613
// The SYCL spec requires that the set of devices must remain fixed for the
@@ -1620,12 +1620,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle(
16201620
if (const auto *platforms = GlobalAdapter->PlatformCache->get_value()) {
16211621
for (const auto &p : *platforms) {
16221622
Dev = p->getDeviceFromNativeHandle(ZeDevice);
1623-
if (Dev) {
1624-
// Check that the input Platform, if was given, matches the found one.
1625-
UR_ASSERT(!Platform || Platform == p.get(),
1626-
UR_RESULT_ERROR_INVALID_PLATFORM);
1627-
break;
1628-
}
16291623
}
16301624
} else {
16311625
return GlobalAdapter->PlatformCache->get_error();

source/adapters/level_zero/device.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ struct ur_device_handle_t_ : _ur_object {
6161
ur_device_handle_t_(ze_device_handle_t Device, ur_platform_handle_t Plt,
6262
ur_device_handle_t ParentDevice = nullptr)
6363
: ZeDevice{Device}, Platform{Plt}, RootDevice{ParentDevice},
64-
ZeDeviceProperties{}, ZeDeviceComputeProperties{} {
64+
ZeDeviceProperties{}, ZeDeviceComputeProperties{}, Id(std::nullopt) {
6565
// NOTE: one must additionally call initialize() to complete
6666
// UR device creation.
6767
}
@@ -229,5 +229,5 @@ struct ur_device_handle_t_ : _ur_object {
229229
ZeOffsetToImageHandleMap;
230230

231231
// unique ephemeral identifer of the device in the adapter
232-
DeviceId Id;
232+
std::optional<DeviceId> Id;
233233
};

source/adapters/level_zero/event.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,9 @@ ur_queue_handle_legacy_t_::enqueueEventsWaitWithBarrier( ///< [in] handle of the
228228

229229
// For in-order queue and wait-list which is empty or has events from
230230
// the same queue just use the last command event as the barrier event.
231-
if (Queue->isInOrderQueue() &&
231+
// This optimization is disabled when profiling is enabled to ensure
232+
// accurate profiling values & the overhead that profiling incurs.
233+
if (Queue->isInOrderQueue() && !Queue->isProfilingEnabled() &&
232234
WaitListEmptyOrAllEventsFromSameQueue(Queue, NumEventsInWaitList,
233235
EventWaitList) &&
234236
Queue->LastCommandEvent && !Queue->LastCommandEvent->IsDiscarded) {

0 commit comments

Comments
 (0)