Skip to content

Commit 27aae7a

Browse files
authored
[UR] CTS tests for command-buffer native append (#17494)
The merged PR #16871 introduced new UR entry-points `urCommandBufferAppendNativeCommandExp` and `urCommandBufferGetNativeHandleExp`, however there was no UR CTS coverage for `urCommandBufferAppendNativeCommandExp` included, as verification was done via SYCL E2E testing. This PR adds UR CTS testing for `urCommandBufferAppendNativeCommandExp` for all of the L0, CUDA, HIP, and OpenCL adapters. Resolves #17448
1 parent 33e7f73 commit 27aae7a

File tree

9 files changed

+648
-18
lines changed

9 files changed

+648
-18
lines changed

unified-runtime/source/adapters/opencl/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,11 @@ else()
8181
set(OpenCLICDLoaderLibrary OpenCL::OpenCL)
8282
endif()
8383

84+
# Make interface library use within the project.
85+
add_library(OpenCLICDLoader INTERFACE)
86+
target_link_libraries(OpenCLICDLoader INTERFACE "${OpenCLICDLoaderLibrary}")
87+
target_include_directories(OpenCLICDLoader INTERFACE ${OpenCLIncludeDirectory})
88+
8489
message(STATUS "OpenCL Include Directory: ${OpenCLIncludeDirectory}")
8590
message(STATUS "OpenCL ICD Loader Library: ${OpenCLICDLoaderLibrary}")
8691

unified-runtime/test/conformance/exp_command_buffer/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ add_conformance_test_with_kernels_environment(exp_command_buffer
1111
event_sync.cpp
1212
kernel_event_sync.cpp
1313
invalid.cpp
14-
native.cpp
1514
update/buffer_fill_kernel_update.cpp
1615
update/invalid_update.cpp
1716
update/kernel_handle_update.cpp
@@ -23,3 +22,5 @@ add_conformance_test_with_kernels_environment(exp_command_buffer
2322
update/kernel_event_sync.cpp
2423
update/local_memory_update.cpp
2524
)
25+
26+
add_subdirectory(native-command)
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
3+
# See LICENSE.TXT
4+
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5+
6+
if (UR_BUILD_ADAPTER_CUDA)
7+
target_sources(test-exp_command_buffer PRIVATE "append_native_cuda.cpp")
8+
target_link_libraries(test-exp_command_buffer PRIVATE cudadrv)
9+
endif()
10+
11+
if (UR_BUILD_ADAPTER_HIP)
12+
target_sources(test-exp_command_buffer PRIVATE "append_native_hip.cpp")
13+
target_link_libraries(test-exp_command_buffer PRIVATE rocmdrv)
14+
endif()
15+
16+
if (UR_BUILD_ADAPTER_L0 OR UR_BUILD_ADAPTER_L0_v2)
17+
target_sources(test-exp_command_buffer PRIVATE "append_native_L0.cpp")
18+
target_link_libraries(test-exp_command_buffer PRIVATE LevelZeroLoader)
19+
endif()
20+
21+
if (UR_BUILD_ADAPTER_OPENCL)
22+
target_sources(test-exp_command_buffer PRIVATE "append_native_opencl.cpp")
23+
target_compile_definitions(test-exp_command_buffer PRIVATE CL_TARGET_OPENCL_VERSION=300)
24+
target_link_libraries(test-exp_command_buffer PRIVATE OpenCLICDLoader)
25+
endif()
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
// Copyright (C) 2025 Intel Corporation
2+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
3+
// Exceptions. See LICENSE.TXT
4+
//
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
7+
#include "common.h"
8+
#include <level_zero/ze_api.h>
9+
10+
// Test using using Level-Zero command-lists to add commands to a native
11+
// Level-Zero command-buffer.
12+
struct urL0CommandBufferNativeAppendTest
13+
: uur::command_buffer::urCommandBufferNativeAppendTest {
14+
void SetUp() override {
15+
UUR_RETURN_ON_FATAL_FAILURE(
16+
uur::command_buffer::urCommandBufferNativeAppendTest::SetUp());
17+
if (backend != UR_PLATFORM_BACKEND_LEVEL_ZERO) {
18+
GTEST_SKIP() << "Native append test is only supported on L0.";
19+
}
20+
21+
// Initialize Level Zero driver is required if this test is linked
22+
// statically with Level Zero loader, the driver will not be init otherwise.
23+
zeInit(ZE_INIT_FLAG_GPU_ONLY);
24+
25+
// L0 doesn't support executing command-lists inside of other
26+
// command-lists
27+
ur_bool_t subgraph_support{};
28+
EXPECT_SUCCESS(urDeviceGetInfo(
29+
device, UR_DEVICE_INFO_COMMAND_BUFFER_SUBGRAPH_SUPPORT_EXP,
30+
sizeof(ur_bool_t), &subgraph_support, nullptr));
31+
EXPECT_FALSE(subgraph_support);
32+
}
33+
};
34+
35+
UUR_INSTANTIATE_DEVICE_TEST_SUITE(urL0CommandBufferNativeAppendTest);
36+
37+
namespace {
38+
struct InteropData {
39+
ur_exp_command_buffer_handle_t command_buffer;
40+
void *src;
41+
void *dst;
42+
};
43+
44+
// Native command-buffer command is a single USM device pointer copy command
45+
void interop_func(void *data) {
46+
InteropData *func_data = reinterpret_cast<InteropData *>(data);
47+
ASSERT_NE(nullptr, func_data);
48+
49+
ze_command_list_handle_t native_graph{};
50+
ASSERT_SUCCESS(urCommandBufferGetNativeHandleExp(
51+
func_data->command_buffer, (ur_native_handle_t *)&native_graph));
52+
ASSERT_NE(ze_command_list_handle_t{}, native_graph);
53+
54+
const auto copy_size =
55+
uur::command_buffer::urCommandBufferNativeAppendTest::allocation_size;
56+
auto res = zeCommandListAppendMemoryCopy(native_graph, func_data->dst,
57+
func_data->src, copy_size, nullptr,
58+
0, nullptr);
59+
ASSERT_EQ(res, ZE_RESULT_SUCCESS);
60+
}
61+
} // end anonymous namespace
62+
63+
// Test command-buffer with a single native command, which when enqueued has an
64+
// eager UR command as a predecessor and eager UR command as a successor.
65+
TEST_P(urL0CommandBufferNativeAppendTest, Success) {
66+
InteropData data{command_buffer, src_device_ptr, dst_device_ptr};
67+
ASSERT_SUCCESS(urCommandBufferAppendNativeCommandExp(
68+
command_buffer, &interop_func, &data, nullptr, 0, nullptr, nullptr));
69+
ASSERT_SUCCESS(urCommandBufferFinalizeExp(command_buffer));
70+
71+
ASSERT_SUCCESS(urEnqueueUSMFill(queue, src_device_ptr, sizeof(val), &val,
72+
allocation_size, 0, nullptr, nullptr));
73+
74+
ASSERT_SUCCESS(
75+
urEnqueueCommandBufferExp(queue, command_buffer, 0, nullptr, nullptr));
76+
77+
ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, host_vec.data(),
78+
dst_device_ptr, allocation_size, 0, nullptr,
79+
nullptr));
80+
81+
urQueueFinish(queue);
82+
for (auto &i : host_vec) {
83+
ASSERT_EQ(i, val);
84+
}
85+
}
86+
87+
// Test command-buffer native command with other command-buffer commands as
88+
// predecessors and successors
89+
TEST_P(urL0CommandBufferNativeAppendTest, Dependencies) {
90+
ur_exp_command_buffer_sync_point_t sync_point_1;
91+
ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp(
92+
command_buffer, src_device_ptr, &val, sizeof(val), allocation_size, 0,
93+
nullptr, 0, nullptr, &sync_point_1, nullptr, nullptr));
94+
95+
InteropData data{command_buffer, src_device_ptr, dst_device_ptr};
96+
ur_exp_command_buffer_sync_point_t sync_point_2;
97+
ASSERT_SUCCESS(urCommandBufferAppendNativeCommandExp(
98+
command_buffer, &interop_func, &data, nullptr, 1, &sync_point_1,
99+
&sync_point_2));
100+
101+
ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp(
102+
command_buffer, host_vec.data(), dst_device_ptr, allocation_size, 1,
103+
&sync_point_2, 0, nullptr, nullptr, nullptr, nullptr));
104+
105+
ASSERT_SUCCESS(urCommandBufferFinalizeExp(command_buffer));
106+
ASSERT_SUCCESS(
107+
urEnqueueCommandBufferExp(queue, command_buffer, 0, nullptr, nullptr));
108+
109+
urQueueFinish(queue);
110+
for (auto &i : host_vec) {
111+
ASSERT_EQ(i, val);
112+
}
113+
}
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
// Copyright (C) 2025 Intel Corporation
2+
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM
3+
// Exceptions. See LICENSE.TXT
4+
//
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
7+
#include "common.h"
8+
#include <cuda.h>
9+
10+
// Test using using CUDA-Graph to add commands to a native CUDA command-buffer.
11+
struct urCudaCommandBufferNativeAppendTest
12+
: uur::command_buffer::urCommandBufferNativeAppendTest {
13+
void SetUp() override {
14+
UUR_RETURN_ON_FATAL_FAILURE(
15+
uur::command_buffer::urCommandBufferNativeAppendTest::SetUp());
16+
if (backend != UR_PLATFORM_BACKEND_CUDA) {
17+
GTEST_SKIP() << "Native append test is only supported on CUDA.";
18+
}
19+
20+
// CUDA-Graph supports adds sub-graph nodes to a parent graph
21+
ur_bool_t subgraph_support = false;
22+
EXPECT_SUCCESS(urDeviceGetInfo(
23+
device, UR_DEVICE_INFO_COMMAND_BUFFER_SUBGRAPH_SUPPORT_EXP,
24+
sizeof(ur_bool_t), &subgraph_support, nullptr));
25+
EXPECT_TRUE(subgraph_support);
26+
27+
// Create a non-updatable graph as a child graph
28+
ur_exp_command_buffer_desc_t desc{
29+
UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC /*stype*/, nullptr /*pnext*/,
30+
false /* updatable */, false /* in-order */, false /* profilable*/
31+
};
32+
UUR_RETURN_ON_FATAL_FAILURE(
33+
urCommandBufferCreateExp(context, device, &desc, &child_cmd_buf));
34+
}
35+
36+
void TearDown() override {
37+
if (child_cmd_buf) {
38+
EXPECT_SUCCESS(urCommandBufferReleaseExp(child_cmd_buf));
39+
}
40+
UUR_RETURN_ON_FATAL_FAILURE(
41+
uur::command_buffer::urCommandBufferNativeAppendTest::TearDown());
42+
}
43+
44+
ur_exp_command_buffer_handle_t child_cmd_buf = nullptr;
45+
};
46+
47+
UUR_INSTANTIATE_DEVICE_TEST_SUITE(urCudaCommandBufferNativeAppendTest);
48+
49+
namespace {
50+
struct InteropData {
51+
ur_exp_command_buffer_handle_t command_buffer;
52+
ur_context_handle_t context;
53+
void *src;
54+
void *dst;
55+
};
56+
57+
// Native command-buffer command is a single USM device pointer copy command
58+
void interop_func(void *data) {
59+
InteropData *func_data = reinterpret_cast<InteropData *>(data);
60+
ASSERT_NE(nullptr, func_data);
61+
62+
CUgraph native_graph{};
63+
ASSERT_SUCCESS(urCommandBufferGetNativeHandleExp(
64+
func_data->command_buffer, (ur_native_handle_t *)&native_graph));
65+
ASSERT_NE(CUgraph{}, native_graph);
66+
67+
CUcontext native_context{};
68+
ASSERT_SUCCESS(urContextGetNativeHandle(
69+
func_data->context, (ur_native_handle_t *)&native_context));
70+
ASSERT_NE(CUcontext{}, native_context);
71+
72+
CUDA_MEMCPY3D params{};
73+
params.srcMemoryType = CU_MEMORYTYPE_DEVICE;
74+
params.srcDevice = (CUdeviceptr)func_data->src;
75+
params.srcHost = nullptr;
76+
params.dstMemoryType = CU_MEMORYTYPE_DEVICE;
77+
params.dstDevice = (CUdeviceptr)func_data->dst;
78+
params.dstHost = nullptr;
79+
params.WidthInBytes =
80+
uur::command_buffer::urCommandBufferNativeAppendTest::allocation_size;
81+
params.Height = 1;
82+
params.Depth = 1;
83+
84+
CUgraphNode node;
85+
auto res = cuGraphAddMemcpyNode(&node, native_graph, nullptr, 0, &params,
86+
native_context);
87+
ASSERT_EQ(res, CUDA_SUCCESS);
88+
}
89+
} // end anonymous namespace
90+
91+
// Test command-buffer with a single native command, which when enqueued has an
92+
// eager UR command as a predecessor and eager UR command as a successor.
93+
TEST_P(urCudaCommandBufferNativeAppendTest, Success) {
94+
InteropData data{child_cmd_buf, context, src_device_ptr, dst_device_ptr};
95+
ASSERT_SUCCESS(urCommandBufferAppendNativeCommandExp(
96+
command_buffer, &interop_func, &data, child_cmd_buf, 0, nullptr,
97+
nullptr));
98+
ASSERT_SUCCESS(urCommandBufferFinalizeExp(command_buffer));
99+
100+
ASSERT_SUCCESS(urEnqueueUSMFill(queue, src_device_ptr, sizeof(val), &val,
101+
allocation_size, 0, nullptr, nullptr));
102+
ASSERT_SUCCESS(
103+
urEnqueueCommandBufferExp(queue, command_buffer, 0, nullptr, nullptr));
104+
105+
ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, true, host_vec.data(),
106+
dst_device_ptr, allocation_size, 0, nullptr,
107+
nullptr));
108+
109+
for (auto &i : host_vec) {
110+
ASSERT_EQ(i, val);
111+
}
112+
}
113+
114+
// Test command-buffer native command with other command-buffer commands as
115+
// predecessors and successors
116+
TEST_P(urCudaCommandBufferNativeAppendTest, Dependencies) {
117+
ur_exp_command_buffer_sync_point_t sync_point_1;
118+
ASSERT_SUCCESS(urCommandBufferAppendUSMFillExp(
119+
command_buffer, src_device_ptr, &val, sizeof(val), allocation_size, 0,
120+
nullptr, 0, nullptr, &sync_point_1, nullptr, nullptr));
121+
122+
InteropData data{child_cmd_buf, context, src_device_ptr, dst_device_ptr};
123+
ur_exp_command_buffer_sync_point_t sync_point_2;
124+
ASSERT_SUCCESS(urCommandBufferAppendNativeCommandExp(
125+
command_buffer, &interop_func, &data, child_cmd_buf, 1, &sync_point_1,
126+
&sync_point_2));
127+
128+
ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp(
129+
command_buffer, host_vec.data(), dst_device_ptr, allocation_size, 1,
130+
&sync_point_2, 0, nullptr, nullptr, nullptr, nullptr));
131+
132+
ASSERT_SUCCESS(urCommandBufferFinalizeExp(command_buffer));
133+
134+
ASSERT_SUCCESS(
135+
urEnqueueCommandBufferExp(queue, command_buffer, 0, nullptr, nullptr));
136+
137+
urQueueFinish(queue);
138+
for (auto &i : host_vec) {
139+
ASSERT_EQ(i, val);
140+
}
141+
}

0 commit comments

Comments
 (0)