Skip to content

Commit 29a2502

Browse files
[UR] fix: synchronize resubmission of the same command buffer (#18566)
Fixes #18610
1 parent 7ab3e81 commit 29a2502

File tree

3 files changed

+129
-42
lines changed

3 files changed

+129
-42
lines changed

unified-runtime/source/adapters/level_zero/v2/queue_immediate_in_order.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1026,9 +1026,14 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueCommandBufferExp(
10261026
ur_event_handle_t executionEvent =
10271027
hCommandBuffer->getExecutionEventUnlocked();
10281028

1029+
if (executionEvent != nullptr) {
1030+
ZE2UR_CALL(zeEventHostSynchronize,
1031+
(executionEvent->getZeEvent(), UINT64_MAX));
1032+
}
1033+
10291034
UR_CALL(enqueueGenericCommandListsExp(
10301035
1, &commandBufferCommandList, phEvent, numEventsInWaitList,
1031-
phEventWaitList, UR_COMMAND_ENQUEUE_COMMAND_BUFFER_EXP, executionEvent));
1036+
phEventWaitList, UR_COMMAND_ENQUEUE_COMMAND_BUFFER_EXP, nullptr));
10321037
UR_CALL(hCommandBuffer->registerExecutionEventUnlocked(*phEvent));
10331038
if (internalEvent != nullptr) {
10341039
internalEvent->release();

unified-runtime/test/conformance/exp_command_buffer/enqueue.cpp

Lines changed: 96 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -17,25 +17,19 @@
1717
// enqueue_update.cpp test for a test verifying the order of submissions, as
1818
// the input/output to the kernels can be modified between the submissions.
1919
struct urEnqueueCommandBufferExpTest
20-
: uur::command_buffer::urCommandBufferExpExecutionTest {
20+
: uur::command_buffer::urCommandBufferExpExecutionTestWithParam<
21+
ur_queue_flags_t> {
2122
virtual void SetUp() override {
2223
program_name = "increment";
23-
UUR_RETURN_ON_FATAL_FAILURE(urCommandBufferExpExecutionTest::SetUp());
24+
UUR_RETURN_ON_FATAL_FAILURE(
25+
urCommandBufferExpExecutionTestWithParam::SetUp());
2426

25-
// https://github.com/intel/llvm/issues/18610
26-
UUR_KNOWN_FAILURE_ON(uur::LevelZeroV2{});
27-
28-
// Create an in-order queue
27+
// Create an in-order or out-of-order queue, depending on the passed parameter
28+
ur_queue_flags_t queue_type = std::get<1>(GetParam());
2929
ur_queue_properties_t queue_properties = {
30-
UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, nullptr, 0};
31-
ASSERT_SUCCESS(
32-
urQueueCreate(context, device, &queue_properties, &in_order_queue));
33-
34-
// Create an out-of-order queue
35-
queue_properties.flags = UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE;
36-
ASSERT_SUCCESS(
37-
urQueueCreate(context, device, &queue_properties, &out_of_order_queue));
38-
ASSERT_NE(out_of_order_queue, nullptr);
30+
UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, nullptr, queue_type};
31+
ASSERT_SUCCESS(urQueueCreate(context, device, &queue_properties,
32+
&in_or_out_of_order_queue));
3933

4034
ASSERT_SUCCESS(urUSMDeviceAlloc(context, device, nullptr, nullptr,
4135
allocation_size, &device_ptr));
@@ -45,6 +39,20 @@ struct urEnqueueCommandBufferExpTest
4539
ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptr, sizeof(zero_pattern),
4640
&zero_pattern, allocation_size, 0, nullptr,
4741
nullptr));
42+
43+
for (int i = 0; i < num_copy_buffers; i++) {
44+
ASSERT_SUCCESS(urUSMDeviceAlloc(context, device, nullptr, nullptr,
45+
buffer_size * sizeof(int32_t),
46+
(void **)&(dst_buffers[i])));
47+
ASSERT_SUCCESS(urUSMDeviceAlloc(context, device, nullptr, nullptr,
48+
buffer_size * sizeof(int32_t),
49+
(void **)&(src_buffers[i])));
50+
51+
ASSERT_SUCCESS(urEnqueueUSMFill(
52+
queue, src_buffers[i], sizeof(zero_pattern), &zero_pattern,
53+
buffer_size * sizeof(int32_t), 0, nullptr, nullptr));
54+
}
55+
4856
ASSERT_SUCCESS(urQueueFinish(queue));
4957

5058
// Create command-buffer with a single kernel that does "Ptr[i] += 1;"
@@ -53,53 +61,101 @@ struct urEnqueueCommandBufferExpTest
5361
cmd_buf_handle, kernel, n_dimensions, &global_offset, &global_size,
5462
nullptr, 0, nullptr, 0, nullptr, 0, nullptr, nullptr, nullptr,
5563
nullptr));
64+
65+
// Schedule memory copying in order to prolong the buffer execution
66+
for (int i = 0; i < num_copy_buffers; i++) {
67+
ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp(
68+
cmd_buf_handle, dst_buffers[i], src_buffers[i],
69+
buffer_size * sizeof(int32_t), 0, nullptr, 0, nullptr, nullptr,
70+
nullptr, nullptr));
71+
}
72+
5673
ASSERT_SUCCESS(urCommandBufferFinalizeExp(cmd_buf_handle));
5774
}
5875

5976
virtual void TearDown() override {
60-
if (in_order_queue) {
61-
EXPECT_SUCCESS(urQueueRelease(in_order_queue));
62-
}
63-
64-
if (out_of_order_queue) {
65-
EXPECT_SUCCESS(urQueueRelease(out_of_order_queue));
77+
if (in_or_out_of_order_queue) {
78+
EXPECT_SUCCESS(urQueueRelease(in_or_out_of_order_queue));
6679
}
6780

6881
if (device_ptr) {
6982
EXPECT_SUCCESS(urUSMFree(context, device_ptr));
7083
}
7184

72-
UUR_RETURN_ON_FATAL_FAILURE(urCommandBufferExpExecutionTest::TearDown());
85+
for (int i = 0; i < num_copy_buffers; i++) {
86+
if (dst_buffers[i]) {
87+
EXPECT_SUCCESS(urUSMFree(context, dst_buffers[i]));
88+
}
89+
90+
if (src_buffers[i]) {
91+
EXPECT_SUCCESS(urUSMFree(context, src_buffers[i]));
92+
}
93+
}
94+
95+
UUR_RETURN_ON_FATAL_FAILURE(
96+
urCommandBufferExpExecutionTestWithParam::TearDown());
7397
}
7498

75-
ur_queue_handle_t in_order_queue = nullptr;
76-
ur_queue_handle_t out_of_order_queue = nullptr;
99+
ur_queue_handle_t in_or_out_of_order_queue = nullptr;
77100

78101
static constexpr size_t global_size = 16;
79102
static constexpr size_t global_offset = 0;
80103
static constexpr size_t n_dimensions = 1;
81104
static constexpr size_t allocation_size = sizeof(uint32_t) * global_size;
82105
void *device_ptr = nullptr;
106+
107+
static constexpr int num_copy_buffers = 10;
108+
static constexpr int buffer_size = 512;
109+
int32_t *dst_buffers[num_copy_buffers] = {};
110+
int32_t *src_buffers[num_copy_buffers] = {};
83111
};
84112

85-
UUR_INSTANTIATE_DEVICE_TEST_SUITE(urEnqueueCommandBufferExpTest);
113+
std::string deviceTestWithQueueTypePrinter(
114+
const ::testing::TestParamInfo<
115+
std::tuple<uur::DeviceTuple, ur_queue_flags_t>> &info) {
116+
auto device = std::get<0>(info.param).device;
117+
auto queue_type = std::get<1>(info.param);
118+
119+
std::stringstream ss;
120+
121+
switch (queue_type) {
122+
case 0:
123+
ss << "InOrderQueue";
124+
break;
125+
126+
case UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE:
127+
ss << "OutOfOrderQueue";
128+
break;
129+
130+
default:
131+
ss << "UnspecifiedQueueType" << queue_type;
132+
}
133+
134+
return uur::GetPlatformAndDeviceName(device) + "__" +
135+
uur::GTestSanitizeString(ss.str());
136+
}
137+
138+
UUR_DEVICE_TEST_SUITE_WITH_PARAM(
139+
urEnqueueCommandBufferExpTest,
140+
testing::Values(0, UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE),
141+
deviceTestWithQueueTypePrinter);
86142

87143
// Tests that the same command-buffer submitted across different in-order
88144
// queues has an implicit dependency on first submission
89145
TEST_P(urEnqueueCommandBufferExpTest, SerializeAcrossQueues) {
90146
// Execute command-buffer to first in-order queue (created by parent
91-
// urQueueTest fixture)
147+
// urQueueTestWithParam fixture)
92148
ASSERT_SUCCESS(
93149
urEnqueueCommandBufferExp(queue, cmd_buf_handle, 0, nullptr, nullptr));
94150

95151
// Execute command-buffer to second in-order queue, should have implicit
96152
// dependency on first submission.
97-
ASSERT_SUCCESS(urEnqueueCommandBufferExp(in_order_queue, cmd_buf_handle, 0,
98-
nullptr, nullptr));
153+
ASSERT_SUCCESS(urEnqueueCommandBufferExp(
154+
in_or_out_of_order_queue, cmd_buf_handle, 0, nullptr, nullptr));
99155

100156
// Wait for both submissions to complete
101157
ASSERT_SUCCESS(urQueueFlush(queue));
102-
ASSERT_SUCCESS(urQueueFinish(in_order_queue));
158+
ASSERT_SUCCESS(urQueueFinish(in_or_out_of_order_queue));
103159

104160
std::vector<uint32_t> Output(global_size);
105161
ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, Output.data(), device_ptr,
@@ -113,24 +169,23 @@ TEST_P(urEnqueueCommandBufferExpTest, SerializeAcrossQueues) {
113169
}
114170
}
115171

116-
// Tests that submitting a command-buffer twice to an out-of-order queue
117-
// relying on implicit serialization semantics for dependencies.
118-
TEST_P(urEnqueueCommandBufferExpTest, SerializeOutofOrderQueue) {
119-
ASSERT_SUCCESS(urEnqueueCommandBufferExp(out_of_order_queue, cmd_buf_handle,
120-
0, nullptr, nullptr));
121-
ASSERT_SUCCESS(urEnqueueCommandBufferExp(out_of_order_queue, cmd_buf_handle,
122-
0, nullptr, nullptr));
172+
TEST_P(urEnqueueCommandBufferExpTest, SerializeInOrOutOfOrderQueue) {
173+
const int iterations = 5;
174+
for (int i = 0; i < iterations; i++) {
175+
ASSERT_SUCCESS(urEnqueueCommandBufferExp(
176+
in_or_out_of_order_queue, cmd_buf_handle, 0, nullptr, nullptr));
177+
}
123178

124179
// Wait for both submissions to complete
125-
ASSERT_SUCCESS(urQueueFinish(out_of_order_queue));
180+
ASSERT_SUCCESS(urQueueFinish(in_or_out_of_order_queue));
126181

127182
std::vector<uint32_t> Output(global_size);
128-
ASSERT_SUCCESS(urEnqueueUSMMemcpy(out_of_order_queue, true, Output.data(),
129-
device_ptr, allocation_size, 0, nullptr,
130-
nullptr));
183+
ASSERT_SUCCESS(urEnqueueUSMMemcpy(in_or_out_of_order_queue, true,
184+
Output.data(), device_ptr, allocation_size,
185+
0, nullptr, nullptr));
131186

132187
// Verify
133-
const uint32_t reference = 2;
188+
const uint32_t reference = iterations;
134189
for (size_t i = 0; i < global_size; i++) {
135190
ASSERT_EQ(reference, Output[i]);
136191
}

unified-runtime/test/conformance/exp_command_buffer/fixtures.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,33 @@ struct urCommandBufferExpExecutionTest : uur::urKernelExecutionTest {
114114
ur_exp_command_buffer_handle_t cmd_buf_handle = nullptr;
115115
};
116116

117+
template <class T>
118+
struct urCommandBufferExpExecutionTestWithParam
119+
: urKernelExecutionTestWithParam<T> {
120+
void SetUp() override {
121+
UUR_RETURN_ON_FATAL_FAILURE(
122+
uur::urKernelExecutionTestWithParam<T>::SetUp());
123+
124+
UUR_RETURN_ON_FATAL_FAILURE(checkCommandBufferSupport(this->device));
125+
126+
ur_exp_command_buffer_desc_t desc{UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC,
127+
nullptr, false, false, false};
128+
ASSERT_SUCCESS(urCommandBufferCreateExp(this->context, this->device, &desc,
129+
&cmd_buf_handle));
130+
ASSERT_NE(cmd_buf_handle, nullptr);
131+
}
132+
133+
void TearDown() override {
134+
if (cmd_buf_handle) {
135+
EXPECT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle));
136+
}
137+
UUR_RETURN_ON_FATAL_FAILURE(
138+
uur::urKernelExecutionTestWithParam<T>::TearDown());
139+
}
140+
141+
ur_exp_command_buffer_handle_t cmd_buf_handle = nullptr;
142+
};
143+
117144
struct urUpdatableCommandBufferExpTest : uur::urQueueTest {
118145
void SetUp() override {
119146
UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTest::SetUp());

0 commit comments

Comments
 (0)