Skip to content

[UR] fix: synchronize resubmission of the same command buffer #18566

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1026,9 +1026,14 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueCommandBufferExp(
ur_event_handle_t executionEvent =
hCommandBuffer->getExecutionEventUnlocked();

if (executionEvent != nullptr) {
ZE2UR_CALL(zeEventHostSynchronize,
(executionEvent->getZeEvent(), UINT64_MAX));
}

UR_CALL(enqueueGenericCommandListsExp(
1, &commandBufferCommandList, phEvent, numEventsInWaitList,
phEventWaitList, UR_COMMAND_ENQUEUE_COMMAND_BUFFER_EXP, executionEvent));
phEventWaitList, UR_COMMAND_ENQUEUE_COMMAND_BUFFER_EXP, nullptr));
UR_CALL(hCommandBuffer->registerExecutionEventUnlocked(*phEvent));
if (internalEvent != nullptr) {
internalEvent->release();
Expand Down
137 changes: 96 additions & 41 deletions unified-runtime/test/conformance/exp_command_buffer/enqueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,25 +17,19 @@
// enqueue_update.cpp test for a test verifying the order of submissions, as
// the input/output to the kernels can be modified between the submissions.
struct urEnqueueCommandBufferExpTest
: uur::command_buffer::urCommandBufferExpExecutionTest {
: uur::command_buffer::urCommandBufferExpExecutionTestWithParam<
ur_queue_flags_t> {
virtual void SetUp() override {
program_name = "increment";
UUR_RETURN_ON_FATAL_FAILURE(urCommandBufferExpExecutionTest::SetUp());
UUR_RETURN_ON_FATAL_FAILURE(
urCommandBufferExpExecutionTestWithParam::SetUp());

// https://github.com/intel/llvm/issues/18610
UUR_KNOWN_FAILURE_ON(uur::LevelZeroV2{});

// Create an in-order queue
// Create an in-order or out-of-order queue, depending on the passed parameter
ur_queue_flags_t queue_type = std::get<1>(GetParam());
ur_queue_properties_t queue_properties = {
UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, nullptr, 0};
ASSERT_SUCCESS(
urQueueCreate(context, device, &queue_properties, &in_order_queue));

// Create an out-of-order queue
queue_properties.flags = UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE;
ASSERT_SUCCESS(
urQueueCreate(context, device, &queue_properties, &out_of_order_queue));
ASSERT_NE(out_of_order_queue, nullptr);
UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, nullptr, queue_type};
ASSERT_SUCCESS(urQueueCreate(context, device, &queue_properties,
&in_or_out_of_order_queue));

ASSERT_SUCCESS(urUSMDeviceAlloc(context, device, nullptr, nullptr,
allocation_size, &device_ptr));
Expand All @@ -45,6 +39,20 @@ struct urEnqueueCommandBufferExpTest
ASSERT_SUCCESS(urEnqueueUSMFill(queue, device_ptr, sizeof(zero_pattern),
&zero_pattern, allocation_size, 0, nullptr,
nullptr));

for (int i = 0; i < num_copy_buffers; i++) {
ASSERT_SUCCESS(urUSMDeviceAlloc(context, device, nullptr, nullptr,
buffer_size * sizeof(int32_t),
(void **)&(dst_buffers[i])));
ASSERT_SUCCESS(urUSMDeviceAlloc(context, device, nullptr, nullptr,
buffer_size * sizeof(int32_t),
(void **)&(src_buffers[i])));

ASSERT_SUCCESS(urEnqueueUSMFill(
queue, src_buffers[i], sizeof(zero_pattern), &zero_pattern,
buffer_size * sizeof(int32_t), 0, nullptr, nullptr));
}

ASSERT_SUCCESS(urQueueFinish(queue));

// Create command-buffer with a single kernel that does "Ptr[i] += 1;"
Expand All @@ -53,53 +61,101 @@ struct urEnqueueCommandBufferExpTest
cmd_buf_handle, kernel, n_dimensions, &global_offset, &global_size,
nullptr, 0, nullptr, 0, nullptr, 0, nullptr, nullptr, nullptr,
nullptr));

// Schedule memory copying in order to prolong the buffer execution
for (int i = 0; i < num_copy_buffers; i++) {
ASSERT_SUCCESS(urCommandBufferAppendUSMMemcpyExp(
cmd_buf_handle, dst_buffers[i], src_buffers[i],
buffer_size * sizeof(int32_t), 0, nullptr, 0, nullptr, nullptr,
nullptr, nullptr));
}

ASSERT_SUCCESS(urCommandBufferFinalizeExp(cmd_buf_handle));
}

virtual void TearDown() override {
if (in_order_queue) {
EXPECT_SUCCESS(urQueueRelease(in_order_queue));
}

if (out_of_order_queue) {
EXPECT_SUCCESS(urQueueRelease(out_of_order_queue));
if (in_or_out_of_order_queue) {
EXPECT_SUCCESS(urQueueRelease(in_or_out_of_order_queue));
}

if (device_ptr) {
EXPECT_SUCCESS(urUSMFree(context, device_ptr));
}

UUR_RETURN_ON_FATAL_FAILURE(urCommandBufferExpExecutionTest::TearDown());
for (int i = 0; i < num_copy_buffers; i++) {
if (dst_buffers[i]) {
EXPECT_SUCCESS(urUSMFree(context, dst_buffers[i]));
}

if (src_buffers[i]) {
EXPECT_SUCCESS(urUSMFree(context, src_buffers[i]));
}
}

UUR_RETURN_ON_FATAL_FAILURE(
urCommandBufferExpExecutionTestWithParam::TearDown());
}

ur_queue_handle_t in_order_queue = nullptr;
ur_queue_handle_t out_of_order_queue = nullptr;
ur_queue_handle_t in_or_out_of_order_queue = nullptr;

static constexpr size_t global_size = 16;
static constexpr size_t global_offset = 0;
static constexpr size_t n_dimensions = 1;
static constexpr size_t allocation_size = sizeof(uint32_t) * global_size;
void *device_ptr = nullptr;

static constexpr int num_copy_buffers = 10;
static constexpr int buffer_size = 512;
int32_t *dst_buffers[num_copy_buffers] = {};
int32_t *src_buffers[num_copy_buffers] = {};
};

UUR_INSTANTIATE_DEVICE_TEST_SUITE(urEnqueueCommandBufferExpTest);
std::string deviceTestWithQueueTypePrinter(
const ::testing::TestParamInfo<
std::tuple<uur::DeviceTuple, ur_queue_flags_t>> &info) {
auto device = std::get<0>(info.param).device;
auto queue_type = std::get<1>(info.param);

std::stringstream ss;

switch (queue_type) {
case 0:
ss << "InOrderQueue";
break;

case UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE:
ss << "OutOfOrderQueue";
break;

default:
ss << "UnspecifiedQueueType" << queue_type;
}

return uur::GetPlatformAndDeviceName(device) + "__" +
uur::GTestSanitizeString(ss.str());
}

UUR_DEVICE_TEST_SUITE_WITH_PARAM(
urEnqueueCommandBufferExpTest,
testing::Values(0, UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE),
deviceTestWithQueueTypePrinter);

// Tests that the same command-buffer submitted across different in-order
// queues has an implicit dependency on first submission
TEST_P(urEnqueueCommandBufferExpTest, SerializeAcrossQueues) {
// Execute command-buffer to first in-order queue (created by parent
// urQueueTest fixture)
// urQueueTestWithParam fixture)
ASSERT_SUCCESS(
urEnqueueCommandBufferExp(queue, cmd_buf_handle, 0, nullptr, nullptr));

// Execute command-buffer to second in-order queue, should have implicit
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Execute command-buffer to second in-order queue, should have implicit
// Execute command-buffer to second queue, should have implicit

// dependency on first submission.
ASSERT_SUCCESS(urEnqueueCommandBufferExp(in_order_queue, cmd_buf_handle, 0,
nullptr, nullptr));
ASSERT_SUCCESS(urEnqueueCommandBufferExp(
in_or_out_of_order_queue, cmd_buf_handle, 0, nullptr, nullptr));

// Wait for both submissions to complete
ASSERT_SUCCESS(urQueueFlush(queue));
ASSERT_SUCCESS(urQueueFinish(in_order_queue));
ASSERT_SUCCESS(urQueueFinish(in_or_out_of_order_queue));

std::vector<uint32_t> Output(global_size);
ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, false, Output.data(), device_ptr,
Expand All @@ -113,24 +169,23 @@ TEST_P(urEnqueueCommandBufferExpTest, SerializeAcrossQueues) {
}
}

// Tests that submitting a command-buffer twice to an out-of-order queue
// relying on implicit serialization semantics for dependencies.
TEST_P(urEnqueueCommandBufferExpTest, SerializeOutofOrderQueue) {
ASSERT_SUCCESS(urEnqueueCommandBufferExp(out_of_order_queue, cmd_buf_handle,
0, nullptr, nullptr));
ASSERT_SUCCESS(urEnqueueCommandBufferExp(out_of_order_queue, cmd_buf_handle,
0, nullptr, nullptr));
TEST_P(urEnqueueCommandBufferExpTest, SerializeInOrOutOfOrderQueue) {
const int iterations = 5;
for (int i = 0; i < iterations; i++) {
ASSERT_SUCCESS(urEnqueueCommandBufferExp(
in_or_out_of_order_queue, cmd_buf_handle, 0, nullptr, nullptr));
}

// Wait for both submissions to complete
ASSERT_SUCCESS(urQueueFinish(out_of_order_queue));
ASSERT_SUCCESS(urQueueFinish(in_or_out_of_order_queue));

std::vector<uint32_t> Output(global_size);
ASSERT_SUCCESS(urEnqueueUSMMemcpy(out_of_order_queue, true, Output.data(),
device_ptr, allocation_size, 0, nullptr,
nullptr));
ASSERT_SUCCESS(urEnqueueUSMMemcpy(in_or_out_of_order_queue, true,
Output.data(), device_ptr, allocation_size,
0, nullptr, nullptr));

// Verify
const uint32_t reference = 2;
const uint32_t reference = iterations;
for (size_t i = 0; i < global_size; i++) {
ASSERT_EQ(reference, Output[i]);
}
Expand Down
27 changes: 27 additions & 0 deletions unified-runtime/test/conformance/exp_command_buffer/fixtures.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,33 @@ struct urCommandBufferExpExecutionTest : uur::urKernelExecutionTest {
ur_exp_command_buffer_handle_t cmd_buf_handle = nullptr;
};

template <class T>
struct urCommandBufferExpExecutionTestWithParam
: urKernelExecutionTestWithParam<T> {
void SetUp() override {
UUR_RETURN_ON_FATAL_FAILURE(
uur::urKernelExecutionTestWithParam<T>::SetUp());

UUR_RETURN_ON_FATAL_FAILURE(checkCommandBufferSupport(this->device));

ur_exp_command_buffer_desc_t desc{UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC,
nullptr, false, false, false};
ASSERT_SUCCESS(urCommandBufferCreateExp(this->context, this->device, &desc,
&cmd_buf_handle));
ASSERT_NE(cmd_buf_handle, nullptr);
}

void TearDown() override {
if (cmd_buf_handle) {
EXPECT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle));
}
UUR_RETURN_ON_FATAL_FAILURE(
uur::urKernelExecutionTestWithParam<T>::TearDown());
}

ur_exp_command_buffer_handle_t cmd_buf_handle = nullptr;
};

struct urUpdatableCommandBufferExpTest : uur::urQueueTest {
void SetUp() override {
UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTest::SetUp());
Expand Down