Skip to content

[Offload] Allow setting null arguments in olLaunchKernel #141958

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions offload/liboffload/API/Kernel.td
Original file line number Diff line number Diff line change
Expand Up @@ -43,19 +43,21 @@ def : Function {
let name = "olLaunchKernel";
let desc = "Enqueue a kernel launch with the specified size and parameters.";
let details = [
"If a queue is not specified, kernel execution happens synchronously"
"If a queue is not specified, kernel execution happens synchronously",
"ArgumentsData may be set to NULL (to indicate no parameters)"
];
let params = [
Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN_OPTIONAL>,
Param<"ol_device_handle_t", "Device", "handle of the device to execute on", PARAM_IN>,
Param<"ol_kernel_handle_t", "Kernel", "handle of the kernel", PARAM_IN>,
Param<"const void*", "ArgumentsData", "pointer to the kernel argument struct", PARAM_IN>,
Param<"const void*", "ArgumentsData", "pointer to the kernel argument struct", PARAM_IN_OPTIONAL>,
Param<"size_t", "ArgumentsSize", "size of the kernel argument struct", PARAM_IN>,
Param<"const ol_kernel_launch_size_args_t*", "LaunchSizeArgs", "pointer to the struct containing launch size parameters", PARAM_IN>,
Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL>
];
let returns = [
Return<"OL_ERRC_INVALID_ARGUMENT", ["`Queue == NULL && EventOut != NULL`"]>,
Return<"OL_ERRC_INVALID_ARGUMENT", ["`ArgumentsSize > 0 && ArgumentsData == NULL`"]>,
Return<"OL_ERRC_INVALID_DEVICE", ["If Queue is non-null but does not belong to Device"]>,
];
}
5 changes: 5 additions & 0 deletions offload/unittests/OffloadAPI/device_code/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ macro(add_offload_test_device_code test_filename test_name)
add_custom_command(OUTPUT ${BIN_PATH}
COMMAND
${CMAKE_C_COMPILER} --target=nvptx64-nvidia-cuda
${ARGN}
-march=${LIBOMPTARGET_DEP_CUDA_ARCH}
--cuda-path=${CUDA_ROOT}
${SRC_PATH} -o ${BIN_PATH}
Expand All @@ -21,6 +22,7 @@ macro(add_offload_test_device_code test_filename test_name)
add_custom_command(OUTPUT ${BIN_PATH}
COMMAND
${CMAKE_C_COMPILER} --target=amdgcn-amd-amdhsa -nogpulib
${ARGN}
-mcpu=${LIBOMPTARGET_DEP_AMDGPU_ARCH}
${SRC_PATH} -o ${BIN_PATH}
DEPENDS ${SRC_PATH}
Expand Down Expand Up @@ -61,6 +63,9 @@ endif()

add_offload_test_device_code(foo.c foo)
add_offload_test_device_code(bar.c bar)
# By default, amdhsa will add a number of "hidden" arguments to the kernel defintion
# O3 disables this, and results in a kernel function with actually no arguments as seen by liboffload
add_offload_test_device_code(noargs.c noargs -O3)

add_custom_target(OffloadUnitTestsDeviceBins DEPENDS ${BIN_PATHS})

Expand Down
3 changes: 3 additions & 0 deletions offload/unittests/OffloadAPI/device_code/noargs.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#include <gpuintrin.h>

__gpu_kernel void noargs() { (void)0; }
27 changes: 23 additions & 4 deletions offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@
#include <OffloadAPI.h>
#include <gtest/gtest.h>

struct olLaunchKernelTest : OffloadQueueTest {
void SetUp() override {
struct LaunchKernelTestBase : OffloadQueueTest {
void SetUpKernel(const char *kernel) {
RETURN_ON_FATAL_FAILURE(OffloadQueueTest::SetUp());
ASSERT_TRUE(TestEnvironment::loadDeviceBinary("foo", Device, DeviceBin));
ASSERT_TRUE(TestEnvironment::loadDeviceBinary(kernel, Device, DeviceBin));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This assumes that the kernel name and binary name are the same, guess it's fine for now but probably something we want much more complicated tests for.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think any test for which that doesn't hold will probably want to inherit directly from OffloadQueueTest and do its own binary handling logic.

ASSERT_GE(DeviceBin->getBufferSize(), 0lu);
ASSERT_SUCCESS(olCreateProgram(Device, DeviceBin->getBufferStart(),
DeviceBin->getBufferSize(), &Program));
ASSERT_SUCCESS(olGetKernel(Program, "foo", &Kernel));
ASSERT_SUCCESS(olGetKernel(Program, kernel, &Kernel));
LaunchArgs.Dimensions = 1;
LaunchArgs.GroupSizeX = 64;
LaunchArgs.GroupSizeY = 1;
Expand All @@ -43,8 +43,20 @@ struct olLaunchKernelTest : OffloadQueueTest {
ol_kernel_launch_size_args_t LaunchArgs{};
};

struct olLaunchKernelTest : LaunchKernelTestBase {
void SetUp() override {
RETURN_ON_FATAL_FAILURE(LaunchKernelTestBase::SetUpKernel("foo"));
}
};
OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olLaunchKernelTest);

struct olLaunchKernelNoArgsTest : LaunchKernelTestBase {
void SetUp() override {
RETURN_ON_FATAL_FAILURE(LaunchKernelTestBase::SetUpKernel("noargs"));
}
};
OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olLaunchKernelNoArgsTest);

TEST_P(olLaunchKernelTest, Success) {
void *Mem;
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
Expand All @@ -66,6 +78,13 @@ TEST_P(olLaunchKernelTest, Success) {
ASSERT_SUCCESS(olMemFree(Mem));
}

TEST_P(olLaunchKernelNoArgsTest, Success) {
ASSERT_SUCCESS(
olLaunchKernel(Queue, Device, Kernel, nullptr, 0, &LaunchArgs, nullptr));

ASSERT_SUCCESS(olWaitQueue(Queue));
}

TEST_P(olLaunchKernelTest, SuccessSynchronous) {
void *Mem;
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED,
Expand Down
Loading