[Offload] Allow setting null arguments in olLaunchKernel #141958

RossBrunton · 2025-05-29T14:59:26Z

No description provided.

llvmbot · 2025-05-29T15:00:02Z

@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-offload

Author: Ross Brunton (RossBrunton)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/141958.diff

6 Files Affected:

(modified) offload/liboffload/API/Kernel.td (+4-2)
(modified) offload/liboffload/include/generated/OffloadAPI.h (+4-2)
(modified) offload/liboffload/include/generated/OffloadEntryPoints.inc (+6-5)
(modified) offload/unittests/OffloadAPI/device_code/CMakeLists.txt (+1)
(added) offload/unittests/OffloadAPI/device_code/noargs.c (+5)
(modified) offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp (+23-4)

diff --git a/offload/liboffload/API/Kernel.td b/offload/liboffload/API/Kernel.td
index 247f9c1bf5b6a..45e3d8112791c 100644
--- a/offload/liboffload/API/Kernel.td
+++ b/offload/liboffload/API/Kernel.td
@@ -43,19 +43,21 @@ def : Function {
     let name = "olLaunchKernel";
     let desc = "Enqueue a kernel launch with the specified size and parameters.";
     let details = [
-        "If a queue is not specified, kernel execution happens synchronously"
+        "If a queue is not specified, kernel execution happens synchronously",
+        "ArgumentsData may be set to NULL (to indicate no parameters)"
     ];
     let params = [
         Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN_OPTIONAL>,
         Param<"ol_device_handle_t", "Device", "handle of the device to execute on", PARAM_IN>,
         Param<"ol_kernel_handle_t", "Kernel", "handle of the kernel", PARAM_IN>,
-        Param<"const void*", "ArgumentsData", "pointer to the kernel argument struct", PARAM_IN>,
+        Param<"const void*", "ArgumentsData", "pointer to the kernel argument struct", PARAM_IN_OPTIONAL>,
         Param<"size_t", "ArgumentsSize", "size of the kernel argument struct", PARAM_IN>,
         Param<"const ol_kernel_launch_size_args_t*", "LaunchSizeArgs", "pointer to the struct containing launch size parameters", PARAM_IN>,
         Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL>
     ];
     let returns = [
         Return<"OL_ERRC_INVALID_ARGUMENT", ["`Queue == NULL && EventOut != NULL`"]>,
+        Return<"OL_ERRC_INVALID_ARGUMENT", ["`ArgumentsSize > 0 && ArgumentsData == NULL`"]>,
         Return<"OL_ERRC_INVALID_DEVICE", ["If Queue is non-null but does not belong to Device"]>,
     ];
 }
diff --git a/offload/liboffload/include/generated/OffloadAPI.h b/offload/liboffload/include/generated/OffloadAPI.h
index a1d7540519e32..d918a51e8ffd7 100644
--- a/offload/liboffload/include/generated/OffloadAPI.h
+++ b/offload/liboffload/include/generated/OffloadAPI.h
@@ -692,6 +692,7 @@ typedef struct ol_kernel_launch_size_args_t {
 ///
 /// @details
 ///    - If a queue is not specified, kernel execution happens synchronously
+///    - ArgumentsData may be set to NULL (to indicate no parameters)
 ///
 /// @returns
 ///     - ::OL_RESULT_SUCCESS
@@ -699,13 +700,14 @@ typedef struct ol_kernel_launch_size_args_t {
 ///     - ::OL_ERRC_DEVICE_LOST
 ///     - ::OL_ERRC_INVALID_ARGUMENT
 ///         + `Queue == NULL && EventOut != NULL`
+///     - ::OL_ERRC_INVALID_ARGUMENT
+///         + `ArgumentsSize > 0 && ArgumentsData == NULL`
 ///     - ::OL_ERRC_INVALID_DEVICE
 ///         + If Queue is non-null but does not belong to Device
 ///     - ::OL_ERRC_INVALID_NULL_HANDLE
 ///         + `NULL == Device`
 ///         + `NULL == Kernel`
 ///     - ::OL_ERRC_INVALID_NULL_POINTER
-///         + `NULL == ArgumentsData`
 ///         + `NULL == LaunchSizeArgs`
 OL_APIEXPORT ol_result_t OL_APICALL olLaunchKernel(
     // [in][optional] handle of the queue
@@ -714,7 +716,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olLaunchKernel(
     ol_device_handle_t Device,
     // [in] handle of the kernel
     ol_kernel_handle_t Kernel,
-    // [in] pointer to the kernel argument struct
+    // [in][optional] pointer to the kernel argument struct
     const void *ArgumentsData,
     // [in] size of the kernel argument struct
     size_t ArgumentsSize,
diff --git a/offload/liboffload/include/generated/OffloadEntryPoints.inc b/offload/liboffload/include/generated/OffloadEntryPoints.inc
index 9feebeea09ec3..d629e14a6bb55 100644
--- a/offload/liboffload/include/generated/OffloadEntryPoints.inc
+++ b/offload/liboffload/include/generated/OffloadEntryPoints.inc
@@ -838,6 +838,12 @@ olLaunchKernel_val(ol_queue_handle_t Queue, ol_device_handle_t Device,
           "validation failure: Queue == NULL && EventOut != NULL");
     }
 
+    if (ArgumentsSize > 0 && ArgumentsData == NULL) {
+      return createOffloadError(
+          error::ErrorCode::INVALID_ARGUMENT,
+          "validation failure: ArgumentsSize > 0 && ArgumentsData == NULL");
+    }
+
     if (NULL == Device) {
       return createOffloadError(error::ErrorCode::INVALID_NULL_HANDLE,
                                 "validation failure: NULL == Device");
@@ -848,11 +854,6 @@ olLaunchKernel_val(ol_queue_handle_t Queue, ol_device_handle_t Device,
                                 "validation failure: NULL == Kernel");
     }
 
-    if (NULL == ArgumentsData) {
-      return createOffloadError(error::ErrorCode::INVALID_NULL_POINTER,
-                                "validation failure: NULL == ArgumentsData");
-    }
-
     if (NULL == LaunchSizeArgs) {
       return createOffloadError(error::ErrorCode::INVALID_NULL_POINTER,
                                 "validation failure: NULL == LaunchSizeArgs");
diff --git a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
index 5814943e4aaa9..91a9ca24786a3 100644
--- a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
@@ -61,6 +61,7 @@ endif()
 
 add_offload_test_device_code(foo.c foo)
 add_offload_test_device_code(bar.c bar)
+add_offload_test_device_code(noargs.c noargs)
 
 add_custom_target(OffloadUnitTestsDeviceBins DEPENDS ${BIN_PATHS})
 
diff --git a/offload/unittests/OffloadAPI/device_code/noargs.c b/offload/unittests/OffloadAPI/device_code/noargs.c
new file mode 100644
index 0000000000000..a46c9469b13cd
--- /dev/null
+++ b/offload/unittests/OffloadAPI/device_code/noargs.c
@@ -0,0 +1,5 @@
+#include <gpuintrin.h>
+
+__gpu_kernel void noargs() {
+  (void)0;
+}
diff --git a/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp b/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
index 20462e22fd73f..3b6a25139c6cf 100644
--- a/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
+++ b/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
@@ -10,14 +10,14 @@
 #include <OffloadAPI.h>
 #include <gtest/gtest.h>
 
-struct olLaunchKernelTest : OffloadQueueTest {
-  void SetUp() override {
+struct LaunchKernelTestBase : OffloadQueueTest {
+  void SetUpKernel(const char *kernel) {
     RETURN_ON_FATAL_FAILURE(OffloadQueueTest::SetUp());
-    ASSERT_TRUE(TestEnvironment::loadDeviceBinary("foo", Device, DeviceBin));
+    ASSERT_TRUE(TestEnvironment::loadDeviceBinary(kernel, Device, DeviceBin));
     ASSERT_GE(DeviceBin->getBufferSize(), 0lu);
     ASSERT_SUCCESS(olCreateProgram(Device, DeviceBin->getBufferStart(),
                                    DeviceBin->getBufferSize(), &Program));
-    ASSERT_SUCCESS(olGetKernel(Program, "foo", &Kernel));
+    ASSERT_SUCCESS(olGetKernel(Program, kernel, &Kernel));
     LaunchArgs.Dimensions = 1;
     LaunchArgs.GroupSizeX = 64;
     LaunchArgs.GroupSizeY = 1;
@@ -43,8 +43,20 @@ struct olLaunchKernelTest : OffloadQueueTest {
   ol_kernel_launch_size_args_t LaunchArgs{};
 };
 
+struct olLaunchKernelTest : LaunchKernelTestBase {
+  void SetUp() override {
+    RETURN_ON_FATAL_FAILURE(LaunchKernelTestBase::SetUpKernel("foo"));
+  }
+};
 OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olLaunchKernelTest);
 
+struct olLaunchKernelNoArgsTest : LaunchKernelTestBase {
+  void SetUp() override {
+    RETURN_ON_FATAL_FAILURE(LaunchKernelTestBase::SetUpKernel("noargs"));
+  }
+};
+OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olLaunchKernelNoArgsTest);
+
 TEST_P(olLaunchKernelTest, Success) {
   void *Mem;
   ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, 64, &Mem));
@@ -65,6 +77,13 @@ TEST_P(olLaunchKernelTest, Success) {
   ASSERT_SUCCESS(olMemFree(Mem));
 }
 
+TEST_P(olLaunchKernelNoArgsTest, Success) {
+  ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernel, nullptr, 0,
+                                &LaunchArgs, nullptr));
+
+  ASSERT_SUCCESS(olWaitQueue(Queue));
+}
+
 TEST_P(olLaunchKernelTest, SuccessSynchronous) {
   void *Mem;
   ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, 64, &Mem));

jhuber6 · 2025-05-29T15:01:32Z

offload/liboffload/include/generated/OffloadEntryPoints.inc

+    if (ArgumentsSize > 0 && ArgumentsData == NULL) {
+      return createOffloadError(


I'd imagine the size of zero is the important bit, and then the pointer value just becomes a no-op since it's not touched.

What do you mean? This check is autogenerated from the spec.

I think I means the && ArgumentsData == NULL is redundant

I don't think it is? It should be an error condition if a size is specified and ArgumentsData hasn't been.

Is there anything either of you want changing about the error condition here?

github-actions · 2025-05-29T15:01:56Z

✅ With the latest revision this PR passed the C/C++ code formatter.

RossBrunton · 2025-05-30T09:20:26Z

I've pushed up an additional change to this PR; the thing I was debugging originally was that the amdgpu driver would throw an error if a function was being called with no arguments at all.

However, more digging has revealed that even if you build a kernel with no arguments at the C level, when compiled at O0 a number of "hidden" arguments are added. The new change compiles the noargs.cpp test with O3 and fixes amdgpu to pass.

Not sure if the fixes to amdgpu are worth moving into their own MR or not though.

Also, I checked: kernarg_address in hsa_kernel_dispatch_packet_t may be NULL.

jhuber6 · 2025-05-30T12:45:46Z

I've pushed up an additional change to this PR; the thing I was debugging originally was that the amdgpu driver would throw an error if a function was being called with no arguments at all.

However, more digging has revealed that even if you build a kernel with no arguments at the C level, when compiled at O0 a number of "hidden" arguments are added. The new change compiles the noargs.cpp test with O3 and fixes amdgpu to pass.

Not sure if the fixes to amdgpu are worth moving into their own MR or not though.

Also, I checked: kernarg_address in hsa_kernel_dispatch_packet_t may be NULL.

Oh yeah, I was going to fix that. There's alignment issues as well.

arsenm · 2025-05-30T14:06:33Z

However, more digging has revealed that even if you build a kernel with no arguments at the C level, when compiled at O0 a number of "hidden" arguments are added.

More precisely they are there to begin with and optimized out if unused

jhuber6

I'm going to make a separate fix for the AMDGPU stuff. free(malloc(0)) is explicitly defined by the standard to work so that is what we should do. If the size is zero a nullptr argument is perfectly valid.

jhuber6 · 2025-05-30T21:30:44Z

#142199 should ideally fix this for AMDGPU, then this should be a small change + test.

RossBrunton · 2025-06-02T09:47:23Z

@jhuber6 Thanks for looking into this. I've rebased on top of your change and, with another small amdgpu modification, it looks like the test is passing.

Specifically, if we don't specify KernelArgs, we now skip the schedReleaseBuffer callback action entirely.

jhuber6 · 2025-06-02T11:36:41Z

offload/plugins-nextgen/amdgpu/src/rtl.cpp

-    if (auto Err = Slots[Curr].schedReleaseBuffer(KernelArgs, MemoryManager))
-      return Err;
+    // Setup the post action to release the kernel args buffer, if it exists
+    if (KernelArgs) {


Do you know why this is necessary? The release buffer action just calls free, free(nullptr) is valid.

The releaseBufferAction has an assert:

assert(Args->Buffer && "Invalid buffer");

I could just remove the assert, but since the action is just a no-op, I figured it'd be better to just skip pushing it to the queue entirely.

Fixed it by removing the assert, hopefully it works now. I'm honestly not too concerned with saving the latency of writing a pointer and then doing a no-op.

Just removed all the AMD-specific fixes and rebased on main and yep, looks like it's passing now.

jhuber6 · 2025-06-02T14:56:57Z

offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp

    RETURN_ON_FATAL_FAILURE(OffloadQueueTest::SetUp());
-    ASSERT_TRUE(TestEnvironment::loadDeviceBinary("foo", Device, DeviceBin));
+    ASSERT_TRUE(TestEnvironment::loadDeviceBinary(kernel, Device, DeviceBin));


This assumes that the kernel name and binary name are the same, guess it's fine for now but probably something we want much more complicated tests for.

I think any test for which that doesn't hold will probably want to inherit directly from OffloadQueueTest and do its own binary handling logic.

llvmbot added the offload label May 29, 2025

jhuber6 reviewed May 29, 2025

View reviewed changes

RossBrunton force-pushed the optionalargs branch from 773a436 to 68b5a76 Compare May 29, 2025 15:10

llvmbot added the backend:AMDGPU label May 30, 2025

RossBrunton changed the title ~~[Offload] Allow setting null arguments in olLaunchKernel~~ [Offload] Allow setting null arguments in olLaunchKernel and fix amdgpu handling for empty kernel arguments May 30, 2025

jhuber6 reviewed May 30, 2025

View reviewed changes

RossBrunton force-pushed the optionalargs branch from e022df8 to 6be750b Compare June 2, 2025 09:43

jhuber6 reviewed Jun 2, 2025

View reviewed changes

RossBrunton force-pushed the optionalargs branch from 6be750b to fc3f6c4 Compare June 2, 2025 14:55

RossBrunton changed the title ~~[Offload] Allow setting null arguments in olLaunchKernel and fix amdgpu handling for empty kernel arguments~~ [Offload] Allow setting null arguments in olLaunchKernel Jun 2, 2025

jhuber6 approved these changes Jun 2, 2025

View reviewed changes

RossBrunton added 2 commits June 6, 2025 13:00

[Offload] Allow setting null arguments in olLaunchKernel

d180dfb

Set opts for noargs.c

70ba0e6

RossBrunton force-pushed the optionalargs branch from fc3f6c4 to 70ba0e6 Compare June 6, 2025 12:04

jhuber6 merged commit 269c29a into llvm:main Jun 6, 2025
5 of 7 checks passed

rorth pushed a commit to rorth/llvm-project that referenced this pull request Jun 11, 2025

[Offload] Allow setting null arguments in olLaunchKernel (llvm#141958)

2379f93

DhruvSrivastavaX pushed a commit to DhruvSrivastavaX/lldb-for-aix that referenced this pull request Jun 12, 2025

[Offload] Allow setting null arguments in olLaunchKernel (llvm#141958)

66f672e

tomtor pushed a commit to tomtor/llvm-project that referenced this pull request Jun 14, 2025

[Offload] Allow setting null arguments in olLaunchKernel (llvm#141958)

e9881d3

		if (ArgumentsSize > 0 && ArgumentsData == NULL) {
		return createOffloadError(

[Offload] Allow setting null arguments in olLaunchKernel #141958

[Offload] Allow setting null arguments in olLaunchKernel #141958

Uh oh!

Conversation

RossBrunton commented May 29, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented May 29, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

github-actions bot commented May 29, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

RossBrunton commented May 30, 2025

Uh oh!

jhuber6 commented May 30, 2025

Uh oh!

arsenm commented May 30, 2025

Uh oh!

jhuber6 left a comment

Choose a reason for hiding this comment

Uh oh!

jhuber6 commented May 30, 2025

Uh oh!

RossBrunton commented Jun 2, 2025

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

RossBrunton commented May 29, 2025 •

edited

Loading

llvmbot commented May 29, 2025 •

edited

Loading

github-actions bot commented May 29, 2025 •

edited

Loading