-
Notifications
You must be signed in to change notification settings - Fork 14.3k
[Offload] Allow setting null arguments in olLaunchKernel #141958
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-offload Author: Ross Brunton (RossBrunton) ChangesFull diff: https://github.com/llvm/llvm-project/pull/141958.diff 6 Files Affected:
diff --git a/offload/liboffload/API/Kernel.td b/offload/liboffload/API/Kernel.td
index 247f9c1bf5b6a..45e3d8112791c 100644
--- a/offload/liboffload/API/Kernel.td
+++ b/offload/liboffload/API/Kernel.td
@@ -43,19 +43,21 @@ def : Function {
let name = "olLaunchKernel";
let desc = "Enqueue a kernel launch with the specified size and parameters.";
let details = [
- "If a queue is not specified, kernel execution happens synchronously"
+ "If a queue is not specified, kernel execution happens synchronously",
+ "ArgumentsData may be set to NULL (to indicate no parameters)"
];
let params = [
Param<"ol_queue_handle_t", "Queue", "handle of the queue", PARAM_IN_OPTIONAL>,
Param<"ol_device_handle_t", "Device", "handle of the device to execute on", PARAM_IN>,
Param<"ol_kernel_handle_t", "Kernel", "handle of the kernel", PARAM_IN>,
- Param<"const void*", "ArgumentsData", "pointer to the kernel argument struct", PARAM_IN>,
+ Param<"const void*", "ArgumentsData", "pointer to the kernel argument struct", PARAM_IN_OPTIONAL>,
Param<"size_t", "ArgumentsSize", "size of the kernel argument struct", PARAM_IN>,
Param<"const ol_kernel_launch_size_args_t*", "LaunchSizeArgs", "pointer to the struct containing launch size parameters", PARAM_IN>,
Param<"ol_event_handle_t*", "EventOut", "optional recorded event for the enqueued operation", PARAM_OUT_OPTIONAL>
];
let returns = [
Return<"OL_ERRC_INVALID_ARGUMENT", ["`Queue == NULL && EventOut != NULL`"]>,
+ Return<"OL_ERRC_INVALID_ARGUMENT", ["`ArgumentsSize > 0 && ArgumentsData == NULL`"]>,
Return<"OL_ERRC_INVALID_DEVICE", ["If Queue is non-null but does not belong to Device"]>,
];
}
diff --git a/offload/liboffload/include/generated/OffloadAPI.h b/offload/liboffload/include/generated/OffloadAPI.h
index a1d7540519e32..d918a51e8ffd7 100644
--- a/offload/liboffload/include/generated/OffloadAPI.h
+++ b/offload/liboffload/include/generated/OffloadAPI.h
@@ -692,6 +692,7 @@ typedef struct ol_kernel_launch_size_args_t {
///
/// @details
/// - If a queue is not specified, kernel execution happens synchronously
+/// - ArgumentsData may be set to NULL (to indicate no parameters)
///
/// @returns
/// - ::OL_RESULT_SUCCESS
@@ -699,13 +700,14 @@ typedef struct ol_kernel_launch_size_args_t {
/// - ::OL_ERRC_DEVICE_LOST
/// - ::OL_ERRC_INVALID_ARGUMENT
/// + `Queue == NULL && EventOut != NULL`
+/// - ::OL_ERRC_INVALID_ARGUMENT
+/// + `ArgumentsSize > 0 && ArgumentsData == NULL`
/// - ::OL_ERRC_INVALID_DEVICE
/// + If Queue is non-null but does not belong to Device
/// - ::OL_ERRC_INVALID_NULL_HANDLE
/// + `NULL == Device`
/// + `NULL == Kernel`
/// - ::OL_ERRC_INVALID_NULL_POINTER
-/// + `NULL == ArgumentsData`
/// + `NULL == LaunchSizeArgs`
OL_APIEXPORT ol_result_t OL_APICALL olLaunchKernel(
// [in][optional] handle of the queue
@@ -714,7 +716,7 @@ OL_APIEXPORT ol_result_t OL_APICALL olLaunchKernel(
ol_device_handle_t Device,
// [in] handle of the kernel
ol_kernel_handle_t Kernel,
- // [in] pointer to the kernel argument struct
+ // [in][optional] pointer to the kernel argument struct
const void *ArgumentsData,
// [in] size of the kernel argument struct
size_t ArgumentsSize,
diff --git a/offload/liboffload/include/generated/OffloadEntryPoints.inc b/offload/liboffload/include/generated/OffloadEntryPoints.inc
index 9feebeea09ec3..d629e14a6bb55 100644
--- a/offload/liboffload/include/generated/OffloadEntryPoints.inc
+++ b/offload/liboffload/include/generated/OffloadEntryPoints.inc
@@ -838,6 +838,12 @@ olLaunchKernel_val(ol_queue_handle_t Queue, ol_device_handle_t Device,
"validation failure: Queue == NULL && EventOut != NULL");
}
+ if (ArgumentsSize > 0 && ArgumentsData == NULL) {
+ return createOffloadError(
+ error::ErrorCode::INVALID_ARGUMENT,
+ "validation failure: ArgumentsSize > 0 && ArgumentsData == NULL");
+ }
+
if (NULL == Device) {
return createOffloadError(error::ErrorCode::INVALID_NULL_HANDLE,
"validation failure: NULL == Device");
@@ -848,11 +854,6 @@ olLaunchKernel_val(ol_queue_handle_t Queue, ol_device_handle_t Device,
"validation failure: NULL == Kernel");
}
- if (NULL == ArgumentsData) {
- return createOffloadError(error::ErrorCode::INVALID_NULL_POINTER,
- "validation failure: NULL == ArgumentsData");
- }
-
if (NULL == LaunchSizeArgs) {
return createOffloadError(error::ErrorCode::INVALID_NULL_POINTER,
"validation failure: NULL == LaunchSizeArgs");
diff --git a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
index 5814943e4aaa9..91a9ca24786a3 100644
--- a/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
+++ b/offload/unittests/OffloadAPI/device_code/CMakeLists.txt
@@ -61,6 +61,7 @@ endif()
add_offload_test_device_code(foo.c foo)
add_offload_test_device_code(bar.c bar)
+add_offload_test_device_code(noargs.c noargs)
add_custom_target(OffloadUnitTestsDeviceBins DEPENDS ${BIN_PATHS})
diff --git a/offload/unittests/OffloadAPI/device_code/noargs.c b/offload/unittests/OffloadAPI/device_code/noargs.c
new file mode 100644
index 0000000000000..a46c9469b13cd
--- /dev/null
+++ b/offload/unittests/OffloadAPI/device_code/noargs.c
@@ -0,0 +1,5 @@
+#include <gpuintrin.h>
+
+__gpu_kernel void noargs() {
+ (void)0;
+}
diff --git a/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp b/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
index 20462e22fd73f..3b6a25139c6cf 100644
--- a/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
+++ b/offload/unittests/OffloadAPI/kernel/olLaunchKernel.cpp
@@ -10,14 +10,14 @@
#include <OffloadAPI.h>
#include <gtest/gtest.h>
-struct olLaunchKernelTest : OffloadQueueTest {
- void SetUp() override {
+struct LaunchKernelTestBase : OffloadQueueTest {
+ void SetUpKernel(const char *kernel) {
RETURN_ON_FATAL_FAILURE(OffloadQueueTest::SetUp());
- ASSERT_TRUE(TestEnvironment::loadDeviceBinary("foo", Device, DeviceBin));
+ ASSERT_TRUE(TestEnvironment::loadDeviceBinary(kernel, Device, DeviceBin));
ASSERT_GE(DeviceBin->getBufferSize(), 0lu);
ASSERT_SUCCESS(olCreateProgram(Device, DeviceBin->getBufferStart(),
DeviceBin->getBufferSize(), &Program));
- ASSERT_SUCCESS(olGetKernel(Program, "foo", &Kernel));
+ ASSERT_SUCCESS(olGetKernel(Program, kernel, &Kernel));
LaunchArgs.Dimensions = 1;
LaunchArgs.GroupSizeX = 64;
LaunchArgs.GroupSizeY = 1;
@@ -43,8 +43,20 @@ struct olLaunchKernelTest : OffloadQueueTest {
ol_kernel_launch_size_args_t LaunchArgs{};
};
+struct olLaunchKernelTest : LaunchKernelTestBase {
+ void SetUp() override {
+ RETURN_ON_FATAL_FAILURE(LaunchKernelTestBase::SetUpKernel("foo"));
+ }
+};
OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olLaunchKernelTest);
+struct olLaunchKernelNoArgsTest : LaunchKernelTestBase {
+ void SetUp() override {
+ RETURN_ON_FATAL_FAILURE(LaunchKernelTestBase::SetUpKernel("noargs"));
+ }
+};
+OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olLaunchKernelNoArgsTest);
+
TEST_P(olLaunchKernelTest, Success) {
void *Mem;
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, 64, &Mem));
@@ -65,6 +77,13 @@ TEST_P(olLaunchKernelTest, Success) {
ASSERT_SUCCESS(olMemFree(Mem));
}
+TEST_P(olLaunchKernelNoArgsTest, Success) {
+ ASSERT_SUCCESS(olLaunchKernel(Queue, Device, Kernel, nullptr, 0,
+ &LaunchArgs, nullptr));
+
+ ASSERT_SUCCESS(olWaitQueue(Queue));
+}
+
TEST_P(olLaunchKernelTest, SuccessSynchronous) {
void *Mem;
ASSERT_SUCCESS(olMemAlloc(Device, OL_ALLOC_TYPE_MANAGED, 64, &Mem));
|
if (ArgumentsSize > 0 && ArgumentsData == NULL) { | ||
return createOffloadError( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd imagine the size of zero is the important bit, and then the pointer value just becomes a no-op since it's not touched.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What do you mean? This check is autogenerated from the spec.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think I means the && ArgumentsData == NULL is redundant
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think it is? It should be an error condition if a size is specified and ArgumentsData hasn't been.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there anything either of you want changing about the error condition here?
✅ With the latest revision this PR passed the C/C++ code formatter. |
I've pushed up an additional change to this PR; the thing I was debugging originally was that the amdgpu driver would throw an error if a function was being called with no arguments at all. However, more digging has revealed that even if you build a kernel with no arguments at the C level, when compiled at O0 a number of "hidden" arguments are added. The new change compiles the noargs.cpp test with O3 and fixes amdgpu to pass. Not sure if the fixes to amdgpu are worth moving into their own MR or not though. Also, I checked: |
Oh yeah, I was going to fix that. There's alignment issues as well. |
More precisely they are there to begin with and optimized out if unused |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm going to make a separate fix for the AMDGPU stuff. free(malloc(0))
is explicitly defined by the standard to work so that is what we should do. If the size is zero a nullptr argument is perfectly valid.
#142199 should ideally fix this for AMDGPU, then this should be a small change + test. |
@jhuber6 Thanks for looking into this. I've rebased on top of your change and, with another small amdgpu modification, it looks like the test is passing. Specifically, if we don't specify |
if (auto Err = Slots[Curr].schedReleaseBuffer(KernelArgs, MemoryManager)) | ||
return Err; | ||
// Setup the post action to release the kernel args buffer, if it exists | ||
if (KernelArgs) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you know why this is necessary? The release buffer action just calls free, free(nullptr)
is valid.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The releaseBufferAction has an assert:
assert(Args->Buffer && "Invalid buffer");
I could just remove the assert, but since the action is just a no-op, I figured it'd be better to just skip pushing it to the queue entirely.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed it by removing the assert, hopefully it works now. I'm honestly not too concerned with saving the latency of writing a pointer and then doing a no-op.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just removed all the AMD-specific fixes and rebased on main and yep, looks like it's passing now.
RETURN_ON_FATAL_FAILURE(OffloadQueueTest::SetUp()); | ||
ASSERT_TRUE(TestEnvironment::loadDeviceBinary("foo", Device, DeviceBin)); | ||
ASSERT_TRUE(TestEnvironment::loadDeviceBinary(kernel, Device, DeviceBin)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This assumes that the kernel name and binary name are the same, guess it's fine for now but probably something we want much more complicated tests for.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think any test for which that doesn't hold will probably want to inherit directly from OffloadQueueTest and do its own binary handling logic.
No description provided.