Skip to content

Commit e022df8

Browse files
committed
Fix for AMD
1 parent 68b5a76 commit e022df8

File tree

2 files changed

+11
-5
lines changed

2 files changed

+11
-5
lines changed

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1261,8 +1261,9 @@ struct AMDGPUStreamTy {
12611261
auto [Curr, InputSignal] = consume(OutputSignal);
12621262

12631263
// Setup the post action to release the kernel args buffer.
1264-
if (auto Err = Slots[Curr].schedReleaseBuffer(KernelArgs, MemoryManager))
1265-
return Err;
1264+
if (KernelArgs)
1265+
if (auto Err = Slots[Curr].schedReleaseBuffer(KernelArgs, MemoryManager))
1266+
return Err;
12661267

12671268
// If we are running an RPC server we want to wake up the server thread
12681269
// whenever there is a kernel running and let it sleep otherwise.
@@ -3375,8 +3376,9 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
33753376
AMDGPUMemoryManagerTy &ArgsMemoryManager = HostDevice.getArgsMemoryManager();
33763377

33773378
void *AllArgs = nullptr;
3378-
if (auto Err = ArgsMemoryManager.allocate(ArgsSize, &AllArgs))
3379-
return Err;
3379+
if (ArgsSize)
3380+
if (auto Err = ArgsMemoryManager.allocate(ArgsSize, &AllArgs))
3381+
return Err;
33803382

33813383
// Account for user requested dynamic shared memory.
33823384
uint32_t GroupSize = getGroupSize();

offload/unittests/OffloadAPI/device_code/CMakeLists.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ macro(add_offload_test_device_code test_filename test_name)
77
add_custom_command(OUTPUT ${BIN_PATH}
88
COMMAND
99
${CMAKE_C_COMPILER} --target=nvptx64-nvidia-cuda
10+
${ARGN}
1011
-march=${LIBOMPTARGET_DEP_CUDA_ARCH}
1112
--cuda-path=${CUDA_ROOT}
1213
${SRC_PATH} -o ${BIN_PATH}
@@ -21,6 +22,7 @@ macro(add_offload_test_device_code test_filename test_name)
2122
add_custom_command(OUTPUT ${BIN_PATH}
2223
COMMAND
2324
${CMAKE_C_COMPILER} --target=amdgcn-amd-amdhsa -nogpulib
25+
${ARGN}
2426
-mcpu=${LIBOMPTARGET_DEP_AMDGPU_ARCH}
2527
${SRC_PATH} -o ${BIN_PATH}
2628
DEPENDS ${SRC_PATH}
@@ -61,7 +63,9 @@ endif()
6163

6264
add_offload_test_device_code(foo.c foo)
6365
add_offload_test_device_code(bar.c bar)
64-
add_offload_test_device_code(noargs.c noargs)
66+
# By default, amdhsa will add a number of "hidden" arguments to the kernel defintion
67+
# O3 disables this, and results in a kernel function with actually no arguments as seen by liboffload
68+
add_offload_test_device_code(noargs.c noargs -O3)
6569

6670
add_custom_target(OffloadUnitTestsDeviceBins DEPENDS ${BIN_PATHS})
6771

0 commit comments

Comments
 (0)