File tree Expand file tree Collapse file tree 2 files changed +11
-5
lines changed
plugins-nextgen/amdgpu/src
unittests/OffloadAPI/device_code Expand file tree Collapse file tree 2 files changed +11
-5
lines changed Original file line number Diff line number Diff line change @@ -1261,8 +1261,9 @@ struct AMDGPUStreamTy {
1261
1261
auto [Curr, InputSignal] = consume (OutputSignal);
1262
1262
1263
1263
// Setup the post action to release the kernel args buffer.
1264
- if (auto Err = Slots[Curr].schedReleaseBuffer (KernelArgs, MemoryManager))
1265
- return Err;
1264
+ if (KernelArgs)
1265
+ if (auto Err = Slots[Curr].schedReleaseBuffer (KernelArgs, MemoryManager))
1266
+ return Err;
1266
1267
1267
1268
// If we are running an RPC server we want to wake up the server thread
1268
1269
// whenever there is a kernel running and let it sleep otherwise.
@@ -3375,8 +3376,9 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
3375
3376
AMDGPUMemoryManagerTy &ArgsMemoryManager = HostDevice.getArgsMemoryManager ();
3376
3377
3377
3378
void *AllArgs = nullptr ;
3378
- if (auto Err = ArgsMemoryManager.allocate (ArgsSize, &AllArgs))
3379
- return Err;
3379
+ if (ArgsSize)
3380
+ if (auto Err = ArgsMemoryManager.allocate (ArgsSize, &AllArgs))
3381
+ return Err;
3380
3382
3381
3383
// Account for user requested dynamic shared memory.
3382
3384
uint32_t GroupSize = getGroupSize ();
Original file line number Diff line number Diff line change @@ -7,6 +7,7 @@ macro(add_offload_test_device_code test_filename test_name)
7
7
add_custom_command (OUTPUT ${BIN_PATH}
8
8
COMMAND
9
9
${CMAKE_C_COMPILER} --target=nvptx64-nvidia-cuda
10
+ ${ARGN}
10
11
-march=${LIBOMPTARGET_DEP_CUDA_ARCH}
11
12
--cuda-path=${CUDA_ROOT}
12
13
${SRC_PATH} -o ${BIN_PATH}
@@ -21,6 +22,7 @@ macro(add_offload_test_device_code test_filename test_name)
21
22
add_custom_command (OUTPUT ${BIN_PATH}
22
23
COMMAND
23
24
${CMAKE_C_COMPILER} --target=amdgcn-amd-amdhsa -nogpulib
25
+ ${ARGN}
24
26
-mcpu=${LIBOMPTARGET_DEP_AMDGPU_ARCH}
25
27
${SRC_PATH} -o ${BIN_PATH}
26
28
DEPENDS ${SRC_PATH}
@@ -61,7 +63,9 @@ endif()
61
63
62
64
add_offload_test_device_code (foo.c foo )
63
65
add_offload_test_device_code (bar.c bar )
64
- add_offload_test_device_code (noargs.c noargs )
66
+ # By default, amdhsa will add a number of "hidden" arguments to the kernel defintion
67
+ # O3 disables this, and results in a kernel function with actually no arguments as seen by liboffload
68
+ add_offload_test_device_code (noargs.c noargs -O3 )
65
69
66
70
add_custom_target (OffloadUnitTestsDeviceBins DEPENDS ${BIN_PATHS} )
67
71
You can’t perform that action at this time.
0 commit comments