Skip to content

Commit f2120cd

Browse files
authored
[Offload][AMDGPU] Impose more restrictions for implicit kernel arguments (#95211)
COV3 is not supported anymore, thus we can just use ArgsSize we read from the kernel to determine how many argument bytes we need and if implicit kernel arguments are used.
1 parent c46a6e6 commit f2120cd

File tree

1 file changed

+15
-12
lines changed
  • offload/plugins-nextgen/amdgpu/src

1 file changed

+15
-12
lines changed

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3267,9 +3267,10 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
32673267
uint32_t NumThreads, uint64_t NumBlocks,
32683268
KernelArgsTy &KernelArgs, void *Args,
32693269
AsyncInfoWrapperTy &AsyncInfoWrapper) const {
3270-
const uint32_t KernelArgsSize = KernelArgs.NumArgs * sizeof(void *);
3270+
const uint32_t LaunchParamsSize = KernelArgs.NumArgs * sizeof(void *);
32713271

3272-
if (ArgsSize < KernelArgsSize)
3272+
if (ArgsSize != LaunchParamsSize &&
3273+
ArgsSize != LaunchParamsSize + getImplicitArgsSize())
32733274
return Plugin::error("Mismatch of kernel arguments size");
32743275

32753276
AMDGPUPluginTy &AMDGPUPlugin =
@@ -3292,20 +3293,21 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
32923293
if (auto Err = GenericDevice.getDeviceStackSize(StackSize))
32933294
return Err;
32943295

3295-
// Initialize implicit arguments.
3296-
utils::AMDGPUImplicitArgsTy *ImplArgs =
3297-
reinterpret_cast<utils::AMDGPUImplicitArgsTy *>(
3298-
advanceVoidPtr(AllArgs, KernelArgsSize));
3296+
utils::AMDGPUImplicitArgsTy *ImplArgs = nullptr;
3297+
if (ArgsSize == LaunchParamsSize + getImplicitArgsSize()) {
3298+
// Initialize implicit arguments.
3299+
ImplArgs = reinterpret_cast<utils::AMDGPUImplicitArgsTy *>(
3300+
advanceVoidPtr(AllArgs, LaunchParamsSize));
32993301

3300-
// Initialize the implicit arguments to zero.
3301-
std::memset(ImplArgs, 0, ImplicitArgsSize);
3302+
// Initialize the implicit arguments to zero.
3303+
std::memset(ImplArgs, 0, getImplicitArgsSize());
3304+
}
33023305

33033306
// Copy the explicit arguments.
33043307
// TODO: We should expose the args memory manager alloc to the common part as
33053308
// alternative to copying them twice.
3306-
if (KernelArgs.NumArgs)
3307-
std::memcpy(AllArgs, *static_cast<void **>(Args),
3308-
sizeof(void *) * KernelArgs.NumArgs);
3309+
if (LaunchParamsSize)
3310+
std::memcpy(AllArgs, *static_cast<void **>(Args), LaunchParamsSize);
33093311

33103312
AMDGPUDeviceTy &AMDGPUDevice = static_cast<AMDGPUDeviceTy &>(GenericDevice);
33113313

@@ -3318,7 +3320,8 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
33183320
Stream->setRPCServer(GenericDevice.getRPCServer());
33193321

33203322
// Only COV5 implicitargs needs to be set. COV4 implicitargs are not used.
3321-
if (getImplicitArgsSize() == sizeof(utils::AMDGPUImplicitArgsTy)) {
3323+
if (ImplArgs &&
3324+
getImplicitArgsSize() == sizeof(utils::AMDGPUImplicitArgsTy)) {
33223325
ImplArgs->BlockCountX = NumBlocks;
33233326
ImplArgs->BlockCountY = 1;
33243327
ImplArgs->BlockCountZ = 1;

0 commit comments

Comments
 (0)