Skip to content

[Offload][AMDGPU] Impose more restrictions for implicit kernel arguments #95211

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 12, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 15 additions & 12 deletions offload/plugins-nextgen/amdgpu/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3267,9 +3267,10 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
uint32_t NumThreads, uint64_t NumBlocks,
KernelArgsTy &KernelArgs, void *Args,
AsyncInfoWrapperTy &AsyncInfoWrapper) const {
const uint32_t KernelArgsSize = KernelArgs.NumArgs * sizeof(void *);
const uint32_t LaunchParamsSize = KernelArgs.NumArgs * sizeof(void *);

if (ArgsSize < KernelArgsSize)
if (ArgsSize != LaunchParamsSize &&
ArgsSize != LaunchParamsSize + getImplicitArgsSize())
return Plugin::error("Mismatch of kernel arguments size");

AMDGPUPluginTy &AMDGPUPlugin =
Expand All @@ -3292,20 +3293,21 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
if (auto Err = GenericDevice.getDeviceStackSize(StackSize))
return Err;

// Initialize implicit arguments.
utils::AMDGPUImplicitArgsTy *ImplArgs =
reinterpret_cast<utils::AMDGPUImplicitArgsTy *>(
advanceVoidPtr(AllArgs, KernelArgsSize));
utils::AMDGPUImplicitArgsTy *ImplArgs = nullptr;
if (ArgsSize == LaunchParamsSize + getImplicitArgsSize()) {
// Initialize implicit arguments.
ImplArgs = reinterpret_cast<utils::AMDGPUImplicitArgsTy *>(
advanceVoidPtr(AllArgs, LaunchParamsSize));

// Initialize the implicit arguments to zero.
std::memset(ImplArgs, 0, ImplicitArgsSize);
// Initialize the implicit arguments to zero.
std::memset(ImplArgs, 0, getImplicitArgsSize());
}

// Copy the explicit arguments.
// TODO: We should expose the args memory manager alloc to the common part as
// alternative to copying them twice.
if (KernelArgs.NumArgs)
std::memcpy(AllArgs, *static_cast<void **>(Args),
sizeof(void *) * KernelArgs.NumArgs);
if (LaunchParamsSize)
std::memcpy(AllArgs, *static_cast<void **>(Args), LaunchParamsSize);

AMDGPUDeviceTy &AMDGPUDevice = static_cast<AMDGPUDeviceTy &>(GenericDevice);

Expand All @@ -3318,7 +3320,8 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
Stream->setRPCServer(GenericDevice.getRPCServer());

// Only COV5 implicitargs needs to be set. COV4 implicitargs are not used.
if (getImplicitArgsSize() == sizeof(utils::AMDGPUImplicitArgsTy)) {
if (ImplArgs &&
getImplicitArgsSize() == sizeof(utils::AMDGPUImplicitArgsTy)) {
ImplArgs->BlockCountX = NumBlocks;
ImplArgs->BlockCountY = 1;
ImplArgs->BlockCountZ = 1;
Expand Down
Loading