Skip to content

Commit 25bf4e2

Browse files
authored
[Offload] Remove handling for COV4 binaries from offload/ (#131033)
Summary: We moved from cov4 to cov5 a long time ago, and it guards simplifying some front end code, so we should be able to move up with this.
1 parent ff8aa30 commit 25bf4e2

File tree

3 files changed

+15
-27
lines changed

3 files changed

+15
-27
lines changed

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -576,8 +576,7 @@ struct AMDGPUKernelTy : public GenericKernelTy {
576576
/// Get the HSA kernel object representing the kernel function.
577577
uint64_t getKernelObject() const { return KernelObject; }
578578

579-
/// Get the size of implicitargs based on the code object version
580-
/// @return 56 for cov4 and 256 for cov5
579+
/// Get the size of implicitargs based on the code object version.
581580
uint32_t getImplicitArgsSize() const { return ImplicitArgsSize; }
582581

583582
/// Indicates whether or not we need to set up our own private segment size.
@@ -3386,20 +3385,17 @@ Error AMDGPUKernelTy::launchImpl(GenericDeviceTy &GenericDevice,
33863385
if (auto Err = AMDGPUDevice.getStream(AsyncInfoWrapper, Stream))
33873386
return Err;
33883387

3389-
// Only COV5 implicitargs needs to be set. COV4 implicitargs are not used.
3390-
if (ImplArgs &&
3391-
getImplicitArgsSize() == sizeof(hsa_utils::AMDGPUImplicitArgsTy)) {
3392-
ImplArgs->BlockCountX = NumBlocks[0];
3393-
ImplArgs->BlockCountY = NumBlocks[1];
3394-
ImplArgs->BlockCountZ = NumBlocks[2];
3395-
ImplArgs->GroupSizeX = NumThreads[0];
3396-
ImplArgs->GroupSizeY = NumThreads[1];
3397-
ImplArgs->GroupSizeZ = NumThreads[2];
3398-
ImplArgs->GridDims = NumBlocks[2] * NumThreads[2] > 1
3399-
? 3
3400-
: 1 + (NumBlocks[1] * NumThreads[1] != 1);
3401-
ImplArgs->DynamicLdsSize = KernelArgs.DynCGroupMem;
3402-
}
3388+
// Set the COV5+ implicit arguments to the appropriate values.
3389+
ImplArgs->BlockCountX = NumBlocks[0];
3390+
ImplArgs->BlockCountY = NumBlocks[1];
3391+
ImplArgs->BlockCountZ = NumBlocks[2];
3392+
ImplArgs->GroupSizeX = NumThreads[0];
3393+
ImplArgs->GroupSizeY = NumThreads[1];
3394+
ImplArgs->GroupSizeZ = NumThreads[2];
3395+
ImplArgs->GridDims = NumBlocks[2] * NumThreads[2] > 1
3396+
? 3
3397+
: 1 + (NumBlocks[1] * NumThreads[1] != 1);
3398+
ImplArgs->DynamicLdsSize = KernelArgs.DynCGroupMem;
34033399

34043400
// Push the kernel launch into the stream.
34053401
return Stream->pushKernelLaunch(*this, AllArgs, NumThreads, NumBlocks,

offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,17 +40,10 @@ struct AMDGPUImplicitArgsTy {
4040
uint8_t Unused2[132]; // 132 byte offset.
4141
};
4242

43-
// Dummy struct for COV4 implicitargs.
44-
struct AMDGPUImplicitArgsTyCOV4 {
45-
uint8_t Unused[56];
46-
};
47-
4843
/// Returns the size in bytes of the implicit arguments of AMDGPU kernels.
4944
/// `Version` is the ELF ABI version, e.g. COV5.
5045
inline uint32_t getImplicitArgsSize(uint16_t Version) {
51-
return Version < ELF::ELFABIVERSION_AMDGPU_HSA_V5
52-
? sizeof(AMDGPUImplicitArgsTyCOV4)
53-
: sizeof(AMDGPUImplicitArgsTy);
46+
return sizeof(AMDGPUImplicitArgsTy);
5447
}
5548

5649
/// Reads the AMDGPU specific metadata from the ELF file and propagates the

offload/plugins-nextgen/common/src/Utils/ELF.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,9 @@ checkMachineImpl(const object::ELFObjectFile<ELFT> &ELFObj, uint16_t EMachine) {
6565
if (Header.e_machine == EM_AMDGPU) {
6666
if (Header.e_ident[EI_OSABI] != ELFOSABI_AMDGPU_HSA)
6767
return createError("Invalid AMD OS/ABI, must be AMDGPU_HSA");
68-
if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V4 &&
69-
Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5 &&
68+
if (Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V5 &&
7069
Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6)
71-
return createError("Invalid AMD ABI version, must be version 4 or above");
70+
return createError("Invalid AMD ABI version, must be version 5 or above");
7271
if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 ||
7372
(Header.e_flags & EF_AMDGPU_MACH) >
7473
EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC)

0 commit comments

Comments
 (0)