Skip to content

Commit f2bceb2

Browse files
authored
[Offload][AMDGPU] accept generic target (#118919)
Enables generic ISA, e.g., "--offload-arch=gfx11-generic" device code to run on gfx11-generic ISA capable device. Executable may contain one ELF that has specific target ISA and another ELF that has compatible generic ISA. Under that circumstance, this code should say both ELFs are compatible, leaving the rest to PluginManager to handle. Suggestions on how best to address that is welcome.
1 parent 2c05e69 commit f2bceb2

File tree

3 files changed

+34
-27
lines changed

3 files changed

+34
-27
lines changed

offload/DeviceRTL/CMakeLists.txt

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,11 +42,16 @@ set(devicertl_base_directory ${CMAKE_CURRENT_SOURCE_DIR})
4242
set(include_directory ${devicertl_base_directory}/include)
4343
set(source_directory ${devicertl_base_directory}/src)
4444

45-
set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906"
46-
"gfx908;gfx90a;gfx90c;gfx940;gfx941;gfx942;gfx950;gfx1010"
47-
"gfx1012;gfx1030;gfx1031;gfx1032;gfx1033;gfx1034;gfx1035"
48-
"gfx1036;gfx1100;gfx1101;gfx1102;gfx1103;gfx1150"
49-
"gfx1151;gfx1152;gfx1153")
45+
set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803"
46+
"gfx9-generic;gfx900;gfx902;gfx906;gfx908"
47+
"gfx90a;gfx90c"
48+
"gfx9-4-generic;gfx940;gfx941;gfx942;gfx950"
49+
"gfx10-1-generic;gfx1010;gfx1012"
50+
"gfx10-3-generic;gfx1030;gfx1031;gfx1032;gfx1033"
51+
"gfx1034;gfx1035;gfx1036"
52+
"gfx11-generic;gfx1100;gfx1101;gfx1102;gfx1103"
53+
"gfx1150;gfx1151;gfx1152;gfx1153"
54+
"gfx12-generic")
5055
set(all_nvptx_architectures "sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62"
5156
"sm_70;sm_72;sm_75;sm_80;sm_86;sm_87;sm_89;sm_90")
5257
set(all_gpu_architectures

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -190,8 +190,8 @@ Error asyncMemCopy(bool UseMultipleSdmaEngines, void *Dst, hsa_agent_t DstAgent,
190190
#endif
191191
}
192192

193-
Expected<std::string> getTargetTripleAndFeatures(hsa_agent_t Agent) {
194-
std::string Target;
193+
Error getTargetTripleAndFeatures(hsa_agent_t Agent,
194+
SmallVector<SmallString<32>> &Targets) {
195195
auto Err = hsa_utils::iterateAgentISAs(Agent, [&](hsa_isa_t ISA) {
196196
uint32_t Length;
197197
hsa_status_t Status;
@@ -205,13 +205,13 @@ Expected<std::string> getTargetTripleAndFeatures(hsa_agent_t Agent) {
205205
return Status;
206206

207207
llvm::StringRef TripleTarget(ISAName.begin(), Length);
208-
if (TripleTarget.consume_front("amdgcn-amd-amdhsa"))
209-
Target = TripleTarget.ltrim('-').rtrim('\0').str();
210-
return HSA_STATUS_INFO_BREAK;
208+
if (TripleTarget.consume_front("amdgcn-amd-amdhsa")) {
209+
auto Target = TripleTarget.ltrim('-').rtrim('\0');
210+
Targets.push_back(Target);
211+
}
212+
return HSA_STATUS_SUCCESS;
211213
});
212-
if (Err)
213-
return Err;
214-
return Target;
214+
return Err;
215215
}
216216
} // namespace hsa_utils
217217

@@ -1992,12 +1992,10 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
19921992
return Err;
19931993

19941994
// Detect if XNACK is enabled
1995-
auto TargeTripleAndFeaturesOrError =
1996-
hsa_utils::getTargetTripleAndFeatures(Agent);
1997-
if (!TargeTripleAndFeaturesOrError)
1998-
return TargeTripleAndFeaturesOrError.takeError();
1999-
if (static_cast<StringRef>(*TargeTripleAndFeaturesOrError)
2000-
.contains("xnack+"))
1995+
SmallVector<SmallString<32>> Targets;
1996+
if (auto Err = hsa_utils::getTargetTripleAndFeatures(Agent, Targets))
1997+
return Err;
1998+
if (!Targets.empty() && Targets[0].str().contains("xnack+"))
20011999
IsXnackEnabled = true;
20022000

20032001
// detect if device is an APU.
@@ -3211,13 +3209,16 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
32113209
if (!Processor)
32123210
return false;
32133211

3214-
auto TargeTripleAndFeaturesOrError =
3215-
hsa_utils::getTargetTripleAndFeatures(getKernelAgent(DeviceId));
3216-
if (!TargeTripleAndFeaturesOrError)
3217-
return TargeTripleAndFeaturesOrError.takeError();
3218-
return offloading::amdgpu::isImageCompatibleWithEnv(
3219-
Processor ? *Processor : "", ElfOrErr->getPlatformFlags(),
3220-
*TargeTripleAndFeaturesOrError);
3212+
SmallVector<SmallString<32>> Targets;
3213+
if (auto Err = hsa_utils::getTargetTripleAndFeatures(
3214+
getKernelAgent(DeviceId), Targets))
3215+
return Err;
3216+
for (auto &Target : Targets)
3217+
if (offloading::amdgpu::isImageCompatibleWithEnv(
3218+
Processor ? *Processor : "", ElfOrErr->getPlatformFlags(),
3219+
Target.str()))
3220+
return true;
3221+
return false;
32213222
}
32223223

32233224
bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) override {

offload/plugins-nextgen/common/src/Utils/ELF.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,8 @@ checkMachineImpl(const object::ELFObjectFile<ELFT> &ELFObj, uint16_t EMachine) {
6868
Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6)
6969
return createError("Invalid AMD ABI version, must be version 4 or above");
7070
if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 ||
71-
(Header.e_flags & EF_AMDGPU_MACH) > EF_AMDGPU_MACH_AMDGCN_GFX1201)
71+
(Header.e_flags & EF_AMDGPU_MACH) >
72+
EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC)
7273
return createError("Unsupported AMDGPU architecture");
7374
} else if (Header.e_machine == EM_CUDA) {
7475
if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS)

0 commit comments

Comments
 (0)