Skip to content

Commit 43b9c54

Browse files
hidekisaitoCatherineMoore
authored andcommitted
[Offload][AMDGPU] accept generic target (llvm#118919)
Enables generic ISA, e.g., "--offload-arch=gfx11-generic" device code to run on gfx11-generic ISA capable device. Executable may contain one ELF that has specific target ISA and another ELF that has compatible generic ISA. Under that circumstance, this code should say both ELFs are compatible, leaving the rest to PluginManager to handle. Suggestions on how best to address that is welcome.
1 parent db3d077 commit 43b9c54

File tree

5 files changed

+47
-29
lines changed

5 files changed

+47
-29
lines changed

offload/DeviceRTL/CMakeLists.txt

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,16 @@ set(devicertl_base_directory ${CMAKE_CURRENT_SOURCE_DIR})
4242
set(include_directory ${devicertl_base_directory}/include)
4343
set(source_directory ${devicertl_base_directory}/src)
4444

45-
set(all_capabilities 35 37 50 52 53 60 61 62 70 72 75 80 86 89 87 90)
46-
set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906"
47-
"gfx908;gfx90a;gfx90c;gfx940;gfx941;gfx942;gfx950;gfx1010"
48-
"gfx1030;gfx1031;gfx1032;gfx1033;gfx1034;gfx1035"
49-
"gfx1036;gfx1100;gfx1101;gfx1102;gfx1103;gfx1150"
50-
"gfx1151;gfx1152;gfx1153;gfx1200;gfx1201")
45+
set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803"
46+
"gfx9-generic;gfx900;gfx902;gfx906;gfx908"
47+
"gfx90a;gfx90c"
48+
"gfx9-4-generic;gfx940;gfx941;gfx942;gfx950"
49+
"gfx10-1-generic;gfx1010;gfx1012"
50+
"gfx10-3-generic;gfx1030;gfx1031;gfx1032;gfx1033"
51+
"gfx1034;gfx1035;gfx1036"
52+
"gfx11-generic;gfx1100;gfx1101;gfx1102;gfx1103"
53+
"gfx1150;gfx1151;gfx1152;gfx1153"
54+
"gfx12-generic;gfx1200;gfx1201")
5155
set(all_nvptx_architectures "sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62"
5256
"sm_70;sm_72;sm_75;sm_80;sm_86;sm_87;sm_89;sm_90")
5357
set(all_gpu_architectures

offload/DeviceRTL/include/Platform.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,18 @@ extern const inline unsigned __oclc_ISA_version = 11503;
123123
extern const inline unsigned __oclc_ISA_version = 12000;
124124
#elif defined(__gfx1201__)
125125
extern const inline unsigned __oclc_ISA_version = 12001;
126+
#elif defined(__gfx9_generic__)
127+
extern const inline unsigned __oclc_ISA_version = 9000;
128+
#elif defined(__gfx9_4_generic__)
129+
extern const inline unsigned __oclc_ISA_version = 9402;
130+
#elif defined(__gfx10_1_generic__)
131+
extern const inline unsigned __oclc_ISA_version = 10100;
132+
#elif defined(__gfx10_3_generic__)
133+
extern const inline unsigned __oclc_ISA_version = 10300;
134+
#elif defined(__gfx11_generic__)
135+
extern const inline unsigned __oclc_ISA_version = 11003;
136+
#elif defined(__gfx12_generic__)
137+
extern const inline unsigned __oclc_ISA_version = 12000;
126138
#else
127139
#error "Unknown AMDGPU architecture"
128140
#endif

offload/hostexec/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ else()
9797
return()
9898
endif()
9999

100-
set(amdgpu_mcpus gfx700 gfx701 gfx801 gfx803 gfx900 gfx902 gfx906 gfx908 gfx90a gfx90c gfx940 gfx941 gfx942 gfx1010 gfx1030 gfx1031 gfx1032 gfx1033 gfx1034 gfx1035 gfx1036 gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 gfx1153 gfx1200 gfx1201)
100+
set(amdgpu_mcpus gfx700 gfx701 gfx801 gfx803 gfx900 gfx902 gfx906 gfx908 gfx90a gfx90c gfx940 gfx941 gfx942 gfx1010 gfx1030 gfx1031 gfx1032 gfx1033 gfx1034 gfx1035 gfx1036 gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 gfx1153 gfx1200 gfx1201 gfx9-generic gfx9-4-generic gfx10-1-generic gfx10-3-generic gfx11-generic gfx12-generic)
101101
if (DEFINED LIBOMPTARGET_AMDGCN_GFXLIST)
102102
set(amdgpu_mcpus ${LIBOMPTARGET_AMDGCN_GFXLIST})
103103
endif()

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -406,8 +406,8 @@ Error asyncMemCopy(bool UseMultipleSdmaEngines, void *Dst, hsa_agent_t DstAgent,
406406
#endif
407407
}
408408

409-
Expected<std::string> getTargetTripleAndFeatures(hsa_agent_t Agent) {
410-
std::string Target;
409+
Error getTargetTripleAndFeatures(hsa_agent_t Agent,
410+
SmallVector<SmallString<32>> &Targets) {
411411
auto Err = utils::iterateAgentISAs(Agent, [&](hsa_isa_t ISA) {
412412
uint32_t Length;
413413
hsa_status_t Status;
@@ -421,13 +421,13 @@ Expected<std::string> getTargetTripleAndFeatures(hsa_agent_t Agent) {
421421
return Status;
422422

423423
llvm::StringRef TripleTarget(ISAName.begin(), Length);
424-
if (TripleTarget.consume_front("amdgcn-amd-amdhsa"))
425-
Target = TripleTarget.ltrim('-').rtrim('\0').str();
426-
return HSA_STATUS_INFO_BREAK;
424+
if (TripleTarget.consume_front("amdgcn-amd-amdhsa")) {
425+
auto Target = TripleTarget.ltrim('-').rtrim('\0');
426+
Targets.push_back(Target);
427+
}
428+
return HSA_STATUS_SUCCESS;
427429
});
428-
if (Err)
429-
return Err;
430-
return Target;
430+
return Err;
431431
}
432432

433433
} // namespace utils
@@ -2928,12 +2928,10 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
29282928
DP("The number of XGMI Engines: %i\n", NumXGmiEngines);
29292929

29302930
// Detect if XNACK is enabled
2931-
auto TargeTripleAndFeaturesOrError =
2932-
utils::getTargetTripleAndFeatures(Agent);
2933-
if (!TargeTripleAndFeaturesOrError)
2934-
return TargeTripleAndFeaturesOrError.takeError();
2935-
if (static_cast<StringRef>(*TargeTripleAndFeaturesOrError)
2936-
.contains("xnack+"))
2931+
SmallVector<SmallString<32>> Targets;
2932+
if (auto Err = utils::getTargetTripleAndFeatures(Agent, Targets))
2933+
return Err;
2934+
if (!Targets.empty() && Targets[0].str().contains("xnack+"))
29372935
IsXnackEnabled = true;
29382936

29392937
// detect if device is an APU.
@@ -4666,13 +4664,16 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
46664664
if (!Processor)
46674665
return false;
46684666

4669-
auto TargeTripleAndFeaturesOrError =
4670-
utils::getTargetTripleAndFeatures(getKernelAgent(DeviceId));
4671-
if (!TargeTripleAndFeaturesOrError)
4672-
return TargeTripleAndFeaturesOrError.takeError();
4673-
return utils::isImageCompatibleWithEnv(Processor ? *Processor : "",
4674-
ElfOrErr->getPlatformFlags(),
4675-
*TargeTripleAndFeaturesOrError);
4667+
SmallVector<SmallString<32>> Targets;
4668+
if (auto Err = utils::getTargetTripleAndFeatures(
4669+
getKernelAgent(DeviceId), Targets))
4670+
return Err;
4671+
for (auto &Target : Targets)
4672+
if (utils::isImageCompatibleWithEnv(
4673+
Processor ? *Processor : "", ElfOrErr->getPlatformFlags(),
4674+
Target.str()))
4675+
return true;
4676+
return false;
46764677
}
46774678

46784679
bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) override {

offload/plugins-nextgen/common/src/Utils/ELF.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,8 @@ checkMachineImpl(const object::ELFObjectFile<ELFT> &ELFObj, uint16_t EMachine) {
7070
Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6)
7171
return createError("Invalid AMD ABI version, must be version above 4");
7272
if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 ||
73-
(Header.e_flags & EF_AMDGPU_MACH) > EF_AMDGPU_MACH_AMDGCN_GFX1201)
73+
(Header.e_flags & EF_AMDGPU_MACH) >
74+
EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC)
7475
return createError("Unsupported AMDGPU architecture");
7576
} else if (Header.e_machine == EM_CUDA) {
7677
if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS)

0 commit comments

Comments
 (0)