Skip to content

Commit 6df8c66

Browse files
authored
[Offload][AMDGPU] accept generic target (llvm#118919) (llvm#1337)
2 parents 1bb617f + b7b1c4d commit 6df8c66

File tree

5 files changed

+47
-29
lines changed

5 files changed

+47
-29
lines changed

offload/DeviceRTL/CMakeLists.txt

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,12 +42,16 @@ set(devicertl_base_directory ${CMAKE_CURRENT_SOURCE_DIR})
4242
set(include_directory ${devicertl_base_directory}/include)
4343
set(source_directory ${devicertl_base_directory}/src)
4444

45-
set(all_capabilities 35 37 50 52 53 60 61 62 70 72 75 80 86 89 87 90)
46-
set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906"
47-
"gfx908;gfx90a;gfx90c;gfx940;gfx941;gfx942;gfx950;gfx1010"
48-
"gfx1030;gfx1031;gfx1032;gfx1033;gfx1034;gfx1035"
49-
"gfx1036;gfx1100;gfx1101;gfx1102;gfx1103;gfx1150"
50-
"gfx1151;gfx1152;gfx1153;gfx1200;gfx1201")
45+
set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803"
46+
"gfx9-generic;gfx900;gfx902;gfx906;gfx908"
47+
"gfx90a;gfx90c"
48+
"gfx9-4-generic;gfx940;gfx941;gfx942;gfx950"
49+
"gfx10-1-generic;gfx1010;gfx1012"
50+
"gfx10-3-generic;gfx1030;gfx1031;gfx1032;gfx1033"
51+
"gfx1034;gfx1035;gfx1036"
52+
"gfx11-generic;gfx1100;gfx1101;gfx1102;gfx1103"
53+
"gfx1150;gfx1151;gfx1152;gfx1153"
54+
"gfx12-generic;gfx1200;gfx1201")
5155
set(all_nvptx_architectures "sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62"
5256
"sm_70;sm_72;sm_75;sm_80;sm_86;sm_87;sm_89;sm_90")
5357
set(all_gpu_architectures

offload/DeviceRTL/include/Platform.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,18 @@ extern const inline unsigned __oclc_ISA_version = 11503;
125125
extern const inline unsigned __oclc_ISA_version = 12000;
126126
#elif defined(__gfx1201__)
127127
extern const inline unsigned __oclc_ISA_version = 12001;
128+
#elif defined(__gfx9_generic__)
129+
extern const inline unsigned __oclc_ISA_version = 9000;
130+
#elif defined(__gfx9_4_generic__)
131+
extern const inline unsigned __oclc_ISA_version = 9402;
132+
#elif defined(__gfx10_1_generic__)
133+
extern const inline unsigned __oclc_ISA_version = 10100;
134+
#elif defined(__gfx10_3_generic__)
135+
extern const inline unsigned __oclc_ISA_version = 10300;
136+
#elif defined(__gfx11_generic__)
137+
extern const inline unsigned __oclc_ISA_version = 11003;
138+
#elif defined(__gfx12_generic__)
139+
extern const inline unsigned __oclc_ISA_version = 12000;
128140
#else
129141
#error "Unknown AMDGPU architecture"
130142
#endif

offload/hostexec/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ else()
9797
return()
9898
endif()
9999

100-
set(amdgpu_mcpus gfx700 gfx701 gfx801 gfx803 gfx900 gfx902 gfx906 gfx908 gfx90a gfx90c gfx940 gfx941 gfx942 gfx950 gfx1010 gfx1030 gfx1031 gfx1032 gfx1033 gfx1034 gfx1035 gfx1036 gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 gfx1153 gfx1200 gfx1201)
100+
set(amdgpu_mcpus gfx700 gfx701 gfx801 gfx803 gfx900 gfx902 gfx906 gfx908 gfx90a gfx90c gfx940 gfx941 gfx942 gfx950 gfx1010 gfx1030 gfx1031 gfx1032 gfx1033 gfx1034 gfx1035 gfx1036 gfx1100 gfx1101 gfx1102 gfx1103 gfx1150 gfx1151 gfx1152 gfx1153 gfx1200 gfx1201 gfx9-generic gfx9-4-generic gfx10-1-generic gfx10-3-generic gfx11-generic gfx12-generic)
101101
if (DEFINED LIBOMPTARGET_AMDGCN_GFXLIST)
102102
set(amdgpu_mcpus ${LIBOMPTARGET_AMDGCN_GFXLIST})
103103
endif()

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -406,8 +406,8 @@ Error asyncMemCopy(bool UseMultipleSdmaEngines, void *Dst, hsa_agent_t DstAgent,
406406
#endif
407407
}
408408

409-
Expected<std::string> getTargetTripleAndFeatures(hsa_agent_t Agent) {
410-
std::string Target;
409+
Error getTargetTripleAndFeatures(hsa_agent_t Agent,
410+
SmallVector<SmallString<32>> &Targets) {
411411
auto Err = utils::iterateAgentISAs(Agent, [&](hsa_isa_t ISA) {
412412
uint32_t Length;
413413
hsa_status_t Status;
@@ -421,13 +421,13 @@ Expected<std::string> getTargetTripleAndFeatures(hsa_agent_t Agent) {
421421
return Status;
422422

423423
llvm::StringRef TripleTarget(ISAName.begin(), Length);
424-
if (TripleTarget.consume_front("amdgcn-amd-amdhsa"))
425-
Target = TripleTarget.ltrim('-').rtrim('\0').str();
426-
return HSA_STATUS_INFO_BREAK;
424+
if (TripleTarget.consume_front("amdgcn-amd-amdhsa")) {
425+
auto Target = TripleTarget.ltrim('-').rtrim('\0');
426+
Targets.push_back(Target);
427+
}
428+
return HSA_STATUS_SUCCESS;
427429
});
428-
if (Err)
429-
return Err;
430-
return Target;
430+
return Err;
431431
}
432432

433433
} // namespace utils
@@ -2926,12 +2926,10 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
29262926
DP("The number of XGMI Engines: %i\n", NumXGmiEngines);
29272927

29282928
// Detect if XNACK is enabled
2929-
auto TargeTripleAndFeaturesOrError =
2930-
utils::getTargetTripleAndFeatures(Agent);
2931-
if (!TargeTripleAndFeaturesOrError)
2932-
return TargeTripleAndFeaturesOrError.takeError();
2933-
if (static_cast<StringRef>(*TargeTripleAndFeaturesOrError)
2934-
.contains("xnack+"))
2929+
SmallVector<SmallString<32>> Targets;
2930+
if (auto Err = utils::getTargetTripleAndFeatures(Agent, Targets))
2931+
return Err;
2932+
if (!Targets.empty() && Targets[0].str().contains("xnack+"))
29352933
IsXnackEnabled = true;
29362934

29372935
// detect if device is an APU.
@@ -4659,13 +4657,16 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
46594657
if (!Processor)
46604658
return false;
46614659

4662-
auto TargeTripleAndFeaturesOrError =
4663-
utils::getTargetTripleAndFeatures(getKernelAgent(DeviceId));
4664-
if (!TargeTripleAndFeaturesOrError)
4665-
return TargeTripleAndFeaturesOrError.takeError();
4666-
return utils::isImageCompatibleWithEnv(Processor ? *Processor : "",
4667-
ElfOrErr->getPlatformFlags(),
4668-
*TargeTripleAndFeaturesOrError);
4660+
SmallVector<SmallString<32>> Targets;
4661+
if (auto Err = utils::getTargetTripleAndFeatures(
4662+
getKernelAgent(DeviceId), Targets))
4663+
return Err;
4664+
for (auto &Target : Targets)
4665+
if (utils::isImageCompatibleWithEnv(
4666+
Processor ? *Processor : "", ElfOrErr->getPlatformFlags(),
4667+
Target.str()))
4668+
return true;
4669+
return false;
46694670
}
46704671

46714672
bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) override {

offload/plugins-nextgen/common/src/Utils/ELF.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,8 @@ checkMachineImpl(const object::ELFObjectFile<ELFT> &ELFObj, uint16_t EMachine) {
7070
Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6)
7171
return createError("Invalid AMD ABI version, must be version above 4");
7272
if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 ||
73-
(Header.e_flags & EF_AMDGPU_MACH) > EF_AMDGPU_MACH_AMDGCN_GFX1201)
73+
(Header.e_flags & EF_AMDGPU_MACH) >
74+
EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC)
7475
return createError("Unsupported AMDGPU architecture");
7576
} else if (Header.e_machine == EM_CUDA) {
7677
if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS)

0 commit comments

Comments
 (0)