Skip to content

[Offload][AMDGPU] accept generic target #118919

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions offload/DeviceRTL/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,16 @@ set(devicertl_base_directory ${CMAKE_CURRENT_SOURCE_DIR})
set(include_directory ${devicertl_base_directory}/include)
set(source_directory ${devicertl_base_directory}/src)

set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906"
"gfx908;gfx90a;gfx90c;gfx940;gfx941;gfx942;gfx950;gfx1010"
"gfx1012;gfx1030;gfx1031;gfx1032;gfx1033;gfx1034;gfx1035"
"gfx1036;gfx1100;gfx1101;gfx1102;gfx1103;gfx1150"
"gfx1151;gfx1152;gfx1153")
set(all_amdgpu_architectures "gfx700;gfx701;gfx801;gfx803"
"gfx9-generic;gfx900;gfx902;gfx906;gfx908"
"gfx90a;gfx90c"
"gfx9-4-generic;gfx940;gfx941;gfx942;gfx950"
"gfx10-1-generic;gfx1010;gfx1012"
"gfx10-3-generic;gfx1030;gfx1031;gfx1032;gfx1033"
"gfx1034;gfx1035;gfx1036"
"gfx11-generic;gfx1100;gfx1101;gfx1102;gfx1103"
"gfx1150;gfx1151;gfx1152;gfx1153"
"gfx12-generic")
set(all_nvptx_architectures "sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62"
"sm_70;sm_72;sm_75;sm_80;sm_86;sm_87;sm_89;sm_90")
set(all_gpu_architectures
Expand Down
43 changes: 22 additions & 21 deletions offload/plugins-nextgen/amdgpu/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,8 @@ Error asyncMemCopy(bool UseMultipleSdmaEngines, void *Dst, hsa_agent_t DstAgent,
#endif
}

Expected<std::string> getTargetTripleAndFeatures(hsa_agent_t Agent) {
std::string Target;
Error getTargetTripleAndFeatures(hsa_agent_t Agent,
SmallVector<SmallString<32>> &Targets) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see too much point of using SmallString here because every use of it is to convert it to std::string.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SmallString is std::string with a template argument for the size of the SSO buffer. I would recommend we choose a number we expect to always fit the string so we don't need to malloc it.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

then 32 is sufficient here

auto Err = hsa_utils::iterateAgentISAs(Agent, [&](hsa_isa_t ISA) {
uint32_t Length;
hsa_status_t Status;
Expand All @@ -205,13 +205,13 @@ Expected<std::string> getTargetTripleAndFeatures(hsa_agent_t Agent) {
return Status;

llvm::StringRef TripleTarget(ISAName.begin(), Length);
if (TripleTarget.consume_front("amdgcn-amd-amdhsa"))
Target = TripleTarget.ltrim('-').rtrim('\0').str();
return HSA_STATUS_INFO_BREAK;
if (TripleTarget.consume_front("amdgcn-amd-amdhsa")) {
auto Target = TripleTarget.ltrim('-').rtrim('\0');
Targets.push_back(Target);
}
return HSA_STATUS_SUCCESS;
});
if (Err)
return Err;
return Target;
return Err;
}
} // namespace hsa_utils

Expand Down Expand Up @@ -1988,12 +1988,10 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
return Err;

// Detect if XNACK is enabled
auto TargeTripleAndFeaturesOrError =
hsa_utils::getTargetTripleAndFeatures(Agent);
if (!TargeTripleAndFeaturesOrError)
return TargeTripleAndFeaturesOrError.takeError();
if (static_cast<StringRef>(*TargeTripleAndFeaturesOrError)
.contains("xnack+"))
SmallVector<SmallString<32>> Targets;
if (auto Err = hsa_utils::getTargetTripleAndFeatures(Agent, Targets))
return Err;
if (!Targets.empty() && Targets[0].str().contains("xnack+"))
IsXnackEnabled = true;

// detect if device is an APU.
Expand Down Expand Up @@ -3207,13 +3205,16 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
if (!Processor)
return false;

auto TargeTripleAndFeaturesOrError =
hsa_utils::getTargetTripleAndFeatures(getKernelAgent(DeviceId));
if (!TargeTripleAndFeaturesOrError)
return TargeTripleAndFeaturesOrError.takeError();
return offloading::amdgpu::isImageCompatibleWithEnv(
Processor ? *Processor : "", ElfOrErr->getPlatformFlags(),
*TargeTripleAndFeaturesOrError);
SmallVector<SmallString<32>> Targets;
if (auto Err = hsa_utils::getTargetTripleAndFeatures(
getKernelAgent(DeviceId), Targets))
return Err;
for (auto &Target : Targets)
if (offloading::amdgpu::isImageCompatibleWithEnv(
Processor ? *Processor : "", ElfOrErr->getPlatformFlags(),
Target.str()))
return true;
return false;
}

bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) override {
Expand Down
3 changes: 2 additions & 1 deletion offload/plugins-nextgen/common/src/Utils/ELF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ checkMachineImpl(const object::ELFObjectFile<ELFT> &ELFObj, uint16_t EMachine) {
Header.e_ident[EI_ABIVERSION] != ELFABIVERSION_AMDGPU_HSA_V6)
return createError("Invalid AMD ABI version, must be version 4 or above");
if ((Header.e_flags & EF_AMDGPU_MACH) < EF_AMDGPU_MACH_AMDGCN_GFX700 ||
(Header.e_flags & EF_AMDGPU_MACH) > EF_AMDGPU_MACH_AMDGCN_GFX1201)
(Header.e_flags & EF_AMDGPU_MACH) >
EF_AMDGPU_MACH_AMDGCN_GFX9_4_GENERIC)
return createError("Unsupported AMDGPU architecture");
} else if (Header.e_machine == EM_CUDA) {
if (~Header.e_flags & EF_CUDA_64BIT_ADDRESS)
Expand Down
Loading