Skip to content

Commit 529dbb2

Browse files
committed
[llvm][offload] Move AMDGPU offload utilities to LLVM
This patch moves utilities from `offload/plugins-nextgen/amdgpu/utils/UtilitiesRTL.h` to `llvm/Frontend/Offloading/AMDGPU/ObjectUtilities.h` to be reused by other projects. Concretely the following changes were made: - Rename `KernelMetaDataTy` to `AMDGPUKernelMetaData`. - Remove unused fields `KernelObject`, `KernelSegmentSize`, `ExplicitArgumentCount` and `ImplicitArgumentCount` from `AMDGPUKernelMetaData`. - Return the produced error if `ELFObj.sections()` failed instead of using `cantFail`. - Added `AGPRCount` field to `AMDGPUKernelMetaData`.
1 parent fa87eac commit 529dbb2

File tree

7 files changed

+356
-261
lines changed

7 files changed

+356
-261
lines changed
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
//===---- ObjectUtilities.h - AMDGPU ELF utilities ---------------- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file declares AMDGPU ELF related utilities.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#include <cstdint>
14+
15+
#include "llvm/ADT/StringMap.h"
16+
#include "llvm/ADT/StringRef.h"
17+
#include "llvm/Support/Error.h"
18+
#include "llvm/Support/MemoryBufferRef.h"
19+
20+
namespace llvm {
21+
namespace offloading {
22+
namespace amdgpu {
23+
/// Check if an image is compatible with current system's environment. The
24+
/// system environment is given as a 'target-id' which has the form:
25+
///
26+
/// <target-id> := <processor> ( ":" <target-feature> ( "+" | "-" ) )*
27+
///
28+
/// If a feature is not specific as '+' or '-' it is assumed to be in an 'any'
29+
/// and is compatible with either '+' or '-'. The HSA runtime returns this
30+
/// information using the target-id, while we use the ELF header to determine
31+
/// these features.
32+
bool isImageCompatibleWithEnv(StringRef ImageArch, uint32_t ImageFlags,
33+
StringRef EnvTargetID);
34+
35+
/// Struct for holding metadata related to AMDGPU kernels, for more information
36+
/// about the metadata and its meaning see:
37+
/// https://llvm.org/docs/AMDGPUUsage.html#code-object-v3
38+
struct AMDGPUKernelMetaData {
39+
/// Constant indicating that a value is invalid.
40+
static constexpr uint32_t KInvalidValue =
41+
std::numeric_limits<uint32_t>::max();
42+
/// The amount of group segment memory required by a work-group in bytes.
43+
uint32_t GroupSegmentList = KInvalidValue;
44+
/// The amount of fixed private address space memory required for a work-item
45+
/// in bytes.
46+
uint32_t PrivateSegmentSize = KInvalidValue;
47+
/// Number of scalar registers required by a wavefront.
48+
uint32_t SGPRCount = KInvalidValue;
49+
/// Number of vector registers required by each work-item.
50+
uint32_t VGPRCount = KInvalidValue;
51+
/// Number of stores from a scalar register to a register allocator created
52+
/// spill location.
53+
uint32_t SGPRSpillCount = KInvalidValue;
54+
/// Number of stores from a vector register to a register allocator created
55+
/// spill location.
56+
uint32_t VGPRSpillCount = KInvalidValue;
57+
/// Number of accumulator registers required by each work-item.
58+
uint32_t AGPRCount = KInvalidValue;
59+
/// Corresponds to the OpenCL reqd_work_group_size attribute.
60+
uint32_t RequestedWorkgroupSize[3] = {KInvalidValue, KInvalidValue,
61+
KInvalidValue};
62+
/// Corresponds to the OpenCL work_group_size_hint attribute.
63+
uint32_t WorkgroupSizeHint[3] = {KInvalidValue, KInvalidValue, KInvalidValue};
64+
/// Wavefront size.
65+
uint32_t WavefrontSize = KInvalidValue;
66+
/// Maximum flat work-group size supported by the kernel in work-items.
67+
uint32_t MaxFlatWorkgroupSize = KInvalidValue;
68+
};
69+
70+
/// Reads AMDGPU specific metadata from the ELF file and propagates the
71+
/// KernelInfoMap.
72+
Error getAMDGPUMetaDataFromImage(MemoryBufferRef MemBuffer,
73+
StringMap<AMDGPUKernelMetaData> &KernelInfoMap,
74+
uint16_t &ELFABIVersion);
75+
} // namespace amdgpu
76+
} // namespace offloading
77+
} // namespace llvm
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
add_llvm_component_library(LLVMFrontendOffloadingAMDGPU
2+
ObjectUtilities.cpp
3+
4+
LINK_COMPONENTS
5+
Support
6+
BinaryFormat
7+
Object
8+
)
Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
//===---- ObjectUtilities.cpp - AMDGPU ELF utilities -------------- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file defines AMDGPU ELF related utilities.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#include "llvm/Frontend/Offloading/AMDGPU/ObjectUtilities.h"
14+
15+
#include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
16+
#include "llvm/BinaryFormat/ELF.h"
17+
#include "llvm/BinaryFormat/MsgPackDocument.h"
18+
#include "llvm/Object/ELFObjectFile.h"
19+
#include "llvm/Support/MemoryBufferRef.h"
20+
#include "llvm/Support/YAMLTraits.h"
21+
22+
using namespace llvm;
23+
using namespace llvm::ELF;
24+
using namespace llvm::offloading::amdgpu;
25+
26+
bool llvm::offloading::amdgpu::isImageCompatibleWithEnv(StringRef ImageArch,
27+
uint32_t ImageFlags,
28+
StringRef EnvTargetID) {
29+
StringRef EnvArch = EnvTargetID.split(":").first;
30+
31+
// Trivial check if the base processors match.
32+
if (EnvArch != ImageArch)
33+
return false;
34+
35+
// Check if the image is requesting xnack on or off.
36+
switch (ImageFlags & EF_AMDGPU_FEATURE_XNACK_V4) {
37+
case EF_AMDGPU_FEATURE_XNACK_OFF_V4:
38+
// The image is 'xnack-' so the environment must be 'xnack-'.
39+
if (!EnvTargetID.contains("xnack-"))
40+
return false;
41+
break;
42+
case EF_AMDGPU_FEATURE_XNACK_ON_V4:
43+
// The image is 'xnack+' so the environment must be 'xnack+'.
44+
if (!EnvTargetID.contains("xnack+"))
45+
return false;
46+
break;
47+
case EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4:
48+
case EF_AMDGPU_FEATURE_XNACK_ANY_V4:
49+
default:
50+
break;
51+
}
52+
53+
// Check if the image is requesting sramecc on or off.
54+
switch (ImageFlags & EF_AMDGPU_FEATURE_SRAMECC_V4) {
55+
case EF_AMDGPU_FEATURE_SRAMECC_OFF_V4:
56+
// The image is 'sramecc-' so the environment must be 'sramecc-'.
57+
if (!EnvTargetID.contains("sramecc-"))
58+
return false;
59+
break;
60+
case EF_AMDGPU_FEATURE_SRAMECC_ON_V4:
61+
// The image is 'sramecc+' so the environment must be 'sramecc+'.
62+
if (!EnvTargetID.contains("sramecc+"))
63+
return false;
64+
break;
65+
case EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4:
66+
case EF_AMDGPU_FEATURE_SRAMECC_ANY_V4:
67+
break;
68+
}
69+
70+
return true;
71+
}
72+
73+
namespace {
74+
/// Reads the AMDGPU specific per-kernel-metadata from an image.
75+
class KernelInfoReader {
76+
public:
77+
KernelInfoReader(StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KIM)
78+
: KernelInfoMap(KIM) {}
79+
80+
/// Process ELF note to read AMDGPU metadata from respective information
81+
/// fields.
82+
Error processNote(const llvm::object::ELF64LE::Note &Note, size_t Align) {
83+
if (Note.getName() != "AMDGPU")
84+
return Error::success(); // We are not interested in other things
85+
86+
assert(Note.getType() == ELF::NT_AMDGPU_METADATA &&
87+
"Parse AMDGPU MetaData");
88+
auto Desc = Note.getDesc(Align);
89+
StringRef MsgPackString =
90+
StringRef(reinterpret_cast<const char *>(Desc.data()), Desc.size());
91+
msgpack::Document MsgPackDoc;
92+
if (!MsgPackDoc.readFromBlob(MsgPackString, /*Multi=*/false))
93+
return Error::success();
94+
95+
AMDGPU::HSAMD::V3::MetadataVerifier Verifier(true);
96+
if (!Verifier.verify(MsgPackDoc.getRoot()))
97+
return Error::success();
98+
99+
auto RootMap = MsgPackDoc.getRoot().getMap(true);
100+
101+
if (auto Err = iterateAMDKernels(RootMap))
102+
return Err;
103+
104+
return Error::success();
105+
}
106+
107+
private:
108+
/// Extracts the relevant information via simple string look-up in the msgpack
109+
/// document elements.
110+
Error
111+
extractKernelData(msgpack::MapDocNode::MapTy::value_type V,
112+
std::string &KernelName,
113+
offloading::amdgpu::AMDGPUKernelMetaData &KernelData) {
114+
if (!V.first.isString())
115+
return Error::success();
116+
117+
const auto IsKey = [](const msgpack::DocNode &DK, StringRef SK) {
118+
return DK.getString() == SK;
119+
};
120+
121+
const auto GetSequenceOfThreeInts = [](msgpack::DocNode &DN,
122+
uint32_t *Vals) {
123+
assert(DN.isArray() && "MsgPack DocNode is an array node");
124+
auto DNA = DN.getArray();
125+
assert(DNA.size() == 3 && "ArrayNode has at most three elements");
126+
127+
int I = 0;
128+
for (auto DNABegin = DNA.begin(), DNAEnd = DNA.end(); DNABegin != DNAEnd;
129+
++DNABegin) {
130+
Vals[I++] = DNABegin->getUInt();
131+
}
132+
};
133+
134+
if (IsKey(V.first, ".name")) {
135+
KernelName = V.second.toString();
136+
} else if (IsKey(V.first, ".sgpr_count")) {
137+
KernelData.SGPRCount = V.second.getUInt();
138+
} else if (IsKey(V.first, ".sgpr_spill_count")) {
139+
KernelData.SGPRSpillCount = V.second.getUInt();
140+
} else if (IsKey(V.first, ".vgpr_count")) {
141+
KernelData.VGPRCount = V.second.getUInt();
142+
} else if (IsKey(V.first, ".vgpr_spill_count")) {
143+
KernelData.VGPRSpillCount = V.second.getUInt();
144+
} else if (IsKey(V.first, ".agpr_count")) {
145+
KernelData.AGPRCount = V.second.getUInt();
146+
} else if (IsKey(V.first, ".private_segment_fixed_size")) {
147+
KernelData.PrivateSegmentSize = V.second.getUInt();
148+
} else if (IsKey(V.first, ".group_segment_fixed_size")) {
149+
KernelData.GroupSegmentList = V.second.getUInt();
150+
} else if (IsKey(V.first, ".reqd_workgroup_size")) {
151+
GetSequenceOfThreeInts(V.second, KernelData.RequestedWorkgroupSize);
152+
} else if (IsKey(V.first, ".workgroup_size_hint")) {
153+
GetSequenceOfThreeInts(V.second, KernelData.WorkgroupSizeHint);
154+
} else if (IsKey(V.first, ".wavefront_size")) {
155+
KernelData.WavefrontSize = V.second.getUInt();
156+
} else if (IsKey(V.first, ".max_flat_workgroup_size")) {
157+
KernelData.MaxFlatWorkgroupSize = V.second.getUInt();
158+
}
159+
160+
return Error::success();
161+
}
162+
163+
/// Get the "amdhsa.kernels" element from the msgpack Document
164+
Expected<msgpack::ArrayDocNode> getAMDKernelsArray(msgpack::MapDocNode &MDN) {
165+
auto Res = MDN.find("amdhsa.kernels");
166+
if (Res == MDN.end())
167+
return createStringError(inconvertibleErrorCode(),
168+
"Could not find amdhsa.kernels key");
169+
170+
auto Pair = *Res;
171+
assert(Pair.second.isArray() &&
172+
"AMDGPU kernel entries are arrays of entries");
173+
174+
return Pair.second.getArray();
175+
}
176+
177+
/// Iterate all entries for one "amdhsa.kernels" entry. Each entry is a
178+
/// MapDocNode that either maps a string to a single value (most of them) or
179+
/// to another array of things. Currently, we only handle the case that maps
180+
/// to scalar value.
181+
Error generateKernelInfo(msgpack::ArrayDocNode::ArrayTy::iterator It) {
182+
offloading::amdgpu::AMDGPUKernelMetaData KernelData;
183+
std::string KernelName;
184+
auto Entry = (*It).getMap();
185+
for (auto MI = Entry.begin(), E = Entry.end(); MI != E; ++MI)
186+
if (auto Err = extractKernelData(*MI, KernelName, KernelData))
187+
return Err;
188+
189+
KernelInfoMap.insert({KernelName, KernelData});
190+
return Error::success();
191+
}
192+
193+
/// Go over the list of AMD kernels in the "amdhsa.kernels" entry
194+
Error iterateAMDKernels(msgpack::MapDocNode &MDN) {
195+
auto KernelsOrErr = getAMDKernelsArray(MDN);
196+
if (auto Err = KernelsOrErr.takeError())
197+
return Err;
198+
199+
auto KernelsArr = *KernelsOrErr;
200+
for (auto It = KernelsArr.begin(), E = KernelsArr.end(); It != E; ++It) {
201+
if (!It->isMap())
202+
continue; // we expect <key,value> pairs
203+
204+
// Obtain the value for the different entries. Each array entry is a
205+
// MapDocNode
206+
if (auto Err = generateKernelInfo(It))
207+
return Err;
208+
}
209+
return Error::success();
210+
}
211+
212+
// Kernel names are the keys
213+
StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap;
214+
};
215+
} // namespace
216+
217+
Error llvm::offloading::amdgpu::getAMDGPUMetaDataFromImage(
218+
MemoryBufferRef MemBuffer,
219+
StringMap<offloading::amdgpu::AMDGPUKernelMetaData> &KernelInfoMap,
220+
uint16_t &ELFABIVersion) {
221+
Error Err = Error::success(); // Used later as out-parameter
222+
223+
auto ELFOrError = object::ELF64LEFile::create(MemBuffer.getBuffer());
224+
if (auto Err = ELFOrError.takeError())
225+
return Err;
226+
227+
const object::ELF64LEFile ELFObj = ELFOrError.get();
228+
Expected<ArrayRef<object::ELF64LE::Shdr>> Sections = ELFObj.sections();
229+
if (!Sections)
230+
return Sections.takeError();
231+
KernelInfoReader Reader(KernelInfoMap);
232+
233+
// Read the code object version from ELF image header
234+
auto Header = ELFObj.getHeader();
235+
ELFABIVersion = (uint8_t)(Header.e_ident[ELF::EI_ABIVERSION]);
236+
for (const auto &S : *Sections) {
237+
if (S.sh_type != ELF::SHT_NOTE)
238+
continue;
239+
240+
for (const auto N : ELFObj.notes(S, Err)) {
241+
if (Err)
242+
return Err;
243+
// Fills the KernelInfoTabel entries in the reader
244+
if ((Err = Reader.processNote(N, S.sh_addralign)))
245+
return Err;
246+
}
247+
}
248+
return Error::success();
249+
}

llvm/lib/Frontend/Offloading/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
add_subdirectory(AMDGPU)
2+
13
add_llvm_component_library(LLVMFrontendOffloading
24
Utility.cpp
35
OffloadWrapper.cpp

offload/plugins-nextgen/amdgpu/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,12 @@ target_include_directories(omptarget.rtl.amdgpu PRIVATE
1010

1111
if(hsa-runtime64_FOUND AND NOT "amdgpu" IN_LIST LIBOMPTARGET_DLOPEN_PLUGINS)
1212
message(STATUS "Building AMDGPU plugin linked against libhsa")
13-
target_link_libraries(omptarget.rtl.amdgpu PRIVATE hsa-runtime64::hsa-runtime64)
13+
target_link_libraries(omptarget.rtl.amdgpu PRIVATE hsa-runtime64::hsa-runtime64 LLVMFrontendOffloadingAMDGPU)
1414
else()
1515
message(STATUS "Building AMDGPU plugin for dlopened libhsa")
1616
target_include_directories(omptarget.rtl.amdgpu PRIVATE dynamic_hsa)
1717
target_sources(omptarget.rtl.amdgpu PRIVATE dynamic_hsa/hsa.cpp)
18+
target_link_libraries(omptarget.rtl.amdgpu PRIVATE LLVMFrontendOffloadingAMDGPU)
1819
endif()
1920

2021
# Configure testing for the AMDGPU plugin. We will build tests if we could a

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -485,7 +485,7 @@ struct AMDGPUDeviceImageTy : public DeviceImageTy {
485485
findDeviceSymbol(GenericDeviceTy &Device, StringRef SymbolName) const;
486486

487487
/// Get additional info for kernel, e.g., register spill counts
488-
std::optional<utils::KernelMetaDataTy>
488+
std::optional<offloading::amdgpu::AMDGPUKernelMetaData>
489489
getKernelInfo(StringRef Identifier) const {
490490
auto It = KernelInfoMap.find(Identifier);
491491

@@ -499,7 +499,7 @@ struct AMDGPUDeviceImageTy : public DeviceImageTy {
499499
/// The exectuable loaded on the agent.
500500
hsa_executable_t Executable;
501501
hsa_code_object_t CodeObject;
502-
StringMap<utils::KernelMetaDataTy> KernelInfoMap;
502+
StringMap<offloading::amdgpu::AMDGPUKernelMetaData> KernelInfoMap;
503503
uint16_t ELFABIVersion;
504504
};
505505

@@ -600,7 +600,7 @@ struct AMDGPUKernelTy : public GenericKernelTy {
600600
uint32_t ImplicitArgsSize;
601601

602602
/// Additional Info for the AMD GPU Kernel
603-
std::optional<utils::KernelMetaDataTy> KernelInfo;
603+
std::optional<offloading::amdgpu::AMDGPUKernelMetaData> KernelInfo;
604604
};
605605

606606
/// Class representing an HSA signal. Signals are used to define dependencies
@@ -3188,9 +3188,9 @@ struct AMDGPUPluginTy final : public GenericPluginTy {
31883188
utils::getTargetTripleAndFeatures(getKernelAgent(DeviceId));
31893189
if (!TargeTripleAndFeaturesOrError)
31903190
return TargeTripleAndFeaturesOrError.takeError();
3191-
return utils::isImageCompatibleWithEnv(Processor ? *Processor : "",
3192-
ElfOrErr->getPlatformFlags(),
3193-
*TargeTripleAndFeaturesOrError);
3191+
return offloading::amdgpu::isImageCompatibleWithEnv(
3192+
Processor ? *Processor : "", ElfOrErr->getPlatformFlags(),
3193+
*TargeTripleAndFeaturesOrError);
31943194
}
31953195

31963196
bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) override {

0 commit comments

Comments
 (0)