Skip to content

Commit e6a3579

Browse files
authored
[Offload] Replace device info queue with a tree (llvm#144050)
Previously, device info was returned as a queue with each element having a "Level" field indicating its nesting level. This replaces this queue with a more traditional tree-like structure. This should not result in a change to the output of `llvm-offload-device-info`.
1 parent 0a0960d commit e6a3579

File tree

6 files changed

+125
-106
lines changed

6 files changed

+125
-106
lines changed

offload/liboffload/src/OffloadImpl.cpp

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -229,26 +229,19 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,
229229

230230
// Find the info if it exists under any of the given names
231231
auto GetInfo = [&](std::vector<std::string> Names) {
232-
InfoQueueTy DevInfo;
233232
if (Device == HostDevice())
234233
return std::string("Host");
235234

236235
if (!Device->Device)
237236
return std::string("");
238237

239-
if (auto Err = Device->Device->obtainInfoImpl(DevInfo))
238+
auto Info = Device->Device->obtainInfoImpl();
239+
if (auto Err = Info.takeError())
240240
return std::string("");
241241

242242
for (auto Name : Names) {
243-
auto InfoKeyMatches = [&](const InfoQueueTy::InfoQueueEntryTy &Info) {
244-
return Info.Key == Name;
245-
};
246-
auto Item = std::find_if(DevInfo.getQueue().begin(),
247-
DevInfo.getQueue().end(), InfoKeyMatches);
248-
249-
if (Item != std::end(DevInfo.getQueue())) {
250-
return Item->Value;
251-
}
243+
if (auto Entry = Info->get(Name))
244+
return (*Entry)->Value;
252245
}
253246

254247
return std::string("");

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2551,7 +2551,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
25512551
}
25522552

25532553
/// Print information about the device.
2554-
Error obtainInfoImpl(InfoQueueTy &Info) override {
2554+
Expected<InfoTreeNode> obtainInfoImpl() override {
25552555
char TmpChar[1000];
25562556
const char *TmpCharPtr = "Unknown";
25572557
uint16_t Major, Minor;
@@ -2562,6 +2562,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
25622562
uint16_t WorkgrpMaxDim[3];
25632563
hsa_dim3_t GridMaxDim;
25642564
hsa_status_t Status, Status2;
2565+
InfoTreeNode Info;
25652566

25662567
Status = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MAJOR, &Major);
25672568
Status2 = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MINOR, &Minor);
@@ -2617,11 +2618,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
26172618
// runtime.
26182619
Status = getDeviceAttrRaw(HSA_AGENT_INFO_CACHE_SIZE, CacheSize);
26192620
if (Status == HSA_STATUS_SUCCESS) {
2620-
Info.add("Cache");
2621+
auto &Cache = *Info.add("Cache");
26212622

26222623
for (int I = 0; I < 4; I++)
26232624
if (CacheSize[I])
2624-
Info.add<InfoLevel2>("L" + std::to_string(I), CacheSize[I]);
2625+
Cache.add("L" + std::to_string(I), CacheSize[I]);
26252626
}
26262627

26272628
Status = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_CACHELINE_SIZE, TmpUInt);
@@ -2654,10 +2655,10 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
26542655

26552656
Status = getDeviceAttrRaw(HSA_AGENT_INFO_WORKGROUP_MAX_DIM, WorkgrpMaxDim);
26562657
if (Status == HSA_STATUS_SUCCESS) {
2657-
Info.add("Workgroup Max Size per Dimension");
2658-
Info.add<InfoLevel2>("x", WorkgrpMaxDim[0]);
2659-
Info.add<InfoLevel2>("y", WorkgrpMaxDim[1]);
2660-
Info.add<InfoLevel2>("z", WorkgrpMaxDim[2]);
2658+
auto &MaxSize = *Info.add("Workgroup Max Size per Dimension");
2659+
MaxSize.add("x", WorkgrpMaxDim[0]);
2660+
MaxSize.add("y", WorkgrpMaxDim[1]);
2661+
MaxSize.add("z", WorkgrpMaxDim[2]);
26612662
}
26622663

26632664
Status = getDeviceAttrRaw(
@@ -2673,17 +2674,17 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
26732674

26742675
Status = getDeviceAttrRaw(HSA_AGENT_INFO_GRID_MAX_DIM, GridMaxDim);
26752676
if (Status == HSA_STATUS_SUCCESS) {
2676-
Info.add("Grid Max Size per Dimension");
2677-
Info.add<InfoLevel2>("x", GridMaxDim.x);
2678-
Info.add<InfoLevel2>("y", GridMaxDim.y);
2679-
Info.add<InfoLevel2>("z", GridMaxDim.z);
2677+
auto &MaxDim = *Info.add("Grid Max Size per Dimension");
2678+
MaxDim.add("x", GridMaxDim.x);
2679+
MaxDim.add("y", GridMaxDim.y);
2680+
MaxDim.add("z", GridMaxDim.z);
26802681
}
26812682

26822683
Status = getDeviceAttrRaw(HSA_AGENT_INFO_FBARRIER_MAX_SIZE, TmpUInt);
26832684
if (Status == HSA_STATUS_SUCCESS)
26842685
Info.add("Max fbarriers/Workgrp", TmpUInt);
26852686

2686-
Info.add("Memory Pools");
2687+
auto &RootPool = *Info.add("Memory Pools");
26872688
for (AMDGPUMemoryPoolTy *Pool : AllMemoryPools) {
26882689
std::string TmpStr, TmpStr2;
26892690

@@ -2698,7 +2699,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
26982699
else
26992700
TmpStr = "Unknown";
27002701

2701-
Info.add<InfoLevel2>(std::string("Pool ") + TmpStr);
2702+
auto &PoolNode = *RootPool.add(std::string("Pool ") + TmpStr);
27022703

27032704
if (Pool->isGlobal()) {
27042705
if (Pool->isFineGrained())
@@ -2708,39 +2709,39 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
27082709
if (Pool->supportsKernelArgs())
27092710
TmpStr2 += "Kernarg ";
27102711

2711-
Info.add<InfoLevel3>("Flags", TmpStr2);
2712+
PoolNode.add("Flags", TmpStr2);
27122713
}
27132714

27142715
Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_SIZE, TmpSt);
27152716
if (Status == HSA_STATUS_SUCCESS)
2716-
Info.add<InfoLevel3>("Size", TmpSt, "bytes");
2717+
PoolNode.add("Size", TmpSt, "bytes");
27172718

27182719
Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED,
27192720
TmpBool);
27202721
if (Status == HSA_STATUS_SUCCESS)
2721-
Info.add<InfoLevel3>("Allocatable", TmpBool);
2722+
PoolNode.add("Allocatable", TmpBool);
27222723

27232724
Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE,
27242725
TmpSt);
27252726
if (Status == HSA_STATUS_SUCCESS)
2726-
Info.add<InfoLevel3>("Runtime Alloc Granule", TmpSt, "bytes");
2727+
PoolNode.add("Runtime Alloc Granule", TmpSt, "bytes");
27272728

27282729
Status = Pool->getAttrRaw(
27292730
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT, TmpSt);
27302731
if (Status == HSA_STATUS_SUCCESS)
2731-
Info.add<InfoLevel3>("Runtime Alloc Alignment", TmpSt, "bytes");
2732+
PoolNode.add("Runtime Alloc Alignment", TmpSt, "bytes");
27322733

27332734
Status =
27342735
Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, TmpBool);
27352736
if (Status == HSA_STATUS_SUCCESS)
2736-
Info.add<InfoLevel3>("Accessible by all", TmpBool);
2737+
PoolNode.add("Accessible by all", TmpBool);
27372738
}
27382739

2739-
Info.add("ISAs");
2740+
auto &ISAs = *Info.add("ISAs");
27402741
auto Err = hsa_utils::iterateAgentISAs(getAgent(), [&](hsa_isa_t ISA) {
27412742
Status = hsa_isa_get_info_alt(ISA, HSA_ISA_INFO_NAME, TmpChar);
27422743
if (Status == HSA_STATUS_SUCCESS)
2743-
Info.add<InfoLevel2>("Name", TmpChar);
2744+
ISAs.add("Name", TmpChar);
27442745

27452746
return Status;
27462747
});
@@ -2749,7 +2750,7 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
27492750
if (Err)
27502751
consumeError(std::move(Err));
27512752

2752-
return Plugin::success();
2753+
return Info;
27532754
}
27542755

27552756
/// Returns true if auto zero-copy the best configuration for the current

offload/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 81 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -112,77 +112,100 @@ struct AsyncInfoWrapperTy {
112112
__tgt_async_info *AsyncInfoPtr;
113113
};
114114

115-
/// The information level represents the level of a key-value property in the
116-
/// info tree print (i.e. indentation). The first level should be the default.
117-
enum InfoLevelKind { InfoLevel1 = 1, InfoLevel2, InfoLevel3 };
118-
119-
/// Class for storing device information and later be printed. An object of this
120-
/// type acts as a queue of key-value properties. Each property has a key, a
121-
/// a value, and an optional unit for the value. For printing purposes, the
122-
/// information can be classified into several levels. These levels are useful
123-
/// for defining sections and subsections. Thus, each key-value property also
124-
/// has an additional field indicating to which level belongs to. Notice that
125-
/// we use the level to determine the indentation of the key-value property at
126-
/// printing time. See the enum InfoLevelKind for the list of accepted levels.
127-
class InfoQueueTy {
128-
public:
129-
struct InfoQueueEntryTy {
130-
std::string Key;
131-
std::string Value;
132-
std::string Units;
133-
uint64_t Level;
134-
};
135-
136-
private:
137-
std::deque<InfoQueueEntryTy> Queue;
138-
139-
public:
140-
/// Add a new info entry to the queue. The entry requires at least a key
141-
/// string in \p Key. The value in \p Value is optional and can be any type
142-
/// that is representable as a string. The units in \p Units is optional and
143-
/// must be a string. The info level is a template parameter that defaults to
144-
/// the first level (top level).
145-
template <InfoLevelKind L = InfoLevel1, typename T = std::string>
146-
void add(const std::string &Key, T Value = T(),
147-
const std::string &Units = std::string()) {
115+
/// Tree node for device information
116+
///
117+
/// This information is either printed or used by liboffload to extract certain
118+
/// device queries. Each property has an optional key, an optional value
119+
/// and optional children. The children can be used to store additional
120+
/// information (such as x, y and z components of ranges).
121+
struct InfoTreeNode {
122+
static constexpr uint64_t IndentSize = 4;
123+
124+
std::string Key;
125+
std::string Value;
126+
std::string Units;
127+
// Need to specify a default value number of elements here as `InfoTreeNode`'s
128+
// size is unknown. This is a vector (rather than a Key->Value map) since:
129+
// * The keys need to be owned and thus `std::string`s
130+
// * The order of keys is important
131+
// * The same key can appear multiple times
132+
std::unique_ptr<llvm::SmallVector<InfoTreeNode, 8>> Children;
133+
134+
InfoTreeNode() : InfoTreeNode("", "", "") {}
135+
InfoTreeNode(std::string Key, std::string Value, std::string Units)
136+
: Key(Key), Value(Value), Units(Units) {}
137+
138+
/// Add a new info entry as a child of this node. The entry requires at least
139+
/// a key string in \p Key. The value in \p Value is optional and can be any
140+
/// type that is representable as a string. The units in \p Units is optional
141+
/// and must be a string.
142+
template <typename T = std::string>
143+
InfoTreeNode *add(std::string Key, T Value = T(),
144+
const std::string &Units = std::string()) {
148145
assert(!Key.empty() && "Invalid info key");
149146

150-
// Convert the value to a string depending on its type.
147+
if (!Children)
148+
Children = std::make_unique<llvm::SmallVector<InfoTreeNode, 8>>();
149+
150+
std::string ValueStr;
151151
if constexpr (std::is_same_v<T, bool>)
152-
Queue.push_back({Key, Value ? "Yes" : "No", Units, L});
152+
ValueStr = Value ? "Yes" : "No";
153153
else if constexpr (std::is_arithmetic_v<T>)
154-
Queue.push_back({Key, std::to_string(Value), Units, L});
154+
ValueStr = std::to_string(Value);
155155
else
156-
Queue.push_back({Key, Value, Units, L});
156+
ValueStr = Value;
157+
158+
return &Children->emplace_back(Key, ValueStr, Units);
157159
}
158160

159-
const std::deque<InfoQueueEntryTy> &getQueue() const { return Queue; }
161+
std::optional<InfoTreeNode *> get(StringRef Key) {
162+
if (!Children)
163+
return std::nullopt;
160164

161-
/// Print all info entries added to the queue.
162-
void print() const {
163-
// We print four spances for each level.
164-
constexpr uint64_t IndentSize = 4;
165+
auto It = std::find_if(Children->begin(), Children->end(),
166+
[&](auto &V) { return V.Key == Key; });
167+
if (It == Children->end())
168+
return std::nullopt;
169+
return It;
170+
}
165171

166-
// Find the maximum key length (level + key) to compute the individual
167-
// indentation of each entry.
168-
uint64_t MaxKeySize = 0;
169-
for (const auto &Entry : Queue) {
170-
uint64_t KeySize = Entry.Key.size() + Entry.Level * IndentSize;
171-
if (KeySize > MaxKeySize)
172-
MaxKeySize = KeySize;
173-
}
172+
/// Print all info entries in the tree
173+
void print() const {
174+
// Fake an additional indent so that values are offset from the keys
175+
doPrint(0, maxKeySize(1));
176+
}
174177

175-
// Print all info entries.
176-
for (const auto &Entry : Queue) {
178+
private:
179+
void doPrint(int Level, uint64_t MaxKeySize) const {
180+
if (Key.size()) {
177181
// Compute the indentations for the current entry.
178-
uint64_t KeyIndentSize = Entry.Level * IndentSize;
182+
uint64_t KeyIndentSize = Level * IndentSize;
179183
uint64_t ValIndentSize =
180-
MaxKeySize - (Entry.Key.size() + KeyIndentSize) + IndentSize;
184+
MaxKeySize - (Key.size() + KeyIndentSize) + IndentSize;
181185

182-
llvm::outs() << std::string(KeyIndentSize, ' ') << Entry.Key
183-
<< std::string(ValIndentSize, ' ') << Entry.Value
184-
<< (Entry.Units.empty() ? "" : " ") << Entry.Units << "\n";
186+
llvm::outs() << std::string(KeyIndentSize, ' ') << Key
187+
<< std::string(ValIndentSize, ' ') << Value
188+
<< (Units.empty() ? "" : " ") << Units << "\n";
185189
}
190+
191+
// Print children
192+
if (Children)
193+
for (const auto &Entry : *Children)
194+
Entry.doPrint(Level + 1, MaxKeySize);
195+
}
196+
197+
// Recursively calculates the maximum width of each key, including indentation
198+
uint64_t maxKeySize(int Level) const {
199+
uint64_t MaxKeySize = 0;
200+
201+
if (Children)
202+
for (const auto &Entry : *Children) {
203+
uint64_t KeySize = Entry.Key.size() + Level * IndentSize;
204+
MaxKeySize = std::max(MaxKeySize, KeySize);
205+
MaxKeySize = std::max(MaxKeySize, Entry.maxKeySize(Level + 1));
206+
}
207+
208+
return MaxKeySize;
186209
}
187210
};
188211

@@ -871,7 +894,7 @@ struct GenericDeviceTy : public DeviceAllocatorTy {
871894

872895
/// Print information about the device.
873896
Error printInfo();
874-
virtual Error obtainInfoImpl(InfoQueueTy &Info) = 0;
897+
virtual Expected<InfoTreeNode> obtainInfoImpl() = 0;
875898

876899
/// Getters of the grid values.
877900
uint32_t getWarpSize() const { return GridValues.GV_Warp_Size; }

offload/plugins-nextgen/common/src/PluginInterface.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1578,14 +1578,14 @@ Error GenericDeviceTy::initDeviceInfo(__tgt_device_info *DeviceInfo) {
15781578
}
15791579

15801580
Error GenericDeviceTy::printInfo() {
1581-
InfoQueueTy InfoQueue;
1581+
auto Info = obtainInfoImpl();
15821582

15831583
// Get the vendor-specific info entries describing the device properties.
1584-
if (auto Err = obtainInfoImpl(InfoQueue))
1584+
if (auto Err = Info.takeError())
15851585
return Err;
15861586

15871587
// Print all info entries.
1588-
InfoQueue.print();
1588+
Info->print();
15891589

15901590
return Plugin::success();
15911591
}

0 commit comments

Comments
 (0)