Skip to content

Commit e5b0990

Browse files
committed
temp 2
1 parent 9a9ef4b commit e5b0990

File tree

8 files changed

+183
-35
lines changed

8 files changed

+183
-35
lines changed

src/memory_target.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,9 +86,12 @@ umf_result_t umfMemoryTargetGetCapacity(umf_memory_target_handle_t memoryTarget,
8686
}
8787

8888
umf_result_t
89-
umfMemoryTargetGetBandwidth(umf_memory_target_handle_t memoryTarget,
89+
umfMemoryTargetGetBandwidth(umf_memory_target_handle_t srcMemoryTarget,
90+
umf_memory_target_handle_t dstMemoryTarget,
9091
size_t *bandwidth) {
91-
assert(memoryTarget);
92+
assert(srcMemoryTarget);
93+
assert(dstMemoryTarget);
9294
assert(bandwidth);
93-
return memoryTarget->ops->get_bandwidth(memoryTarget->priv, bandwidth);
95+
return srcMemoryTarget->ops->get_bandwidth(
96+
srcMemoryTarget->priv, dstMemoryTarget->priv, bandwidth);
9497
}

src/memory_target.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ umf_result_t umfMemoryTargetClone(umf_memory_target_handle_t memoryTarget,
3838
umf_result_t umfMemoryTargetGetCapacity(umf_memory_target_handle_t memoryTarget,
3939
size_t *capacity);
4040
umf_result_t
41-
umfMemoryTargetGetBandwidth(umf_memory_target_handle_t memoryTarget,
41+
umfMemoryTargetGetBandwidth(umf_memory_target_handle_t srcMemoryTarget,
42+
umf_memory_target_handle_t dstMemoryTarget,
4243
size_t *bandwidth);
4344

4445
#ifdef __cplusplus

src/memory_target_ops.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ typedef struct umf_memory_target_ops_t {
4141
umf_memory_provider_handle_t *provider);
4242

4343
umf_result_t (*get_capacity)(void *memoryTarget, size_t *capacity);
44-
umf_result_t (*get_bandwidth)(void *memoryTarget, size_t *bandwidth);
44+
umf_result_t (*get_bandwidth)(void *srcMemoryTarget, void *dstMemoryTarget,
45+
size_t *bandwidth);
4546
} umf_memory_target_ops_t;
4647

4748
#ifdef __cplusplus

src/memory_targets/memory_target_numa.c

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -195,38 +195,40 @@ static umf_result_t numa_get_capacity(void *memTarget, size_t *capacity) {
195195
return UMF_RESULT_SUCCESS;
196196
}
197197

198-
static umf_result_t numa_get_bandwidth(void *memTarget, size_t *bandwidth) {
198+
static umf_result_t numa_get_bandwidth(void *srcMemoryTarget,
199+
void *dstMemoryTarget,
200+
size_t *bandwidth) {
199201
hwloc_topology_t topology = umfGetTopology();
200202
if (!topology) {
201203
return UMF_RESULT_ERROR_NOT_SUPPORTED;
202204
}
203205

204-
hwloc_obj_t numaNode = hwloc_get_obj_by_type(
206+
hwloc_obj_t srcNumaNode = hwloc_get_obj_by_type(
205207
topology, HWLOC_OBJ_NUMANODE,
206-
((struct numa_memory_target_t *)memTarget)->physical_id);
207-
if (!numaNode) {
208+
((struct numa_memory_target_t *)srcMemoryTarget)->physical_id);
209+
if (!srcNumaNode) {
208210
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
209211
}
210212

211-
umf_result_t umf_ret = UMF_RESULT_SUCCESS;
213+
hwloc_obj_t dstNumaNode = hwloc_get_obj_by_type(
214+
topology, HWLOC_OBJ_NUMANODE,
215+
((struct numa_memory_target_t *)dstMemoryTarget)->physical_id);
216+
if (!dstNumaNode) {
217+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
218+
}
212219

213-
hwloc_const_cpuset_t cCpuset = hwloc_topology_get_allowed_cpuset(topology);
214-
hwloc_cpuset_t cpuset = hwloc_bitmap_dup(cCpuset);
215-
struct hwloc_location initiator = {.location.cpuset = cpuset,
220+
struct hwloc_location initiator = {.location.cpuset = srcNumaNode->cpuset,
216221
.type = HWLOC_LOCATION_TYPE_CPUSET};
217222
hwloc_uint64_t value = 0;
218223
int ret = hwloc_memattr_get_value(topology, HWLOC_MEMATTR_ID_BANDWIDTH,
219-
numaNode, &initiator, 0, &value);
224+
dstNumaNode, &initiator, 0, &value);
220225
if (ret) {
221-
umf_ret = (errno == EINVAL) ? UMF_RESULT_ERROR_NOT_SUPPORTED
222-
: UMF_RESULT_ERROR_UNKNOWN;
223-
goto err;
226+
return (errno == EINVAL) ? UMF_RESULT_ERROR_NOT_SUPPORTED
227+
: UMF_RESULT_ERROR_UNKNOWN;
224228
}
225229

226230
*bandwidth = value;
227-
err:
228-
hwloc_bitmap_free(cpuset);
229-
return umf_ret;
231+
return UMF_RESULT_SUCCESS;
230232
}
231233

232234
struct umf_memory_target_ops_t UMF_MEMORY_TARGET_NUMA_OPS = {

src/memspace.c

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,3 +208,89 @@ umfMemspaceSortDesc(umf_memspace_handle_t hMemspace,
208208

209209
return UMF_RESULT_SUCCESS;
210210
}
211+
212+
enum umf_result_t umfMemspaceFilter(
213+
umf_memspace_handle_t hMemspace,
214+
umf_result_t (*getTarget)(umf_memory_target_handle_t initiator,
215+
umf_memory_target_handle_t *nodes,
216+
size_t numNodes,
217+
umf_memory_target_handle_t *target),
218+
umf_memspace_handle_t *filteredMemspace) {
219+
if (!hMemspace || !getTarget) {
220+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
221+
}
222+
223+
umf_memory_target_handle_t *uniqueBestNodes = umf_ba_global_alloc(
224+
hMemspace->size * sizeof(umf_memory_target_handle_t *));
225+
if (!uniqueBestNodes) {
226+
return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
227+
}
228+
229+
umf_result_t ret = UMF_RESULT_SUCCESS;
230+
231+
size_t numUniqueBestNodes = 0;
232+
for (size_t nodeIdx = 0; nodeIdx < hMemspace->size; nodeIdx++) {
233+
umf_memory_target_handle_t target = NULL;
234+
ret = getTarget(hMemspace->nodes[nodeIdx], hMemspace->nodes,
235+
hMemspace->size, &target);
236+
if (ret != UMF_RESULT_SUCCESS) {
237+
goto err_free_best_targets;
238+
}
239+
240+
// check if the target is already present in the best nodes
241+
size_t bestTargetIdx;
242+
for (bestTargetIdx = 0; bestTargetIdx < numUniqueBestNodes;
243+
bestTargetIdx++) {
244+
if (uniqueBestNodes[bestTargetIdx] == target) {
245+
break;
246+
}
247+
}
248+
249+
// if the target is not present, add it to the best nodes
250+
if (bestTargetIdx == numUniqueBestNodes) {
251+
uniqueBestNodes[numUniqueBestNodes++] = target;
252+
}
253+
}
254+
255+
// copy the unique best nodes into a new memspace
256+
umf_memspace_handle_t newMemspace =
257+
umf_ba_global_alloc(sizeof(struct umf_memspace_t));
258+
if (!newMemspace) {
259+
ret = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
260+
goto err_free_best_targets;
261+
}
262+
263+
newMemspace->size = numUniqueBestNodes;
264+
newMemspace->nodes = umf_ba_global_alloc(
265+
sizeof(umf_memory_target_handle_t) * newMemspace->size);
266+
if (!newMemspace->nodes) {
267+
ret = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
268+
goto err_free_new_memspace;
269+
}
270+
271+
size_t cloneIdx = 0;
272+
for (size_t cloneIdx = 0; cloneIdx < newMemspace->size; cloneIdx++) {
273+
ret = umfMemoryTargetClone(uniqueBestNodes[cloneIdx],
274+
&newMemspace->nodes[cloneIdx]);
275+
if (ret != UMF_RESULT_SUCCESS) {
276+
goto err_free_cloned_nodes;
277+
}
278+
}
279+
280+
*filteredMemspace = newMemspace;
281+
umf_ba_global_free(uniqueBestNodes);
282+
283+
return UMF_RESULT_SUCCESS;
284+
285+
err_free_cloned_nodes:
286+
while (cloneIdx != 0) {
287+
cloneIdx--;
288+
umfMemoryTargetDestroy(newMemspace->nodes[cloneIdx]);
289+
}
290+
umf_ba_global_free(newMemspace->nodes);
291+
err_free_new_memspace:
292+
umf_ba_global_free(newMemspace);
293+
err_free_best_targets:
294+
umf_ba_global_free(uniqueBestNodes);
295+
return ret;
296+
}

src/memspace_internal.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,18 @@ typedef umf_result_t (*umfGetPropertyFn)(umf_memory_target_handle_t,
3939
enum umf_result_t umfMemspaceSortDesc(umf_memspace_handle_t hMemspace,
4040
umfGetPropertyFn getProperty);
4141

42+
typedef umf_result_t (*umfGetTargetFn)(umf_memory_target_handle_t initiator,
43+
umf_memory_target_handle_t *nodes,
44+
size_t numNodes,
45+
umf_memory_target_handle_t *target);
46+
47+
///
48+
/// \brief Filters the targets using getTarget() to create a new memspace
49+
///
50+
enum umf_result_t umfMemspaceFilter(umf_memspace_handle_t hMemspace,
51+
umfGetTargetFn getTarget,
52+
umf_memspace_handle_t *filteredMemspace);
53+
4254
///
4355
/// \brief Destroys memspace
4456
/// \param hMemspace handle to memspace

src/memspaces/memspace_highest_bandwidth.c

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,31 @@ static umf_result_t getenv_to_nodes_array(char *envStr, size_t *arr,
7171
return UMF_RESULT_SUCCESS;
7272
}
7373

74+
static umf_result_t getBestBandwidthTarget(umf_memory_target_handle_t initiator,
75+
umf_memory_target_handle_t *nodes,
76+
size_t numNodes,
77+
umf_memory_target_handle_t *target) {
78+
size_t bestNodeIdx = 0;
79+
uint64_t bestBandwidth = 0;
80+
for (size_t nodeIdx = 0; nodeIdx < numNodes; nodeIdx++) {
81+
uint64_t bandwidth = 0;
82+
umf_result_t ret =
83+
umfMemoryTargetGetBandwidth(initiator, nodes[nodeIdx], &bandwidth);
84+
if (ret) {
85+
return ret;
86+
}
87+
88+
if (bandwidth > bestBandwidth) {
89+
bestNodeIdx = nodeIdx;
90+
bestBandwidth = bandwidth;
91+
}
92+
}
93+
94+
*target = nodes[bestNodeIdx];
95+
96+
return UMF_RESULT_SUCCESS;
97+
}
98+
7499
static umf_result_t
75100
umfMemspaceHighestBandwidthCreate(umf_memspace_handle_t *hMemspace) {
76101
if (!hMemspace) {
@@ -94,16 +119,9 @@ umfMemspaceHighestBandwidthCreate(umf_memspace_handle_t *hMemspace) {
94119
}
95120

96121
umf_memspace_handle_t highBandwidthMemspace = NULL;
97-
ret = umfMemspaceClone(hostAllMemspace, &highBandwidthMemspace);
98-
if (ret != UMF_RESULT_SUCCESS) {
99-
return ret;
100-
}
101-
102-
ret = umfMemspaceSortDesc(highBandwidthMemspace,
103-
(umfGetPropertyFn)&umfMemoryTargetGetBandwidth);
122+
ret = umfMemspaceFilter(hostAllMemspace, getBestBandwidthTarget,
123+
&highBandwidthMemspace);
104124
if (ret != UMF_RESULT_SUCCESS) {
105-
// TODO: Should we have an alternative way to query 'bandwidth' value
106-
// of each NUMA node?
107125
// HWLOC could possibly return an 'EINVAL' error, which in this context
108126
// means that the HMAT is unavailable and we can't obtain the
109127
// 'bandwidth' value of any NUMA node.

test/memspaces/memspace_highest_bandwidth.cpp

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,36 @@
1414

1515
using umf_test::test;
1616

17+
// In HWLOC v2.3.0, the 'hwloc_location_type_e' enum is defined inside an
18+
// 'hwloc_location' struct. In newer versions, this enum is defined globally.
19+
// To prevent compile errors in C++ tests related this scope change
20+
// 'hwloc_location_type_e' has been conditionally aliased depending on the
21+
// actual location of the enum definition.
22+
template <typename T> struct has_hwloc_location_type_e {
23+
private:
24+
template <typename U>
25+
static auto test() -> decltype(U::hwloc_location_type_e, std::true_type{});
26+
template <typename> static std::false_type test(...);
27+
28+
public:
29+
static constexpr bool value = decltype(test<T>())::value;
30+
};
31+
32+
template <typename T, bool = has_hwloc_location_type_e<T>::value>
33+
struct conditional_hwloc_location_type_e;
34+
35+
template <typename T> struct conditional_hwloc_location_type_e<T, true> {
36+
using type = typename T::hwloc_location_type_e;
37+
};
38+
39+
template <typename T> struct conditional_hwloc_location_type_e<T, false> {
40+
using type = enum hwloc_location_type_e;
41+
};
42+
43+
// Alias for hwloc_location_type_e
44+
using hwloc_location_type_e_alias =
45+
typename conditional_hwloc_location_type_e<hwloc_location>::type;
46+
1747
static bool canQueryBandwidth(size_t nodeId) {
1848
hwloc_topology_t topology = nullptr;
1949
int ret = hwloc_topology_init(&topology);
@@ -25,20 +55,15 @@ static bool canQueryBandwidth(size_t nodeId) {
2555
hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, nodeId);
2656
UT_ASSERTne(numaNode, nullptr);
2757

28-
// Get local cpuset.
29-
// hwloc_const_cpuset_t cCpuset = hwloc_topology_get_allowed_cpuset(topology);
30-
// hwloc_cpuset_t cpuset = hwloc_bitmap_dup(cCpuset);
31-
3258
// Setup initiator structure.
3359
struct hwloc_location initiator;
3460
initiator.location.cpuset = numaNode->cpuset;
35-
initiator.type = HWLOC_LOCATION_TYPE_CPUSET;
61+
initiator.type = hwloc_location_type_e_alias::HWLOC_LOCATION_TYPE_CPUSET;
3662

3763
hwloc_uint64_t value = 0;
3864
ret = hwloc_memattr_get_value(topology, HWLOC_MEMATTR_ID_BANDWIDTH,
3965
numaNode, &initiator, 0, &value);
4066

41-
// hwloc_bitmap_free(cpuset);
4267
hwloc_topology_destroy(topology);
4368
return (ret == 0);
4469
}

0 commit comments

Comments
 (0)