Skip to content

Commit e7dbe35

Browse files
committed
Add memspace "highest bandwidth"
This memspace contains an aggregated list of NUMA nodes identified as best targets after selecting each NUMA node as the initiator. Querying the bandwidth value requires HMAT support on the platform, calling umfMemspaceHighestBandwidthGet() will return NULL if it's not supported. Introduce UMF_MEMSPACE_HIGHEST_BANDWIDTH env var. This environment variable is used to forcefully assign NUMA nodes to the memspace "highest bandwidth". The ordering of memory targets within this memspace will correspond to the sequence of NUMA nodes specified in the environment variable.
1 parent 73352c1 commit e7dbe35

13 files changed

+355
-1
lines changed

include/umf/memspace.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,11 @@ umf_memspace_handle_t umfMemspaceHostAllGet(void);
5656
///
5757
umf_memspace_handle_t umfMemspaceHighestCapacityGet(void);
5858

59+
/// \brief Retrieves predefined highest bandwidth memspace.
60+
/// \return highest bandwidth memspace handle on success or NULL on failure.
61+
///
62+
umf_memspace_handle_t umfMemspaceHighestBandwidthGet(void);
63+
5964
#ifdef __cplusplus
6065
}
6166
#endif

src/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,8 @@ set(UMF_SOURCES_COMMON_LINUX_MACOSX
8181
memory_targets/memory_target_numa.c
8282
memspaces/memspace_numa.c
8383
memspaces/memspace_host_all.c
84-
memspaces/memspace_highest_capacity.c)
84+
memspaces/memspace_highest_capacity.c
85+
memspaces/memspace_highest_bandwidth.c)
8586

8687
set(UMF_SOURCES_LINUX ${UMF_SOURCES_LINUX} ${UMF_SOURCES_COMMON_LINUX_MACOSX}
8788
provider/provider_os_memory_linux.c)

src/libumf.map

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ UMF_1.0 {
3030
umfMemoryProviderPutIPCHandle;
3131
umfMemspaceCreateFromNumaArray;
3232
umfMemspaceDestroy;
33+
umfMemspaceHighestBandwidthGet;
3334
umfMemspaceHighestCapacityGet;
3435
umfMemspaceHostAllGet;
3536
umfOpenIPCHandle;

src/libumf_linux.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ void __attribute__((destructor)) umfDestroy(void) {
2929
umfMemoryTrackerDestroy(t);
3030
umfMemspaceHostAllDestroy();
3131
umfMemspaceHighestCapacityDestroy();
32+
umfMemspaceHighestBandwidthDestroy();
3233
umfDestroyTopology();
3334
}
3435

src/memory_target.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,3 +83,14 @@ umf_result_t umfMemoryTargetGetCapacity(umf_memory_target_handle_t memoryTarget,
8383
assert(capacity);
8484
return memoryTarget->ops->get_capacity(memoryTarget->priv, capacity);
8585
}
86+
87+
umf_result_t
88+
umfMemoryTargetGetBandwidth(umf_memory_target_handle_t srcMemoryTarget,
89+
umf_memory_target_handle_t dstMemoryTarget,
90+
size_t *bandwidth) {
91+
assert(srcMemoryTarget);
92+
assert(dstMemoryTarget);
93+
assert(bandwidth);
94+
return srcMemoryTarget->ops->get_bandwidth(
95+
srcMemoryTarget->priv, dstMemoryTarget->priv, bandwidth);
96+
}

src/memory_target.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ umf_result_t umfMemoryTargetClone(umf_memory_target_handle_t memoryTarget,
3737
umf_memory_target_handle_t *outHandle);
3838
umf_result_t umfMemoryTargetGetCapacity(umf_memory_target_handle_t memoryTarget,
3939
size_t *capacity);
40+
umf_result_t
41+
umfMemoryTargetGetBandwidth(umf_memory_target_handle_t srcMemoryTarget,
42+
umf_memory_target_handle_t dstMemoryTarget,
43+
size_t *bandwidth);
4044

4145
#ifdef __cplusplus
4246
}

src/memory_target_ops.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ typedef struct umf_memory_target_ops_t {
4141
umf_memory_provider_handle_t *provider);
4242

4343
umf_result_t (*get_capacity)(void *memoryTarget, size_t *capacity);
44+
umf_result_t (*get_bandwidth)(void *srcMemoryTarget, void *dstMemoryTarget,
45+
size_t *bandwidth);
4446
} umf_memory_target_ops_t;
4547

4648
#ifdef __cplusplus

src/memory_targets/memory_target_numa.c

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "base_alloc_global.h"
2020
#include "memory_target_numa.h"
2121
#include "topology.h"
22+
#include "utils_log.h"
2223

2324
struct numa_memory_target_t {
2425
unsigned physical_id;
@@ -143,12 +144,57 @@ static umf_result_t numa_get_capacity(void *memTarget, size_t *capacity) {
143144
return UMF_RESULT_SUCCESS;
144145
}
145146

147+
static umf_result_t numa_get_bandwidth(void *srcMemoryTarget,
148+
void *dstMemoryTarget,
149+
size_t *bandwidth) {
150+
hwloc_topology_t topology = umfGetTopology();
151+
if (!topology) {
152+
return UMF_RESULT_ERROR_NOT_SUPPORTED;
153+
}
154+
155+
hwloc_obj_t srcNumaNode = hwloc_get_obj_by_type(
156+
topology, HWLOC_OBJ_NUMANODE,
157+
((struct numa_memory_target_t *)srcMemoryTarget)->physical_id);
158+
if (!srcNumaNode) {
159+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
160+
}
161+
162+
hwloc_obj_t dstNumaNode = hwloc_get_obj_by_type(
163+
topology, HWLOC_OBJ_NUMANODE,
164+
((struct numa_memory_target_t *)dstMemoryTarget)->physical_id);
165+
if (!dstNumaNode) {
166+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
167+
}
168+
169+
// Given NUMA nodes aren't local, HWLOC returns an error in such case.
170+
if (!hwloc_bitmap_intersects(srcNumaNode->cpuset, dstNumaNode->cpuset)) {
171+
*bandwidth = 0;
172+
return UMF_RESULT_SUCCESS;
173+
}
174+
175+
struct hwloc_location initiator = {.location.cpuset = srcNumaNode->cpuset,
176+
.type = HWLOC_LOCATION_TYPE_CPUSET};
177+
hwloc_uint64_t value = 0;
178+
int ret = hwloc_memattr_get_value(topology, HWLOC_MEMATTR_ID_BANDWIDTH,
179+
dstNumaNode, &initiator, 0, &value);
180+
if (ret) {
181+
LOG_ERR("Retrieving bandwidth for initiator node %u to node %u failed.",
182+
srcNumaNode->os_index, dstNumaNode->os_index);
183+
return (errno == EINVAL) ? UMF_RESULT_ERROR_NOT_SUPPORTED
184+
: UMF_RESULT_ERROR_UNKNOWN;
185+
}
186+
187+
*bandwidth = value;
188+
return UMF_RESULT_SUCCESS;
189+
}
190+
146191
struct umf_memory_target_ops_t UMF_MEMORY_TARGET_NUMA_OPS = {
147192
.version = UMF_VERSION_CURRENT,
148193
.initialize = numa_initialize,
149194
.finalize = numa_finalize,
150195
.pool_create_from_memspace = numa_pool_create_from_memspace,
151196
.clone = numa_clone,
152197
.get_capacity = numa_get_capacity,
198+
.get_bandwidth = numa_get_bandwidth,
153199
.memory_provider_create_from_memspace =
154200
numa_memory_provider_create_from_memspace};

src/memspace.c

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,3 +208,85 @@ umfMemspaceSortDesc(umf_memspace_handle_t hMemspace,
208208

209209
return UMF_RESULT_SUCCESS;
210210
}
211+
212+
umf_result_t umfMemspaceFilter(umf_memspace_handle_t hMemspace,
213+
umfGetTargetFn getTarget,
214+
umf_memspace_handle_t *filteredMemspace) {
215+
if (!hMemspace || !getTarget) {
216+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
217+
}
218+
219+
umf_memory_target_handle_t *uniqueBestNodes =
220+
umf_ba_global_alloc(hMemspace->size * sizeof(*uniqueBestNodes));
221+
if (!uniqueBestNodes) {
222+
return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
223+
}
224+
225+
umf_result_t ret = UMF_RESULT_SUCCESS;
226+
227+
size_t numUniqueBestNodes = 0;
228+
for (size_t nodeIdx = 0; nodeIdx < hMemspace->size; nodeIdx++) {
229+
umf_memory_target_handle_t target = NULL;
230+
ret = getTarget(hMemspace->nodes[nodeIdx], hMemspace->nodes,
231+
hMemspace->size, &target);
232+
if (ret != UMF_RESULT_SUCCESS) {
233+
goto err_free_best_targets;
234+
}
235+
236+
// check if the target is already present in the best nodes
237+
size_t bestTargetIdx;
238+
for (bestTargetIdx = 0; bestTargetIdx < numUniqueBestNodes;
239+
bestTargetIdx++) {
240+
if (uniqueBestNodes[bestTargetIdx] == target) {
241+
break;
242+
}
243+
}
244+
245+
// if the target is not present, add it to the best nodes
246+
if (bestTargetIdx == numUniqueBestNodes) {
247+
uniqueBestNodes[numUniqueBestNodes++] = target;
248+
}
249+
}
250+
251+
// copy the unique best nodes into a new memspace
252+
umf_memspace_handle_t newMemspace =
253+
umf_ba_global_alloc(sizeof(*newMemspace));
254+
if (!newMemspace) {
255+
ret = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
256+
goto err_free_best_targets;
257+
}
258+
259+
newMemspace->size = numUniqueBestNodes;
260+
newMemspace->nodes =
261+
umf_ba_global_alloc(sizeof(*newMemspace->nodes) * newMemspace->size);
262+
if (!newMemspace->nodes) {
263+
ret = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
264+
goto err_free_new_memspace;
265+
}
266+
267+
size_t cloneIdx = 0;
268+
for (size_t cloneIdx = 0; cloneIdx < newMemspace->size; cloneIdx++) {
269+
ret = umfMemoryTargetClone(uniqueBestNodes[cloneIdx],
270+
&newMemspace->nodes[cloneIdx]);
271+
if (ret != UMF_RESULT_SUCCESS) {
272+
goto err_free_cloned_nodes;
273+
}
274+
}
275+
276+
*filteredMemspace = newMemspace;
277+
umf_ba_global_free(uniqueBestNodes);
278+
279+
return UMF_RESULT_SUCCESS;
280+
281+
err_free_cloned_nodes:
282+
while (cloneIdx != 0) {
283+
cloneIdx--;
284+
umfMemoryTargetDestroy(newMemspace->nodes[cloneIdx]);
285+
}
286+
umf_ba_global_free(newMemspace->nodes);
287+
err_free_new_memspace:
288+
umf_ba_global_free(newMemspace);
289+
err_free_best_targets:
290+
umf_ba_global_free(uniqueBestNodes);
291+
return ret;
292+
}

src/memspace_internal.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,18 @@ typedef umf_result_t (*umfGetPropertyFn)(umf_memory_target_handle_t,
3939
umf_result_t umfMemspaceSortDesc(umf_memspace_handle_t hMemspace,
4040
umfGetPropertyFn getProperty);
4141

42+
typedef umf_result_t (*umfGetTargetFn)(umf_memory_target_handle_t initiator,
43+
umf_memory_target_handle_t *nodes,
44+
size_t numNodes,
45+
umf_memory_target_handle_t *target);
46+
47+
///
48+
/// \brief Filters the targets using getTarget() to create a new memspace
49+
///
50+
umf_result_t umfMemspaceFilter(umf_memspace_handle_t hMemspace,
51+
umfGetTargetFn getTarget,
52+
umf_memspace_handle_t *filteredMemspace);
53+
4254
///
4355
/// \brief Destroys memspace
4456
/// \param hMemspace handle to memspace
@@ -47,6 +59,7 @@ void umfMemspaceDestroy(umf_memspace_handle_t hMemspace);
4759

4860
void umfMemspaceHostAllDestroy(void);
4961
void umfMemspaceHighestCapacityDestroy(void);
62+
void umfMemspaceHighestBandwidthDestroy(void);
5063

5164
#ifdef __cplusplus
5265
}

0 commit comments

Comments
 (0)