Skip to content

Commit 5c83873

Browse files
committed
Add memspace "highest bandwidth"
This memspace contains an aggregated list of NUMA nodes identified as best targets after selecting each NUMA node as the initiator. Querying the bandwidth value requires HMAT support on the platform, calling umfMemspaceHighestBandwidthGet() will return NULL if it's not supported.
1 parent 0660c56 commit 5c83873

13 files changed

+293
-1
lines changed

include/umf/memspace.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,11 @@ umf_memspace_handle_t umfMemspaceHostAllGet(void);
5656
///
5757
umf_memspace_handle_t umfMemspaceHighestCapacityGet(void);
5858

59+
/// \brief Retrieves predefined highest bandwidth memspace.
60+
/// \return highest bandwidth memspace handle on success or NULL on failure.
61+
///
62+
umf_memspace_handle_t umfMemspaceHighestBandwidthGet(void);
63+
5964
#ifdef __cplusplus
6065
}
6166
#endif

src/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,8 @@ set(UMF_SOURCES_COMMON_LINUX_MACOSX
8181
memory_targets/memory_target_numa.c
8282
memspaces/memspace_numa.c
8383
memspaces/memspace_host_all.c
84-
memspaces/memspace_highest_capacity.c)
84+
memspaces/memspace_highest_capacity.c
85+
memspaces/memspace_highest_bandwidth.c)
8586

8687
set(UMF_SOURCES_LINUX ${UMF_SOURCES_LINUX} ${UMF_SOURCES_COMMON_LINUX_MACOSX}
8788
provider/provider_os_memory_linux.c)

src/libumf.map

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ UMF_1.0 {
3030
umfMemoryProviderPutIPCHandle;
3131
umfMemspaceCreateFromNumaArray;
3232
umfMemspaceDestroy;
33+
umfMemspaceHighestBandwidthGet;
3334
umfMemspaceHighestCapacityGet;
3435
umfMemspaceHostAllGet;
3536
umfOpenIPCHandle;

src/libumf_linux.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ void __attribute__((destructor)) umfDestroy(void) {
2929
umfMemoryTrackerDestroy(t);
3030
umfMemspaceHostAllDestroy();
3131
umfMemspaceHighestCapacityDestroy();
32+
umfMemspaceHighestBandwidthDestroy();
3233
umfDestroyTopology();
3334
}
3435

src/memory_target.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,3 +83,14 @@ umf_result_t umfMemoryTargetGetCapacity(umf_memory_target_handle_t memoryTarget,
8383
assert(capacity);
8484
return memoryTarget->ops->get_capacity(memoryTarget->priv, capacity);
8585
}
86+
87+
umf_result_t
88+
umfMemoryTargetGetBandwidth(umf_memory_target_handle_t srcMemoryTarget,
89+
umf_memory_target_handle_t dstMemoryTarget,
90+
size_t *bandwidth) {
91+
assert(srcMemoryTarget);
92+
assert(dstMemoryTarget);
93+
assert(bandwidth);
94+
return srcMemoryTarget->ops->get_bandwidth(
95+
srcMemoryTarget->priv, dstMemoryTarget->priv, bandwidth);
96+
}

src/memory_target.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ umf_result_t umfMemoryTargetClone(umf_memory_target_handle_t memoryTarget,
3737
umf_memory_target_handle_t *outHandle);
3838
umf_result_t umfMemoryTargetGetCapacity(umf_memory_target_handle_t memoryTarget,
3939
size_t *capacity);
40+
umf_result_t
41+
umfMemoryTargetGetBandwidth(umf_memory_target_handle_t srcMemoryTarget,
42+
umf_memory_target_handle_t dstMemoryTarget,
43+
size_t *bandwidth);
4044

4145
#ifdef __cplusplus
4246
}

src/memory_target_ops.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ typedef struct umf_memory_target_ops_t {
4141
umf_memory_provider_handle_t *provider);
4242

4343
umf_result_t (*get_capacity)(void *memoryTarget, size_t *capacity);
44+
umf_result_t (*get_bandwidth)(void *srcMemoryTarget, void *dstMemoryTarget,
45+
size_t *bandwidth);
4446
} umf_memory_target_ops_t;
4547

4648
#ifdef __cplusplus

src/memory_targets/memory_target_numa.c

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "base_alloc_global.h"
2020
#include "memory_target_numa.h"
2121
#include "topology.h"
22+
#include "utils_log.h"
2223

2324
struct numa_memory_target_t {
2425
unsigned physical_id;
@@ -143,12 +144,57 @@ static umf_result_t numa_get_capacity(void *memTarget, size_t *capacity) {
143144
return UMF_RESULT_SUCCESS;
144145
}
145146

147+
static umf_result_t numa_get_bandwidth(void *srcMemoryTarget,
148+
void *dstMemoryTarget,
149+
size_t *bandwidth) {
150+
hwloc_topology_t topology = umfGetTopology();
151+
if (!topology) {
152+
return UMF_RESULT_ERROR_NOT_SUPPORTED;
153+
}
154+
155+
hwloc_obj_t srcNumaNode = hwloc_get_obj_by_type(
156+
topology, HWLOC_OBJ_NUMANODE,
157+
((struct numa_memory_target_t *)srcMemoryTarget)->physical_id);
158+
if (!srcNumaNode) {
159+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
160+
}
161+
162+
hwloc_obj_t dstNumaNode = hwloc_get_obj_by_type(
163+
topology, HWLOC_OBJ_NUMANODE,
164+
((struct numa_memory_target_t *)dstMemoryTarget)->physical_id);
165+
if (!dstNumaNode) {
166+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
167+
}
168+
169+
// Given NUMA nodes aren't local, HWLOC returns an error in such case.
170+
if (!hwloc_bitmap_intersects(srcNumaNode->cpuset, dstNumaNode->cpuset)) {
171+
*bandwidth = 0;
172+
return UMF_RESULT_SUCCESS;
173+
}
174+
175+
struct hwloc_location initiator = {.location.cpuset = srcNumaNode->cpuset,
176+
.type = HWLOC_LOCATION_TYPE_CPUSET};
177+
hwloc_uint64_t value = 0;
178+
int ret = hwloc_memattr_get_value(topology, HWLOC_MEMATTR_ID_BANDWIDTH,
179+
dstNumaNode, &initiator, 0, &value);
180+
if (ret) {
181+
LOG_ERR("Retrieving bandwidth for initiator node %u to node %u failed.",
182+
srcNumaNode->os_index, dstNumaNode->os_index);
183+
return (errno == EINVAL) ? UMF_RESULT_ERROR_NOT_SUPPORTED
184+
: UMF_RESULT_ERROR_UNKNOWN;
185+
}
186+
187+
*bandwidth = value;
188+
return UMF_RESULT_SUCCESS;
189+
}
190+
146191
struct umf_memory_target_ops_t UMF_MEMORY_TARGET_NUMA_OPS = {
147192
.version = UMF_VERSION_CURRENT,
148193
.initialize = numa_initialize,
149194
.finalize = numa_finalize,
150195
.pool_create_from_memspace = numa_pool_create_from_memspace,
151196
.clone = numa_clone,
152197
.get_capacity = numa_get_capacity,
198+
.get_bandwidth = numa_get_bandwidth,
153199
.memory_provider_create_from_memspace =
154200
numa_memory_provider_create_from_memspace};

src/memspace.c

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,3 +208,85 @@ umfMemspaceSortDesc(umf_memspace_handle_t hMemspace,
208208

209209
return UMF_RESULT_SUCCESS;
210210
}
211+
212+
umf_result_t umfMemspaceFilter(umf_memspace_handle_t hMemspace,
213+
umfGetTargetFn getTarget,
214+
umf_memspace_handle_t *filteredMemspace) {
215+
if (!hMemspace || !getTarget) {
216+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
217+
}
218+
219+
umf_memory_target_handle_t *uniqueBestNodes =
220+
umf_ba_global_alloc(hMemspace->size * sizeof(*uniqueBestNodes));
221+
if (!uniqueBestNodes) {
222+
return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
223+
}
224+
225+
umf_result_t ret = UMF_RESULT_SUCCESS;
226+
227+
size_t numUniqueBestNodes = 0;
228+
for (size_t nodeIdx = 0; nodeIdx < hMemspace->size; nodeIdx++) {
229+
umf_memory_target_handle_t target = NULL;
230+
ret = getTarget(hMemspace->nodes[nodeIdx], hMemspace->nodes,
231+
hMemspace->size, &target);
232+
if (ret != UMF_RESULT_SUCCESS) {
233+
goto err_free_best_targets;
234+
}
235+
236+
// check if the target is already present in the best nodes
237+
size_t bestTargetIdx;
238+
for (bestTargetIdx = 0; bestTargetIdx < numUniqueBestNodes;
239+
bestTargetIdx++) {
240+
if (uniqueBestNodes[bestTargetIdx] == target) {
241+
break;
242+
}
243+
}
244+
245+
// if the target is not present, add it to the best nodes
246+
if (bestTargetIdx == numUniqueBestNodes) {
247+
uniqueBestNodes[numUniqueBestNodes++] = target;
248+
}
249+
}
250+
251+
// copy the unique best nodes into a new memspace
252+
umf_memspace_handle_t newMemspace =
253+
umf_ba_global_alloc(sizeof(*newMemspace));
254+
if (!newMemspace) {
255+
ret = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
256+
goto err_free_best_targets;
257+
}
258+
259+
newMemspace->size = numUniqueBestNodes;
260+
newMemspace->nodes =
261+
umf_ba_global_alloc(sizeof(*newMemspace->nodes) * newMemspace->size);
262+
if (!newMemspace->nodes) {
263+
ret = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
264+
goto err_free_new_memspace;
265+
}
266+
267+
size_t cloneIdx = 0;
268+
for (size_t cloneIdx = 0; cloneIdx < newMemspace->size; cloneIdx++) {
269+
ret = umfMemoryTargetClone(uniqueBestNodes[cloneIdx],
270+
&newMemspace->nodes[cloneIdx]);
271+
if (ret != UMF_RESULT_SUCCESS) {
272+
goto err_free_cloned_nodes;
273+
}
274+
}
275+
276+
*filteredMemspace = newMemspace;
277+
umf_ba_global_free(uniqueBestNodes);
278+
279+
return UMF_RESULT_SUCCESS;
280+
281+
err_free_cloned_nodes:
282+
while (cloneIdx != 0) {
283+
cloneIdx--;
284+
umfMemoryTargetDestroy(newMemspace->nodes[cloneIdx]);
285+
}
286+
umf_ba_global_free(newMemspace->nodes);
287+
err_free_new_memspace:
288+
umf_ba_global_free(newMemspace);
289+
err_free_best_targets:
290+
umf_ba_global_free(uniqueBestNodes);
291+
return ret;
292+
}

src/memspace_internal.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,18 @@ typedef umf_result_t (*umfGetPropertyFn)(umf_memory_target_handle_t,
3939
umf_result_t umfMemspaceSortDesc(umf_memspace_handle_t hMemspace,
4040
umfGetPropertyFn getProperty);
4141

42+
typedef umf_result_t (*umfGetTargetFn)(umf_memory_target_handle_t initiator,
43+
umf_memory_target_handle_t *nodes,
44+
size_t numNodes,
45+
umf_memory_target_handle_t *target);
46+
47+
///
48+
/// \brief Filters the targets using getTarget() to create a new memspace
49+
///
50+
umf_result_t umfMemspaceFilter(umf_memspace_handle_t hMemspace,
51+
umfGetTargetFn getTarget,
52+
umf_memspace_handle_t *filteredMemspace);
53+
4254
///
4355
/// \brief Destroys memspace
4456
/// \param hMemspace handle to memspace
@@ -47,6 +59,7 @@ void umfMemspaceDestroy(umf_memspace_handle_t hMemspace);
4759

4860
void umfMemspaceHostAllDestroy(void);
4961
void umfMemspaceHighestCapacityDestroy(void);
62+
void umfMemspaceHighestBandwidthDestroy(void);
5063

5164
#ifdef __cplusplus
5265
}
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
/*
2+
*
3+
* Copyright (C) 2024 Intel Corporation
4+
*
5+
* Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT.
6+
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
*
8+
*/
9+
10+
#include <assert.h>
11+
#include <ctype.h>
12+
#include <hwloc.h>
13+
#include <stdlib.h>
14+
15+
#include "base_alloc_global.h"
16+
#include "memory_target_numa.h"
17+
#include "memspace_internal.h"
18+
#include "memspace_numa.h"
19+
#include "topology.h"
20+
#include "utils_common.h"
21+
#include "utils_concurrency.h"
22+
#include "utils_log.h"
23+
24+
static umf_result_t getBestBandwidthTarget(umf_memory_target_handle_t initiator,
25+
umf_memory_target_handle_t *nodes,
26+
size_t numNodes,
27+
umf_memory_target_handle_t *target) {
28+
size_t bestNodeIdx = 0;
29+
size_t bestBandwidth = 0;
30+
for (size_t nodeIdx = 0; nodeIdx < numNodes; nodeIdx++) {
31+
size_t bandwidth = 0;
32+
umf_result_t ret =
33+
umfMemoryTargetGetBandwidth(initiator, nodes[nodeIdx], &bandwidth);
34+
if (ret) {
35+
return ret;
36+
}
37+
38+
if (bandwidth > bestBandwidth) {
39+
bestNodeIdx = nodeIdx;
40+
bestBandwidth = bandwidth;
41+
}
42+
}
43+
44+
*target = nodes[bestNodeIdx];
45+
46+
return UMF_RESULT_SUCCESS;
47+
}
48+
49+
static umf_result_t
50+
umfMemspaceHighestBandwidthCreate(umf_memspace_handle_t *hMemspace) {
51+
if (!hMemspace) {
52+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
53+
}
54+
55+
umf_memspace_handle_t hostAllMemspace = umfMemspaceHostAllGet();
56+
if (!hostAllMemspace) {
57+
return UMF_RESULT_ERROR_UNKNOWN;
58+
}
59+
60+
umf_memspace_handle_t highBandwidthMemspace = NULL;
61+
umf_result_t ret = umfMemspaceFilter(
62+
hostAllMemspace, getBestBandwidthTarget, &highBandwidthMemspace);
63+
if (ret != UMF_RESULT_SUCCESS) {
64+
// HWLOC could possibly return an 'EINVAL' error, which in this context
65+
// means that the HMAT is unavailable and we can't obtain the
66+
// 'bandwidth' value of any NUMA node.
67+
return ret;
68+
}
69+
70+
*hMemspace = highBandwidthMemspace;
71+
return UMF_RESULT_SUCCESS;
72+
}
73+
74+
static umf_memspace_handle_t UMF_MEMSPACE_HIGHEST_BANDWIDTH = NULL;
75+
static UTIL_ONCE_FLAG UMF_MEMSPACE_HBW_INITIALIZED = UTIL_ONCE_FLAG_INIT;
76+
77+
void umfMemspaceHighestBandwidthDestroy(void) {
78+
if (UMF_MEMSPACE_HIGHEST_BANDWIDTH) {
79+
umfMemspaceDestroy(UMF_MEMSPACE_HIGHEST_BANDWIDTH);
80+
UMF_MEMSPACE_HIGHEST_BANDWIDTH = NULL;
81+
}
82+
}
83+
84+
static void umfMemspaceHighestBandwidthInit(void) {
85+
umf_result_t ret =
86+
umfMemspaceHighestBandwidthCreate(&UMF_MEMSPACE_HIGHEST_BANDWIDTH);
87+
if (ret != UMF_RESULT_SUCCESS) {
88+
LOG_ERR(
89+
"Creating the highest bandwidth memspace failed with a %u error\n",
90+
ret);
91+
assert(ret == UMF_RESULT_ERROR_NOT_SUPPORTED);
92+
}
93+
94+
#if defined(_WIN32) && !defined(UMF_SHARED_LIBRARY)
95+
atexit(umfMemspaceHighestBandwidthDestroy);
96+
#endif
97+
}
98+
99+
umf_memspace_handle_t umfMemspaceHighestBandwidthGet(void) {
100+
util_init_once(&UMF_MEMSPACE_HBW_INITIALIZED,
101+
umfMemspaceHighestBandwidthInit);
102+
return UMF_MEMSPACE_HIGHEST_BANDWIDTH;
103+
}

test/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,10 @@ if(LINUX) # OS-specific functions are implemented only for Linux now
173173
NAME memspace_highest_capacity
174174
SRCS memspaces/memspace_highest_capacity.cpp
175175
LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES})
176+
add_umf_test(
177+
NAME memspace_highest_bandwidth
178+
SRCS memspaces/memspace_highest_bandwidth.cpp
179+
LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES})
176180
endif()
177181

178182
if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_LEVEL_ZERO_PROVIDER)
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
// Copyright (C) 2024 Intel Corporation
2+
// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT.
3+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
4+
5+
#include "memory_target_numa.h"
6+
#include "memspace_helpers.hpp"
7+
#include "memspace_internal.h"
8+
#include "test_helpers.h"
9+
10+
#include <numa.h>
11+
#include <numaif.h>
12+
#include <umf/memspace.h>
13+
14+
using umf_test::test;
15+
16+
TEST_F(numaNodesTest, memspaceGet) {
17+
umf_memspace_handle_t hMemspace = umfMemspaceHighestBandwidthGet();
18+
UT_ASSERTne(hMemspace, nullptr);
19+
}

0 commit comments

Comments
 (0)