Skip to content

Add memspace "highest bandwidth" #408

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
May 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,11 @@ using umfMemspaceHostAllGet.
Memspace backed by all available NUMA nodes discovered on the platform sorted by capacity.
Can be retrieved using umfMemspaceHighestCapacityGet.

#### Highest bandwidth memspace

Memspace backed by an aggregated list of NUMA nodes identified as highest bandwidth after selecting each available NUMA node as the initiator.
Querying the bandwidth value requires HMAT support on the platform. Calling `umfMemspaceHighestBandwidthGet()` will return NULL if it's not supported.

### Proxy library

UMF provides the UMF proxy library (`umf_proxy`) that makes it possible
Expand Down
5 changes: 5 additions & 0 deletions include/umf/memspace.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ umf_memspace_handle_t umfMemspaceHostAllGet(void);
///
umf_memspace_handle_t umfMemspaceHighestCapacityGet(void);

/// \brief Retrieves predefined highest bandwidth memspace.
/// \return highest bandwidth memspace handle on success or NULL on failure.
///
umf_memspace_handle_t umfMemspaceHighestBandwidthGet(void);

#ifdef __cplusplus
}
#endif
Expand Down
2 changes: 1 addition & 1 deletion scripts/qemu/run-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ make -j $(nproc)
echo password | sudo sync;
echo password | sudo sh -c "/usr/bin/echo 3 > /proc/sys/vm/drop_caches"

ctest --output-on-failure
ctest --verbose

# run tests bound to a numa node
numactl -N 0 ctest --output-on-failure
Expand Down
3 changes: 2 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ set(UMF_SOURCES_COMMON_LINUX_MACOSX
memory_targets/memory_target_numa.c
memspaces/memspace_numa.c
memspaces/memspace_host_all.c
memspaces/memspace_highest_capacity.c)
memspaces/memspace_highest_capacity.c
memspaces/memspace_highest_bandwidth.c)

set(UMF_SOURCES_LINUX ${UMF_SOURCES_LINUX} ${UMF_SOURCES_COMMON_LINUX_MACOSX}
provider/provider_os_memory_linux.c)
Expand Down
1 change: 1 addition & 0 deletions src/libumf.map
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ UMF_1.0 {
umfMemoryProviderPutIPCHandle;
umfMemspaceCreateFromNumaArray;
umfMemspaceDestroy;
umfMemspaceHighestBandwidthGet;
umfMemspaceHighestCapacityGet;
umfMemspaceHostAllGet;
umfOpenIPCHandle;
Expand Down
1 change: 1 addition & 0 deletions src/libumf_linux.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ void __attribute__((destructor)) umfDestroy(void) {
umfMemoryTrackerDestroy(t);
umfMemspaceHostAllDestroy();
umfMemspaceHighestCapacityDestroy();
umfMemspaceHighestBandwidthDestroy();
umfDestroyTopology();
}

Expand Down
18 changes: 16 additions & 2 deletions src/memory_target.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,21 @@ umf_result_t umfMemoryTargetClone(umf_memory_target_handle_t memoryTarget,

umf_result_t umfMemoryTargetGetCapacity(umf_memory_target_handle_t memoryTarget,
size_t *capacity) {
assert(memoryTarget);
assert(capacity);
if (!memoryTarget || !capacity) {
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
}

return memoryTarget->ops->get_capacity(memoryTarget->priv, capacity);
}

umf_result_t
umfMemoryTargetGetBandwidth(umf_memory_target_handle_t srcMemoryTarget,
umf_memory_target_handle_t dstMemoryTarget,
size_t *bandwidth) {
if (!srcMemoryTarget || !dstMemoryTarget || !bandwidth) {
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
}

return srcMemoryTarget->ops->get_bandwidth(
srcMemoryTarget->priv, dstMemoryTarget->priv, bandwidth);
}
4 changes: 4 additions & 0 deletions src/memory_target.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ umf_result_t umfMemoryTargetClone(umf_memory_target_handle_t memoryTarget,
umf_memory_target_handle_t *outHandle);
umf_result_t umfMemoryTargetGetCapacity(umf_memory_target_handle_t memoryTarget,
size_t *capacity);
umf_result_t
umfMemoryTargetGetBandwidth(umf_memory_target_handle_t srcMemoryTarget,
umf_memory_target_handle_t dstMemoryTarget,
size_t *bandwidth);

#ifdef __cplusplus
}
Expand Down
2 changes: 2 additions & 0 deletions src/memory_target_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ typedef struct umf_memory_target_ops_t {
umf_memory_provider_handle_t *provider);

umf_result_t (*get_capacity)(void *memoryTarget, size_t *capacity);
umf_result_t (*get_bandwidth)(void *srcMemoryTarget, void *dstMemoryTarget,
size_t *bandwidth);
} umf_memory_target_ops_t;

#ifdef __cplusplus
Expand Down
54 changes: 54 additions & 0 deletions src/memory_targets/memory_target_numa.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "base_alloc_global.h"
#include "memory_target_numa.h"
#include "topology.h"
#include "utils_log.h"

struct numa_memory_target_t {
unsigned physical_id;
Expand Down Expand Up @@ -124,6 +125,10 @@ static umf_result_t numa_clone(void *memTarget, void **outMemTarget) {
}

static umf_result_t numa_get_capacity(void *memTarget, size_t *capacity) {
if (!memTarget || !capacity) {
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
}

hwloc_topology_t topology = umfGetTopology();
if (!topology) {
return UMF_RESULT_ERROR_NOT_SUPPORTED;
Expand All @@ -143,12 +148,61 @@ static umf_result_t numa_get_capacity(void *memTarget, size_t *capacity) {
return UMF_RESULT_SUCCESS;
}

static umf_result_t numa_get_bandwidth(void *srcMemoryTarget,
void *dstMemoryTarget,
size_t *bandwidth) {
if (!srcMemoryTarget || !dstMemoryTarget || !bandwidth) {
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
}

hwloc_topology_t topology = umfGetTopology();
if (!topology) {
return UMF_RESULT_ERROR_NOT_SUPPORTED;
}

hwloc_obj_t srcNumaNode = hwloc_get_obj_by_type(
topology, HWLOC_OBJ_NUMANODE,
((struct numa_memory_target_t *)srcMemoryTarget)->physical_id);
if (!srcNumaNode) {
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
}

hwloc_obj_t dstNumaNode = hwloc_get_obj_by_type(
topology, HWLOC_OBJ_NUMANODE,
((struct numa_memory_target_t *)dstMemoryTarget)->physical_id);
if (!dstNumaNode) {
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
}

// Given NUMA nodes aren't local, HWLOC returns an error in such case.
if (!hwloc_bitmap_intersects(srcNumaNode->cpuset, dstNumaNode->cpuset)) {
*bandwidth = 0;
return UMF_RESULT_SUCCESS;
}

struct hwloc_location initiator = {.location.cpuset = srcNumaNode->cpuset,
.type = HWLOC_LOCATION_TYPE_CPUSET};
hwloc_uint64_t value = 0;
int ret = hwloc_memattr_get_value(topology, HWLOC_MEMATTR_ID_BANDWIDTH,
dstNumaNode, &initiator, 0, &value);
if (ret) {
LOG_ERR("Retrieving bandwidth for initiator node %u to node %u failed.",
srcNumaNode->os_index, dstNumaNode->os_index);
return (errno == EINVAL) ? UMF_RESULT_ERROR_NOT_SUPPORTED
: UMF_RESULT_ERROR_UNKNOWN;
}

*bandwidth = value;
return UMF_RESULT_SUCCESS;
}

struct umf_memory_target_ops_t UMF_MEMORY_TARGET_NUMA_OPS = {
.version = UMF_VERSION_CURRENT,
.initialize = numa_initialize,
.finalize = numa_finalize,
.pool_create_from_memspace = numa_pool_create_from_memspace,
.clone = numa_clone,
.get_capacity = numa_get_capacity,
.get_bandwidth = numa_get_bandwidth,
.memory_provider_create_from_memspace =
numa_memory_provider_create_from_memspace};
82 changes: 82 additions & 0 deletions src/memspace.c
Original file line number Diff line number Diff line change
Expand Up @@ -208,3 +208,85 @@ umfMemspaceSortDesc(umf_memspace_handle_t hMemspace,

return UMF_RESULT_SUCCESS;
}

umf_result_t umfMemspaceFilter(umf_memspace_handle_t hMemspace,
umfGetTargetFn getTarget,
umf_memspace_handle_t *filteredMemspace) {
if (!hMemspace || !getTarget) {
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
}

umf_memory_target_handle_t *uniqueBestNodes =
umf_ba_global_alloc(hMemspace->size * sizeof(*uniqueBestNodes));
if (!uniqueBestNodes) {
return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
}

umf_result_t ret = UMF_RESULT_SUCCESS;

size_t numUniqueBestNodes = 0;
for (size_t nodeIdx = 0; nodeIdx < hMemspace->size; nodeIdx++) {
umf_memory_target_handle_t target = NULL;
ret = getTarget(hMemspace->nodes[nodeIdx], hMemspace->nodes,
hMemspace->size, &target);
if (ret != UMF_RESULT_SUCCESS) {
goto err_free_best_targets;
}

// check if the target is already present in the best nodes
size_t bestTargetIdx;
for (bestTargetIdx = 0; bestTargetIdx < numUniqueBestNodes;
bestTargetIdx++) {
if (uniqueBestNodes[bestTargetIdx] == target) {
break;
}
}

// if the target is not present, add it to the best nodes
if (bestTargetIdx == numUniqueBestNodes) {
uniqueBestNodes[numUniqueBestNodes++] = target;
}
}

// copy the unique best nodes into a new memspace
umf_memspace_handle_t newMemspace =
umf_ba_global_alloc(sizeof(*newMemspace));
if (!newMemspace) {
ret = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
goto err_free_best_targets;
}

newMemspace->size = numUniqueBestNodes;
newMemspace->nodes =
umf_ba_global_alloc(sizeof(*newMemspace->nodes) * newMemspace->size);
if (!newMemspace->nodes) {
ret = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
goto err_free_new_memspace;
}

size_t cloneIdx = 0;
for (size_t cloneIdx = 0; cloneIdx < newMemspace->size; cloneIdx++) {
ret = umfMemoryTargetClone(uniqueBestNodes[cloneIdx],
&newMemspace->nodes[cloneIdx]);
if (ret != UMF_RESULT_SUCCESS) {
goto err_free_cloned_nodes;
}
}

*filteredMemspace = newMemspace;
umf_ba_global_free(uniqueBestNodes);

return UMF_RESULT_SUCCESS;

err_free_cloned_nodes:
while (cloneIdx != 0) {
cloneIdx--;
umfMemoryTargetDestroy(newMemspace->nodes[cloneIdx]);
}
umf_ba_global_free(newMemspace->nodes);
err_free_new_memspace:
umf_ba_global_free(newMemspace);
err_free_best_targets:
umf_ba_global_free(uniqueBestNodes);
return ret;
}
13 changes: 13 additions & 0 deletions src/memspace_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,18 @@ typedef umf_result_t (*umfGetPropertyFn)(umf_memory_target_handle_t,
umf_result_t umfMemspaceSortDesc(umf_memspace_handle_t hMemspace,
umfGetPropertyFn getProperty);

typedef umf_result_t (*umfGetTargetFn)(umf_memory_target_handle_t initiator,
umf_memory_target_handle_t *nodes,
size_t numNodes,
umf_memory_target_handle_t *target);

///
/// \brief Filters the targets using getTarget() to create a new memspace
///
umf_result_t umfMemspaceFilter(umf_memspace_handle_t hMemspace,
umfGetTargetFn getTarget,
umf_memspace_handle_t *filteredMemspace);

///
/// \brief Destroys memspace
/// \param hMemspace handle to memspace
Expand All @@ -47,6 +59,7 @@ void umfMemspaceDestroy(umf_memspace_handle_t hMemspace);

void umfMemspaceHostAllDestroy(void);
void umfMemspaceHighestCapacityDestroy(void);
void umfMemspaceHighestBandwidthDestroy(void);

#ifdef __cplusplus
}
Expand Down
Loading