Skip to content

Handle default kernel behavior for CPU access to all NUMA nodes #533

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 22 additions & 10 deletions src/memory_targets/memory_target_numa.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,22 +87,34 @@ static umf_result_t numa_memory_provider_create_from_memspace(
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
}
} else {
params.numa_mode = UMF_NUMA_MODE_BIND;
if (memspace == umfMemspaceHostAllGet()) {
// For the default memspace, we use the default mode without any
// call to mbind
params.numa_mode = UMF_NUMA_MODE_DEFAULT;
} else {
params.numa_mode = UMF_NUMA_MODE_BIND;
}
}

params.numa_list =
umf_ba_global_alloc(sizeof(*params.numa_list) * numNodesProvider);
if (memspace == umfMemspaceHostAllGet() && policy == NULL) {
// For default memspace with default policy we use all numa nodes so
// simply left numa list empty
params.numa_list_len = 0;
params.numa_list = NULL;
} else {
params.numa_list =
umf_ba_global_alloc(sizeof(*params.numa_list) * numNodesProvider);

if (!params.numa_list) {
return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
}
if (!params.numa_list) {
return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
}

for (size_t i = 0; i < numNodesProvider; i++) {
params.numa_list[i] = numaTargets[i]->physical_id;
for (size_t i = 0; i < numNodesProvider; i++) {
params.numa_list[i] = numaTargets[i]->physical_id;
}
params.numa_list_len = numNodesProvider;
}

params.numa_list_len = numNodesProvider;

umf_memory_provider_handle_t numaProvider = NULL;
int ret = umfMemoryProviderCreate(umfOsMemoryProviderOps(), &params,
&numaProvider);
Expand Down
60 changes: 32 additions & 28 deletions src/provider/provider_os_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -567,35 +567,39 @@ static umf_result_t os_alloc(void *provider, size_t size, size_t alignment,
goto err_unmap;
}

errno = 0;
unsigned membind = get_membind(os_provider, ALIGN_UP(size, page_size));
size_t bind_size = os_provider->nodeset_len == 1
? size
: ALIGN_UP(os_provider->part_size, page_size);
char *ptr_iter = addr;

do {
size_t s = bind_size < size ? bind_size : size;
ret = hwloc_set_area_membind(
os_provider->topo, ptr_iter, s, os_provider->nodeset[membind++],
os_provider->numa_policy, os_provider->numa_flags);

size -= s;
ptr_iter += s;
membind %= os_provider->nodeset_len;
if (ret) {
os_store_last_native_error(UMF_OS_RESULT_ERROR_BIND_FAILED, errno);
LOG_PERR("binding memory to NUMA node failed");
// TODO: (errno == 0) when hwloc_set_area_membind() fails on Windows,
// ignore this temporarily
if (errno != ENOSYS &&
errno != 0) { // ENOSYS - Function not implemented
// Do not error out if memory binding is not implemented at all
// (like in case of WSL on Windows).
goto err_unmap;
// Bind memory to NUMA nodes if numa_policy is other than DEFAULT
if (os_provider->numa_policy != HWLOC_MEMBIND_DEFAULT) {
errno = 0;
unsigned membind = get_membind(os_provider, ALIGN_UP(size, page_size));
size_t bind_size = os_provider->nodeset_len == 1
? size
: ALIGN_UP(os_provider->part_size, page_size);
char *ptr_iter = addr;

do {
size_t s = bind_size < size ? bind_size : size;
ret = hwloc_set_area_membind(
os_provider->topo, ptr_iter, s, os_provider->nodeset[membind++],
os_provider->numa_policy, os_provider->numa_flags);

size -= s;
ptr_iter += s;
membind %= os_provider->nodeset_len;
if (ret) {
os_store_last_native_error(UMF_OS_RESULT_ERROR_BIND_FAILED,
errno);
LOG_PERR("binding memory to NUMA node failed");
// TODO: (errno == 0) when hwloc_set_area_membind() fails on Windows,
// ignore this temporarily
if (errno != ENOSYS &&
errno != 0) { // ENOSYS - Function not implemented
// Do not error out if memory binding is not implemented at all
// (like in case of WSL on Windows).
goto err_unmap;
}
}
}
} while (size > 0);
} while (size > 0);
}

if (os_provider->fd > 0) {
// store (fd_offset + 1) to be able to store fd_offset == 0
Expand Down
91 changes: 75 additions & 16 deletions test/memspaces/memspace_host_all.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,20 @@
// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <numa.h>
#include <numaif.h>
#include <sys/mman.h>
#include <unordered_set>

#include <umf/memspace.h>

#include "memory_target_numa.h"
#include "memspace_helpers.hpp"
#include "memspace_internal.h"
#include "numa_helpers.h"
#include "test_helpers.h"
#include "utils_sanitizers.h"

#include <numa.h>
#include <numaif.h>
#include <umf/memspace.h>
#include <unordered_set>

using umf_test::test;

struct memspaceHostAllTest : ::numaNodesTest {
Expand Down Expand Up @@ -87,6 +89,69 @@ TEST_F(memspaceHostAllProviderTest, allocFree) {
UT_ASSERTeq(ret, UMF_RESULT_SUCCESS);
}

TEST_F(memspaceHostAllProviderTest, hostAllDefaults) {
// This testcase checks if the allocations made using the provider with
// default parameters based on default memspace (HostAll) uses the fast,
// default kernel path (no mbind).

umf_memspace_handle_t hMemspace = umfMemspaceHostAllGet();
UT_ASSERTne(hMemspace, nullptr);

umf_memory_provider_handle_t hProvider = nullptr;
umf_result_t ret = umfMemoryProviderCreateFromMemspace(
umfMemspaceHostAllGet(), NULL, &hProvider);
ASSERT_EQ(ret, UMF_RESULT_SUCCESS);
ASSERT_NE(hProvider, nullptr);

// Create single allocation using the provider.
void *ptr1 = nullptr;
size_t size = SIZE_4K;
size_t alignment = 0;

ret = umfMemoryProviderAlloc(hProvider, size, alignment, &ptr1);
UT_ASSERTeq(ret, UMF_RESULT_SUCCESS);
UT_ASSERTne(ptr1, nullptr);
memset(ptr1, 0xFF, size);

// Create single allocation using mmap
void *ptr2 = mmap(nullptr, size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
UT_ASSERTne(ptr2, nullptr);
memset(ptr2, 0xFF, size);

// Compare UMF and kernel default allocation policy
struct bitmask *nodemask1 = numa_allocate_nodemask();
struct bitmask *nodemask2 = numa_allocate_nodemask();
int memMode1 = -1, memMode2 = -1;

int ret2 = get_mempolicy(&memMode1, nodemask1->maskp, nodemask1->size, ptr1,
MPOL_F_ADDR);
UT_ASSERTeq(ret2, 0);
ret2 = get_mempolicy(&memMode2, nodemask2->maskp, nodemask2->size, ptr2,
MPOL_F_ADDR);
UT_ASSERTeq(ret2, 0);
UT_ASSERTeq(memMode1, memMode2);
UT_ASSERTeq(nodemask1->size, nodemask2->size);
UT_ASSERTeq(numa_bitmask_equal(nodemask1, nodemask2), 1);

int nodeId1 = -1, nodeId2 = -1;
ret2 = get_mempolicy(&nodeId1, nullptr, 0, ptr1, MPOL_F_ADDR | MPOL_F_NODE);
UT_ASSERTeq(ret2, 0);
ret2 = get_mempolicy(&nodeId2, nullptr, 0, ptr2, MPOL_F_ADDR | MPOL_F_NODE);
UT_ASSERTeq(ret2, 0);
UT_ASSERTeq(nodeId1, nodeId2);

numa_free_nodemask(nodemask2);
numa_free_nodemask(nodemask1);

ret2 = munmap(ptr2, size);
UT_ASSERTeq(ret2, 0);

ret = umfMemoryProviderFree(hProvider, ptr1, size);
UT_ASSERTeq(ret, UMF_RESULT_SUCCESS);
umfMemoryProviderDestroy(hProvider);
}

TEST_F(memspaceHostAllProviderTest, allocsSpreadAcrossAllNumaNodes) {
// This testcase is unsuitable for TSan.
#ifdef __SANITIZE_THREAD__
Expand Down Expand Up @@ -140,17 +205,11 @@ TEST_F(memspaceHostAllProviderTest, allocsSpreadAcrossAllNumaNodes) {
size_t allocNodeId = SIZE_MAX;
getAllocationPolicy(ptr, maxNodeId, mode, boundNodeIds, allocNodeId);

// 'BIND' mode specifies that the memory is bound to a set of NUMA nodes.
// In case of 'HOST ALL' memspace, those set of nodes should be all
// available nodes.
UT_ASSERTeq(mode, MPOL_BIND);

// Confirm that the memory is bound to all the nodes from 'HOST ALL'
// memspace.
for (auto &id : nodeIds) {
auto it = std::find(boundNodeIds.begin(), boundNodeIds.end(), id);
UT_ASSERT(it != boundNodeIds.end());
}
// In case of 'HOST ALL' memspace, the default set of nodes (that
// contains all available nodes) is used but get_mempolicy() would
// return an empty set of nodes.
UT_ASSERTeq(mode, MPOL_DEFAULT);
UT_ASSERTeq(boundNodeIds.size(), 0);

// Confirm that the memory is allocated on one of the nodes in
// 'HOST ALL' memspace.
Expand Down