Skip to content

Commit 0b1f6e4

Browse files
committed
implment interleave mode with customizable size
Signed-off-by: Łukasz Plewa <[email protected]>
1 parent 7df0dff commit 0b1f6e4

File tree

4 files changed

+148
-65
lines changed

4 files changed

+148
-65
lines changed

include/umf/providers/provider_os_memory.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ typedef struct umf_os_memory_provider_params_t {
7070

7171
/// Describes how node list is interpreted
7272
umf_numa_mode_t numa_mode;
73+
/// part size for interleave mode - 0 means default (system specific)
74+
size_t part_size;
7375
} umf_os_memory_provider_params_t;
7476

7577
/// @brief OS Memory Provider operation results
@@ -94,6 +96,7 @@ umfOsMemoryProviderParamsDefault(void) {
9496
NULL, /* numa_list */
9597
0, /* numa_list_len */
9698
UMF_NUMA_MODE_DEFAULT, /* numa_mode */
99+
0 /* part_size */
97100
};
98101

99102
return params;

src/provider/provider_os_memory.c

Lines changed: 141 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#include "base_alloc_global.h"
1818
#include "provider_os_memory_internal.h"
19+
#include "utils_concurrency.h"
1920
#include "utils_log.h"
2021

2122
#include <umf.h>
@@ -28,11 +29,14 @@ typedef struct os_memory_provider_t {
2829
unsigned protection; // combination of OS-specific protection flags
2930

3031
// NUMA config
31-
hwloc_bitmap_t nodeset;
32+
hwloc_bitmap_t *nodeset;
33+
unsigned nodeset_len;
3234
char *nodeset_str_buf;
3335
hwloc_membind_policy_t numa_policy;
3436
int numa_flags; // combination of hwloc flags
3537

38+
size_t part_size;
39+
size_t alloc_sum; // sum of all allocations - used for manual interleaving
3640
hwloc_topology_t topo;
3741
} os_memory_provider_t;
3842

@@ -81,30 +85,67 @@ static void os_store_last_native_error(int32_t native_error, int errno_value) {
8185
TLS_last_native_error.errno_value = errno_value;
8286
}
8387

84-
static umf_result_t nodemask_to_hwloc_nodeset(const unsigned *nodelist,
85-
unsigned long listsize,
86-
hwloc_bitmap_t *out_nodeset) {
87-
if (out_nodeset == NULL) {
88-
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
89-
}
88+
static umf_result_t initialize_nodeset(os_memory_provider_t *os_provider,
89+
const unsigned *nodelist,
90+
unsigned long listsize,
91+
int separate_nodes) {
9092

91-
*out_nodeset = hwloc_bitmap_alloc();
92-
if (!*out_nodeset) {
93+
unsigned long array_size = (listsize && separate_nodes) ? listsize : 1;
94+
os_provider->nodeset =
95+
umf_ba_global_alloc(sizeof(*os_provider->nodeset) * array_size);
96+
97+
if (!os_provider->nodeset) {
9398
return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
9499
}
95100

101+
hwloc_bitmap_t *out_nodeset = os_provider->nodeset;
102+
os_provider->nodeset_len = array_size;
96103
if (listsize == 0) {
104+
// Hwloc_set_area_membind fails if empty nodeset is passed so
105+
// if no node is specified, just pass all available nodes.
106+
// For modes where no node is needed, they will be ignored anyway.
107+
out_nodeset[0] = hwloc_bitmap_dup(
108+
hwloc_topology_get_complete_nodeset(os_provider->topo));
109+
if (!out_nodeset[0]) {
110+
goto err_free_list;
111+
}
97112
return UMF_RESULT_SUCCESS;
98113
}
99114

100-
for (unsigned long i = 0; i < listsize; i++) {
101-
if (hwloc_bitmap_set(*out_nodeset, nodelist[i])) {
102-
hwloc_bitmap_free(*out_nodeset);
103-
return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
115+
for (unsigned long i = 0; i < array_size; i++) {
116+
out_nodeset[i] = hwloc_bitmap_alloc();
117+
if (!out_nodeset[i]) {
118+
for (unsigned long j = 0; j < i; j++) {
119+
hwloc_bitmap_free(out_nodeset[j]);
120+
}
121+
goto err_free_list;
122+
}
123+
}
124+
125+
if (separate_nodes) {
126+
for (unsigned long i = 0; i < listsize; i++) {
127+
if (hwloc_bitmap_set(out_nodeset[i], nodelist[i])) {
128+
goto err_free_bitmaps;
129+
}
130+
}
131+
} else {
132+
for (unsigned long i = 0; i < listsize; i++) {
133+
if (hwloc_bitmap_set(out_nodeset[0], nodelist[i])) {
134+
goto err_free_bitmaps;
135+
}
104136
}
105137
}
106138

107139
return UMF_RESULT_SUCCESS;
140+
141+
err_free_bitmaps:
142+
for (unsigned long i = 0; i < array_size; i++) {
143+
hwloc_bitmap_free(out_nodeset[i]);
144+
}
145+
err_free_list:
146+
umf_ba_global_free(*out_nodeset);
147+
os_provider->nodeset_len = 0;
148+
return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
108149
}
109150

110151
umf_result_t os_translate_flags(unsigned in_flags, unsigned max,
@@ -132,42 +173,61 @@ umf_result_t os_translate_flags(unsigned in_flags, unsigned max,
132173
return UMF_RESULT_SUCCESS;
133174
}
134175

135-
static umf_result_t translate_numa_mode(umf_numa_mode_t mode, int nodemaskEmpty,
136-
hwloc_membind_policy_t *numa_policy) {
176+
static umf_result_t validate_numa_mode(umf_numa_mode_t mode,
177+
int nodemaskEmpty) {
137178
switch (mode) {
138179
case UMF_NUMA_MODE_DEFAULT:
180+
case UMF_NUMA_MODE_LOCAL:
139181
if (!nodemaskEmpty) {
140182
// nodeset must be empty
141183
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
142184
}
143-
*numa_policy = HWLOC_MEMBIND_DEFAULT;
144185
return UMF_RESULT_SUCCESS;
145186
case UMF_NUMA_MODE_BIND:
146-
if (nodemaskEmpty) {
147-
// nodeset must not be empty
148-
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
149-
}
150-
*numa_policy = HWLOC_MEMBIND_BIND;
151-
return UMF_RESULT_SUCCESS;
152187
case UMF_NUMA_MODE_INTERLEAVE:
153188
if (nodemaskEmpty) {
154189
// nodeset must not be empty
155190
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
156191
}
157-
*numa_policy = HWLOC_MEMBIND_INTERLEAVE;
158192
return UMF_RESULT_SUCCESS;
159193
case UMF_NUMA_MODE_PREFERRED:
160-
*numa_policy = HWLOC_MEMBIND_BIND;
161194
return UMF_RESULT_SUCCESS;
162-
case UMF_NUMA_MODE_LOCAL:
163-
if (!nodemaskEmpty) {
164-
// nodeset must be empty
165-
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
195+
default:
196+
assert(0);
197+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
198+
}
199+
}
200+
201+
static hwloc_membind_policy_t translate_numa_mode(umf_numa_mode_t mode,
202+
int dedicated_node_bind) {
203+
switch (mode) {
204+
case UMF_NUMA_MODE_DEFAULT:
205+
return HWLOC_MEMBIND_DEFAULT;
206+
case UMF_NUMA_MODE_BIND:
207+
return HWLOC_MEMBIND_BIND;
208+
case UMF_NUMA_MODE_INTERLEAVE:
209+
// In manual mode, we manually implement interleaving,
210+
// by binding memory to specific NUMA nodes.
211+
if (dedicated_node_bind) {
212+
return HWLOC_MEMBIND_BIND;
166213
}
167-
*numa_policy = HWLOC_MEMBIND_BIND;
168-
return UMF_RESULT_SUCCESS;
214+
return HWLOC_MEMBIND_INTERLEAVE;
215+
case UMF_NUMA_MODE_PREFERRED:
216+
return HWLOC_MEMBIND_BIND;
217+
case UMF_NUMA_MODE_LOCAL:
218+
return HWLOC_MEMBIND_BIND;
169219
}
170-
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
220+
assert(0);
221+
return -1;
222+
}
223+
224+
//return 1 if umf will binds memory directly to single NUMA node, based on internal algorithm
225+
//return 0 if umf will just set numa memory policy, and kernel will decide where to allocate memory
226+
static int is_dedicated_node_bind(umf_os_memory_provider_params_t *in_params) {
227+
if (in_params->numa_mode == UMF_NUMA_MODE_INTERLEAVE) {
228+
return in_params->part_size > 0;
229+
}
230+
return 0;
171231
}
172232

173233
static int getHwlocMembindFlags(umf_numa_mode_t mode) {
@@ -193,17 +253,19 @@ static umf_result_t translate_params(umf_os_memory_provider_params_t *in_params,
193253

194254
// NUMA config
195255
int emptyNodeset = in_params->numa_list_len == 0;
196-
result = translate_numa_mode(in_params->numa_mode, emptyNodeset,
197-
&provider->numa_policy);
256+
result = validate_numa_mode(in_params->numa_mode, emptyNodeset);
198257
if (result != UMF_RESULT_SUCCESS) {
199258
LOG_ERR("incorrect NUMA mode: %u", in_params->numa_mode);
200259
return result;
201260
}
202261

262+
int dedicated_node_bind = is_dedicated_node_bind(in_params);
263+
provider->numa_policy =
264+
translate_numa_mode(in_params->numa_mode, dedicated_node_bind);
203265
provider->numa_flags = getHwlocMembindFlags(in_params->numa_mode);
204-
205-
return nodemask_to_hwloc_nodeset(
206-
in_params->numa_list, in_params->numa_list_len, &provider->nodeset);
266+
provider->part_size = in_params->part_size;
267+
return initialize_nodeset(provider, in_params->numa_list,
268+
in_params->numa_list_len, dedicated_node_bind);
207269
}
208270

209271
static umf_result_t os_initialize(void *params, void **provider) {
@@ -250,11 +312,13 @@ static umf_result_t os_initialize(void *params, void **provider) {
250312
if (!os_provider->nodeset_str_buf) {
251313
LOG_INFO("Allocating memory for printing NUMA nodes failed");
252314
} else {
253-
if (hwloc_bitmap_list_snprintf(os_provider->nodeset_str_buf,
254-
NODESET_STR_BUF_LEN,
255-
os_provider->nodeset)) {
256-
LOG_INFO("OS provider initialized with NUMA nodes: %s",
257-
os_provider->nodeset_str_buf);
315+
LOG_INFO("OS provider initialized with NUMA nodes:");
316+
for (unsigned i = 0; i < os_provider->nodeset_len; i++) {
317+
if (hwloc_bitmap_list_snprintf(os_provider->nodeset_str_buf,
318+
NODESET_STR_BUF_LEN,
319+
os_provider->nodeset[i])) {
320+
LOG_INFO("%s", os_provider->nodeset_str_buf);
321+
}
258322
}
259323
}
260324

@@ -280,7 +344,10 @@ static void os_finalize(void *provider) {
280344
umf_ba_global_free(os_provider->nodeset_str_buf);
281345
}
282346

283-
hwloc_bitmap_free(os_provider->nodeset);
347+
for (unsigned i = 0; i < os_provider->nodeset_len; i++) {
348+
hwloc_bitmap_free(os_provider->nodeset[i]);
349+
}
350+
umf_ba_global_free(os_provider->nodeset);
284351
hwloc_topology_destroy(os_provider->topo);
285352
umf_ba_global_free(os_provider);
286353
}
@@ -387,6 +454,17 @@ static int os_mmap_aligned(void *hint_addr, size_t length, size_t alignment,
387454
return 0;
388455
}
389456

457+
static int get_membind(os_memory_provider_t *provider, size_t size) {
458+
if (provider->nodeset_len == 1) {
459+
return 0;
460+
}
461+
462+
assert(provider->part_size != 0);
463+
size_t s = util_fetch_and_add64(&provider->alloc_sum, size);
464+
465+
return (s / provider->part_size) % provider->nodeset_len;
466+
}
467+
390468
static umf_result_t os_alloc(void *provider, size_t size, size_t alignment,
391469
void **resultPtr) {
392470
int ret;
@@ -434,32 +512,31 @@ static umf_result_t os_alloc(void *provider, size_t size, size_t alignment,
434512
}
435513

436514
errno = 0;
437-
if (hwloc_bitmap_iszero(os_provider->nodeset)) {
438-
// Hwloc_set_area_membind fails if empty nodeset is passed so if no node is specified,
439-
// just pass all available nodes. For modes where no node is needed, they will be
440-
// ignored anyway.
441-
hwloc_const_nodeset_t complete_nodeset =
442-
hwloc_topology_get_complete_nodeset(os_provider->topo);
443-
ret = hwloc_set_area_membind(os_provider->topo, addr, size,
444-
complete_nodeset, os_provider->numa_policy,
445-
os_provider->numa_flags);
446-
} else {
515+
unsigned membind = get_membind(os_provider, size);
516+
size_t bind_size =
517+
os_provider->nodeset_len == 1 ? size : os_provider->part_size;
518+
519+
do {
520+
size_t s = bind_size < size ? bind_size : size;
447521
ret = hwloc_set_area_membind(
448-
os_provider->topo, addr, size, os_provider->nodeset,
522+
os_provider->topo, addr, s, os_provider->nodeset[membind++],
449523
os_provider->numa_policy, os_provider->numa_flags);
450-
}
451524

452-
if (ret) {
453-
os_store_last_native_error(UMF_OS_RESULT_ERROR_BIND_FAILED, errno);
454-
LOG_PERR("binding memory to NUMA node failed");
455-
// TODO: (errno == 0) when hwloc_set_area_membind() fails on Windows - ignore this temporarily
456-
if (errno != ENOSYS &&
457-
errno != 0) { // ENOSYS - Function not implemented
458-
// Do not error out if memory binding is not implemented at all (like in case of WSL on Windows).
459-
goto err_unmap;
525+
size -= s;
526+
membind %= os_provider->nodeset_len;
527+
if (ret) {
528+
os_store_last_native_error(UMF_OS_RESULT_ERROR_BIND_FAILED, errno);
529+
LOG_PERR("binding memory to NUMA node failed");
530+
// TODO: (errno == 0) when hwloc_set_area_membind() fails on Windows,
531+
// ignore this temporarily
532+
if (errno != ENOSYS &&
533+
errno != 0) { // ENOSYS - Function not implemented
534+
// Do not error out if memory binding is not implemented at all
535+
// (like in case of WSL on Windows).
536+
goto err_unmap;
537+
}
460538
}
461-
}
462-
539+
} while (size > 0);
463540
*resultPtr = addr;
464541

465542
return UMF_RESULT_SUCCESS;

src/utils/utils_concurrency.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ static __inline unsigned char util_mssb_index(long long value) {
7070
InterlockedExchange64((LONG64 volatile *)object, (LONG64)desired)
7171
#define util_atomic_increment(object) \
7272
InterlockedIncrement64((LONG64 volatile *)object)
73+
#define util_fetch_and_add64(ptr, value) \
74+
InterlockedExchangeAdd64((LONG64 *)(ptr), value)
7375
#else
7476
#define util_lssb_index(x) ((unsigned char)__builtin_ctzll(x))
7577
#define util_mssb_index(x) ((unsigned char)(63 - __builtin_clzll(x)))
@@ -87,6 +89,7 @@ static __inline unsigned char util_mssb_index(long long value) {
8789

8890
#define util_atomic_increment(object) \
8991
__atomic_add_fetch(object, 1, __ATOMIC_ACQ_REL)
92+
#define util_fetch_and_add64 __sync_fetch_and_add
9093
#endif
9194

9295
#ifdef __cplusplus

test/provider_os_memory.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ using providerCreateExtParams = std::tuple<umf_memory_provider_ops_t *, void *>;
4545

4646
umf::provider_unique_handle_t
4747
providerCreateExt(providerCreateExtParams params) {
48-
umf_memory_provider_handle_t hProvider;
48+
umf_memory_provider_handle_t hProvider = nullptr;
4949
auto [provider_ops, provider_params] = params;
5050

5151
auto ret =

0 commit comments

Comments
 (0)