Skip to content

Commit a331bd8

Browse files
committed
implment interleave mode with customizable size
Signed-off-by: Łukasz Plewa <[email protected]>
1 parent 7df0dff commit a331bd8

File tree

4 files changed

+142
-58
lines changed

4 files changed

+142
-58
lines changed

include/umf/providers/provider_os_memory.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ typedef struct umf_os_memory_provider_params_t {
7070

7171
/// Describes how node list is interpreted
7272
umf_numa_mode_t numa_mode;
73+
/// part size for interleave mode - 0 means default (system specyfic)
74+
size_t part_size;
7375
} umf_os_memory_provider_params_t;
7476

7577
/// @brief OS Memory Provider operation results
@@ -94,6 +96,7 @@ umfOsMemoryProviderParamsDefault(void) {
9496
NULL, /* numa_list */
9597
0, /* numa_list_len */
9698
UMF_NUMA_MODE_DEFAULT, /* numa_mode */
99+
0 /* part_size */
97100
};
98101

99102
return params;

src/provider/provider_os_memory.c

Lines changed: 129 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#include "base_alloc_global.h"
1818
#include "provider_os_memory_internal.h"
19+
#include "utils_concurrency.h"
1920
#include "utils_log.h"
2021

2122
#include <umf.h>
@@ -28,11 +29,14 @@ typedef struct os_memory_provider_t {
2829
unsigned protection; // combination of OS-specific protection flags
2930

3031
// NUMA config
31-
hwloc_bitmap_t nodeset;
32+
hwloc_bitmap_t *nodeset;
33+
unsigned nodeset_len;
3234
char *nodeset_str_buf;
3335
hwloc_membind_policy_t numa_policy;
3436
int numa_flags; // combination of hwloc flags
3537

38+
size_t part_size;
39+
size_t alloc_size;
3640
hwloc_topology_t topo;
3741
} os_memory_provider_t;
3842

@@ -83,28 +87,64 @@ static void os_store_last_native_error(int32_t native_error, int errno_value) {
8387

8488
static umf_result_t nodemask_to_hwloc_nodeset(const unsigned *nodelist,
8589
unsigned long listsize,
86-
hwloc_bitmap_t *out_nodeset) {
87-
if (out_nodeset == NULL) {
88-
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
89-
}
90+
os_memory_provider_t *os_provider,
91+
int is_separate) {
9092

91-
*out_nodeset = hwloc_bitmap_alloc();
92-
if (!*out_nodeset) {
93+
unsigned long array_size = listsize && is_separate ? listsize : 1;
94+
os_provider->nodeset =
95+
umf_ba_global_alloc(sizeof(*os_provider->nodeset) * array_size);
96+
97+
if (!os_provider->nodeset) {
9398
return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
9499
}
95100

101+
hwloc_bitmap_t *out_nodeset = os_provider->nodeset;
102+
os_provider->nodeset_len = array_size;
96103
if (listsize == 0) {
104+
// Hwloc_set_area_membind fails if empty nodeset is passed so
105+
// if no node is specified, just pass all available nodes.
106+
// For modes where no node is needed, they will be ignored anyway.
107+
out_nodeset[0] = hwloc_bitmap_dup(
108+
hwloc_topology_get_complete_nodeset(os_provider->topo));
109+
if (!out_nodeset[0]) {
110+
goto err_free_list;
111+
}
97112
return UMF_RESULT_SUCCESS;
98113
}
99114

100-
for (unsigned long i = 0; i < listsize; i++) {
101-
if (hwloc_bitmap_set(*out_nodeset, nodelist[i])) {
102-
hwloc_bitmap_free(*out_nodeset);
103-
return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
115+
for (unsigned long i = 0; i < array_size; i++) {
116+
out_nodeset[i] = hwloc_bitmap_alloc();
117+
if (!out_nodeset[i]) {
118+
for (unsigned long j = 0; j < i; j++) {
119+
hwloc_bitmap_free(out_nodeset[j]);
120+
}
121+
goto err_free_list;
122+
}
123+
}
124+
125+
if (is_separate) {
126+
for (unsigned long i = 0; i < listsize; i++) {
127+
if (hwloc_bitmap_set(out_nodeset[i], nodelist[i])) {
128+
goto err_free_bitmaps;
129+
}
130+
}
131+
} else {
132+
for (unsigned long i = 0; i < listsize; i++) {
133+
if (hwloc_bitmap_set(out_nodeset[0], nodelist[i])) {
134+
goto err_free_bitmaps;
135+
}
104136
}
105137
}
106138

107139
return UMF_RESULT_SUCCESS;
140+
err_free_bitmaps:
141+
for (unsigned long i = 0; i < array_size; i++) {
142+
hwloc_bitmap_free(out_nodeset[i]);
143+
}
144+
err_free_list:
145+
umf_ba_global_free(*out_nodeset);
146+
os_provider->nodeset_len = 0;
147+
return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY;
108148
}
109149

110150
umf_result_t os_translate_flags(unsigned in_flags, unsigned max,
@@ -132,42 +172,61 @@ umf_result_t os_translate_flags(unsigned in_flags, unsigned max,
132172
return UMF_RESULT_SUCCESS;
133173
}
134174

135-
static umf_result_t translate_numa_mode(umf_numa_mode_t mode, int nodemaskEmpty,
136-
hwloc_membind_policy_t *numa_policy) {
175+
static umf_result_t validate_numa_mode(umf_numa_mode_t mode,
176+
int nodemaskEmpty) {
137177
switch (mode) {
138178
case UMF_NUMA_MODE_DEFAULT:
179+
case UMF_NUMA_MODE_LOCAL:
139180
if (!nodemaskEmpty) {
140181
// nodeset must be empty
141182
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
142183
}
143-
*numa_policy = HWLOC_MEMBIND_DEFAULT;
144184
return UMF_RESULT_SUCCESS;
145185
case UMF_NUMA_MODE_BIND:
146-
if (nodemaskEmpty) {
147-
// nodeset must not be empty
148-
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
149-
}
150-
*numa_policy = HWLOC_MEMBIND_BIND;
151-
return UMF_RESULT_SUCCESS;
152186
case UMF_NUMA_MODE_INTERLEAVE:
153187
if (nodemaskEmpty) {
154188
// nodeset must not be empty
155189
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
156190
}
157-
*numa_policy = HWLOC_MEMBIND_INTERLEAVE;
158191
return UMF_RESULT_SUCCESS;
159192
case UMF_NUMA_MODE_PREFERRED:
160-
*numa_policy = HWLOC_MEMBIND_BIND;
161193
return UMF_RESULT_SUCCESS;
162-
case UMF_NUMA_MODE_LOCAL:
163-
if (!nodemaskEmpty) {
164-
// nodeset must be empty
165-
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
194+
default:
195+
assert(0);
196+
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
197+
}
198+
}
199+
200+
static hwloc_membind_policy_t translate_numa_mode(umf_numa_mode_t mode,
201+
int manual) {
202+
switch (mode) {
203+
case UMF_NUMA_MODE_DEFAULT:
204+
return HWLOC_MEMBIND_DEFAULT;
205+
case UMF_NUMA_MODE_BIND:
206+
return HWLOC_MEMBIND_BIND;
207+
case UMF_NUMA_MODE_INTERLEAVE:
208+
// In manual mode, we manually implement interleaving,
209+
// by binding memory to specific NUMA nodes.
210+
if (manual) {
211+
return HWLOC_MEMBIND_BIND;
166212
}
167-
*numa_policy = HWLOC_MEMBIND_BIND;
168-
return UMF_RESULT_SUCCESS;
213+
return HWLOC_MEMBIND_INTERLEAVE;
214+
case UMF_NUMA_MODE_PREFERRED:
215+
return HWLOC_MEMBIND_BIND;
216+
case UMF_NUMA_MODE_LOCAL:
217+
return HWLOC_MEMBIND_BIND;
218+
}
219+
assert(0);
220+
return -1;
221+
}
222+
223+
//return 1 if membinding logic is performed at umf side
224+
//return 0 if membinding logic is performed by kernel
225+
static int advancedMembind(umf_os_memory_provider_params_t *in_params) {
226+
if (in_params->numa_mode == UMF_NUMA_MODE_INTERLEAVE) {
227+
return in_params->part_size > 0;
169228
}
170-
return UMF_RESULT_ERROR_INVALID_ARGUMENT;
229+
return 0;
171230
}
172231

173232
static int getHwlocMembindFlags(umf_numa_mode_t mode) {
@@ -193,17 +252,19 @@ static umf_result_t translate_params(umf_os_memory_provider_params_t *in_params,
193252

194253
// NUMA config
195254
int emptyNodeset = in_params->numa_list_len == 0;
196-
result = translate_numa_mode(in_params->numa_mode, emptyNodeset,
197-
&provider->numa_policy);
255+
result = validate_numa_mode(in_params->numa_mode, emptyNodeset);
198256
if (result != UMF_RESULT_SUCCESS) {
199257
LOG_ERR("incorrect NUMA mode: %u", in_params->numa_mode);
200258
return result;
201259
}
202260

261+
int manual_mode = advancedMembind(in_params);
262+
provider->numa_policy =
263+
translate_numa_mode(in_params->numa_mode, manual_mode);
203264
provider->numa_flags = getHwlocMembindFlags(in_params->numa_mode);
204-
265+
provider->part_size = in_params->part_size;
205266
return nodemask_to_hwloc_nodeset(
206-
in_params->numa_list, in_params->numa_list_len, &provider->nodeset);
267+
in_params->numa_list, in_params->numa_list_len, provider, manual_mode);
207268
}
208269

209270
static umf_result_t os_initialize(void *params, void **provider) {
@@ -252,7 +313,7 @@ static umf_result_t os_initialize(void *params, void **provider) {
252313
} else {
253314
if (hwloc_bitmap_list_snprintf(os_provider->nodeset_str_buf,
254315
NODESET_STR_BUF_LEN,
255-
os_provider->nodeset)) {
316+
*os_provider->nodeset)) {
256317
LOG_INFO("OS provider initialized with NUMA nodes: %s",
257318
os_provider->nodeset_str_buf);
258319
}
@@ -280,7 +341,10 @@ static void os_finalize(void *provider) {
280341
umf_ba_global_free(os_provider->nodeset_str_buf);
281342
}
282343

283-
hwloc_bitmap_free(os_provider->nodeset);
344+
for (unsigned i = 0; i < os_provider->nodeset_len; i++) {
345+
hwloc_bitmap_free(os_provider->nodeset[i]);
346+
}
347+
umf_ba_global_free(os_provider->nodeset);
284348
hwloc_topology_destroy(os_provider->topo);
285349
umf_ba_global_free(os_provider);
286350
}
@@ -387,6 +451,15 @@ static int os_mmap_aligned(void *hint_addr, size_t length, size_t alignment,
387451
return 0;
388452
}
389453

454+
static int get_membind(os_memory_provider_t *provider, size_t size) {
455+
if (provider->nodeset_len == 1) {
456+
return 0;
457+
}
458+
459+
size_t s = util_fetch_and_add64(&provider->alloc_size, size);
460+
461+
return (s / provider->part_size) % provider->nodeset_len;
462+
}
390463
static umf_result_t os_alloc(void *provider, size_t size, size_t alignment,
391464
void **resultPtr) {
392465
int ret;
@@ -434,32 +507,31 @@ static umf_result_t os_alloc(void *provider, size_t size, size_t alignment,
434507
}
435508

436509
errno = 0;
437-
if (hwloc_bitmap_iszero(os_provider->nodeset)) {
438-
// Hwloc_set_area_membind fails if empty nodeset is passed so if no node is specified,
439-
// just pass all available nodes. For modes where no node is needed, they will be
440-
// ignored anyway.
441-
hwloc_const_nodeset_t complete_nodeset =
442-
hwloc_topology_get_complete_nodeset(os_provider->topo);
443-
ret = hwloc_set_area_membind(os_provider->topo, addr, size,
444-
complete_nodeset, os_provider->numa_policy,
445-
os_provider->numa_flags);
446-
} else {
510+
unsigned membind = get_membind(os_provider, size);
511+
size_t bind_size =
512+
os_provider->nodeset_len == 1 ? size : os_provider->part_size;
513+
514+
do {
515+
size_t s = bind_size < size ? bind_size : size;
447516
ret = hwloc_set_area_membind(
448-
os_provider->topo, addr, size, os_provider->nodeset,
517+
os_provider->topo, addr, s, os_provider->nodeset[membind++],
449518
os_provider->numa_policy, os_provider->numa_flags);
450-
}
451519

452-
if (ret) {
453-
os_store_last_native_error(UMF_OS_RESULT_ERROR_BIND_FAILED, errno);
454-
LOG_PERR("binding memory to NUMA node failed");
455-
// TODO: (errno == 0) when hwloc_set_area_membind() fails on Windows - ignore this temporarily
456-
if (errno != ENOSYS &&
457-
errno != 0) { // ENOSYS - Function not implemented
458-
// Do not error out if memory binding is not implemented at all (like in case of WSL on Windows).
459-
goto err_unmap;
520+
size -= s;
521+
membind %= os_provider->nodeset_len;
522+
if (ret) {
523+
os_store_last_native_error(UMF_OS_RESULT_ERROR_BIND_FAILED, errno);
524+
LOG_PERR("binding memory to NUMA node failed");
525+
// TODO: (errno == 0) when hwloc_set_area_membind() fails on Windows,
526+
// ignore this temporarily
527+
if (errno != ENOSYS &&
528+
errno != 0) { // ENOSYS - Function not implemented
529+
// Do not error out if memory binding is not implecmented at all
530+
// (like in case of WSL on Windows).
531+
goto err_unmap;
532+
}
460533
}
461-
}
462-
534+
} while (size > 0);
463535
*resultPtr = addr;
464536

465537
return UMF_RESULT_SUCCESS;

src/utils/utils_posix_concurrency.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,15 @@
1212

1313
#include "utils_concurrency.h"
1414

15+
// TODO: implement all atomics
16+
#ifndef _MSC_VER
17+
#define util_fetch_and_add64 __sync_fetch_and_add
18+
#else
19+
#include <windows.h>
20+
#define util_fetch_and_add64(ptr, value) \
21+
InterlockedExchangeAdd64((LONG64 *)(ptr), value)
22+
#endif
23+
1524
size_t util_mutex_get_size(void) { return sizeof(pthread_mutex_t); }
1625

1726
os_mutex_t *util_mutex_init(void *ptr) {

test/provider_os_memory.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ using providerCreateExtParams = std::tuple<umf_memory_provider_ops_t *, void *>;
4545

4646
umf::provider_unique_handle_t
4747
providerCreateExt(providerCreateExtParams params) {
48-
umf_memory_provider_handle_t hProvider;
48+
umf_memory_provider_handle_t hProvider = nullptr;
4949
auto [provider_ops, provider_params] = params;
5050

5151
auto ret =

0 commit comments

Comments
 (0)