16
16
17
17
#include "base_alloc_global.h"
18
18
#include "provider_os_memory_internal.h"
19
+ #include "utils_concurrency.h"
19
20
#include "utils_log.h"
20
21
21
22
#include <umf.h>
@@ -28,11 +29,14 @@ typedef struct os_memory_provider_t {
28
29
unsigned protection ; // combination of OS-specific protection flags
29
30
30
31
// NUMA config
31
- hwloc_bitmap_t nodeset ;
32
+ hwloc_bitmap_t * nodeset ;
33
+ unsigned nodeset_len ;
32
34
char * nodeset_str_buf ;
33
35
hwloc_membind_policy_t numa_policy ;
34
36
int numa_flags ; // combination of hwloc flags
35
37
38
+ size_t part_size ;
39
+ size_t alloc_size ;
36
40
hwloc_topology_t topo ;
37
41
} os_memory_provider_t ;
38
42
@@ -83,28 +87,64 @@ static void os_store_last_native_error(int32_t native_error, int errno_value) {
83
87
84
88
static umf_result_t nodemask_to_hwloc_nodeset (const unsigned * nodelist ,
85
89
unsigned long listsize ,
86
- hwloc_bitmap_t * out_nodeset ) {
87
- if (out_nodeset == NULL ) {
88
- return UMF_RESULT_ERROR_INVALID_ARGUMENT ;
89
- }
90
+ os_memory_provider_t * os_provider ,
91
+ int is_separate ) {
90
92
91
- * out_nodeset = hwloc_bitmap_alloc ();
92
- if (!* out_nodeset ) {
93
+ unsigned long array_size = listsize && is_separate ? listsize : 1 ;
94
+ os_provider -> nodeset =
95
+ umf_ba_global_alloc (sizeof (* os_provider -> nodeset ) * array_size );
96
+
97
+ if (!os_provider -> nodeset ) {
93
98
return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY ;
94
99
}
95
100
101
+ hwloc_bitmap_t * out_nodeset = os_provider -> nodeset ;
102
+ os_provider -> nodeset_len = array_size ;
96
103
if (listsize == 0 ) {
104
+ // Hwloc_set_area_membind fails if empty nodeset is passed so
105
+ // if no node is specified, just pass all available nodes.
106
+ // For modes where no node is needed, they will be ignored anyway.
107
+ out_nodeset [0 ] = hwloc_bitmap_dup (
108
+ hwloc_topology_get_complete_nodeset (os_provider -> topo ));
109
+ if (!out_nodeset [0 ]) {
110
+ goto err_free_list ;
111
+ }
97
112
return UMF_RESULT_SUCCESS ;
98
113
}
99
114
100
- for (unsigned long i = 0 ; i < listsize ; i ++ ) {
101
- if (hwloc_bitmap_set (* out_nodeset , nodelist [i ])) {
102
- hwloc_bitmap_free (* out_nodeset );
103
- return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY ;
115
+ for (unsigned long i = 0 ; i < array_size ; i ++ ) {
116
+ out_nodeset [i ] = hwloc_bitmap_alloc ();
117
+ if (!out_nodeset [i ]) {
118
+ for (unsigned long j = 0 ; j < i ; j ++ ) {
119
+ hwloc_bitmap_free (out_nodeset [j ]);
120
+ }
121
+ goto err_free_list ;
122
+ }
123
+ }
124
+
125
+ if (is_separate ) {
126
+ for (unsigned long i = 0 ; i < listsize ; i ++ ) {
127
+ if (hwloc_bitmap_set (out_nodeset [i ], nodelist [i ])) {
128
+ goto err_free_bitmaps ;
129
+ }
130
+ }
131
+ } else {
132
+ for (unsigned long i = 0 ; i < listsize ; i ++ ) {
133
+ if (hwloc_bitmap_set (out_nodeset [0 ], nodelist [i ])) {
134
+ goto err_free_bitmaps ;
135
+ }
104
136
}
105
137
}
106
138
107
139
return UMF_RESULT_SUCCESS ;
140
+ err_free_bitmaps :
141
+ for (unsigned long i = 0 ; i < array_size ; i ++ ) {
142
+ hwloc_bitmap_free (out_nodeset [i ]);
143
+ }
144
+ err_free_list :
145
+ umf_ba_global_free (* out_nodeset );
146
+ os_provider -> nodeset_len = 0 ;
147
+ return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY ;
108
148
}
109
149
110
150
umf_result_t os_translate_flags (unsigned in_flags , unsigned max ,
@@ -132,42 +172,61 @@ umf_result_t os_translate_flags(unsigned in_flags, unsigned max,
132
172
return UMF_RESULT_SUCCESS ;
133
173
}
134
174
135
- static umf_result_t translate_numa_mode (umf_numa_mode_t mode , int nodemaskEmpty ,
136
- hwloc_membind_policy_t * numa_policy ) {
175
+ static umf_result_t validate_numa_mode (umf_numa_mode_t mode ,
176
+ int nodemaskEmpty ) {
137
177
switch (mode ) {
138
178
case UMF_NUMA_MODE_DEFAULT :
179
+ case UMF_NUMA_MODE_LOCAL :
139
180
if (!nodemaskEmpty ) {
140
181
// nodeset must be empty
141
182
return UMF_RESULT_ERROR_INVALID_ARGUMENT ;
142
183
}
143
- * numa_policy = HWLOC_MEMBIND_DEFAULT ;
144
184
return UMF_RESULT_SUCCESS ;
145
185
case UMF_NUMA_MODE_BIND :
146
- if (nodemaskEmpty ) {
147
- // nodeset must not be empty
148
- return UMF_RESULT_ERROR_INVALID_ARGUMENT ;
149
- }
150
- * numa_policy = HWLOC_MEMBIND_BIND ;
151
- return UMF_RESULT_SUCCESS ;
152
186
case UMF_NUMA_MODE_INTERLEAVE :
153
187
if (nodemaskEmpty ) {
154
188
// nodeset must not be empty
155
189
return UMF_RESULT_ERROR_INVALID_ARGUMENT ;
156
190
}
157
- * numa_policy = HWLOC_MEMBIND_INTERLEAVE ;
158
191
return UMF_RESULT_SUCCESS ;
159
192
case UMF_NUMA_MODE_PREFERRED :
160
- * numa_policy = HWLOC_MEMBIND_BIND ;
161
193
return UMF_RESULT_SUCCESS ;
162
- case UMF_NUMA_MODE_LOCAL :
163
- if (!nodemaskEmpty ) {
164
- // nodeset must be empty
165
- return UMF_RESULT_ERROR_INVALID_ARGUMENT ;
194
+ default :
195
+ assert (0 );
196
+ return UMF_RESULT_ERROR_INVALID_ARGUMENT ;
197
+ }
198
+ }
199
+
200
+ static hwloc_membind_policy_t translate_numa_mode (umf_numa_mode_t mode ,
201
+ int manual ) {
202
+ switch (mode ) {
203
+ case UMF_NUMA_MODE_DEFAULT :
204
+ return HWLOC_MEMBIND_DEFAULT ;
205
+ case UMF_NUMA_MODE_BIND :
206
+ return HWLOC_MEMBIND_BIND ;
207
+ case UMF_NUMA_MODE_INTERLEAVE :
208
+ // In manual mode, we manually implement interleaving,
209
+ // by binding memory to specific NUMA nodes.
210
+ if (manual ) {
211
+ return HWLOC_MEMBIND_BIND ;
166
212
}
167
- * numa_policy = HWLOC_MEMBIND_BIND ;
168
- return UMF_RESULT_SUCCESS ;
213
+ return HWLOC_MEMBIND_INTERLEAVE ;
214
+ case UMF_NUMA_MODE_PREFERRED :
215
+ return HWLOC_MEMBIND_BIND ;
216
+ case UMF_NUMA_MODE_LOCAL :
217
+ return HWLOC_MEMBIND_BIND ;
218
+ }
219
+ assert (0 );
220
+ return -1 ;
221
+ }
222
+
223
+ //return 1 if membinding logic is performed at umf side
224
+ //return 0 if membinding logic is performed by kernel
225
+ static int advancedMembind (umf_os_memory_provider_params_t * in_params ) {
226
+ if (in_params -> numa_mode == UMF_NUMA_MODE_INTERLEAVE ) {
227
+ return in_params -> part_size > 0 ;
169
228
}
170
- return UMF_RESULT_ERROR_INVALID_ARGUMENT ;
229
+ return 0 ;
171
230
}
172
231
173
232
static int getHwlocMembindFlags (umf_numa_mode_t mode ) {
@@ -193,17 +252,19 @@ static umf_result_t translate_params(umf_os_memory_provider_params_t *in_params,
193
252
194
253
// NUMA config
195
254
int emptyNodeset = in_params -> numa_list_len == 0 ;
196
- result = translate_numa_mode (in_params -> numa_mode , emptyNodeset ,
197
- & provider -> numa_policy );
255
+ result = validate_numa_mode (in_params -> numa_mode , emptyNodeset );
198
256
if (result != UMF_RESULT_SUCCESS ) {
199
257
LOG_ERR ("incorrect NUMA mode: %u" , in_params -> numa_mode );
200
258
return result ;
201
259
}
202
260
261
+ int manual_mode = advancedMembind (in_params );
262
+ provider -> numa_policy =
263
+ translate_numa_mode (in_params -> numa_mode , manual_mode );
203
264
provider -> numa_flags = getHwlocMembindFlags (in_params -> numa_mode );
204
-
265
+ provider -> part_size = in_params -> part_size ;
205
266
return nodemask_to_hwloc_nodeset (
206
- in_params -> numa_list , in_params -> numa_list_len , & provider -> nodeset );
267
+ in_params -> numa_list , in_params -> numa_list_len , provider , manual_mode );
207
268
}
208
269
209
270
static umf_result_t os_initialize (void * params , void * * provider ) {
@@ -252,7 +313,7 @@ static umf_result_t os_initialize(void *params, void **provider) {
252
313
} else {
253
314
if (hwloc_bitmap_list_snprintf (os_provider -> nodeset_str_buf ,
254
315
NODESET_STR_BUF_LEN ,
255
- os_provider -> nodeset )) {
316
+ * os_provider -> nodeset )) {
256
317
LOG_INFO ("OS provider initialized with NUMA nodes: %s" ,
257
318
os_provider -> nodeset_str_buf );
258
319
}
@@ -280,7 +341,10 @@ static void os_finalize(void *provider) {
280
341
umf_ba_global_free (os_provider -> nodeset_str_buf );
281
342
}
282
343
283
- hwloc_bitmap_free (os_provider -> nodeset );
344
+ for (unsigned i = 0 ; i < os_provider -> nodeset_len ; i ++ ) {
345
+ hwloc_bitmap_free (os_provider -> nodeset [i ]);
346
+ }
347
+ umf_ba_global_free (os_provider -> nodeset );
284
348
hwloc_topology_destroy (os_provider -> topo );
285
349
umf_ba_global_free (os_provider );
286
350
}
@@ -387,6 +451,15 @@ static int os_mmap_aligned(void *hint_addr, size_t length, size_t alignment,
387
451
return 0 ;
388
452
}
389
453
454
+ static int get_membind (os_memory_provider_t * provider , size_t size ) {
455
+ if (provider -> nodeset_len == 1 ) {
456
+ return 0 ;
457
+ }
458
+
459
+ size_t s = util_fetch_and_add64 (& provider -> alloc_size , size );
460
+
461
+ return (s / provider -> part_size ) % provider -> nodeset_len ;
462
+ }
390
463
static umf_result_t os_alloc (void * provider , size_t size , size_t alignment ,
391
464
void * * resultPtr ) {
392
465
int ret ;
@@ -434,32 +507,31 @@ static umf_result_t os_alloc(void *provider, size_t size, size_t alignment,
434
507
}
435
508
436
509
errno = 0 ;
437
- if (hwloc_bitmap_iszero (os_provider -> nodeset )) {
438
- // Hwloc_set_area_membind fails if empty nodeset is passed so if no node is specified,
439
- // just pass all available nodes. For modes where no node is needed, they will be
440
- // ignored anyway.
441
- hwloc_const_nodeset_t complete_nodeset =
442
- hwloc_topology_get_complete_nodeset (os_provider -> topo );
443
- ret = hwloc_set_area_membind (os_provider -> topo , addr , size ,
444
- complete_nodeset , os_provider -> numa_policy ,
445
- os_provider -> numa_flags );
446
- } else {
510
+ unsigned membind = get_membind (os_provider , size );
511
+ size_t bind_size =
512
+ os_provider -> nodeset_len == 1 ? size : os_provider -> part_size ;
513
+
514
+ do {
515
+ size_t s = bind_size < size ? bind_size : size ;
447
516
ret = hwloc_set_area_membind (
448
- os_provider -> topo , addr , size , os_provider -> nodeset ,
517
+ os_provider -> topo , addr , s , os_provider -> nodeset [ membind ++ ] ,
449
518
os_provider -> numa_policy , os_provider -> numa_flags );
450
- }
451
519
452
- if (ret ) {
453
- os_store_last_native_error (UMF_OS_RESULT_ERROR_BIND_FAILED , errno );
454
- LOG_PERR ("binding memory to NUMA node failed" );
455
- // TODO: (errno == 0) when hwloc_set_area_membind() fails on Windows - ignore this temporarily
456
- if (errno != ENOSYS &&
457
- errno != 0 ) { // ENOSYS - Function not implemented
458
- // Do not error out if memory binding is not implemented at all (like in case of WSL on Windows).
459
- goto err_unmap ;
520
+ size -= s ;
521
+ membind %= os_provider -> nodeset_len ;
522
+ if (ret ) {
523
+ os_store_last_native_error (UMF_OS_RESULT_ERROR_BIND_FAILED , errno );
524
+ LOG_PERR ("binding memory to NUMA node failed" );
525
+ // TODO: (errno == 0) when hwloc_set_area_membind() fails on Windows,
526
+ // ignore this temporarily
527
+ if (errno != ENOSYS &&
528
+ errno != 0 ) { // ENOSYS - Function not implemented
529
+ // Do not error out if memory binding is not implecmented at all
530
+ // (like in case of WSL on Windows).
531
+ goto err_unmap ;
532
+ }
460
533
}
461
- }
462
-
534
+ } while (size > 0 );
463
535
* resultPtr = addr ;
464
536
465
537
return UMF_RESULT_SUCCESS ;
0 commit comments