1
+ // defines MAP_ANONYMOUS
2
+ #ifndef _GNU_SOURCE
3
+ #define _GNU_SOURCE
4
+ #endif
5
+
1
6
#include "ggml-alloc.h"
2
7
#include "ggml.h"
3
8
#include <assert.h>
6
11
#include <stdlib.h>
7
12
#include <string.h>
8
13
14
+ #ifdef __has_include
15
+ #if __has_include (< unistd .h > )
16
+ #include <unistd.h>
17
+ #if defined(_POSIX_MAPPED_FILES )
18
+ #include <sys/types.h>
19
+ #include <sys/mman.h>
20
+ #endif
21
+ #endif
22
+ #endif
23
+
24
+ #if defined(_WIN32 )
25
+ #define WIN32_LEAN_AND_MEAN
26
+ #ifndef NOMINMAX
27
+ #define NOMINMAX
28
+ #endif
29
+ #include <windows.h>
30
+ #include <memoryapi.h>
31
+ #endif
32
+
33
+
9
34
#define UNUSED (x ) (void)(x)
10
35
#define MAX (a , b ) ((a) > (b) ? (a) : (b))
11
36
#define GGML_MAX_CONCUR (2*GGML_MAX_NODES)
@@ -99,19 +124,24 @@ static void remove_allocated_tensor(struct ggml_allocr * alloc, struct ggml_tens
99
124
}
100
125
#endif
101
126
102
-
103
- static size_t ggml_allocator_get_alloc_size (struct ggml_allocr * alloc , struct ggml_tensor * tensor ) {
127
+ static size_t ggml_allocr_get_alloc_size (struct ggml_allocr * alloc , struct ggml_tensor * tensor ) {
104
128
return ggml_nbytes (tensor );
105
129
106
130
UNUSED (alloc );
107
131
}
108
132
133
+ // check if a tensor is allocated by this buffer
134
+ static bool ggml_allocr_is_own (struct ggml_allocr * alloc , const struct ggml_tensor * tensor ) {
135
+ void * ptr = tensor -> data ;
136
+ return ptr >= alloc -> data && (char * )ptr < (char * )alloc -> data + alloc -> max_size ;
137
+ }
138
+
109
139
void ggml_allocr_alloc (struct ggml_allocr * alloc , struct ggml_tensor * tensor ) {
110
140
#ifdef GGML_ALLOCATOR_DEBUG
111
141
GGML_ASSERT (ggml_is_view (tensor ) == false); // views generally get data pointer from one of their sources
112
142
GGML_ASSERT (tensor -> data == NULL ); // avoid allocating tensor which already has memory allocated
113
143
#endif
114
- size_t size = ggml_allocator_get_alloc_size (alloc , tensor );
144
+ size_t size = ggml_allocr_get_alloc_size (alloc , tensor );
115
145
size = aligned_offset (NULL , size , alloc -> alignment );
116
146
117
147
AT_PRINTF ("%s: allocating %s (%zu bytes) - " , __func__ , tensor -> name , size );
@@ -177,17 +207,17 @@ void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor)
177
207
}
178
208
179
209
// this is a very naive implementation, but for our case the number of free blocks should be very small
180
- static void ggml_allocator_free_tensor (struct ggml_allocr * alloc , struct ggml_tensor * tensor ) {
210
+ static void ggml_allocr_free_tensor (struct ggml_allocr * alloc , struct ggml_tensor * tensor ) {
181
211
void * ptr = tensor -> data ;
182
212
183
- if (ptr < alloc -> data || ( char * ) ptr >= ( char * ) alloc -> data + alloc -> max_size ) {
213
+ if (ggml_allocr_is_own ( alloc , tensor ) == false ) {
184
214
// the tensor was not allocated in this buffer
185
215
// this can happen because the graph allocator will try to free weights and other tensors from different buffers
186
216
// the easiest way to deal with this is just to ignore it
187
217
return ;
188
218
}
189
219
190
- size_t size = ggml_allocator_get_alloc_size (alloc , tensor );
220
+ size_t size = ggml_allocr_get_alloc_size (alloc , tensor );
191
221
size = aligned_offset (NULL , size , alloc -> alignment );
192
222
AT_PRINTF ("%s: freeing %s (%zu bytes) - n_free_blocks = %d\n" , __func__ , tensor -> name , size , alloc -> n_free_blocks );
193
223
@@ -281,24 +311,64 @@ struct ggml_allocr * ggml_allocr_new(void * data, size_t size, size_t alignment)
281
311
return alloc ;
282
312
}
283
313
284
- // address and size of the buffer when measuring
285
- // it needs to be large enough to fit all the tensors, but it cannot overlap with other existing buffers
286
- static void * const MEASURE_BASE_ADDR = (void * ) 0x1000 ;
287
- #if defined(__ARM_NEON ) && !defined(__aarch64__ )
288
- // 32-bit
289
- // TODO: Use for 32-bit x86 as well
290
- static const size_t MEASURE_MAX_SIZE = (1ULL <<32 ) - 1 ; // 4 GB
314
+ // OS specific functions to allocate and free uncommitted virtual memory
315
+ static void * alloc_vmem (size_t size ) {
316
+ #if defined(_WIN32 )
317
+ return VirtualAlloc (NULL , size , MEM_RESERVE , PAGE_NOACCESS );
318
+ #elif defined(_POSIX_MAPPED_FILES )
319
+ return mmap (NULL , size , PROT_NONE , MAP_PRIVATE | MAP_ANON , -1 , 0 );
291
320
#else
292
- // 64-bit
293
- static const size_t MEASURE_MAX_SIZE = 1ULL <<40 ; // 1 TB
321
+ // use a fixed address for other platforms
322
+ uintptr_t base_addr = (uintptr_t )- size - 0x100 ;
323
+ return (void * )base_addr ;
294
324
#endif
325
+ }
326
+
327
+ static void free_vmem (void * base_addr , size_t size ) {
328
+ #if defined(_WIN32 )
329
+ VirtualFree (base_addr , 0 , MEM_RELEASE );
330
+ UNUSED (size );
331
+ #elif defined(_POSIX_MAPPED_FILES )
332
+ munmap (base_addr , size );
333
+ #else
334
+ // nothing to do
335
+ UNUSED (base_addr );
336
+ UNUSED (size );
337
+ #endif
338
+ }
339
+
340
+ // allocate uncommitted virtual memory to measure the size of the graph
341
+ static void alloc_measure_vmem (void * * base_addr , size_t * size ) {
342
+ // 1TB for 64-bit, 1GB for 32-bit
343
+ * size = sizeof (void * ) == 4 ? 1ULL <<30 : 1ULL <<40 ;
344
+ do {
345
+ * base_addr = alloc_vmem (* size );
346
+ if (* base_addr != NULL ) {
347
+ AT_PRINTF ("allocated %.2f GB of virtual memory for measure buffer at %p\n" , * size / 1024.0 / 1024.0 / 1024.0 , * base_addr );
348
+ return ;
349
+ }
350
+ // try again with half the size
351
+ * size /= 2 ;
352
+ } while (* size > 0 );
353
+
354
+ GGML_ASSERT (!"failed to allocate virtual memory for measure buffer" );
355
+ }
356
+
357
+ static void free_measure_vmem (void * base_addr , size_t size ) {
358
+ free_vmem (base_addr , size );
359
+ }
295
360
296
361
struct ggml_allocr * ggml_allocr_new_measure (size_t alignment ) {
297
362
struct ggml_allocr * alloc = (struct ggml_allocr * )malloc (sizeof (struct ggml_allocr ) /* + n_free_blocks * sizeof(struct free_block) */ );
298
363
364
+ void * base_addr ;
365
+ size_t size ;
366
+
367
+ alloc_measure_vmem (& base_addr , & size );
368
+
299
369
* alloc = (struct ggml_allocr ){
300
- /*.data = */ MEASURE_BASE_ADDR ,
301
- /*.size = */ MEASURE_MAX_SIZE ,
370
+ /*.data = */ base_addr ,
371
+ /*.size = */ size ,
302
372
/*.alignment = */ alignment ,
303
373
/*.n_free_blocks = */ 0 ,
304
374
/*.free_blocks = */ {{0 }},
@@ -318,6 +388,9 @@ struct ggml_allocr * ggml_allocr_new_measure(size_t alignment) {
318
388
}
319
389
320
390
void ggml_allocr_free (struct ggml_allocr * alloc ) {
391
+ if (alloc -> measure ) {
392
+ free_measure_vmem (alloc -> data , alloc -> size );
393
+ }
321
394
free (alloc );
322
395
}
323
396
@@ -387,8 +460,7 @@ static void allocate_node(struct ggml_allocr * alloc, struct ggml_tensor * node)
387
460
}
388
461
389
462
// if the node's data is external, then we cannot re-use it
390
- if ((char * ) parent -> data < (char * ) alloc -> data ||
391
- (char * ) parent -> data >= ((char * ) alloc -> data + alloc -> size )) {
463
+ if (ggml_allocr_is_own (alloc , parent ) == false) {
392
464
AT_PRINTF ("not reusing parent %s for %s as %p is external\n" , parent -> name , node -> name , parent -> data );
393
465
continue ;
394
466
}
@@ -422,7 +494,7 @@ static void allocate_node(struct ggml_allocr * alloc, struct ggml_tensor * node)
422
494
}
423
495
}
424
496
425
- static size_t ggml_allocator_alloc_graph_tensors_n (
497
+ static size_t ggml_allocr_alloc_graph_tensors_n (
426
498
struct ggml_allocr * alloc ,
427
499
struct ggml_cgraph * * graphs , int n_graphs ,
428
500
struct ggml_tensor * * * inputs , struct ggml_tensor * * * outputs ) {
@@ -500,11 +572,10 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
500
572
AT_PRINTF ("\n" );
501
573
}
502
574
503
-
504
575
// update parents
505
576
// update immediately if there is no parse_seq
506
577
// update only at barriers if there is parse_seq
507
- if ((alloc -> parse_seq_len == 0 ) || alloc -> parse_seq [ind ] == -1 ) {
578
+ if ((alloc -> parse_seq_len == 0 ) || alloc -> parse_seq [ind ] == -1 ) {
508
579
int update_start = alloc -> parse_seq_len ? last_barrier_pos : ind ;
509
580
int update_end = alloc -> parse_seq_len ? ind : ind + 1 ;
510
581
for (int i = update_start ; i < update_end ; i ++ ) {
@@ -528,12 +599,12 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
528
599
view_src_hn -> n_views -= 1 ;
529
600
AT_PRINTF ("view_src %s: %d children, %d views\n" , view_src -> name , view_src_hn -> n_children , view_src_hn -> n_views );
530
601
if (view_src_hn -> n_views == 0 && view_src_hn -> n_children == 0 && view_src -> data != node -> data ) {
531
- ggml_allocator_free_tensor (alloc , view_src );
602
+ ggml_allocr_free_tensor (alloc , view_src );
532
603
}
533
604
}
534
605
else {
535
606
if (parent -> data != node -> data ) {
536
- ggml_allocator_free_tensor (alloc , parent );
607
+ ggml_allocr_free_tensor (alloc , parent );
537
608
}
538
609
}
539
610
}
@@ -550,7 +621,7 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
550
621
for (int i = 0 ; outputs [g ][i ] != NULL ; i ++ ) {
551
622
struct ggml_tensor * output = outputs [g ][i ];
552
623
AT_PRINTF ("output: %s\n" , output -> name );
553
- ggml_allocator_free_tensor (alloc , output );
624
+ ggml_allocr_free_tensor (alloc , output );
554
625
}
555
626
}
556
627
}
@@ -559,5 +630,5 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
559
630
}
560
631
561
632
size_t ggml_allocr_alloc_graph (struct ggml_allocr * alloc , struct ggml_cgraph * graph ) {
562
- return ggml_allocator_alloc_graph_tensors_n (alloc , & graph , 1 , NULL , NULL );
633
+ return ggml_allocr_alloc_graph_tensors_n (alloc , & graph , 1 , NULL , NULL );
563
634
}
0 commit comments