Skip to content

Commit cf9b084

Browse files
authored
ggml-alloc : use virtual memory for measurement (#2973)
* ggml-alloc : use virtual memory for measurement * compatibility fixes for MAP_ANONYMOUS * fallback to fixed address for systems without virtual memory
1 parent 47068e5 commit cf9b084

File tree

1 file changed

+97
-26
lines changed

1 file changed

+97
-26
lines changed

ggml-alloc.c

Lines changed: 97 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
// defines MAP_ANONYMOUS
2+
#ifndef _GNU_SOURCE
3+
#define _GNU_SOURCE
4+
#endif
5+
16
#include "ggml-alloc.h"
27
#include "ggml.h"
38
#include <assert.h>
@@ -6,6 +11,26 @@
611
#include <stdlib.h>
712
#include <string.h>
813

14+
#ifdef __has_include
15+
#if __has_include(<unistd.h>)
16+
#include <unistd.h>
17+
#if defined(_POSIX_MAPPED_FILES)
18+
#include <sys/types.h>
19+
#include <sys/mman.h>
20+
#endif
21+
#endif
22+
#endif
23+
24+
#if defined(_WIN32)
25+
#define WIN32_LEAN_AND_MEAN
26+
#ifndef NOMINMAX
27+
#define NOMINMAX
28+
#endif
29+
#include <windows.h>
30+
#include <memoryapi.h>
31+
#endif
32+
33+
934
#define UNUSED(x) (void)(x)
1035
#define MAX(a, b) ((a) > (b) ? (a) : (b))
1136
#define GGML_MAX_CONCUR (2*GGML_MAX_NODES)
@@ -99,19 +124,24 @@ static void remove_allocated_tensor(struct ggml_allocr * alloc, struct ggml_tens
99124
}
100125
#endif
101126

102-
103-
static size_t ggml_allocator_get_alloc_size(struct ggml_allocr * alloc, struct ggml_tensor * tensor) {
127+
static size_t ggml_allocr_get_alloc_size(struct ggml_allocr * alloc, struct ggml_tensor * tensor) {
104128
return ggml_nbytes(tensor);
105129

106130
UNUSED(alloc);
107131
}
108132

133+
// check if a tensor is allocated by this buffer
134+
static bool ggml_allocr_is_own(struct ggml_allocr * alloc, const struct ggml_tensor * tensor) {
135+
void * ptr = tensor->data;
136+
return ptr >= alloc->data && (char *)ptr < (char *)alloc->data + alloc->max_size;
137+
}
138+
109139
void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor) {
110140
#ifdef GGML_ALLOCATOR_DEBUG
111141
GGML_ASSERT(ggml_is_view(tensor) == false); // views generally get data pointer from one of their sources
112142
GGML_ASSERT(tensor->data == NULL); // avoid allocating tensor which already has memory allocated
113143
#endif
114-
size_t size = ggml_allocator_get_alloc_size(alloc, tensor);
144+
size_t size = ggml_allocr_get_alloc_size(alloc, tensor);
115145
size = aligned_offset(NULL, size, alloc->alignment);
116146

117147
AT_PRINTF("%s: allocating %s (%zu bytes) - ", __func__, tensor->name, size);
@@ -177,17 +207,17 @@ void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor)
177207
}
178208

179209
// this is a very naive implementation, but for our case the number of free blocks should be very small
180-
static void ggml_allocator_free_tensor(struct ggml_allocr * alloc, struct ggml_tensor * tensor) {
210+
static void ggml_allocr_free_tensor(struct ggml_allocr * alloc, struct ggml_tensor * tensor) {
181211
void * ptr = tensor->data;
182212

183-
if (ptr < alloc->data || (char*)ptr >= (char*)alloc->data + alloc->max_size) {
213+
if (ggml_allocr_is_own(alloc, tensor) == false) {
184214
// the tensor was not allocated in this buffer
185215
// this can happen because the graph allocator will try to free weights and other tensors from different buffers
186216
// the easiest way to deal with this is just to ignore it
187217
return;
188218
}
189219

190-
size_t size = ggml_allocator_get_alloc_size(alloc, tensor);
220+
size_t size = ggml_allocr_get_alloc_size(alloc, tensor);
191221
size = aligned_offset(NULL, size, alloc->alignment);
192222
AT_PRINTF("%s: freeing %s (%zu bytes) - n_free_blocks = %d\n", __func__, tensor->name, size, alloc->n_free_blocks);
193223

@@ -281,24 +311,64 @@ struct ggml_allocr * ggml_allocr_new(void * data, size_t size, size_t alignment)
281311
return alloc;
282312
}
283313

284-
// address and size of the buffer when measuring
285-
// it needs to be large enough to fit all the tensors, but it cannot overlap with other existing buffers
286-
static void * const MEASURE_BASE_ADDR = (void *) 0x1000;
287-
#if defined(__ARM_NEON) && !defined(__aarch64__)
288-
// 32-bit
289-
// TODO: Use for 32-bit x86 as well
290-
static const size_t MEASURE_MAX_SIZE = (1ULL<<32) - 1; // 4 GB
314+
// OS specific functions to allocate and free uncommitted virtual memory
315+
static void * alloc_vmem(size_t size) {
316+
#if defined(_WIN32)
317+
return VirtualAlloc(NULL, size, MEM_RESERVE, PAGE_NOACCESS);
318+
#elif defined(_POSIX_MAPPED_FILES)
319+
return mmap(NULL, size, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0);
291320
#else
292-
// 64-bit
293-
static const size_t MEASURE_MAX_SIZE = 1ULL<<40; // 1 TB
321+
// use a fixed address for other platforms
322+
uintptr_t base_addr = (uintptr_t)-size - 0x100;
323+
return (void *)base_addr;
294324
#endif
325+
}
326+
327+
static void free_vmem(void * base_addr, size_t size) {
328+
#if defined(_WIN32)
329+
VirtualFree(base_addr, 0, MEM_RELEASE);
330+
UNUSED(size);
331+
#elif defined(_POSIX_MAPPED_FILES)
332+
munmap(base_addr, size);
333+
#else
334+
// nothing to do
335+
UNUSED(base_addr);
336+
UNUSED(size);
337+
#endif
338+
}
339+
340+
// allocate uncommitted virtual memory to measure the size of the graph
341+
static void alloc_measure_vmem(void ** base_addr, size_t * size) {
342+
// 1TB for 64-bit, 1GB for 32-bit
343+
*size = sizeof(void *) == 4 ? 1ULL<<30 : 1ULL<<40;
344+
do {
345+
*base_addr = alloc_vmem(*size);
346+
if (*base_addr != NULL) {
347+
AT_PRINTF("allocated %.2f GB of virtual memory for measure buffer at %p\n", *size / 1024.0 / 1024.0 / 1024.0, *base_addr);
348+
return;
349+
}
350+
// try again with half the size
351+
*size /= 2;
352+
} while (*size > 0);
353+
354+
GGML_ASSERT(!"failed to allocate virtual memory for measure buffer");
355+
}
356+
357+
static void free_measure_vmem(void * base_addr, size_t size) {
358+
free_vmem(base_addr, size);
359+
}
295360

296361
struct ggml_allocr * ggml_allocr_new_measure(size_t alignment) {
297362
struct ggml_allocr * alloc = (struct ggml_allocr *)malloc(sizeof(struct ggml_allocr) /* + n_free_blocks * sizeof(struct free_block) */);
298363

364+
void * base_addr;
365+
size_t size;
366+
367+
alloc_measure_vmem(&base_addr, &size);
368+
299369
*alloc = (struct ggml_allocr){
300-
/*.data = */ MEASURE_BASE_ADDR,
301-
/*.size = */ MEASURE_MAX_SIZE,
370+
/*.data = */ base_addr,
371+
/*.size = */ size,
302372
/*.alignment = */ alignment,
303373
/*.n_free_blocks = */ 0,
304374
/*.free_blocks = */ {{0}},
@@ -318,6 +388,9 @@ struct ggml_allocr * ggml_allocr_new_measure(size_t alignment) {
318388
}
319389

320390
void ggml_allocr_free(struct ggml_allocr * alloc) {
391+
if (alloc->measure) {
392+
free_measure_vmem(alloc->data, alloc->size);
393+
}
321394
free(alloc);
322395
}
323396

@@ -387,8 +460,7 @@ static void allocate_node(struct ggml_allocr * alloc, struct ggml_tensor * node)
387460
}
388461

389462
// if the node's data is external, then we cannot re-use it
390-
if ((char *) parent->data < (char *) alloc->data ||
391-
(char *) parent->data >= ((char *) alloc->data + alloc->size)) {
463+
if (ggml_allocr_is_own(alloc, parent) == false) {
392464
AT_PRINTF("not reusing parent %s for %s as %p is external\n", parent->name, node->name, parent->data);
393465
continue;
394466
}
@@ -422,7 +494,7 @@ static void allocate_node(struct ggml_allocr * alloc, struct ggml_tensor * node)
422494
}
423495
}
424496

425-
static size_t ggml_allocator_alloc_graph_tensors_n(
497+
static size_t ggml_allocr_alloc_graph_tensors_n(
426498
struct ggml_allocr * alloc,
427499
struct ggml_cgraph ** graphs, int n_graphs,
428500
struct ggml_tensor *** inputs, struct ggml_tensor *** outputs) {
@@ -500,11 +572,10 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
500572
AT_PRINTF("\n");
501573
}
502574

503-
504575
// update parents
505576
// update immediately if there is no parse_seq
506577
// update only at barriers if there is parse_seq
507-
if ((alloc->parse_seq_len==0) || alloc->parse_seq[ind] == -1) {
578+
if ((alloc->parse_seq_len == 0) || alloc->parse_seq[ind] == -1) {
508579
int update_start = alloc->parse_seq_len ? last_barrier_pos : ind;
509580
int update_end = alloc->parse_seq_len ? ind : ind + 1;
510581
for (int i = update_start; i < update_end; i++) {
@@ -528,12 +599,12 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
528599
view_src_hn->n_views -= 1;
529600
AT_PRINTF("view_src %s: %d children, %d views\n", view_src->name, view_src_hn->n_children, view_src_hn->n_views);
530601
if (view_src_hn->n_views == 0 && view_src_hn->n_children == 0 && view_src->data != node->data) {
531-
ggml_allocator_free_tensor(alloc, view_src);
602+
ggml_allocr_free_tensor(alloc, view_src);
532603
}
533604
}
534605
else {
535606
if (parent->data != node->data) {
536-
ggml_allocator_free_tensor(alloc, parent);
607+
ggml_allocr_free_tensor(alloc, parent);
537608
}
538609
}
539610
}
@@ -550,7 +621,7 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
550621
for (int i = 0; outputs[g][i] != NULL; i++) {
551622
struct ggml_tensor * output = outputs[g][i];
552623
AT_PRINTF("output: %s\n", output->name);
553-
ggml_allocator_free_tensor(alloc, output);
624+
ggml_allocr_free_tensor(alloc, output);
554625
}
555626
}
556627
}
@@ -559,5 +630,5 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
559630
}
560631

561632
size_t ggml_allocr_alloc_graph(struct ggml_allocr * alloc, struct ggml_cgraph * graph) {
562-
return ggml_allocator_alloc_graph_tensors_n(alloc, &graph, 1, NULL, NULL);
633+
return ggml_allocr_alloc_graph_tensors_n(alloc, &graph, 1, NULL, NULL);
563634
}

0 commit comments

Comments
 (0)