Skip to content

ggml : allocate graphs in a context #2392

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jul 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
171 changes: 100 additions & 71 deletions ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -4071,8 +4071,8 @@ bool ggml_is_numa(void) {
////////////////////////////////////////////////////////////////////////////////

void ggml_print_object(const struct ggml_object * obj) {
GGML_PRINT(" - ggml_object: offset = %zu, size = %zu, next = %p\n",
obj->offs, obj->size, (const void *) obj->next);
GGML_PRINT(" - ggml_object: type = %d, offset = %zu, size = %zu, next = %p\n",
obj->type, obj->offs, obj->size, (const void *) obj->next);
}

void ggml_print_objects(const struct ggml_context * ctx) {
Expand Down Expand Up @@ -4212,7 +4212,7 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
}

size_t ggml_tensor_overhead(void) {
return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE + 16;
return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE;
}

bool ggml_is_transposed(const struct ggml_tensor * tensor) {
Expand Down Expand Up @@ -4383,7 +4383,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
return NULL;
}

const size_t mem_size = (params.mem_size + GGML_MEM_ALIGN - 1) & ~(GGML_MEM_ALIGN - 1);
const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN);

*ctx = (struct ggml_context) {
/*.mem_size =*/ mem_size,
Expand Down Expand Up @@ -4472,12 +4472,14 @@ size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
struct ggml_object * obj = ctx->objects_begin;

while (obj != NULL) {
struct ggml_tensor * tensor = (struct ggml_tensor *) ((char *) ctx->mem_buffer + obj->offs);
if (obj->type == GGML_OBJECT_TENSOR) {
struct ggml_tensor * tensor = (struct ggml_tensor *) ((char *) ctx->mem_buffer + obj->offs);

const size_t size = ggml_nbytes(tensor);
const size_t size = ggml_nbytes(tensor);

if (max_size < size) {
max_size = size;
if (max_size < size) {
max_size = size;
}
}

obj = obj->next;
Expand Down Expand Up @@ -4509,90 +4511,87 @@ static void ggml_scratch_load(struct ggml_context * ctx) {

////////////////////////////////////////////////////////////////////////////////

static struct ggml_tensor * ggml_new_tensor_impl(
struct ggml_context * ctx,
enum ggml_type type,
int n_dims,
const int64_t* ne,
void* data) {
static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml_object_type type, size_t size) {
// always insert objects at the end of the context's memory pool
struct ggml_object * obj_cur = ctx->objects_end;

const size_t cur_offs = obj_cur == NULL ? 0 : obj_cur->offs;
const size_t cur_size = obj_cur == NULL ? 0 : obj_cur->size;
const size_t cur_end = cur_offs + cur_size;

size_t size_needed = 0;

if (data == NULL && !ctx->no_alloc) {
size_needed += GGML_TYPE_SIZE[type]*(ne[0]/GGML_BLCK_SIZE[type]);
for (int i = 1; i < n_dims; i++) {
size_needed *= ne[i];
}
// align to GGML_MEM_ALIGN
size_needed = ((size_needed + GGML_MEM_ALIGN - 1)/GGML_MEM_ALIGN)*GGML_MEM_ALIGN;
}
// align to GGML_MEM_ALIGN
size_t size_needed = GGML_PAD(size, GGML_MEM_ALIGN);

char * const mem_buffer = ctx->mem_buffer;
struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);

if (ctx->scratch.data == NULL || data != NULL) {
size_needed += GGML_TENSOR_SIZE;
if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
__func__, cur_end + size_needed, ctx->mem_size);
assert(false);
return NULL;
}

if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
__func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size);
assert(false);
return NULL;
}
*obj_new = (struct ggml_object) {
.offs = cur_end + GGML_OBJECT_SIZE,
.size = size_needed,
.next = NULL,
.type = type,
};

*obj_new = (struct ggml_object) {
.offs = cur_end + GGML_OBJECT_SIZE,
.size = size_needed,
.next = NULL,
};
ggml_assert_aligned(mem_buffer + obj_new->offs);

if (obj_cur != NULL) {
obj_cur->next = obj_new;
} else {
if (ctx->scratch.offs + size_needed > ctx->scratch.size) {
GGML_PRINT("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n",
__func__, ctx->scratch.offs + size_needed, ctx->scratch.size);
assert(false);
return NULL;
// this is the first object in this context
ctx->objects_begin = obj_new;
}

ctx->objects_end = obj_new;

//printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);

return obj_new;
}

static struct ggml_tensor * ggml_new_tensor_impl(
struct ggml_context * ctx,
enum ggml_type type,
int n_dims,
const int64_t* ne,
void* data) {

size_t data_size = 0;

if (data == NULL && !ctx->no_alloc) {
data_size += GGML_TYPE_SIZE[type]*(ne[0]/GGML_BLCK_SIZE[type]);
for (int i = 1; i < n_dims; i++) {
data_size *= ne[i];
}
}

if (cur_end + GGML_TENSOR_SIZE + GGML_OBJECT_SIZE > ctx->mem_size) {
GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
__func__, cur_end + GGML_TENSOR_SIZE + GGML_OBJECT_SIZE, ctx->mem_size);
if (ctx->scratch.data != NULL && data == NULL) {
// allocate tensor data in the scratch buffer
if (ctx->scratch.offs + data_size > ctx->scratch.size) {
GGML_PRINT("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n",
__func__, ctx->scratch.offs + data_size, ctx->scratch.size);
assert(false);
return NULL;
}

data = (char * const) ctx->scratch.data + ctx->scratch.offs;

*obj_new = (struct ggml_object) {
.offs = cur_end + GGML_OBJECT_SIZE,
.size = GGML_TENSOR_SIZE,
.next = NULL,
};

//printf("scratch offs = %zu, size_needed = %zu\n", ctx->scratch.offs, size_needed);
ctx->scratch.offs += data_size;

ctx->scratch.offs += size_needed;
data_size = 0;
}

if (obj_cur != NULL) {
obj_cur->next = obj_new;
} else {
// this is the first object in this context
ctx->objects_begin = obj_new;
}

ctx->objects_end = obj_new;

//printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TENSOR, GGML_TENSOR_SIZE + data_size);

struct ggml_tensor * const result = (struct ggml_tensor *)(mem_buffer + obj_new->offs);
// TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here

ggml_assert_aligned(result);
struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs);

*result = (struct ggml_tensor) {
/*.type =*/ type,
Expand Down Expand Up @@ -5026,9 +5025,11 @@ struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * nam
char * const mem_buffer = ctx->mem_buffer;

while (obj != NULL) {
struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
if (strcmp(cur->name, name) == 0) {
return cur;
if (obj->type == GGML_OBJECT_TENSOR) {
struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
if (strcmp(cur->name, name) == 0) {
return cur;
}
}

obj = obj->next;
Expand Down Expand Up @@ -15829,6 +15830,35 @@ struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cg
return result;
}

struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_GRAPH, GGML_GRAPH_SIZE);
struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);

*cgraph = (struct ggml_cgraph) {
/*.n_nodes =*/ 0,
/*.n_leafs =*/ 0,
/*.nodes =*/ { NULL },
/*.grads =*/ { NULL },
/*.leafs =*/ { NULL },
/*.hash_table =*/ { NULL },
/*.perf_runs =*/ 0,
/*.perf_cycles =*/ 0,
/*.perf_time_us =*/ 0,
};

return cgraph;
}

struct ggml_cgraph * ggml_build_forward_ctx(struct ggml_context * ctx, struct ggml_tensor * tensor) {
struct ggml_cgraph * cgraph = ggml_new_graph(ctx);
ggml_build_forward_impl(cgraph, tensor, false);
return cgraph;
}

size_t ggml_graph_overhead(void) {
return GGML_OBJECT_SIZE + GGML_PAD(GGML_GRAPH_SIZE, GGML_MEM_ALIGN);
}

//
// thread data
//
Expand Down Expand Up @@ -16544,10 +16574,9 @@ void ggml_graph_reset(struct ggml_cgraph * cgraph) {
void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads) {
struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads);

struct ggml_tensor * buf = ggml_new_tensor_1d(ctx, GGML_TYPE_I8, cplan.work_size);
GGML_ASSERT(buf);
struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_WORK_BUFFER, cplan.work_size);

cplan.work_data = buf->data;
cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs;

ggml_graph_compute(cgraph, &cplan);
}
Expand Down
21 changes: 19 additions & 2 deletions ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@

#define GGML_UNUSED(x) (void)(x)

#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))

#define GGML_ASSERT(x) \
do { \
Expand Down Expand Up @@ -396,14 +397,22 @@ extern "C" {
GGML_UNARY_OP_SILU,
};

enum ggml_object_type {
GGML_OBJECT_TENSOR,
GGML_OBJECT_GRAPH,
GGML_OBJECT_WORK_BUFFER
};

// ggml object
struct ggml_object {
size_t offs;
size_t size;

struct ggml_object * next;

char padding[8];
enum ggml_object_type type;

char padding[4];
};

static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
Expand All @@ -424,7 +433,7 @@ extern "C" {
enum ggml_op op;

// op params - allocated as int32_t for alignment
int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(uint32_t)];
int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)];

bool is_param;

Expand Down Expand Up @@ -485,6 +494,8 @@ extern "C" {
int64_t perf_time_us;
};

static const size_t GGML_GRAPH_SIZE = sizeof(struct ggml_cgraph);

// scratch buffer
struct ggml_scratch {
size_t offs;
Expand Down Expand Up @@ -1391,11 +1402,17 @@ extern "C" {
struct ggml_context * ctx,
struct ggml_tensor * tensor);


GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);

GGML_API struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);
GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);

// graph allocation in a context
GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx);
GGML_API struct ggml_cgraph * ggml_build_forward_ctx(struct ggml_context * ctx, struct ggml_tensor * tensor);
GGML_API size_t ggml_graph_overhead(void);

// ggml_graph_plan() has to be called before ggml_graph_compute()
// when plan.work_size > 0, caller must allocate memory for plan.work_data
GGML_API struct ggml_cplan ggml_graph_plan (struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
Expand Down
Loading