@@ -4071,8 +4071,8 @@ bool ggml_is_numa(void) {
4071
4071
////////////////////////////////////////////////////////////////////////////////
4072
4072
4073
4073
void ggml_print_object(const struct ggml_object * obj) {
4074
- GGML_PRINT(" - ggml_object: offset = %zu, size = %zu, next = %p\n",
4075
- obj->offs, obj->size, (const void *) obj->next);
4074
+ GGML_PRINT(" - ggml_object: type = %d, offset = %zu, size = %zu, next = %p\n",
4075
+ obj->type, obj-> offs, obj->size, (const void *) obj->next);
4076
4076
}
4077
4077
4078
4078
void ggml_print_objects(const struct ggml_context * ctx) {
@@ -4212,7 +4212,7 @@ enum ggml_type ggml_ftype_to_ggml_type(enum ggml_ftype ftype) {
4212
4212
}
4213
4213
4214
4214
size_t ggml_tensor_overhead(void) {
4215
- return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE + 16 ;
4215
+ return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE;
4216
4216
}
4217
4217
4218
4218
bool ggml_is_transposed(const struct ggml_tensor * tensor) {
@@ -4383,7 +4383,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
4383
4383
return NULL;
4384
4384
}
4385
4385
4386
- const size_t mem_size = ( params.mem_size + GGML_MEM_ALIGN - 1) & ~(GGML_MEM_ALIGN - 1 );
4386
+ const size_t mem_size = params.mem_buffer ? params.mem_size : GGML_PAD(params.mem_size, GGML_MEM_ALIGN );
4387
4387
4388
4388
*ctx = (struct ggml_context) {
4389
4389
/*.mem_size =*/ mem_size,
@@ -4472,12 +4472,14 @@ size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) {
4472
4472
struct ggml_object * obj = ctx->objects_begin;
4473
4473
4474
4474
while (obj != NULL) {
4475
- struct ggml_tensor * tensor = (struct ggml_tensor *) ((char *) ctx->mem_buffer + obj->offs);
4475
+ if (obj->type == GGML_OBJECT_TENSOR) {
4476
+ struct ggml_tensor * tensor = (struct ggml_tensor *) ((char *) ctx->mem_buffer + obj->offs);
4476
4477
4477
- const size_t size = ggml_nbytes(tensor);
4478
+ const size_t size = ggml_nbytes(tensor);
4478
4479
4479
- if (max_size < size) {
4480
- max_size = size;
4480
+ if (max_size < size) {
4481
+ max_size = size;
4482
+ }
4481
4483
}
4482
4484
4483
4485
obj = obj->next;
@@ -4509,90 +4511,87 @@ static void ggml_scratch_load(struct ggml_context * ctx) {
4509
4511
4510
4512
////////////////////////////////////////////////////////////////////////////////
4511
4513
4512
- static struct ggml_tensor * ggml_new_tensor_impl(
4513
- struct ggml_context * ctx,
4514
- enum ggml_type type,
4515
- int n_dims,
4516
- const int64_t* ne,
4517
- void* data) {
4514
+ static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml_object_type type, size_t size) {
4518
4515
// always insert objects at the end of the context's memory pool
4519
4516
struct ggml_object * obj_cur = ctx->objects_end;
4520
4517
4521
4518
const size_t cur_offs = obj_cur == NULL ? 0 : obj_cur->offs;
4522
4519
const size_t cur_size = obj_cur == NULL ? 0 : obj_cur->size;
4523
4520
const size_t cur_end = cur_offs + cur_size;
4524
4521
4525
- size_t size_needed = 0;
4526
-
4527
- if (data == NULL && !ctx->no_alloc) {
4528
- size_needed += GGML_TYPE_SIZE[type]*(ne[0]/GGML_BLCK_SIZE[type]);
4529
- for (int i = 1; i < n_dims; i++) {
4530
- size_needed *= ne[i];
4531
- }
4532
- // align to GGML_MEM_ALIGN
4533
- size_needed = ((size_needed + GGML_MEM_ALIGN - 1)/GGML_MEM_ALIGN)*GGML_MEM_ALIGN;
4534
- }
4522
+ // align to GGML_MEM_ALIGN
4523
+ size_t size_needed = GGML_PAD(size, GGML_MEM_ALIGN);
4535
4524
4536
4525
char * const mem_buffer = ctx->mem_buffer;
4537
4526
struct ggml_object * const obj_new = (struct ggml_object *)(mem_buffer + cur_end);
4538
4527
4539
- if (ctx->scratch.data == NULL || data != NULL) {
4540
- size_needed += GGML_TENSOR_SIZE;
4528
+ if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size) {
4529
+ GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
4530
+ __func__, cur_end + size_needed, ctx->mem_size);
4531
+ assert(false);
4532
+ return NULL;
4533
+ }
4541
4534
4542
- if (cur_end + size_needed + GGML_OBJECT_SIZE > ctx->mem_size ) {
4543
- GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n" ,
4544
- __func__, cur_end + size_needed + GGML_OBJECT_SIZE, ctx->mem_size);
4545
- assert(false);
4546
- return NULL;
4547
- }
4535
+ *obj_new = (struct ggml_object ) {
4536
+ .offs = cur_end + GGML_OBJECT_SIZE ,
4537
+ .size = size_needed,
4538
+ .next = NULL,
4539
+ .type = type,
4540
+ };
4548
4541
4549
- *obj_new = (struct ggml_object) {
4550
- .offs = cur_end + GGML_OBJECT_SIZE,
4551
- .size = size_needed,
4552
- .next = NULL,
4553
- };
4542
+ ggml_assert_aligned(mem_buffer + obj_new->offs);
4543
+
4544
+ if (obj_cur != NULL) {
4545
+ obj_cur->next = obj_new;
4554
4546
} else {
4555
- if (ctx->scratch.offs + size_needed > ctx->scratch.size) {
4556
- GGML_PRINT("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n",
4557
- __func__, ctx->scratch.offs + size_needed, ctx->scratch.size);
4558
- assert(false);
4559
- return NULL;
4547
+ // this is the first object in this context
4548
+ ctx->objects_begin = obj_new;
4549
+ }
4550
+
4551
+ ctx->objects_end = obj_new;
4552
+
4553
+ //printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
4554
+
4555
+ return obj_new;
4556
+ }
4557
+
4558
+ static struct ggml_tensor * ggml_new_tensor_impl(
4559
+ struct ggml_context * ctx,
4560
+ enum ggml_type type,
4561
+ int n_dims,
4562
+ const int64_t* ne,
4563
+ void* data) {
4564
+
4565
+ size_t data_size = 0;
4566
+
4567
+ if (data == NULL && !ctx->no_alloc) {
4568
+ data_size += GGML_TYPE_SIZE[type]*(ne[0]/GGML_BLCK_SIZE[type]);
4569
+ for (int i = 1; i < n_dims; i++) {
4570
+ data_size *= ne[i];
4560
4571
}
4572
+ }
4561
4573
4562
- if (cur_end + GGML_TENSOR_SIZE + GGML_OBJECT_SIZE > ctx->mem_size) {
4563
- GGML_PRINT("%s: not enough space in the context's memory pool (needed %zu, available %zu)\n",
4564
- __func__, cur_end + GGML_TENSOR_SIZE + GGML_OBJECT_SIZE, ctx->mem_size);
4574
+ if (ctx->scratch.data != NULL && data == NULL) {
4575
+ // allocate tensor data in the scratch buffer
4576
+ if (ctx->scratch.offs + data_size > ctx->scratch.size) {
4577
+ GGML_PRINT("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n",
4578
+ __func__, ctx->scratch.offs + data_size, ctx->scratch.size);
4565
4579
assert(false);
4566
4580
return NULL;
4567
4581
}
4568
4582
4569
4583
data = (char * const) ctx->scratch.data + ctx->scratch.offs;
4570
4584
4571
- *obj_new = (struct ggml_object) {
4572
- .offs = cur_end + GGML_OBJECT_SIZE,
4573
- .size = GGML_TENSOR_SIZE,
4574
- .next = NULL,
4575
- };
4576
-
4577
- //printf("scratch offs = %zu, size_needed = %zu\n", ctx->scratch.offs, size_needed);
4585
+ ctx->scratch.offs += data_size;
4578
4586
4579
- ctx->scratch.offs += size_needed ;
4587
+ data_size = 0 ;
4580
4588
}
4581
4589
4582
- if (obj_cur != NULL) {
4583
- obj_cur->next = obj_new;
4584
- } else {
4585
- // this is the first object in this context
4586
- ctx->objects_begin = obj_new;
4587
- }
4588
-
4589
- ctx->objects_end = obj_new;
4590
-
4591
- //printf("%s: inserted new object at %zu, size = %zu\n", __func__, cur_end, obj_new->size);
4590
+ struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TENSOR, GGML_TENSOR_SIZE + data_size);
4592
4591
4593
- struct ggml_tensor * const result = (struct ggml_tensor *)(mem_buffer + obj_new->offs);
4592
+ // TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here
4594
4593
4595
- ggml_assert_aligned( result);
4594
+ struct ggml_tensor * const result = (struct ggml_tensor *)((char *)ctx->mem_buffer + obj_new->offs );
4596
4595
4597
4596
*result = (struct ggml_tensor) {
4598
4597
/*.type =*/ type,
@@ -5026,9 +5025,11 @@ struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * nam
5026
5025
char * const mem_buffer = ctx->mem_buffer;
5027
5026
5028
5027
while (obj != NULL) {
5029
- struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
5030
- if (strcmp(cur->name, name) == 0) {
5031
- return cur;
5028
+ if (obj->type == GGML_OBJECT_TENSOR) {
5029
+ struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
5030
+ if (strcmp(cur->name, name) == 0) {
5031
+ return cur;
5032
+ }
5032
5033
}
5033
5034
5034
5035
obj = obj->next;
@@ -15829,6 +15830,35 @@ struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cg
15829
15830
return result;
15830
15831
}
15831
15832
15833
+ struct ggml_cgraph * ggml_new_graph(struct ggml_context * ctx) {
15834
+ struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_GRAPH, GGML_GRAPH_SIZE);
15835
+ struct ggml_cgraph * cgraph = (struct ggml_cgraph *) ((char *) ctx->mem_buffer + obj->offs);
15836
+
15837
+ *cgraph = (struct ggml_cgraph) {
15838
+ /*.n_nodes =*/ 0,
15839
+ /*.n_leafs =*/ 0,
15840
+ /*.nodes =*/ { NULL },
15841
+ /*.grads =*/ { NULL },
15842
+ /*.leafs =*/ { NULL },
15843
+ /*.hash_table =*/ { NULL },
15844
+ /*.perf_runs =*/ 0,
15845
+ /*.perf_cycles =*/ 0,
15846
+ /*.perf_time_us =*/ 0,
15847
+ };
15848
+
15849
+ return cgraph;
15850
+ }
15851
+
15852
+ struct ggml_cgraph * ggml_build_forward_ctx(struct ggml_context * ctx, struct ggml_tensor * tensor) {
15853
+ struct ggml_cgraph * cgraph = ggml_new_graph(ctx);
15854
+ ggml_build_forward_impl(cgraph, tensor, false);
15855
+ return cgraph;
15856
+ }
15857
+
15858
+ size_t ggml_graph_overhead(void) {
15859
+ return GGML_OBJECT_SIZE + GGML_PAD(GGML_GRAPH_SIZE, GGML_MEM_ALIGN);
15860
+ }
15861
+
15832
15862
//
15833
15863
// thread data
15834
15864
//
@@ -16544,10 +16574,9 @@ void ggml_graph_reset(struct ggml_cgraph * cgraph) {
16544
16574
void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads) {
16545
16575
struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads);
16546
16576
16547
- struct ggml_tensor * buf = ggml_new_tensor_1d(ctx, GGML_TYPE_I8, cplan.work_size);
16548
- GGML_ASSERT(buf);
16577
+ struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_WORK_BUFFER, cplan.work_size);
16549
16578
16550
- cplan.work_data = buf->data ;
16579
+ cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs ;
16551
16580
16552
16581
ggml_graph_compute(cgraph, &cplan);
16553
16582
}
0 commit comments