@@ -18620,11 +18620,13 @@ struct gguf_tensor_info {
18620
18620
18621
18621
uint32_t n_dims;
18622
18622
uint32_t ne[GGML_MAX_DIMS];
18623
- uint32_t n_elms; // TODO: is this needed?
18624
18623
18625
18624
enum ggml_type type;
18626
18625
18627
18626
uint64_t offset; // offset from start of `data`, must be a multiple of `ALIGNMENT`
18627
+
18628
+ // for writing
18629
+ const struct ggml_tensor * tensor;
18628
18630
};
18629
18631
18630
18632
struct gguf_context {
@@ -18635,27 +18637,27 @@ struct gguf_context {
18635
18637
18636
18638
size_t alignment;
18637
18639
size_t offset; // offset of `data` from beginning of file
18638
- size_t size_data; // size of `data` in bytes
18640
+ size_t size; // size of `data` in bytes
18639
18641
18640
18642
//uint8_t * padding;
18641
- uint8_t * data;
18643
+ void * data;
18642
18644
};
18643
18645
18644
- static bool gguf_fread_el(void * dst, size_t size, FILE * file , size_t * offset) {
18646
+ static bool gguf_fread_el(FILE * file, void * dst, size_t size, size_t * offset) {
18645
18647
const size_t n = fread(dst, 1, size, file);
18646
18648
*offset += n;
18647
18649
return n == size;
18648
18650
}
18649
18651
18650
- static bool gguf_fread_str(struct gguf_str * p, FILE * file , size_t * offset) {
18652
+ static bool gguf_fread_str(FILE * file, struct gguf_str * p , size_t * offset) {
18651
18653
p->n = 0;
18652
18654
p->data = NULL;
18653
18655
18654
18656
bool ok = true;
18655
18657
18656
18658
// TODO: how to avoid mallocs for strings?
18657
- ok = ok && gguf_fread_el(&p->n, sizeof(p->n), file , offset); p->data = calloc(p->n + 1, 1);
18658
- ok = ok && gguf_fread_el( p->data, p->n, file, offset);
18659
+ ok = ok && gguf_fread_el(file, &p->n, sizeof(p->n), offset); p->data = calloc(p->n + 1, 1);
18660
+ ok = ok && gguf_fread_el(file, p->data, p->n, offset);
18659
18661
18660
18662
return ok;
18661
18663
}
@@ -18673,7 +18675,7 @@ struct gguf_context * gguf_init_empty(void) {
18673
18675
18674
18676
ctx->alignment = GGUF_DEFAULT_ALIGNMENT;
18675
18677
ctx->offset = 0;
18676
- ctx->size_data = 0;
18678
+ ctx->size = 0;
18677
18679
18678
18680
ctx->data = NULL;
18679
18681
@@ -18693,7 +18695,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
18693
18695
18694
18696
// check the magic before making allocations
18695
18697
{
18696
- gguf_fread_el(&magic, sizeof(magic), file , &offset);
18698
+ gguf_fread_el(file, &magic, sizeof(magic), &offset);
18697
18699
18698
18700
if (magic != GGUF_MAGIC) {
18699
18701
fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic);
@@ -18714,9 +18716,9 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
18714
18716
ctx->infos = NULL;
18715
18717
ctx->data = NULL;
18716
18718
18717
- ok = ok && gguf_fread_el(&ctx->header.version, sizeof(ctx->header.version), file, &offset);
18718
- ok = ok && gguf_fread_el(&ctx->header.n_tensors, sizeof(ctx->header.n_tensors), file , &offset);
18719
- ok = ok && gguf_fread_el(&ctx->header.n_kv, sizeof(ctx->header.n_kv), file, &offset);
18719
+ ok = ok && gguf_fread_el(file, &ctx->header.version, sizeof(ctx->header.version), &offset);
18720
+ ok = ok && gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset);
18721
+ ok = ok && gguf_fread_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset);
18720
18722
18721
18723
if (!ok) {
18722
18724
fprintf(stderr, "%s: failed to read header\n", __func__);
@@ -18735,26 +18737,26 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
18735
18737
18736
18738
//fprintf(stderr, "%s: reading kv %d\n", __func__, i);
18737
18739
18738
- ok = ok && gguf_fread_str(&kv->key, file, &offset);
18739
- //ok = ok && gguf_fread_el (&kv->n_bytes, sizeof(kv->n_bytes), file , &offset);
18740
- ok = ok && gguf_fread_el (&kv->type, sizeof(kv->type), file, &offset);
18740
+ ok = ok && gguf_fread_str(file, &kv->key, &offset);
18741
+ //ok = ok && gguf_fread_el (file, &kv->n_bytes, sizeof(kv->n_bytes), &offset);
18742
+ ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset);
18741
18743
18742
18744
//fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data);
18743
18745
18744
18746
switch (kv->type) {
18745
- case GGUF_TYPE_UINT8: ok = ok && gguf_fread_el (&kv->value.uint8, sizeof(kv->value.uint8), file, &offset); break;
18746
- case GGUF_TYPE_INT8: ok = ok && gguf_fread_el (&kv->value.int8, sizeof(kv->value.int8), file, &offset); break;
18747
- case GGUF_TYPE_UINT16: ok = ok && gguf_fread_el (&kv->value.uint16, sizeof(kv->value.uint16), file, &offset); break;
18748
- case GGUF_TYPE_INT16: ok = ok && gguf_fread_el (&kv->value.int16, sizeof(kv->value.int16), file, &offset); break;
18749
- case GGUF_TYPE_UINT32: ok = ok && gguf_fread_el (&kv->value.uint32, sizeof(kv->value.uint32), file, &offset); break;
18750
- case GGUF_TYPE_INT32: ok = ok && gguf_fread_el (&kv->value.int32, sizeof(kv->value.int32), file, &offset); break;
18751
- case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (&kv->value.float32, sizeof(kv->value.float32), file , &offset); break;
18752
- case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (&kv->value.bool_, sizeof(kv->value.bool_), file, &offset); break;
18753
- case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(&kv->value.str, file, &offset); break;
18747
+ case GGUF_TYPE_UINT8: ok = ok && gguf_fread_el (file, &kv->value.uint8, sizeof(kv->value.uint8), &offset); break;
18748
+ case GGUF_TYPE_INT8: ok = ok && gguf_fread_el (file, &kv->value.int8, sizeof(kv->value.int8), &offset); break;
18749
+ case GGUF_TYPE_UINT16: ok = ok && gguf_fread_el (file, &kv->value.uint16, sizeof(kv->value.uint16), &offset); break;
18750
+ case GGUF_TYPE_INT16: ok = ok && gguf_fread_el (file, &kv->value.int16, sizeof(kv->value.int16), &offset); break;
18751
+ case GGUF_TYPE_UINT32: ok = ok && gguf_fread_el (file, &kv->value.uint32, sizeof(kv->value.uint32), &offset); break;
18752
+ case GGUF_TYPE_INT32: ok = ok && gguf_fread_el (file, &kv->value.int32, sizeof(kv->value.int32), &offset); break;
18753
+ case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (file, &kv->value.float32, sizeof(kv->value.float32), &offset); break;
18754
+ case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (file, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break;
18755
+ case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(file, &kv->value.str, &offset); break;
18754
18756
case GGUF_TYPE_ARRAY:
18755
18757
{
18756
- ok = ok && gguf_fread_el(&kv->value.arr.type, sizeof(kv->value.arr.type), file , &offset);
18757
- ok = ok && gguf_fread_el(&kv->value.arr.n, sizeof(kv->value.arr.n), file, &offset);
18758
+ ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
18759
+ ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
18758
18760
18759
18761
switch (kv->value.arr.type) {
18760
18762
case GGUF_TYPE_UINT8:
@@ -18767,13 +18769,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
18767
18769
case GGUF_TYPE_BOOL:
18768
18770
{
18769
18771
kv->value.arr.data = malloc(kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
18770
- ok = ok && gguf_fread_el(kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type], file , &offset);
18772
+ ok = ok && gguf_fread_el(file, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type], &offset);
18771
18773
} break;
18772
18774
case GGUF_TYPE_STRING:
18773
18775
{
18774
18776
kv->value.arr.data = malloc(kv->value.arr.n * sizeof(struct gguf_str));
18775
18777
for (uint32_t j = 0; j < kv->value.arr.n; ++j) {
18776
- ok = ok && gguf_fread_str(&((struct gguf_str *) kv->value.arr.data)[j], file , &offset);
18778
+ ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset);
18777
18779
}
18778
18780
} break;
18779
18781
case GGUF_TYPE_ARRAY:
@@ -18807,14 +18809,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
18807
18809
info->ne[j] = 1;
18808
18810
}
18809
18811
18810
- ok = ok && gguf_fread_str(&info->name, file, &offset);
18811
- ok = ok && gguf_fread_el (&info->n_dims, sizeof(info->n_dims), file, &offset);
18812
+ ok = ok && gguf_fread_str(file, &info->name, &offset);
18813
+ ok = ok && gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims), &offset);
18812
18814
for (uint32_t j = 0; j < info->n_dims; ++j) {
18813
- ok = ok && gguf_fread_el(&info->ne[j], sizeof(info->ne[j]), file , &offset);
18815
+ ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset);
18814
18816
}
18815
- //ok = ok && gguf_fread_el (&info->n_elms, sizeof(info->n_elms), file, &offset);
18816
- ok = ok && gguf_fread_el (&info->type, sizeof(info->type), file, &offset);
18817
- ok = ok && gguf_fread_el (&info->offset, sizeof(info->offset), file, &offset);
18817
+ ok = ok && gguf_fread_el (file, &info->type, sizeof(info->type), &offset);
18818
+ ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset);
18818
18819
18819
18820
if (!ok) {
18820
18821
fprintf(stderr, "%s: failed to read tensor info\n", __func__);
@@ -18847,7 +18848,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
18847
18848
18848
18849
// compute the total size of the data section, taking into account the alignment
18849
18850
{
18850
- ctx->size_data = 0;
18851
+ ctx->size = 0;
18851
18852
for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
18852
18853
struct gguf_tensor_info * info = &ctx->infos[i];
18853
18854
@@ -18867,7 +18868,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
18867
18868
18868
18869
const size_t size_cur = (ne*ggml_type_size(info->type))/ggml_blck_size(info->type);
18869
18870
18870
- ctx->size_data += GGML_PAD(size_cur, ctx->alignment);
18871
+ ctx->size += GGML_PAD(size_cur, ctx->alignment);
18871
18872
}
18872
18873
}
18873
18874
@@ -18881,7 +18882,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
18881
18882
const size_t mem_size =
18882
18883
params.no_alloc ?
18883
18884
(ctx->header.n_tensors )*ggml_tensor_overhead() :
18884
- (ctx->header.n_tensors + 1)*ggml_tensor_overhead() + ctx->size_data ;
18885
+ (ctx->header.n_tensors + 1)*ggml_tensor_overhead() + ctx->size ;
18885
18886
18886
18887
struct ggml_init_params pdata = {
18887
18888
.mem_size = mem_size,
@@ -18896,12 +18897,12 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
18896
18897
struct ggml_tensor * data = NULL;
18897
18898
18898
18899
if (params.no_alloc == false) {
18899
- data = ggml_new_tensor_1d(ctx_data, GGML_TYPE_I8, ctx->size_data );
18900
+ data = ggml_new_tensor_1d(ctx_data, GGML_TYPE_I8, ctx->size );
18900
18901
18901
18902
ok = ok && data != NULL;
18902
18903
18903
18904
// read the binary blob with the tensor data
18904
- ok = ok && gguf_fread_el(data->data, ctx->size_data, file , &offset);
18905
+ ok = ok && gguf_fread_el(file, data->data, ctx->size , &offset);
18905
18906
18906
18907
if (!ok) {
18907
18908
fprintf(stderr, "%s: failed to read tensor data\n", __func__);
@@ -19274,15 +19275,136 @@ void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tenso
19274
19275
for (int i = 0; i < tensor->n_dims; i++) {
19275
19276
ctx->infos[idx].ne[i] = tensor->ne[i];
19276
19277
}
19277
- //ctx->infos[idx].n_elms = tensor->n_elms;
19278
19278
19279
- ctx->infos[idx].type = tensor->type;
19279
+ ctx->infos[idx].type = tensor->type;
19280
+ ctx->infos[idx].offset = 0;
19281
+ ctx->infos[idx].tensor = tensor;
19280
19282
19281
- ctx->infos[idx].offset = -1; // set later;
19283
+ if (ctx->header.n_tensors > 0) {
19284
+ ctx->infos[idx].offset = ctx->infos[idx - 1].offset + GGML_PAD(ggml_nbytes(tensor), ctx->alignment);
19285
+ }
19282
19286
19283
19287
ctx->header.n_tensors++;
19284
19288
}
19285
19289
19290
+ static void gguf_fwrite_str(FILE * file, const struct gguf_str * val) {
19291
+ fwrite(&val->n, sizeof(val->n), 1, file);
19292
+ fwrite(val->data, sizeof(char), val->n, file);
19293
+ }
19294
+
19295
+ static void gguf_fwrite_el(FILE * file, const void * val, size_t size) {
19296
+ fwrite(val, sizeof(char), size, file);
19297
+ }
19298
+
19299
+ void gguf_write_to_file(struct gguf_context * ctx, const char * fname) {
19300
+ FILE * file = fopen(fname, "wb");
19301
+ if (!file) {
19302
+ GGML_ASSERT(false && "failed to open file for writing");
19303
+ }
19304
+
19305
+ // write header
19306
+ fwrite(&ctx->header, sizeof(struct gguf_header), 1, file);
19307
+
19308
+ // write key-value pairs
19309
+ for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
19310
+ struct gguf_kv * kv = &ctx->kv[i];
19311
+
19312
+ gguf_fwrite_str(file, &kv->key);
19313
+ gguf_fwrite_el (file, &kv->type, sizeof(kv->type));
19314
+
19315
+ switch (kv->type) {
19316
+ case GGUF_TYPE_UINT8: gguf_fwrite_el (file, &kv->value.uint8, sizeof(kv->value.uint8) ); break;
19317
+ case GGUF_TYPE_INT8: gguf_fwrite_el (file, &kv->value.int8, sizeof(kv->value.int8) ); break;
19318
+ case GGUF_TYPE_UINT16: gguf_fwrite_el (file, &kv->value.uint16, sizeof(kv->value.uint16) ); break;
19319
+ case GGUF_TYPE_INT16: gguf_fwrite_el (file, &kv->value.int16, sizeof(kv->value.int16) ); break;
19320
+ case GGUF_TYPE_UINT32: gguf_fwrite_el (file, &kv->value.uint32, sizeof(kv->value.uint32) ); break;
19321
+ case GGUF_TYPE_INT32: gguf_fwrite_el (file, &kv->value.int32, sizeof(kv->value.int32) ); break;
19322
+ case GGUF_TYPE_FLOAT32: gguf_fwrite_el (file, &kv->value.float32, sizeof(kv->value.float32)); break;
19323
+ case GGUF_TYPE_BOOL: gguf_fwrite_el (file, &kv->value.bool_, sizeof(kv->value.bool_) ); break;
19324
+ case GGUF_TYPE_STRING: gguf_fwrite_str(file, &kv->value.str ); break;
19325
+ case GGUF_TYPE_ARRAY:
19326
+ {
19327
+ gguf_fwrite_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type));
19328
+ gguf_fwrite_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n) );
19329
+
19330
+ switch (kv->value.arr.type) {
19331
+ case GGUF_TYPE_UINT8:
19332
+ case GGUF_TYPE_INT8:
19333
+ case GGUF_TYPE_UINT16:
19334
+ case GGUF_TYPE_INT16:
19335
+ case GGUF_TYPE_UINT32:
19336
+ case GGUF_TYPE_INT32:
19337
+ case GGUF_TYPE_FLOAT32:
19338
+ case GGUF_TYPE_BOOL:
19339
+ {
19340
+ gguf_fwrite_el(file, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
19341
+ } break;
19342
+ case GGUF_TYPE_STRING:
19343
+ {
19344
+ for (uint32_t j = 0; j < kv->value.arr.n; ++j) {
19345
+ gguf_fwrite_str(file, &((struct gguf_str *) kv->value.arr.data)[j]);
19346
+ }
19347
+ } break;
19348
+ case GGUF_TYPE_ARRAY:
19349
+ case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); break;
19350
+ };
19351
+ } break;
19352
+ case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
19353
+ };
19354
+ }
19355
+
19356
+ // write tensor infos
19357
+ for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
19358
+ struct gguf_tensor_info * info = &ctx->infos[i];
19359
+
19360
+ gguf_fwrite_str(file, &info->name);
19361
+ gguf_fwrite_el (file, &info->n_dims, sizeof(info->n_dims));
19362
+ for (uint32_t j = 0; j < info->n_dims; ++j) {
19363
+ gguf_fwrite_el(file, &info->ne[j], sizeof(info->ne[j]));
19364
+ }
19365
+ gguf_fwrite_el (file, &info->type, sizeof(info->type));
19366
+ gguf_fwrite_el (file, &info->offset, sizeof(info->offset));
19367
+ }
19368
+
19369
+ // we require the data section to be aligned, so take into account any padding
19370
+ {
19371
+ const size_t offset = ftell(file);
19372
+ const size_t offset_pad = GGML_PAD(offset, ctx->alignment);
19373
+
19374
+ if (offset_pad != offset) {
19375
+ uint8_t pad = 0;
19376
+ for (size_t i = 0; i < offset_pad - offset; ++i) {
19377
+ gguf_fwrite_el(file, &pad, sizeof(pad));
19378
+ }
19379
+ }
19380
+ }
19381
+
19382
+ size_t offset = 0;
19383
+
19384
+ // write tensor data
19385
+ for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
19386
+ struct gguf_tensor_info * info = &ctx->infos[i];
19387
+
19388
+ const size_t size = ggml_nbytes(info->tensor);
19389
+ const size_t size_pad = GGML_PAD(size, ctx->alignment);
19390
+
19391
+ gguf_fwrite_el(file, info->tensor->data, size);
19392
+
19393
+ if (size_pad != size) {
19394
+ uint8_t pad = 0;
19395
+ for (size_t j = 0; j < size_pad - size; ++j) {
19396
+ gguf_fwrite_el(file, &pad, sizeof(pad));
19397
+ }
19398
+ }
19399
+
19400
+ GGML_ASSERT(offset == info->offset);
19401
+
19402
+ offset += size_pad;
19403
+ }
19404
+
19405
+ fclose(file);
19406
+ }
19407
+
19286
19408
////////////////////////////////////////////////////////////////////////////////
19287
19409
19288
19410
int ggml_cpu_has_avx(void) {
0 commit comments