Skip to content

Commit 85ebfb8

Browse files
committed
gguf : write to file API (not tested)
1 parent 5cb9d9a commit 85ebfb8

File tree

3 files changed

+167
-43
lines changed

3 files changed

+167
-43
lines changed

convert-llama-h5-to-gguf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ def count_model_parts(dir_model: str) -> int:
132132
toktype = 1 # defualt to normal token type
133133
if tokenizer.is_unknown(i): toktype = 2
134134
if tokenizer.is_control(i): toktype = 3
135-
135+
136136
# TODO: How to determinate if a token is user defined?
137137
# ref: https://github.com/google/sentencepiece/blob/master/src/sentencepiece_model.proto
138138
# if tokenizer.is_user_defined(i): toktype = 4

ggml.c

Lines changed: 164 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -18620,11 +18620,13 @@ struct gguf_tensor_info {
1862018620

1862118621
uint32_t n_dims;
1862218622
uint32_t ne[GGML_MAX_DIMS];
18623-
uint32_t n_elms; // TODO: is this needed?
1862418623

1862518624
enum ggml_type type;
1862618625

1862718626
uint64_t offset; // offset from start of `data`, must be a multiple of `ALIGNMENT`
18627+
18628+
// for writing
18629+
const struct ggml_tensor * tensor;
1862818630
};
1862918631

1863018632
struct gguf_context {
@@ -18635,27 +18637,27 @@ struct gguf_context {
1863518637

1863618638
size_t alignment;
1863718639
size_t offset; // offset of `data` from beginning of file
18638-
size_t size_data; // size of `data` in bytes
18640+
size_t size; // size of `data` in bytes
1863918641

1864018642
//uint8_t * padding;
18641-
uint8_t * data;
18643+
void * data;
1864218644
};
1864318645

18644-
static bool gguf_fread_el(void * dst, size_t size, FILE * file, size_t * offset) {
18646+
static bool gguf_fread_el(FILE * file, void * dst, size_t size, size_t * offset) {
1864518647
const size_t n = fread(dst, 1, size, file);
1864618648
*offset += n;
1864718649
return n == size;
1864818650
}
1864918651

18650-
static bool gguf_fread_str(struct gguf_str * p, FILE * file, size_t * offset) {
18652+
static bool gguf_fread_str(FILE * file, struct gguf_str * p, size_t * offset) {
1865118653
p->n = 0;
1865218654
p->data = NULL;
1865318655

1865418656
bool ok = true;
1865518657

1865618658
// TODO: how to avoid mallocs for strings?
18657-
ok = ok && gguf_fread_el(&p->n, sizeof(p->n), file, offset); p->data = calloc(p->n + 1, 1);
18658-
ok = ok && gguf_fread_el( p->data, p->n, file, offset);
18659+
ok = ok && gguf_fread_el(file, &p->n, sizeof(p->n), offset); p->data = calloc(p->n + 1, 1);
18660+
ok = ok && gguf_fread_el(file, p->data, p->n, offset);
1865918661

1866018662
return ok;
1866118663
}
@@ -18673,7 +18675,7 @@ struct gguf_context * gguf_init_empty(void) {
1867318675

1867418676
ctx->alignment = GGUF_DEFAULT_ALIGNMENT;
1867518677
ctx->offset = 0;
18676-
ctx->size_data = 0;
18678+
ctx->size = 0;
1867718679

1867818680
ctx->data = NULL;
1867918681

@@ -18693,7 +18695,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
1869318695

1869418696
// check the magic before making allocations
1869518697
{
18696-
gguf_fread_el(&magic, sizeof(magic), file, &offset);
18698+
gguf_fread_el(file, &magic, sizeof(magic), &offset);
1869718699

1869818700
if (magic != GGUF_MAGIC) {
1869918701
fprintf(stderr, "%s: invalid magic number %08x\n", __func__, magic);
@@ -18714,9 +18716,9 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
1871418716
ctx->infos = NULL;
1871518717
ctx->data = NULL;
1871618718

18717-
ok = ok && gguf_fread_el(&ctx->header.version, sizeof(ctx->header.version), file, &offset);
18718-
ok = ok && gguf_fread_el(&ctx->header.n_tensors, sizeof(ctx->header.n_tensors), file, &offset);
18719-
ok = ok && gguf_fread_el(&ctx->header.n_kv, sizeof(ctx->header.n_kv), file, &offset);
18719+
ok = ok && gguf_fread_el(file, &ctx->header.version, sizeof(ctx->header.version), &offset);
18720+
ok = ok && gguf_fread_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors), &offset);
18721+
ok = ok && gguf_fread_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv), &offset);
1872018722

1872118723
if (!ok) {
1872218724
fprintf(stderr, "%s: failed to read header\n", __func__);
@@ -18735,26 +18737,26 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
1873518737

1873618738
//fprintf(stderr, "%s: reading kv %d\n", __func__, i);
1873718739

18738-
ok = ok && gguf_fread_str(&kv->key, file, &offset);
18739-
//ok = ok && gguf_fread_el (&kv->n_bytes, sizeof(kv->n_bytes), file, &offset);
18740-
ok = ok && gguf_fread_el (&kv->type, sizeof(kv->type), file, &offset);
18740+
ok = ok && gguf_fread_str(file, &kv->key, &offset);
18741+
//ok = ok && gguf_fread_el (file, &kv->n_bytes, sizeof(kv->n_bytes), &offset);
18742+
ok = ok && gguf_fread_el (file, &kv->type, sizeof(kv->type), &offset);
1874118743

1874218744
//fprintf(stderr, "%s: reading kv with key %s\n", __func__, kv->key.data);
1874318745

1874418746
switch (kv->type) {
18745-
case GGUF_TYPE_UINT8: ok = ok && gguf_fread_el (&kv->value.uint8, sizeof(kv->value.uint8), file, &offset); break;
18746-
case GGUF_TYPE_INT8: ok = ok && gguf_fread_el (&kv->value.int8, sizeof(kv->value.int8), file, &offset); break;
18747-
case GGUF_TYPE_UINT16: ok = ok && gguf_fread_el (&kv->value.uint16, sizeof(kv->value.uint16), file, &offset); break;
18748-
case GGUF_TYPE_INT16: ok = ok && gguf_fread_el (&kv->value.int16, sizeof(kv->value.int16), file, &offset); break;
18749-
case GGUF_TYPE_UINT32: ok = ok && gguf_fread_el (&kv->value.uint32, sizeof(kv->value.uint32), file, &offset); break;
18750-
case GGUF_TYPE_INT32: ok = ok && gguf_fread_el (&kv->value.int32, sizeof(kv->value.int32), file, &offset); break;
18751-
case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (&kv->value.float32, sizeof(kv->value.float32), file, &offset); break;
18752-
case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (&kv->value.bool_, sizeof(kv->value.bool_), file, &offset); break;
18753-
case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(&kv->value.str, file, &offset); break;
18747+
case GGUF_TYPE_UINT8: ok = ok && gguf_fread_el (file, &kv->value.uint8, sizeof(kv->value.uint8), &offset); break;
18748+
case GGUF_TYPE_INT8: ok = ok && gguf_fread_el (file, &kv->value.int8, sizeof(kv->value.int8), &offset); break;
18749+
case GGUF_TYPE_UINT16: ok = ok && gguf_fread_el (file, &kv->value.uint16, sizeof(kv->value.uint16), &offset); break;
18750+
case GGUF_TYPE_INT16: ok = ok && gguf_fread_el (file, &kv->value.int16, sizeof(kv->value.int16), &offset); break;
18751+
case GGUF_TYPE_UINT32: ok = ok && gguf_fread_el (file, &kv->value.uint32, sizeof(kv->value.uint32), &offset); break;
18752+
case GGUF_TYPE_INT32: ok = ok && gguf_fread_el (file, &kv->value.int32, sizeof(kv->value.int32), &offset); break;
18753+
case GGUF_TYPE_FLOAT32: ok = ok && gguf_fread_el (file, &kv->value.float32, sizeof(kv->value.float32), &offset); break;
18754+
case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (file, &kv->value.bool_, sizeof(kv->value.bool_), &offset); break;
18755+
case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(file, &kv->value.str, &offset); break;
1875418756
case GGUF_TYPE_ARRAY:
1875518757
{
18756-
ok = ok && gguf_fread_el(&kv->value.arr.type, sizeof(kv->value.arr.type), file, &offset);
18757-
ok = ok && gguf_fread_el(&kv->value.arr.n, sizeof(kv->value.arr.n), file, &offset);
18758+
ok = ok && gguf_fread_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type), &offset);
18759+
ok = ok && gguf_fread_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n), &offset);
1875818760

1875918761
switch (kv->value.arr.type) {
1876018762
case GGUF_TYPE_UINT8:
@@ -18767,13 +18769,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
1876718769
case GGUF_TYPE_BOOL:
1876818770
{
1876918771
kv->value.arr.data = malloc(kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
18770-
ok = ok && gguf_fread_el(kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type], file, &offset);
18772+
ok = ok && gguf_fread_el(file, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type], &offset);
1877118773
} break;
1877218774
case GGUF_TYPE_STRING:
1877318775
{
1877418776
kv->value.arr.data = malloc(kv->value.arr.n * sizeof(struct gguf_str));
1877518777
for (uint32_t j = 0; j < kv->value.arr.n; ++j) {
18776-
ok = ok && gguf_fread_str(&((struct gguf_str *) kv->value.arr.data)[j], file, &offset);
18778+
ok = ok && gguf_fread_str(file, &((struct gguf_str *) kv->value.arr.data)[j], &offset);
1877718779
}
1877818780
} break;
1877918781
case GGUF_TYPE_ARRAY:
@@ -18807,14 +18809,13 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
1880718809
info->ne[j] = 1;
1880818810
}
1880918811

18810-
ok = ok && gguf_fread_str(&info->name, file, &offset);
18811-
ok = ok && gguf_fread_el (&info->n_dims, sizeof(info->n_dims), file, &offset);
18812+
ok = ok && gguf_fread_str(file, &info->name, &offset);
18813+
ok = ok && gguf_fread_el (file, &info->n_dims, sizeof(info->n_dims), &offset);
1881218814
for (uint32_t j = 0; j < info->n_dims; ++j) {
18813-
ok = ok && gguf_fread_el(&info->ne[j], sizeof(info->ne[j]), file, &offset);
18815+
ok = ok && gguf_fread_el(file, &info->ne[j], sizeof(info->ne[j]), &offset);
1881418816
}
18815-
//ok = ok && gguf_fread_el (&info->n_elms, sizeof(info->n_elms), file, &offset);
18816-
ok = ok && gguf_fread_el (&info->type, sizeof(info->type), file, &offset);
18817-
ok = ok && gguf_fread_el (&info->offset, sizeof(info->offset), file, &offset);
18817+
ok = ok && gguf_fread_el (file, &info->type, sizeof(info->type), &offset);
18818+
ok = ok && gguf_fread_el (file, &info->offset, sizeof(info->offset), &offset);
1881818819

1881918820
if (!ok) {
1882018821
fprintf(stderr, "%s: failed to read tensor info\n", __func__);
@@ -18847,7 +18848,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
1884718848

1884818849
// compute the total size of the data section, taking into account the alignment
1884918850
{
18850-
ctx->size_data = 0;
18851+
ctx->size = 0;
1885118852
for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
1885218853
struct gguf_tensor_info * info = &ctx->infos[i];
1885318854

@@ -18867,7 +18868,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
1886718868

1886818869
const size_t size_cur = (ne*ggml_type_size(info->type))/ggml_blck_size(info->type);
1886918870

18870-
ctx->size_data += GGML_PAD(size_cur, ctx->alignment);
18871+
ctx->size += GGML_PAD(size_cur, ctx->alignment);
1887118872
}
1887218873
}
1887318874

@@ -18881,7 +18882,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
1888118882
const size_t mem_size =
1888218883
params.no_alloc ?
1888318884
(ctx->header.n_tensors )*ggml_tensor_overhead() :
18884-
(ctx->header.n_tensors + 1)*ggml_tensor_overhead() + ctx->size_data;
18885+
(ctx->header.n_tensors + 1)*ggml_tensor_overhead() + ctx->size;
1888518886

1888618887
struct ggml_init_params pdata = {
1888718888
.mem_size = mem_size,
@@ -18896,12 +18897,12 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
1889618897
struct ggml_tensor * data = NULL;
1889718898

1889818899
if (params.no_alloc == false) {
18899-
data = ggml_new_tensor_1d(ctx_data, GGML_TYPE_I8, ctx->size_data);
18900+
data = ggml_new_tensor_1d(ctx_data, GGML_TYPE_I8, ctx->size);
1890018901

1890118902
ok = ok && data != NULL;
1890218903

1890318904
// read the binary blob with the tensor data
18904-
ok = ok && gguf_fread_el(data->data, ctx->size_data, file, &offset);
18905+
ok = ok && gguf_fread_el(file, data->data, ctx->size, &offset);
1890518906

1890618907
if (!ok) {
1890718908
fprintf(stderr, "%s: failed to read tensor data\n", __func__);
@@ -19274,15 +19275,136 @@ void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tenso
1927419275
for (int i = 0; i < tensor->n_dims; i++) {
1927519276
ctx->infos[idx].ne[i] = tensor->ne[i];
1927619277
}
19277-
//ctx->infos[idx].n_elms = tensor->n_elms;
1927819278

19279-
ctx->infos[idx].type = tensor->type;
19279+
ctx->infos[idx].type = tensor->type;
19280+
ctx->infos[idx].offset = 0;
19281+
ctx->infos[idx].tensor = tensor;
1928019282

19281-
ctx->infos[idx].offset = -1; // set later;
19283+
if (ctx->header.n_tensors > 0) {
19284+
ctx->infos[idx].offset = ctx->infos[idx - 1].offset + GGML_PAD(ggml_nbytes(tensor), ctx->alignment);
19285+
}
1928219286

1928319287
ctx->header.n_tensors++;
1928419288
}
1928519289

19290+
static void gguf_fwrite_str(FILE * file, const struct gguf_str * val) {
19291+
fwrite(&val->n, sizeof(val->n), 1, file);
19292+
fwrite(val->data, sizeof(char), val->n, file);
19293+
}
19294+
19295+
static void gguf_fwrite_el(FILE * file, const void * val, size_t size) {
19296+
fwrite(val, sizeof(char), size, file);
19297+
}
19298+
19299+
void gguf_write_to_file(struct gguf_context * ctx, const char * fname) {
19300+
FILE * file = fopen(fname, "wb");
19301+
if (!file) {
19302+
GGML_ASSERT(false && "failed to open file for writing");
19303+
}
19304+
19305+
// write header
19306+
fwrite(&ctx->header, sizeof(struct gguf_header), 1, file);
19307+
19308+
// write key-value pairs
19309+
for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
19310+
struct gguf_kv * kv = &ctx->kv[i];
19311+
19312+
gguf_fwrite_str(file, &kv->key);
19313+
gguf_fwrite_el (file, &kv->type, sizeof(kv->type));
19314+
19315+
switch (kv->type) {
19316+
case GGUF_TYPE_UINT8: gguf_fwrite_el (file, &kv->value.uint8, sizeof(kv->value.uint8) ); break;
19317+
case GGUF_TYPE_INT8: gguf_fwrite_el (file, &kv->value.int8, sizeof(kv->value.int8) ); break;
19318+
case GGUF_TYPE_UINT16: gguf_fwrite_el (file, &kv->value.uint16, sizeof(kv->value.uint16) ); break;
19319+
case GGUF_TYPE_INT16: gguf_fwrite_el (file, &kv->value.int16, sizeof(kv->value.int16) ); break;
19320+
case GGUF_TYPE_UINT32: gguf_fwrite_el (file, &kv->value.uint32, sizeof(kv->value.uint32) ); break;
19321+
case GGUF_TYPE_INT32: gguf_fwrite_el (file, &kv->value.int32, sizeof(kv->value.int32) ); break;
19322+
case GGUF_TYPE_FLOAT32: gguf_fwrite_el (file, &kv->value.float32, sizeof(kv->value.float32)); break;
19323+
case GGUF_TYPE_BOOL: gguf_fwrite_el (file, &kv->value.bool_, sizeof(kv->value.bool_) ); break;
19324+
case GGUF_TYPE_STRING: gguf_fwrite_str(file, &kv->value.str ); break;
19325+
case GGUF_TYPE_ARRAY:
19326+
{
19327+
gguf_fwrite_el(file, &kv->value.arr.type, sizeof(kv->value.arr.type));
19328+
gguf_fwrite_el(file, &kv->value.arr.n, sizeof(kv->value.arr.n) );
19329+
19330+
switch (kv->value.arr.type) {
19331+
case GGUF_TYPE_UINT8:
19332+
case GGUF_TYPE_INT8:
19333+
case GGUF_TYPE_UINT16:
19334+
case GGUF_TYPE_INT16:
19335+
case GGUF_TYPE_UINT32:
19336+
case GGUF_TYPE_INT32:
19337+
case GGUF_TYPE_FLOAT32:
19338+
case GGUF_TYPE_BOOL:
19339+
{
19340+
gguf_fwrite_el(file, kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
19341+
} break;
19342+
case GGUF_TYPE_STRING:
19343+
{
19344+
for (uint32_t j = 0; j < kv->value.arr.n; ++j) {
19345+
gguf_fwrite_str(file, &((struct gguf_str *) kv->value.arr.data)[j]);
19346+
}
19347+
} break;
19348+
case GGUF_TYPE_ARRAY:
19349+
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); break;
19350+
};
19351+
} break;
19352+
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
19353+
};
19354+
}
19355+
19356+
// write tensor infos
19357+
for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
19358+
struct gguf_tensor_info * info = &ctx->infos[i];
19359+
19360+
gguf_fwrite_str(file, &info->name);
19361+
gguf_fwrite_el (file, &info->n_dims, sizeof(info->n_dims));
19362+
for (uint32_t j = 0; j < info->n_dims; ++j) {
19363+
gguf_fwrite_el(file, &info->ne[j], sizeof(info->ne[j]));
19364+
}
19365+
gguf_fwrite_el (file, &info->type, sizeof(info->type));
19366+
gguf_fwrite_el (file, &info->offset, sizeof(info->offset));
19367+
}
19368+
19369+
// we require the data section to be aligned, so take into account any padding
19370+
{
19371+
const size_t offset = ftell(file);
19372+
const size_t offset_pad = GGML_PAD(offset, ctx->alignment);
19373+
19374+
if (offset_pad != offset) {
19375+
uint8_t pad = 0;
19376+
for (size_t i = 0; i < offset_pad - offset; ++i) {
19377+
gguf_fwrite_el(file, &pad, sizeof(pad));
19378+
}
19379+
}
19380+
}
19381+
19382+
size_t offset = 0;
19383+
19384+
// write tensor data
19385+
for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
19386+
struct gguf_tensor_info * info = &ctx->infos[i];
19387+
19388+
const size_t size = ggml_nbytes(info->tensor);
19389+
const size_t size_pad = GGML_PAD(size, ctx->alignment);
19390+
19391+
gguf_fwrite_el(file, info->tensor->data, size);
19392+
19393+
if (size_pad != size) {
19394+
uint8_t pad = 0;
19395+
for (size_t j = 0; j < size_pad - size; ++j) {
19396+
gguf_fwrite_el(file, &pad, sizeof(pad));
19397+
}
19398+
}
19399+
19400+
GGML_ASSERT(offset == info->offset);
19401+
19402+
offset += size_pad;
19403+
}
19404+
19405+
fclose(file);
19406+
}
19407+
1928619408
////////////////////////////////////////////////////////////////////////////////
1928719409

1928819410
int ggml_cpu_has_avx(void) {

ggml.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1790,6 +1790,8 @@ extern "C" {
17901790

17911791
GGML_API void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor);
17921792

1793+
GGML_API void gguf_write_to_file(struct gguf_context * ctx, const char * fname);
1794+
17931795
//
17941796
// system info
17951797
//

0 commit comments

Comments
 (0)