Skip to content

Commit c9b2f7f

Browse files
committed
gguf : fixes + simplify example + add ggml_nbytes_pad()
1 parent 4463965 commit c9b2f7f

File tree

3 files changed

+17
-93
lines changed

3 files changed

+17
-93
lines changed

examples/gguf/gguf.cpp

Lines changed: 4 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -21,91 +21,6 @@ static std::string to_string(const T & val) {
2121
return ss.str();
2222
}
2323

24-
void gguf_ex_write_str(std::ofstream & fout, const std::string & val) {
25-
const int32_t n = val.size();
26-
fout.write((const char *) &n, sizeof(n));
27-
fout.write(val.c_str(), n);
28-
}
29-
30-
void gguf_ex_write_i32(std::ofstream & fout, int32_t val) {
31-
fout.write((const char *) &val, sizeof(val));
32-
}
33-
34-
void gguf_ex_write_u64(std::ofstream & fout, size_t val) {
35-
fout.write((const char *) &val, sizeof(val));
36-
}
37-
38-
template<typename T>
39-
void gguf_ex_write_val(std::ofstream & fout, const std::string & key, enum gguf_type type, const T & val) {
40-
gguf_ex_write_str(fout, key);
41-
fout.write((const char *) &type, sizeof(type));
42-
fout.write((const char *) &val, sizeof(val));
43-
44-
fprintf(stdout, "%s: write param: %s = %s\n", __func__, key.c_str(), to_string(val).c_str());
45-
}
46-
47-
template<>
48-
void gguf_ex_write_val<std::string>(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::string & val) {
49-
gguf_ex_write_str(fout, key);
50-
fout.write((const char *) &type, sizeof(type));
51-
52-
const int32_t n = val.size();
53-
fout.write((const char *) &n, sizeof(n));
54-
fout.write(val.c_str(), n);
55-
56-
fprintf(stdout, "%s: write param: %s = %s\n", __func__, key.c_str(), val.c_str());
57-
}
58-
59-
template<typename T>
60-
void gguf_ex_write_arr(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::vector<T> & val) {
61-
gguf_ex_write_str(fout, key);
62-
{
63-
const enum gguf_type tarr = GGUF_TYPE_ARRAY;
64-
fout.write((const char *) &tarr, sizeof(tarr));
65-
}
66-
67-
const int32_t n = val.size();
68-
fout.write((const char *) &type, sizeof(type));
69-
fout.write((const char *) &n, sizeof(n));
70-
fout.write((const char *) val.data(), n * sizeof(T));
71-
72-
fprintf(stdout, "%s: write param: %s = [", __func__, key.c_str());
73-
for (int i = 0; i < n; ++i) {
74-
fprintf(stdout, "%s", to_string(val[i]).c_str());
75-
if (i < n - 1) {
76-
fprintf(stdout, ", ");
77-
}
78-
}
79-
fprintf(stdout, "]\n");
80-
}
81-
82-
template<>
83-
void gguf_ex_write_arr<std::string>(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::vector<std::string> & val) {
84-
gguf_ex_write_str(fout, key);
85-
{
86-
const enum gguf_type tarr = GGUF_TYPE_ARRAY;
87-
fout.write((const char *) &tarr, sizeof(tarr));
88-
}
89-
90-
const int32_t n = val.size();
91-
fout.write((const char *) &type, sizeof(type));
92-
fout.write((const char *) &n, sizeof(n));
93-
for (int i = 0; i < n; ++i) {
94-
const int32_t nstr = val[i].size();
95-
fout.write((const char *) &nstr, sizeof(nstr));
96-
fout.write(val[i].c_str(), nstr);
97-
}
98-
99-
fprintf(stdout, "%s: write param: %s = [", __func__, key.c_str());
100-
for (int i = 0; i < n; ++i) {
101-
fprintf(stdout, "%s", val[i].c_str());
102-
if (i < n - 1) {
103-
fprintf(stdout, ", ");
104-
}
105-
}
106-
fprintf(stdout, "]\n");
107-
}
108-
10924
bool gguf_ex_write(const std::string & fname) {
11025
struct gguf_context * ctx = gguf_init_empty();
11126

@@ -118,11 +33,11 @@ bool gguf_ex_write(const std::string & fname) {
11833
gguf_set_val_i32 (ctx, "some.parameter.int32", -0x12345679);
11934
gguf_set_val_f32 (ctx, "some.parameter.float32", 0.123456789f);
12035
gguf_set_val_bool(ctx, "some.parameter.bool", true);
121-
gguf_set_val_str (ctx, "some.parameter.string", "hello world");
36+
gguf_set_val_str (ctx, "some.parameter.string", "hello world");
12237

123-
//gguf_set_arr_data(ctx, "some.parameter.arr.i16", GGUF_TYPE_INT16, std::vector<int16_t>{ 1, 2, 3, 4, }.data(), 4);
124-
//gguf_set_arr_data(ctx, "some.parameter.arr.f32", GGUF_TYPE_FLOAT32, std::vector<float>{ 3.145f, 2.718f, 1.414f, }.data(), 3);
125-
//gguf_ex_write_arr<std::string>(fout, "some.parameter.arr.str", GGUF_TYPE_STRING, { "hello", "world", "!" });
38+
gguf_set_arr_data(ctx, "some.parameter.arr.i16", GGUF_TYPE_INT16, std::vector<int16_t>{ 1, 2, 3, 4, }.data(), 4);
39+
gguf_set_arr_data(ctx, "some.parameter.arr.f32", GGUF_TYPE_FLOAT32, std::vector<float>{ 3.145f, 2.718f, 1.414f, }.data(), 3);
40+
gguf_set_arr_str (ctx, "some.parameter.arr.str", std::vector<const char *>{ "hello", "world", "!" }.data(), 3);
12641
}
12742

12843
struct ggml_init_params params = {

ggml.c

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -213,10 +213,10 @@ inline static void * ggml_aligned_malloc(size_t size) {
213213
error_desc = "insufficient memory";
214214
break;
215215
}
216-
GGML_PRINT("%s: %s (attempted to allocate %6.2f MB)\n",
217-
__func__, error_desc, size/(1024.0*1024.0));
216+
GGML_PRINT("%s: %s (attempted to allocate %6.2f MB)\n", __func__, error_desc, size/(1024.0*1024.0));
218217
return NULL;
219218
}
219+
220220
return aligned_memory;
221221
}
222222
#define GGML_ALIGNED_MALLOC(size) ggml_aligned_malloc(size)
@@ -4109,7 +4109,11 @@ size_t ggml_nbytes(const struct ggml_tensor * tensor) {
41094109
//
41104110
// is enough, but just in case, adding the second part
41114111

4112-
return GGML_PAD(MAX(tensor->ne[3]*tensor->nb[3], (ggml_nelements(tensor)*GGML_TYPE_SIZE[tensor->type])/GGML_BLCK_SIZE[tensor->type]), GGML_MEM_ALIGN);
4112+
return MAX(tensor->ne[3]*tensor->nb[3], (ggml_nelements(tensor)*GGML_TYPE_SIZE[tensor->type])/GGML_BLCK_SIZE[tensor->type]);
4113+
}
4114+
4115+
size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) {
4116+
return GGML_PAD(ggml_nbytes(tensor), GGML_MEM_ALIGN);
41134117
}
41144118

41154119
size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split) {
@@ -19271,6 +19275,10 @@ void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tenso
1927119275
ctx->infos[idx].name.n = strlen(tensor->name) + 1;
1927219276
ctx->infos[idx].name.data = strdup(tensor->name);
1927319277

19278+
for (int i = 0; i < GGML_MAX_DIMS; ++i) {
19279+
ctx->infos[idx].ne[i] = 1;
19280+
}
19281+
1927419282
ctx->infos[idx].n_dims = tensor->n_dims;
1927519283
for (int i = 0; i < tensor->n_dims; i++) {
1927619284
ctx->infos[idx].ne[i] = tensor->ne[i];
@@ -19305,8 +19313,8 @@ void gguf_write_to_file(struct gguf_context * ctx, const char * fname) {
1930519313
// write header
1930619314
gguf_fwrite_el(file, &ctx->header.magic, sizeof(ctx->header.magic));
1930719315
gguf_fwrite_el(file, &ctx->header.version, sizeof(ctx->header.version));
19308-
gguf_fwrite_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv));
1930919316
gguf_fwrite_el(file, &ctx->header.n_tensors, sizeof(ctx->header.n_tensors));
19317+
gguf_fwrite_el(file, &ctx->header.n_kv, sizeof(ctx->header.n_kv));
1931019318

1931119319
// write key-value pairs
1931219320
for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {

ggml.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,7 @@ extern "C" {
566566
GGML_API int64_t ggml_nelements (const struct ggml_tensor * tensor);
567567
GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor);
568568
GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);
569+
GGML_API size_t ggml_nbytes_pad (const struct ggml_tensor * tensor); // same as ggml_nbytes() but padded to GGML_MEM_ALIGN
569570
GGML_API size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split);
570571

571572
GGML_API int ggml_blck_size (enum ggml_type type);

0 commit comments

Comments
 (0)