Skip to content

Commit d2b6ca1

Browse files
committed
gguf : add array support
1 parent d89533d commit d2b6ca1

File tree

3 files changed

+135
-20
lines changed

3 files changed

+135
-20
lines changed

examples/gguf/gguf.cpp

Lines changed: 73 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ void gguf_ex_write_u64(std::ofstream & fout, size_t val) {
2929
}
3030

3131
template<typename T>
32-
void gguf_ex_write_param(std::ofstream & fout, const std::string & key, enum gguf_type type, const T & val) {
32+
void gguf_ex_write_val(std::ofstream & fout, const std::string & key, enum gguf_type type, const T & val) {
3333
gguf_ex_write_str(fout, key);
3434
fout.write((const char *) &type, sizeof(type));
3535
fout.write((const char *) &val, sizeof(val));
@@ -38,13 +38,65 @@ void gguf_ex_write_param(std::ofstream & fout, const std::string & key, enum ggu
3838
}
3939

4040
template<>
41-
void gguf_ex_write_param<std::string>(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::string & val) {
41+
void gguf_ex_write_val<std::string>(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::string & val) {
4242
gguf_ex_write_str(fout, key);
4343
fout.write((const char *) &type, sizeof(type));
4444

4545
const int32_t n = val.size();
4646
fout.write((const char *) &n, sizeof(n));
4747
fout.write(val.c_str(), n);
48+
49+
fprintf(stdout, "%s: write param: %s = %s\n", __func__, key.c_str(), val.c_str());
50+
}
51+
52+
template<typename T>
53+
void gguf_ex_write_arr(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::vector<T> & val) {
54+
gguf_ex_write_str(fout, key);
55+
{
56+
const enum gguf_type tarr = GGUF_TYPE_ARRAY;
57+
fout.write((const char *) &tarr, sizeof(tarr));
58+
}
59+
60+
const int32_t n = val.size();
61+
fout.write((const char *) &type, sizeof(type));
62+
fout.write((const char *) &n, sizeof(n));
63+
fout.write((const char *) val.data(), n * sizeof(T));
64+
65+
fprintf(stdout, "%s: write param: %s = [", __func__, key.c_str());
66+
for (int i = 0; i < n; ++i) {
67+
fprintf(stdout, "%s", to_string(val[i]).c_str());
68+
if (i < n - 1) {
69+
fprintf(stdout, ", ");
70+
}
71+
}
72+
fprintf(stdout, "]\n");
73+
}
74+
75+
template<>
76+
void gguf_ex_write_arr<std::string>(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::vector<std::string> & val) {
77+
gguf_ex_write_str(fout, key);
78+
{
79+
const enum gguf_type tarr = GGUF_TYPE_ARRAY;
80+
fout.write((const char *) &tarr, sizeof(tarr));
81+
}
82+
83+
const int32_t n = val.size();
84+
fout.write((const char *) &type, sizeof(type));
85+
fout.write((const char *) &n, sizeof(n));
86+
for (int i = 0; i < n; ++i) {
87+
const int32_t nstr = val[i].size();
88+
fout.write((const char *) &nstr, sizeof(nstr));
89+
fout.write(val[i].c_str(), nstr);
90+
}
91+
92+
fprintf(stdout, "%s: write param: %s = [", __func__, key.c_str());
93+
for (int i = 0; i < n; ++i) {
94+
fprintf(stdout, "%s", val[i].c_str());
95+
if (i < n - 1) {
96+
fprintf(stdout, ", ");
97+
}
98+
}
99+
fprintf(stdout, "]\n");
48100
}
49101

50102
bool gguf_ex_write(const std::string & fname) {
@@ -60,8 +112,9 @@ bool gguf_ex_write(const std::string & fname) {
60112
fout.write((const char *) &version, sizeof(version));
61113
}
62114

115+
// NOTE: these have to match the output below!
63116
const int n_tensors = 10;
64-
const int n_kv = 9;
117+
const int n_kv = 12;
65118

66119
fout.write((const char*) &n_tensors, sizeof(n_tensors));
67120
fout.write((const char*) &n_kv, sizeof(n_kv));
@@ -70,17 +123,21 @@ bool gguf_ex_write(const std::string & fname) {
70123

71124
// kv data
72125
{
73-
gguf_ex_write_param< uint8_t>(fout, "some.parameter.uint8", GGUF_TYPE_UINT8, 0x12);
74-
gguf_ex_write_param< int8_t>(fout, "some.parameter.int8", GGUF_TYPE_INT8, -0x13);
75-
gguf_ex_write_param<uint16_t>(fout, "some.parameter.uint16", GGUF_TYPE_UINT16, 0x1234);
76-
gguf_ex_write_param< int16_t>(fout, "some.parameter.int16", GGUF_TYPE_INT16, -0x1235);
77-
gguf_ex_write_param<uint32_t>(fout, "some.parameter.uint32", GGUF_TYPE_UINT32, 0x12345678);
78-
gguf_ex_write_param< int32_t>(fout, "some.parameter.int32", GGUF_TYPE_INT32, -0x12345679);
126+
gguf_ex_write_val< uint8_t>(fout, "some.parameter.uint8", GGUF_TYPE_UINT8, 0x12);
127+
gguf_ex_write_val< int8_t>(fout, "some.parameter.int8", GGUF_TYPE_INT8, -0x13);
128+
gguf_ex_write_val<uint16_t>(fout, "some.parameter.uint16", GGUF_TYPE_UINT16, 0x1234);
129+
gguf_ex_write_val< int16_t>(fout, "some.parameter.int16", GGUF_TYPE_INT16, -0x1235);
130+
gguf_ex_write_val<uint32_t>(fout, "some.parameter.uint32", GGUF_TYPE_UINT32, 0x12345678);
131+
gguf_ex_write_val< int32_t>(fout, "some.parameter.int32", GGUF_TYPE_INT32, -0x12345679);
79132

80-
gguf_ex_write_param<float> (fout, "some.parameter.float32", GGUF_TYPE_FLOAT32, 0.123456789f);
81-
gguf_ex_write_param<bool> (fout, "some.parameter.bool", GGUF_TYPE_BOOL, true);
133+
gguf_ex_write_val<float> (fout, "some.parameter.float32", GGUF_TYPE_FLOAT32, 0.123456789f);
134+
gguf_ex_write_val<bool> (fout, "some.parameter.bool", GGUF_TYPE_BOOL, true);
82135

83-
gguf_ex_write_param<std::string>(fout, "some.parameter.string", GGUF_TYPE_STRING, "hello world");
136+
gguf_ex_write_val<std::string>(fout, "some.parameter.string", GGUF_TYPE_STRING, "hello world");
137+
138+
gguf_ex_write_arr<int16_t> (fout, "some.parameter.arr.i16", GGUF_TYPE_INT16, { 1, 2, 3, 4, });
139+
gguf_ex_write_arr<float> (fout, "some.parameter.arr.f32", GGUF_TYPE_FLOAT32, { 3.145f, 2.718f, 1.414f, });
140+
gguf_ex_write_arr<std::string>(fout, "some.parameter.arr.str", GGUF_TYPE_STRING, { "hello", "world", "!" });
84141
}
85142

86143
uint64_t offset_tensor = 0;
@@ -203,13 +260,15 @@ bool gguf_ex_read_0(const std::string & fname) {
203260
fprintf(stdout, "%s: n_tensors: %d\n", __func__, n_tensors);
204261

205262
for (int i = 0; i < n_tensors; ++i) {
206-
const char * name = gguf_get_tensor_name(ctx, i);
263+
const char * name = gguf_get_tensor_name (ctx, i);
207264
const size_t offset = gguf_get_tensor_offset(ctx, i);
208265

209266
fprintf(stdout, "%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
210267
}
211268
}
212269

270+
gguf_free(ctx);
271+
213272
return true;
214273
}
215274

@@ -248,7 +307,7 @@ bool gguf_ex_read_1(const std::string & fname) {
248307
fprintf(stdout, "%s: n_tensors: %d\n", __func__, n_tensors);
249308

250309
for (int i = 0; i < n_tensors; ++i) {
251-
const char * name = gguf_get_tensor_name(ctx, i);
310+
const char * name = gguf_get_tensor_name (ctx, i);
252311
const size_t offset = gguf_get_tensor_offset(ctx, i);
253312

254313
fprintf(stdout, "%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);

ggml.c

Lines changed: 59 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3698,7 +3698,6 @@ static const size_t GGML_TYPE_SIZE[GGML_TYPE_COUNT] = {
36983698
};
36993699
static_assert(GGML_TYPE_COUNT == 19, "GGML_TYPE_SIZE is outdated");
37003700

3701-
37023701
static const char * GGML_TYPE_NAME[GGML_TYPE_COUNT] = {
37033702
[GGML_TYPE_F32] = "f32",
37043703
[GGML_TYPE_F16] = "f16",
@@ -18302,7 +18301,19 @@ struct gguf_str {
1830218301
char * data;
1830318302
};
1830418303

18305-
union gguf_value;
18304+
static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = {
18305+
[GGUF_TYPE_UINT8] = sizeof(uint8_t),
18306+
[GGUF_TYPE_INT8] = sizeof(int8_t),
18307+
[GGUF_TYPE_UINT16] = sizeof(uint16_t),
18308+
[GGUF_TYPE_INT16] = sizeof(int16_t),
18309+
[GGUF_TYPE_UINT32] = sizeof(uint32_t),
18310+
[GGUF_TYPE_INT32] = sizeof(int32_t),
18311+
[GGUF_TYPE_FLOAT32] = sizeof(float),
18312+
[GGUF_TYPE_BOOL] = sizeof(bool),
18313+
[GGUF_TYPE_STRING] = sizeof(struct gguf_str),
18314+
[GGUF_TYPE_ARRAY] = 0, // undefined
18315+
};
18316+
static_assert(GGUF_TYPE_COUNT == 10, "GGUF_TYPE_COUNT != 10");
1830618317

1830718318
union gguf_value {
1830818319
uint8_t uint8;
@@ -18320,7 +18331,7 @@ union gguf_value {
1832018331
enum gguf_type type;
1832118332

1832218333
uint32_t n;
18323-
union gguf_value * arr;
18334+
void * data;
1832418335
} arr;
1832518336
};
1832618337

@@ -18457,8 +18468,35 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
1845718468
case GGUF_TYPE_BOOL: ok = ok && gguf_fread_el (&kv->value.bool_, sizeof(kv->value.bool_), file, &offset); break;
1845818469
case GGUF_TYPE_STRING: ok = ok && gguf_fread_str(&kv->value.str, file, &offset); break;
1845918470
case GGUF_TYPE_ARRAY:
18460-
GGML_ASSERT("gguf: array type not implemented");
18461-
break;
18471+
{
18472+
ok = ok && gguf_fread_el(&kv->value.arr.type, sizeof(kv->value.arr.type), file, &offset);
18473+
ok = ok && gguf_fread_el(&kv->value.arr.n, sizeof(kv->value.arr.n), file, &offset);
18474+
18475+
switch (kv->value.arr.type) {
18476+
case GGUF_TYPE_UINT8:
18477+
case GGUF_TYPE_INT8:
18478+
case GGUF_TYPE_UINT16:
18479+
case GGUF_TYPE_INT16:
18480+
case GGUF_TYPE_UINT32:
18481+
case GGUF_TYPE_INT32:
18482+
case GGUF_TYPE_FLOAT32:
18483+
case GGUF_TYPE_BOOL:
18484+
{
18485+
kv->value.arr.data = malloc(kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
18486+
ok = ok && gguf_fread_el(kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type], file, &offset);
18487+
} break;
18488+
case GGUF_TYPE_STRING:
18489+
{
18490+
kv->value.arr.data = malloc(kv->value.arr.n * sizeof(struct gguf_str));
18491+
for (uint32_t j = 0; j < kv->value.arr.n; ++j) {
18492+
ok = ok && gguf_fread_str(&((struct gguf_str *) kv->value.arr.data)[j], file, &offset);
18493+
}
18494+
} break;
18495+
case GGUF_TYPE_ARRAY:
18496+
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
18497+
};
18498+
} break;
18499+
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
1846218500
};
1846318501

1846418502
if (!ok) {
@@ -18629,6 +18667,8 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
1862918667
ggml_set_no_alloc(ctx_data, params.no_alloc);
1863018668
}
1863118669

18670+
fclose(file);
18671+
1863218672
return ctx;
1863318673
}
1863418674

@@ -18651,6 +18691,20 @@ void gguf_free(struct gguf_context * ctx) {
1865118691
free(kv->value.str.data);
1865218692
}
1865318693
}
18694+
18695+
if (kv->type == GGUF_TYPE_ARRAY) {
18696+
if (kv->value.arr.data) {
18697+
if (kv->value.arr.type == GGUF_TYPE_STRING) {
18698+
for (uint32_t j = 0; j < kv->value.arr.n; ++j) {
18699+
struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j];
18700+
if (str->data) {
18701+
free(str->data);
18702+
}
18703+
}
18704+
}
18705+
free(kv->value.arr.data);
18706+
}
18707+
}
1865418708
}
1865518709

1865618710
GGML_ALIGNED_FREE(ctx->header.kv);

ggml.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1631,6 +1631,7 @@ extern "C" {
16311631
GGUF_TYPE_BOOL = 7,
16321632
GGUF_TYPE_STRING = 8,
16331633
GGUF_TYPE_ARRAY = 9,
1634+
GGUF_TYPE_COUNT, // marks the end of the enum
16341635
};
16351636

16361637
struct gguf_context;
@@ -1664,7 +1665,8 @@ extern "C" {
16641665
GGML_API float gguf_get_val_f32 (struct gguf_context * ctx, int i);
16651666
GGML_API bool gguf_get_val_bool(struct gguf_context * ctx, int i);
16661667
GGML_API const char * gguf_get_val_str (struct gguf_context * ctx, int i);
1667-
// TODO: arr
1668+
GGML_API int gguf_get_arr_n (struct gguf_context * ctx, int i);
1669+
GGML_API void gguf_get_arr_data(struct gguf_context * ctx, int i, void * data);
16681670

16691671
GGML_API int gguf_get_n_tensors (struct gguf_context * ctx);
16701672
GGML_API size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i);

0 commit comments

Comments
 (0)