@@ -18613,8 +18613,6 @@ struct gguf_header {
18613
18613
uint32_t version;
18614
18614
uint32_t n_tensors;
18615
18615
uint32_t n_kv;
18616
-
18617
- struct gguf_kv * kv;
18618
18616
};
18619
18617
18620
18618
struct gguf_tensor_info {
@@ -18630,7 +18628,9 @@ struct gguf_tensor_info {
18630
18628
};
18631
18629
18632
18630
struct gguf_context {
18633
- struct gguf_header header;
18631
+ struct gguf_header header;
18632
+
18633
+ struct gguf_kv * kv;
18634
18634
struct gguf_tensor_info * infos;
18635
18635
18636
18636
size_t alignment;
@@ -18660,6 +18660,26 @@ static bool gguf_fread_str(struct gguf_str * p, FILE * file, size_t * offset) {
18660
18660
return ok;
18661
18661
}
18662
18662
18663
+ struct gguf_context * gguf_init_empty(void) {
18664
+ struct gguf_context * ctx = GGML_ALIGNED_MALLOC(sizeof(struct gguf_context));
18665
+
18666
+ ctx->header.magic = GGUF_MAGIC;
18667
+ ctx->header.version = GGUF_VERSION;
18668
+ ctx->header.n_tensors = 0;
18669
+ ctx->header.n_kv = 0;
18670
+
18671
+ ctx->kv = NULL;
18672
+ ctx->infos = NULL;
18673
+
18674
+ ctx->alignment = GGUF_DEFAULT_ALIGNMENT;
18675
+ ctx->offset = 0;
18676
+ ctx->size_data = 0;
18677
+
18678
+ ctx->data = NULL;
18679
+
18680
+ return ctx;
18681
+ }
18682
+
18663
18683
struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) {
18664
18684
FILE * file = fopen(fname, "rb");
18665
18685
if (!file) {
@@ -18689,8 +18709,8 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
18689
18709
// read the header
18690
18710
{
18691
18711
ctx->header.magic = magic;
18692
- ctx->header.kv = NULL;
18693
18712
18713
+ ctx->kv = NULL;
18694
18714
ctx->infos = NULL;
18695
18715
ctx->data = NULL;
18696
18716
@@ -18708,10 +18728,10 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
18708
18728
18709
18729
// read the kv pairs
18710
18730
{
18711
- ctx->header. kv = GGML_ALIGNED_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv));
18731
+ ctx->kv = GGML_ALIGNED_MALLOC(ctx->header.n_kv * sizeof(struct gguf_kv));
18712
18732
18713
18733
for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
18714
- struct gguf_kv * kv = &ctx->header. kv[i];
18734
+ struct gguf_kv * kv = &ctx->kv[i];
18715
18735
18716
18736
//fprintf(stderr, "%s: reading kv %d\n", __func__, i);
18717
18737
@@ -18757,7 +18777,7 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
18757
18777
}
18758
18778
} break;
18759
18779
case GGUF_TYPE_ARRAY:
18760
- case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
18780
+ case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); break;
18761
18781
};
18762
18782
} break;
18763
18783
case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
@@ -18827,7 +18847,6 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
18827
18847
18828
18848
// compute the total size of the data section, taking into account the alignment
18829
18849
{
18830
-
18831
18850
ctx->size_data = 0;
18832
18851
for (uint32_t i = 0; i < ctx->header.n_tensors; ++i) {
18833
18852
struct gguf_tensor_info * info = &ctx->infos[i];
@@ -18944,10 +18963,10 @@ void gguf_free(struct gguf_context * ctx) {
18944
18963
return;
18945
18964
}
18946
18965
18947
- if (ctx->header. kv) {
18966
+ if (ctx->kv) {
18948
18967
// free string memory - not great..
18949
18968
for (uint32_t i = 0; i < ctx->header.n_kv; ++i) {
18950
- struct gguf_kv * kv = &ctx->header. kv[i];
18969
+ struct gguf_kv * kv = &ctx->kv[i];
18951
18970
18952
18971
if (kv->key.data) {
18953
18972
free(kv->key.data);
@@ -18974,7 +18993,7 @@ void gguf_free(struct gguf_context * ctx) {
18974
18993
}
18975
18994
}
18976
18995
18977
- GGML_ALIGNED_FREE(ctx->header. kv);
18996
+ GGML_ALIGNED_FREE(ctx->kv);
18978
18997
}
18979
18998
18980
18999
if (ctx->infos) {
@@ -19014,8 +19033,9 @@ int gguf_get_n_kv(struct gguf_context * ctx) {
19014
19033
19015
19034
int gguf_find_key(struct gguf_context * ctx, const char * key) {
19016
19035
// return -1 if key not found
19036
+ int keyfound = -1;
19037
+
19017
19038
const int n_kv = gguf_get_n_kv(ctx);
19018
- int keyfound = -1;
19019
19039
19020
19040
for (int i = 0; i < n_kv; ++i) {
19021
19041
if (strcmp(key, gguf_get_key(ctx, i)) == 0) {
@@ -19028,69 +19048,69 @@ int gguf_find_key(struct gguf_context * ctx, const char * key) {
19028
19048
}
19029
19049
19030
19050
const char * gguf_get_key(struct gguf_context * ctx, int i) {
19031
- return ctx->header. kv[i].key.data;
19051
+ return ctx->kv[i].key.data;
19032
19052
}
19033
19053
19034
19054
enum gguf_type gguf_get_kv_type(struct gguf_context * ctx, int i) {
19035
- return ctx->header. kv[i].type;
19055
+ return ctx->kv[i].type;
19036
19056
}
19037
19057
19038
19058
enum gguf_type gguf_get_arr_type(struct gguf_context * ctx, int i) {
19039
- return ctx->header. kv[i].value.arr.type;
19059
+ return ctx->kv[i].value.arr.type;
19040
19060
}
19041
19061
19042
19062
int32_t gguf_get_arr_i32(struct gguf_context * ctx, int key_id, int i) {
19043
- return ((int32_t *) ctx->header. kv[key_id].value.arr.data)[i];
19063
+ return ((int32_t *) ctx->kv[key_id].value.arr.data)[i];
19044
19064
}
19045
19065
19046
19066
float gguf_get_arr_f32(struct gguf_context * ctx, int key_id, int i) {
19047
- return ((float *) ctx->header. kv[key_id].value.arr.data)[i];
19067
+ return ((float *) ctx->kv[key_id].value.arr.data)[i];
19048
19068
}
19049
19069
19050
19070
const char * gguf_get_arr_str(struct gguf_context * ctx, int key_id, int i) {
19051
- struct gguf_kv * kv = &ctx->header. kv[key_id];
19071
+ struct gguf_kv * kv = &ctx->kv[key_id];
19052
19072
struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[i];
19053
19073
return str->data;
19054
19074
}
19055
19075
19056
19076
int gguf_get_arr_n(struct gguf_context * ctx, int i) {
19057
- return ctx->header. kv[i].value.arr.n;
19077
+ return ctx->kv[i].value.arr.n;
19058
19078
}
19059
19079
19060
19080
uint8_t gguf_get_val_u8(struct gguf_context * ctx, int i) {
19061
- return ctx->header. kv[i].value.uint8;
19081
+ return ctx->kv[i].value.uint8;
19062
19082
}
19063
19083
19064
19084
int8_t gguf_get_val_i8(struct gguf_context * ctx, int i) {
19065
- return ctx->header. kv[i].value.int8;
19085
+ return ctx->kv[i].value.int8;
19066
19086
}
19067
19087
19068
19088
uint16_t gguf_get_val_u16(struct gguf_context * ctx, int i) {
19069
- return ctx->header. kv[i].value.uint16;
19089
+ return ctx->kv[i].value.uint16;
19070
19090
}
19071
19091
19072
19092
int16_t gguf_get_val_i16(struct gguf_context * ctx, int i) {
19073
- return ctx->header. kv[i].value.int16;
19093
+ return ctx->kv[i].value.int16;
19074
19094
}
19075
19095
19076
19096
uint32_t gguf_get_val_u32(struct gguf_context * ctx, int i) {
19077
- return ctx->header. kv[i].value.uint32;
19097
+ return ctx->kv[i].value.uint32;
19078
19098
}
19079
19099
19080
19100
int32_t gguf_get_val_i32(struct gguf_context * ctx, int i) {
19081
- return ctx->header. kv[i].value.int32;
19101
+ return ctx->kv[i].value.int32;
19082
19102
}
19083
19103
19084
19104
float gguf_get_val_f32(struct gguf_context * ctx, int i) {
19085
- return ctx->header. kv[i].value.float32;
19105
+ return ctx->kv[i].value.float32;
19086
19106
}
19087
19107
19088
19108
bool gguf_get_val_bool(struct gguf_context * ctx, int i) {
19089
- return ctx->header. kv[i].value.bool_;
19109
+ return ctx->kv[i].value.bool_;
19090
19110
}
19091
19111
19092
19112
const char * gguf_get_val_str (struct gguf_context * ctx, int i) {
19093
- return ctx->header. kv[i].value.str.data;
19113
+ return ctx->kv[i].value.str.data;
19094
19114
}
19095
19115
19096
19116
int gguf_get_n_tensors(struct gguf_context * ctx) {
@@ -19105,6 +19125,164 @@ char * gguf_get_tensor_name(struct gguf_context * ctx, int i) {
19105
19125
return ctx->infos[i].name.data;
19106
19126
}
19107
19127
19128
+ // returns the index
19129
+ static int gguf_get_or_add_key(struct gguf_context * ctx, const char * key) {
19130
+ const int idx = gguf_find_key(ctx, key);
19131
+ if (idx >= 0) {
19132
+ return idx;
19133
+ }
19134
+
19135
+ const int n_kv = gguf_get_n_kv(ctx);
19136
+
19137
+ ctx->kv = realloc(ctx->kv, (n_kv + 1) * sizeof(struct gguf_kv));
19138
+ ctx->kv[n_kv].key.n = strlen(key) + 1;
19139
+ ctx->kv[n_kv].key.data = strdup(key);
19140
+ ctx->header.n_kv++;
19141
+
19142
+ return n_kv;
19143
+ }
19144
+
19145
+ void gguf_set_val_u8(struct gguf_context * ctx, const char * key, uint8_t val) {
19146
+ const int idx = gguf_get_or_add_key(ctx, key);
19147
+
19148
+ ctx->kv[idx].type = GGUF_TYPE_UINT8;
19149
+ ctx->kv[idx].value.uint8 = val;
19150
+ }
19151
+
19152
+ void gguf_set_val_i8(struct gguf_context * ctx, const char * key, int8_t val) {
19153
+ const int idx = gguf_get_or_add_key(ctx, key);
19154
+
19155
+ ctx->kv[idx].type = GGUF_TYPE_INT8;
19156
+ ctx->kv[idx].value.int8 = val;
19157
+ }
19158
+
19159
+ void gguf_set_val_u16(struct gguf_context * ctx, const char * key, uint16_t val) {
19160
+ const int idx = gguf_get_or_add_key(ctx, key);
19161
+
19162
+ ctx->kv[idx].type = GGUF_TYPE_UINT16;
19163
+ ctx->kv[idx].value.uint16 = val;
19164
+ }
19165
+
19166
+ void gguf_set_val_i16(struct gguf_context * ctx, const char * key, int16_t val) {
19167
+ const int idx = gguf_get_or_add_key(ctx, key);
19168
+
19169
+ ctx->kv[idx].type = GGUF_TYPE_INT16;
19170
+ ctx->kv[idx].value.int16 = val;
19171
+ }
19172
+
19173
+ void gguf_set_val_u32(struct gguf_context * ctx, const char * key, uint32_t val) {
19174
+ const int idx = gguf_get_or_add_key(ctx, key);
19175
+
19176
+ ctx->kv[idx].type = GGUF_TYPE_UINT32;
19177
+ ctx->kv[idx].value.uint32 = val;
19178
+ }
19179
+
19180
+ void gguf_set_val_i32(struct gguf_context * ctx, const char * key, int32_t val) {
19181
+ const int idx = gguf_get_or_add_key(ctx, key);
19182
+
19183
+ ctx->kv[idx].type = GGUF_TYPE_INT32;
19184
+ ctx->kv[idx].value.int32 = val;
19185
+ }
19186
+
19187
+ void gguf_set_val_f32(struct gguf_context * ctx, const char * key, float val) {
19188
+ const int idx = gguf_get_or_add_key(ctx, key);
19189
+
19190
+ ctx->kv[idx].type = GGUF_TYPE_FLOAT32;
19191
+ ctx->kv[idx].value.float32 = val;
19192
+ }
19193
+
19194
+ void gguf_set_val_bool(struct gguf_context * ctx, const char * key, bool val) {
19195
+ const int idx = gguf_get_or_add_key(ctx, key);
19196
+
19197
+ ctx->kv[idx].type = GGUF_TYPE_BOOL;
19198
+ ctx->kv[idx].value.bool_ = val;
19199
+ }
19200
+
19201
+ void gguf_set_val_str(struct gguf_context * ctx, const char * key, const char * val) {
19202
+ const int idx = gguf_get_or_add_key(ctx, key);
19203
+
19204
+ ctx->kv[idx].type = GGUF_TYPE_STRING;
19205
+ ctx->kv[idx].value.str.n = strlen(val) + 1;
19206
+ ctx->kv[idx].value.str.data = strdup(val);
19207
+ }
19208
+
19209
+ void gguf_set_arr_data(struct gguf_context * ctx, const char * key, enum gguf_type type, const void * data, int n) {
19210
+ const int idx = gguf_get_or_add_key(ctx, key);
19211
+
19212
+ ctx->kv[idx].type = GGUF_TYPE_ARRAY;
19213
+ ctx->kv[idx].value.arr.type = type;
19214
+ ctx->kv[idx].value.arr.n = n;
19215
+ ctx->kv[idx].value.arr.data = malloc(n*GGUF_TYPE_SIZE[type]);
19216
+ memcpy(ctx->kv[idx].value.arr.data, data, n*GGUF_TYPE_SIZE[type]);
19217
+ }
19218
+
19219
+ void gguf_set_arr_str(struct gguf_context * ctx, const char * key, const char ** data, int n) {
19220
+ const int idx = gguf_get_or_add_key(ctx, key);
19221
+
19222
+ ctx->kv[idx].type = GGUF_TYPE_ARRAY;
19223
+ ctx->kv[idx].value.arr.type = GGUF_TYPE_STRING;
19224
+ ctx->kv[idx].value.arr.n = n;
19225
+ ctx->kv[idx].value.arr.data = malloc(n*sizeof(struct gguf_str));
19226
+ for (int i = 0; i < n; i++) {
19227
+ struct gguf_str * str = &((struct gguf_str *)ctx->kv[idx].value.arr.data)[i];
19228
+ str->n = strlen(data[i]) + 1;
19229
+ str->data = strdup(data[i]);
19230
+ }
19231
+ }
19232
+
19233
+ // set or add KV pairs from another context
19234
+ void gguf_set_kv(struct gguf_context * ctx, struct gguf_context * src) {
19235
+ for (uint32_t i = 0; i < src->header.n_kv; i++) {
19236
+ switch (src->kv[i].type) {
19237
+ case GGUF_TYPE_UINT8: gguf_set_val_u8 (ctx, src->kv[i].key.data, src->kv[i].value.uint8); break;
19238
+ case GGUF_TYPE_INT8: gguf_set_val_i8 (ctx, src->kv[i].key.data, src->kv[i].value.int8); break;
19239
+ case GGUF_TYPE_UINT16: gguf_set_val_u16 (ctx, src->kv[i].key.data, src->kv[i].value.uint16); break;
19240
+ case GGUF_TYPE_INT16: gguf_set_val_i16 (ctx, src->kv[i].key.data, src->kv[i].value.int16); break;
19241
+ case GGUF_TYPE_UINT32: gguf_set_val_u32 (ctx, src->kv[i].key.data, src->kv[i].value.uint32); break;
19242
+ case GGUF_TYPE_INT32: gguf_set_val_i32 (ctx, src->kv[i].key.data, src->kv[i].value.int32); break;
19243
+ case GGUF_TYPE_FLOAT32: gguf_set_val_f32 (ctx, src->kv[i].key.data, src->kv[i].value.float32); break;
19244
+ case GGUF_TYPE_BOOL: gguf_set_val_bool(ctx, src->kv[i].key.data, src->kv[i].value.bool_); break;
19245
+ case GGUF_TYPE_STRING: gguf_set_val_str (ctx, src->kv[i].key.data, src->kv[i].value.str.data); break;
19246
+ case GGUF_TYPE_ARRAY:
19247
+ {
19248
+ if (src->kv[i].value.arr.type == GGUF_TYPE_STRING) {
19249
+ const char ** data = malloc(src->kv[i].value.arr.n*sizeof(char *));
19250
+ for (uint32_t j = 0; j < src->kv[i].value.arr.n; j++) {
19251
+ data[j] = ((struct gguf_str *)src->kv[i].value.arr.data)[j].data;
19252
+ }
19253
+ gguf_set_arr_str(ctx, src->kv[i].key.data, data, src->kv[i].value.arr.n);
19254
+ free(data);
19255
+ } if (src->kv[i].value.arr.type == GGUF_TYPE_ARRAY) {
19256
+ GGML_ASSERT(false && "nested arrays not supported");
19257
+ } else {
19258
+ gguf_set_arr_data(ctx, src->kv[i].key.data, src->kv[i].value.arr.type, src->kv[i].value.arr.data, src->kv[i].value.arr.n);
19259
+ }
19260
+ } break;
19261
+ case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type"); break;
19262
+ }
19263
+ }
19264
+ }
19265
+
19266
+ void gguf_add_tensor(struct gguf_context * ctx, const struct ggml_tensor * tensor) {
19267
+ const int idx = ctx->header.n_tensors;
19268
+ ctx->infos = realloc(ctx->infos, (idx + 1)*sizeof(struct gguf_tensor_info));
19269
+
19270
+ ctx->infos[idx].name.n = strlen(tensor->name) + 1;
19271
+ ctx->infos[idx].name.data = strdup(tensor->name);
19272
+
19273
+ ctx->infos[idx].n_dims = tensor->n_dims;
19274
+ for (int i = 0; i < tensor->n_dims; i++) {
19275
+ ctx->infos[idx].ne[i] = tensor->ne[i];
19276
+ }
19277
+ //ctx->infos[idx].n_elms = tensor->n_elms;
19278
+
19279
+ ctx->infos[idx].type = tensor->type;
19280
+
19281
+ ctx->infos[idx].offset = -1; // set later;
19282
+
19283
+ ctx->header.n_tensors++;
19284
+ }
19285
+
19108
19286
////////////////////////////////////////////////////////////////////////////////
19109
19287
19110
19288
int ggml_cpu_has_avx(void) {
0 commit comments