gguf : add array support

ggerganov · ggerganov · commit d2b6ca13ad25 · 2023-07-27T14:53:07.000+03:00
diff --git a/examples/gguf/gguf.cpp b/examples/gguf/gguf.cpp
@@ -29,7 +29,7 @@ void gguf_ex_write_u64(std::ofstream & fout, size_t val) {
 }
 
 template<typename T>
-void gguf_ex_write_param(std::ofstream & fout, const std::string & key, enum gguf_type type, const T & val) {
+void gguf_ex_write_val(std::ofstream & fout, const std::string & key, enum gguf_type type, const T & val) {
     gguf_ex_write_str(fout, key);
     fout.write((const char *) &type, sizeof(type));
     fout.write((const char *) &val,  sizeof(val));
@@ -38,13 +38,65 @@ void gguf_ex_write_param(std::ofstream & fout, const std::string & key, enum ggu
 }
 
 template<>
-void gguf_ex_write_param<std::string>(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::string & val) {
+void gguf_ex_write_val<std::string>(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::string & val) {
     gguf_ex_write_str(fout, key);
     fout.write((const char *) &type, sizeof(type));
 
     const int32_t n = val.size();
     fout.write((const char *) &n, sizeof(n));
     fout.write(val.c_str(), n);
+
+    fprintf(stdout, "%s: write param: %s = %s\n", __func__, key.c_str(), val.c_str());
+}
+
+template<typename T>
+void gguf_ex_write_arr(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::vector<T> & val) {
+    gguf_ex_write_str(fout, key);
+    {
+        const enum gguf_type tarr = GGUF_TYPE_ARRAY;
+        fout.write((const char *) &tarr, sizeof(tarr));
+    }
+
+    const int32_t n = val.size();
+    fout.write((const char *) &type, sizeof(type));
+    fout.write((const char *) &n,    sizeof(n));
+    fout.write((const char *) val.data(), n * sizeof(T));
+
+    fprintf(stdout, "%s: write param: %s = [", __func__, key.c_str());
+    for (int i = 0; i < n; ++i) {
+        fprintf(stdout, "%s", to_string(val[i]).c_str());
+        if (i < n - 1) {
+            fprintf(stdout, ", ");
+        }
+    }
+    fprintf(stdout, "]\n");
+}
+
+template<>
+void gguf_ex_write_arr<std::string>(std::ofstream & fout, const std::string & key, enum gguf_type type, const std::vector<std::string> & val) {
+    gguf_ex_write_str(fout, key);
+    {
+        const enum gguf_type tarr = GGUF_TYPE_ARRAY;
+        fout.write((const char *) &tarr, sizeof(tarr));
+    }
+
+    const int32_t n = val.size();
+    fout.write((const char *) &type, sizeof(type));
+    fout.write((const char *) &n,    sizeof(n));
+    for (int i = 0; i < n; ++i) {
+        const int32_t nstr = val[i].size();
+        fout.write((const char *) &nstr, sizeof(nstr));
+        fout.write(val[i].c_str(), nstr);
+    }
+
+    fprintf(stdout, "%s: write param: %s = [", __func__, key.c_str());
+    for (int i = 0; i < n; ++i) {
+        fprintf(stdout, "%s", val[i].c_str());
+        if (i < n - 1) {
+            fprintf(stdout, ", ");
+        }
+    }
+    fprintf(stdout, "]\n");
 }
 
 bool gguf_ex_write(const std::string & fname) {
@@ -60,8 +112,9 @@ bool gguf_ex_write(const std::string & fname) {
         fout.write((const char *) &version, sizeof(version));
     }
 
+    // NOTE: these have to match the output below!
     const int n_tensors = 10;
-    const int n_kv = 9;
+    const int n_kv      = 12;
 
     fout.write((const char*) &n_tensors, sizeof(n_tensors));
     fout.write((const char*) &n_kv, sizeof(n_kv));
@@ -70,17 +123,21 @@ bool gguf_ex_write(const std::string & fname) {
 
     // kv data
     {
-        gguf_ex_write_param< uint8_t>(fout, "some.parameter.uint8",   GGUF_TYPE_UINT8,   0x12);
-        gguf_ex_write_param<  int8_t>(fout, "some.parameter.int8",    GGUF_TYPE_INT8,   -0x13);
-        gguf_ex_write_param<uint16_t>(fout, "some.parameter.uint16",  GGUF_TYPE_UINT16,  0x1234);
-        gguf_ex_write_param< int16_t>(fout, "some.parameter.int16",   GGUF_TYPE_INT16,  -0x1235);
-        gguf_ex_write_param<uint32_t>(fout, "some.parameter.uint32",  GGUF_TYPE_UINT32,  0x12345678);
-        gguf_ex_write_param< int32_t>(fout, "some.parameter.int32",   GGUF_TYPE_INT32,  -0x12345679);
+        gguf_ex_write_val< uint8_t>(fout, "some.parameter.uint8",   GGUF_TYPE_UINT8,   0x12);
+        gguf_ex_write_val<  int8_t>(fout, "some.parameter.int8",    GGUF_TYPE_INT8,   -0x13);
+        gguf_ex_write_val<uint16_t>(fout, "some.parameter.uint16",  GGUF_TYPE_UINT16,  0x1234);
+        gguf_ex_write_val< int16_t>(fout, "some.parameter.int16",   GGUF_TYPE_INT16,  -0x1235);
+        gguf_ex_write_val<uint32_t>(fout, "some.parameter.uint32",  GGUF_TYPE_UINT32,  0x12345678);
+        gguf_ex_write_val< int32_t>(fout, "some.parameter.int32",   GGUF_TYPE_INT32,  -0x12345679);
 
-        gguf_ex_write_param<float>   (fout, "some.parameter.float32", GGUF_TYPE_FLOAT32, 0.123456789f);
-        gguf_ex_write_param<bool>    (fout, "some.parameter.bool",    GGUF_TYPE_BOOL,    true);
+        gguf_ex_write_val<float>   (fout, "some.parameter.float32", GGUF_TYPE_FLOAT32, 0.123456789f);
+        gguf_ex_write_val<bool>    (fout, "some.parameter.bool",    GGUF_TYPE_BOOL,    true);
 
-        gguf_ex_write_param<std::string>(fout, "some.parameter.string",  GGUF_TYPE_STRING,  "hello world");
+        gguf_ex_write_val<std::string>(fout, "some.parameter.string",  GGUF_TYPE_STRING,  "hello world");
+
+        gguf_ex_write_arr<int16_t>    (fout, "some.parameter.arr.i16", GGUF_TYPE_INT16,   { 1, 2, 3, 4, });
+        gguf_ex_write_arr<float>      (fout, "some.parameter.arr.f32", GGUF_TYPE_FLOAT32, { 3.145f, 2.718f, 1.414f, });
+        gguf_ex_write_arr<std::string>(fout, "some.parameter.arr.str", GGUF_TYPE_STRING,  { "hello", "world", "!" });
     }
 
     uint64_t offset_tensor = 0;
@@ -203,13 +260,15 @@ bool gguf_ex_read_0(const std::string & fname) {
         fprintf(stdout, "%s: n_tensors: %d\n", __func__, n_tensors);
 
         for (int i = 0; i < n_tensors; ++i) {
-            const char * name = gguf_get_tensor_name(ctx, i);
+            const char * name   = gguf_get_tensor_name  (ctx, i);
             const size_t offset = gguf_get_tensor_offset(ctx, i);
 
             fprintf(stdout, "%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
         }
     }
 
+    gguf_free(ctx);
+
     return true;
 }
 
@@ -248,7 +307,7 @@ bool gguf_ex_read_1(const std::string & fname) {
         fprintf(stdout, "%s: n_tensors: %d\n", __func__, n_tensors);
 
         for (int i = 0; i < n_tensors; ++i) {
-            const char * name = gguf_get_tensor_name(ctx, i);
+            const char * name   = gguf_get_tensor_name  (ctx, i);
             const size_t offset = gguf_get_tensor_offset(ctx, i);
 
             fprintf(stdout, "%s: tensor[%d]: name = %s, offset = %zu\n", __func__, i, name, offset);
diff --git a/ggml.c b/ggml.c
@@ -3698,7 +3698,6 @@ static const size_t GGML_TYPE_SIZE[GGML_TYPE_COUNT] = {
 };
 static_assert(GGML_TYPE_COUNT == 19, "GGML_TYPE_SIZE is outdated");
 
-
 static const char * GGML_TYPE_NAME[GGML_TYPE_COUNT] = {
     [GGML_TYPE_F32]  = "f32",
     [GGML_TYPE_F16]  = "f16",
@@ -18302,7 +18301,19 @@ struct gguf_str {
     char * data;
 };
 
-union gguf_value;
+static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = {
+    [GGUF_TYPE_UINT8]   = sizeof(uint8_t),
+    [GGUF_TYPE_INT8]    = sizeof(int8_t),
+    [GGUF_TYPE_UINT16]  = sizeof(uint16_t),
+    [GGUF_TYPE_INT16]   = sizeof(int16_t),
+    [GGUF_TYPE_UINT32]  = sizeof(uint32_t),
+    [GGUF_TYPE_INT32]   = sizeof(int32_t),
+    [GGUF_TYPE_FLOAT32] = sizeof(float),
+    [GGUF_TYPE_BOOL]    = sizeof(bool),
+    [GGUF_TYPE_STRING]  = sizeof(struct gguf_str),
+    [GGUF_TYPE_ARRAY]   = 0, // undefined
+};
+static_assert(GGUF_TYPE_COUNT == 10, "GGUF_TYPE_COUNT != 10");
 
 union gguf_value {
     uint8_t  uint8;
@@ -18320,7 +18331,7 @@ union gguf_value {
         enum gguf_type type;
 
         uint32_t n;
-        union gguf_value * arr;
+        void * data;
     } arr;
 };
 
@@ -18457,8 +18468,35 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
                 case GGUF_TYPE_BOOL:    ok = ok && gguf_fread_el (&kv->value.bool_,   sizeof(kv->value.bool_),   file, &offset); break;
                 case GGUF_TYPE_STRING:  ok = ok && gguf_fread_str(&kv->value.str,                                file, &offset); break;
                 case GGUF_TYPE_ARRAY:
-                                        GGML_ASSERT("gguf: array type not implemented");
-                                        break;
+                    {
+                        ok = ok && gguf_fread_el(&kv->value.arr.type, sizeof(kv->value.arr.type), file, &offset);
+                        ok = ok && gguf_fread_el(&kv->value.arr.n,    sizeof(kv->value.arr.n),    file, &offset);
+
+                        switch (kv->value.arr.type) {
+                            case GGUF_TYPE_UINT8:
+                            case GGUF_TYPE_INT8:
+                            case GGUF_TYPE_UINT16:
+                            case GGUF_TYPE_INT16:
+                            case GGUF_TYPE_UINT32:
+                            case GGUF_TYPE_INT32:
+                            case GGUF_TYPE_FLOAT32:
+                            case GGUF_TYPE_BOOL:
+                                {
+                                    kv->value.arr.data = malloc(kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type]);
+                                    ok = ok && gguf_fread_el(kv->value.arr.data, kv->value.arr.n * GGUF_TYPE_SIZE[kv->value.arr.type], file, &offset);
+                                } break;
+                            case GGUF_TYPE_STRING:
+                                {
+                                    kv->value.arr.data = malloc(kv->value.arr.n * sizeof(struct gguf_str));
+                                    for (uint32_t j = 0; j < kv->value.arr.n; ++j) {
+                                        ok = ok && gguf_fread_str(&((struct gguf_str *) kv->value.arr.data)[j], file, &offset);
+                                    }
+                                } break;
+                            case GGUF_TYPE_ARRAY:
+                            case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
+                        };
+                    } break;
+                case GGUF_TYPE_COUNT: GGML_ASSERT(false && "invalid type");
             };
 
             if (!ok) {
@@ -18629,6 +18667,8 @@ struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_p
         ggml_set_no_alloc(ctx_data, params.no_alloc);
     }
 
+    fclose(file);
+
     return ctx;
 }
 
@@ -18651,6 +18691,20 @@ void gguf_free(struct gguf_context * ctx) {
                     free(kv->value.str.data);
                 }
             }
+
+            if (kv->type == GGUF_TYPE_ARRAY) {
+                if (kv->value.arr.data) {
+                    if (kv->value.arr.type == GGUF_TYPE_STRING) {
+                        for (uint32_t j = 0; j < kv->value.arr.n; ++j) {
+                            struct gguf_str * str = &((struct gguf_str *) kv->value.arr.data)[j];
+                            if (str->data) {
+                                free(str->data);
+                            }
+                        }
+                    }
+                    free(kv->value.arr.data);
+                }
+            }
         }
 
         GGML_ALIGNED_FREE(ctx->header.kv);
diff --git a/ggml.h b/ggml.h
@@ -1631,6 +1631,7 @@ extern "C" {
         GGUF_TYPE_BOOL    = 7,
         GGUF_TYPE_STRING  = 8,
         GGUF_TYPE_ARRAY   = 9,
+        GGUF_TYPE_COUNT,       // marks the end of the enum
     };
 
     struct gguf_context;
@@ -1664,7 +1665,8 @@ extern "C" {
     GGML_API float        gguf_get_val_f32 (struct gguf_context * ctx, int i);
     GGML_API bool         gguf_get_val_bool(struct gguf_context * ctx, int i);
     GGML_API const char * gguf_get_val_str (struct gguf_context * ctx, int i);
-    // TODO: arr
+    GGML_API int          gguf_get_arr_n   (struct gguf_context * ctx, int i);
+    GGML_API void         gguf_get_arr_data(struct gguf_context * ctx, int i, void * data);
 
     GGML_API int    gguf_get_n_tensors    (struct gguf_context * ctx);
     GGML_API size_t gguf_get_tensor_offset(struct gguf_context * ctx, int i);