ggml-org
diff --git a/‎Makefile
Lines changed: 5 additions & 0 deletions b/‎Makefile
Lines changed: 5 additions & 0 deletions
diff --git a/‎examples/CMakeLists.txt
Lines changed: 1 addition & 0 deletions b/‎examples/CMakeLists.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎examples/gguf-hash/CMakeLists.txt
Lines changed: 7 additions & 0 deletions b/‎examples/gguf-hash/CMakeLists.txt
Lines changed: 7 additions & 0 deletions
diff --git a/‎examples/gguf-hash/README.md
Lines changed: 21 additions & 0 deletions b/‎examples/gguf-hash/README.md
Lines changed: 21 additions & 0 deletions
diff --git a/‎examples/gguf-hash/gguf-hash.cpp
Lines changed: 188 additions & 0 deletions b/‎examples/gguf-hash/gguf-hash.cpp
Lines changed: 188 additions & 0 deletions
@@ -14,6 +14,7 @@ BUILD_TARGETS = \
 	llama-finetune \
 	llama-gbnf-validator \
 	llama-gguf \
+	llama-gguf-hash
 	llama-gguf-split \
 	llama-gritlm \
 	llama-imatrix \
@@ -919,6 +920,10 @@ llama-gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
 
+llama-gguf-hash: examples/gguf-hash/gguf-hash.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
+	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
+	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
+
 llama-gguf-split: examples/gguf-split/gguf-split.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
 
@@ -23,6 +23,7 @@ else()
     add_subdirectory(export-lora)
     add_subdirectory(finetune)
     add_subdirectory(gbnf-validator)
+    add_subdirectory(gguf-hash)
     add_subdirectory(gguf-split)
     add_subdirectory(gguf)
     add_subdirectory(gritlm)
 
@@ -0,0 +1,7 @@
+set(TARGET llama-gguf-hash)
+add_library(sha1 OBJECT sha1.c sha1.h)
+add_library(xxhash OBJECT xxhash.c xxhash.h)
+add_executable(${TARGET} gguf-hash.cpp)
+install(TARGETS ${TARGET} RUNTIME)
+target_link_libraries(${TARGET} PRIVATE ggml sha1 xxhash ${CMAKE_THREAD_LIBS_INIT})
+target_compile_features(${TARGET} PRIVATE cxx_std_11)
@@ -0,0 +1,21 @@
+## GGUF hash Example
+
+CLI to hash GGUF files.
+
+**Command line options:**
+
+- `--xxhash`: use xhash (default)
+- `--sha1`: use sha1
+
+### Compile Example
+
+```
+cmake -B build
+make -C build llama-gguf-hash
+./build/bin/llama-gguf-hash test.gguf
+```
+
+### Crypto/Hash Libaries Used
+
+- https://github.com/clibs/sha1/
+- https://github.com/Cyan4973/xxHash
@@ -0,0 +1,188 @@
+#include "ggml.h"
+
+#include "stdlib.h"   /* abort() */
+#include <cstddef>
+#include "sha1.h"
+#include "xxhash.h"
+
+#include <cstdio>
+#include <string>
+#include <stdexcept>
+#include <algorithm>
+
+#include <string.h>
+
+
+struct hash_params {
+    std::string input;
+    bool xxhash = false;
+    bool sha1 = false;
+};
+
+static void hash_print_usage(const char * executable) {
+    const hash_params default_params;
+    printf("\n");
+    printf("usage: %s [options] GGUF_IN\n", executable);
+    printf("\n");
+    printf("Hash a GGUF file");
+    printf("\n");
+    printf("options:\n");
+    printf("  -h, --help              show this help message and exit\n");
+    printf("      --xxhash            use xxhash\n");
+    printf("      --sha1              use sha1\n");
+    printf("\n");
+}
+
+static void hash_params_parse_ex(int argc, const char ** argv, hash_params & params) {
+    std::string arg;
+    const std::string arg_prefix = "--";
+
+    int arg_idx = 1;
+    for (; arg_idx < argc && strncmp(argv[arg_idx], "--", 2) == 0; arg_idx++) {
+        arg = argv[arg_idx];
+        if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
+            std::replace(arg.begin(), arg.end(), '_', '-');
+        }
+
+        bool arg_found = false;
+        if (arg == "-h" || arg == "--help") {
+            hash_print_usage(argv[0]);
+            exit(0);
+        }
+
+        if (arg == "--xxhash") {
+            arg_found = true;
+            params.xxhash = true;
+        }
+
+        if (arg == "--sha1") {
+            arg_found = true;
+            params.sha1 = true;
+        }
+
+        if (!arg_found) {
+            throw std::invalid_argument("error: unknown argument: " + arg);
+        }
+    }
+
+    if (!params.xxhash && !params.sha1) {
+        // By default if no swich argument provided, assume xxhash
+        params.xxhash = true;
+    }
+
+    if (argc - arg_idx < 1) {
+        throw std::invalid_argument("error: bad arguments");
+    }
+
+    params.input = argv[arg_idx++];
+}
+
+static bool hash_params_parse(int argc, const char ** argv, hash_params & params) {
+    bool result = true;
+    try {
+        hash_params_parse_ex(argc, argv, params);
+    }
+    catch (const std::invalid_argument & ex) {
+        fprintf(stderr, "%s\n", ex.what());
+        hash_print_usage(argv[0]);
+        exit(EXIT_FAILURE);
+    }
+    return result;
+}
+
+static bool gguf_hash(const hash_params & hash_params) {
+    const std::string & fname = hash_params.input;
+    struct ggml_context * ctx_data = NULL;
+
+    struct gguf_init_params params = {
+        /*.no_alloc = */ false,
+        /*.ctx      = */ &ctx_data,
+    };
+
+    // xxhash init
+    XXH64_state_t* xxhash_model_hash_state = NULL;
+    if (hash_params.xxhash) {
+        xxhash_model_hash_state = XXH64_createState();
+        if (xxhash_model_hash_state==NULL) {
+            abort();
+        }
+
+        XXH64_hash_t const seed = 0;
+        if (XXH64_reset(xxhash_model_hash_state, seed) == XXH_ERROR) {
+            abort();
+        }
+    }
+
+    // sha1 init
+    SHA1_CTX sha1_model_hash_ctx;
+    if (hash_params.sha1) {
+        SHA1Init(&sha1_model_hash_ctx);
+    }
+
+    struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
+    const int n_tensors = gguf_get_n_tensors(ctx);
+    for (int i = 0; i < n_tensors; ++i) {
+        const char * name = gguf_get_tensor_name(ctx, i);
+        struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
+        auto n_bytes = ggml_nbytes(cur);
+        auto *raw_data = cur->data;
+
+        if (hash_params.xxhash) {
+
+            // Per Layer Hash
+            XXH64_hash_t hash = XXH64(raw_data, n_bytes, 0);
+            printf("xxhash  %016lx  %s:%s\n", hash, fname.c_str(), name);
+
+            // Overall Model Hash
+            if (XXH64_update(xxhash_model_hash_state, raw_data, n_bytes) == XXH_ERROR) abort();
+        }
+
+        if (hash_params.sha1) {
+
+            // Per Layer Hash
+            char result[21];
+            SHA1( result, (const char *)raw_data, n_bytes);
+
+            char hex_result[41];
+            for (int  offset = 0; offset < 20; offset++) {
+                sprintf( ( hex_result + (2*offset)), "%02x", result[offset]&0xff);
+            }
+
+            printf("sha1    %s  %s:%s\n", hex_result, fname.c_str(), name);
+
+            // Overall Model Hash
+            SHA1Update( &sha1_model_hash_ctx, (unsigned char const *)raw_data, n_bytes);
+        }
+    }
+
+    if (hash_params.xxhash) {
+        XXH64_hash_t const hash = XXH64_digest(xxhash_model_hash_state);
+        printf("xxhash  %016lx  %s\n", hash, fname.c_str());
+    }
+
+    if (hash_params.sha1) {
+        unsigned char result[21];
+        SHA1Final(result, &sha1_model_hash_ctx);
+
+        char hex_result[41];
+        for (int  offset = 0; offset < 20; offset++) {
+            sprintf( ( hex_result + (2*offset)), "%02x", result[offset]&0xff);
+        }
+
+        printf("sha1    %s  %s\n", hex_result, fname.c_str());
+    }
+
+    ggml_free(ctx_data);
+    gguf_free(ctx);
+
+    return true;
+}
+
+int main(int argc, const char ** argv) {
+    hash_params params;
+    hash_params_parse(argc, argv, params);
+
+    gguf_hash(params);
+
+    return 0;
+}