Skip to content

Commit 0dbd834

Browse files
committed
gguf-hash: add --uuid option to c implementation for model ID
1 parent a410d23 commit 0dbd834

File tree

3 files changed

+90
-3
lines changed

3 files changed

+90
-3
lines changed

Makefile

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ BUILD_TARGETS = \
1414
llama-finetune \
1515
llama-gbnf-validator \
1616
llama-gguf \
17-
llama-gguf-hash
17+
llama-gguf-hash \
1818
llama-gguf-split \
1919
llama-gritlm \
2020
llama-imatrix \
@@ -920,8 +920,14 @@ llama-gguf: examples/gguf/gguf.cpp ggml.o $(OBJS)
920920
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
921921
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
922922

923-
llama-gguf-hash: examples/gguf-hash/gguf-hash.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
924-
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
923+
xxhash.o: examples/gguf-hash/deps/xxhash/xxhash.c examples/gguf-hash/deps/xxhash/xxhash.h
924+
$(CXX) $(CXXFLAGS) -c $< -o $@
925+
926+
sha1.o: examples/gguf-hash/deps/sha1/sha1.c examples/gguf-hash/deps/sha1/sha1.h
927+
$(CXX) $(CXXFLAGS) -c $< -o $@
928+
929+
llama-gguf-hash: examples/gguf-hash/gguf-hash.cpp ggml.o llama.o xxhash.o sha1.o $(COMMON_DEPS) $(OBJS)
930+
$(CXX) $(CXXFLAGS) -Iexamples/gguf-hash/deps -c $< -o $(call GET_OBJ_FILE, $<)
925931
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
926932

927933
llama-gguf-split: examples/gguf-split/gguf-split.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)

examples/gguf-hash/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ CLI to hash GGUF files.
66

77
- `--xxhash`: use xhash (default)
88
- `--sha1`: use sha1
9+
- `--uuid`: use uuid
910

1011
### Compile Example
1112

@@ -16,6 +17,7 @@ make -C build llama-gguf-hash VERBOSE=1
1617
./build/bin/llama-gguf-hash test.gguf
1718
./build/bin/llama-gguf-hash --xxhash test.gguf
1819
./build/bin/llama-gguf-hash --sha1 test.gguf
20+
./build/bin/llama-gguf-hash --uuid test.gguf
1921
```
2022

2123
### Crypto/Hash Libraries Used

examples/gguf-hash/gguf-hash.cpp

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,15 @@
1616
#include "sha256/sha256.h"
1717
#endif
1818

19+
// uuid.uuid5(uuid.NAMESPACE_URL, 'en.wikipedia.org/wiki/Llama.cpp')
20+
#define UUID_NAMESPACE_LLAMA_CPP "ef001206-dadc-5f6d-a15f-3359e577d4e5"
21+
#define UUID_NAMESPACE_LLAMA_CPP_HEX 0xef, 0x00, 0x12, 0x06, 0xda, 0xdc, 0x5f, 0x6d, 0xa1, 0x5f, 0x33, 0x59, 0xe5, 0x77, 0xd4, 0xe5
22+
1923
struct hash_params {
2024
std::string input;
2125
bool xxhash = false;
2226
bool sha1 = false;
27+
bool uuid = false;
2328
#ifdef SHA256
2429
bool sha256 = false;
2530
#endif
@@ -36,6 +41,7 @@ static void hash_print_usage(const char * executable) {
3641
printf(" -h, --help show this help message and exit\n");
3742
printf(" --xxhash use xxhash\n");
3843
printf(" --sha1 use sha1\n");
44+
printf(" --uuid use uuid\n");
3945
#ifdef SHA256
4046
printf(" --sha256 use sha256\n");
4147
#endif
@@ -69,6 +75,11 @@ static void hash_params_parse_ex(int argc, const char ** argv, hash_params & par
6975
params.sha1 = true;
7076
}
7177

78+
if (arg == "--uuid") {
79+
arg_found = true;
80+
params.uuid = true;
81+
}
82+
7283
#ifdef SHA256
7384
if (arg == "--sha256") {
7485
arg_found = true;
@@ -83,6 +94,7 @@ static void hash_params_parse_ex(int argc, const char ** argv, hash_params & par
8394

8495
if (!params.xxhash
8596
&& !params.sha1
97+
&& !params.uuid
8698
#ifdef SHA256
8799
&& !params.sha256
88100
#endif
@@ -254,11 +266,78 @@ static bool gguf_hash(const hash_params & hash_params) {
254266
return true;
255267
}
256268

269+
static void generate_uuidv5(const unsigned char sha1_digest[20], unsigned char uuid[16]) {
270+
// Ref: https://www.rfc-editor.org/rfc/rfc9562.html#section-5.5
271+
// Assumes that digest was processed correctly with the expected namespace
272+
for (int i = 0; i < 16; i++) {
273+
uuid[i] = sha1_digest[i];
274+
}
275+
276+
// Set bits corresponding to UUID ver 5
277+
uuid[ 6] &= ~(0xF << 4);
278+
uuid[ 6] |= (5 << 4);
279+
280+
// Set bits corresponding to UUID variant 0b10XX
281+
uuid[ 8] &= ~(0xc << 4);
282+
uuid[ 8] |= (0x8 << 4);
283+
}
284+
285+
static bool gguf_uuid(const hash_params & hash_params) {
286+
if (!hash_params.uuid) {
287+
return true;
288+
}
289+
290+
const std::string & fname = hash_params.input;
291+
struct ggml_context * ctx_data = NULL;
292+
293+
struct gguf_init_params params = {
294+
/*.no_alloc = */ false,
295+
/*.ctx = */ &ctx_data,
296+
};
297+
298+
// sha1 init
299+
SHA1_CTX sha1_model_hash_ctx;
300+
SHA1Init(&sha1_model_hash_ctx);
301+
302+
unsigned char const uuidv5_namespace[] = {UUID_NAMESPACE_LLAMA_CPP_HEX};
303+
SHA1Update( &sha1_model_hash_ctx, (unsigned char const *)uuidv5_namespace, sizeof(uuidv5_namespace));
304+
305+
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
306+
const int n_tensors = gguf_get_n_tensors(ctx);
307+
for (int i = 0; i < n_tensors; ++i) {
308+
const char * name = gguf_get_tensor_name(ctx, i);
309+
struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
310+
auto n_bytes = ggml_nbytes(cur);
311+
auto *raw_data = cur->data;
312+
SHA1Update( &sha1_model_hash_ctx, (unsigned char const *)raw_data, n_bytes);
313+
}
314+
315+
unsigned char result[21];
316+
SHA1Final(result, &sha1_model_hash_ctx);
317+
318+
unsigned char uuid[16];
319+
generate_uuidv5(result, uuid);
320+
321+
char string_buffer[37] = {0};
322+
sprintf(string_buffer, "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
323+
uuid[0], uuid[1], uuid[2], uuid[3],
324+
uuid[4], uuid[5], uuid[6], uuid[7],
325+
uuid[8], uuid[9], uuid[10], uuid[11],
326+
uuid[12], uuid[13], uuid[14], uuid[15]);
327+
printf("UUIDv5 %s %s\n", string_buffer, fname.c_str());
328+
329+
ggml_free(ctx_data);
330+
gguf_free(ctx);
331+
332+
return true;
333+
}
334+
257335
int main(int argc, const char ** argv) {
258336
hash_params params;
259337
hash_params_parse(argc, argv, params);
260338

261339
gguf_hash(params);
340+
gguf_uuid(params);
262341

263342
return 0;
264343
}

0 commit comments

Comments
 (0)