16
16
#include " sha256/sha256.h"
17
17
#endif
18
18
19
+ // uuid.uuid5(uuid.NAMESPACE_URL, 'en.wikipedia.org/wiki/Llama.cpp')
20
+ #define UUID_NAMESPACE_LLAMA_CPP " ef001206-dadc-5f6d-a15f-3359e577d4e5"
21
+ #define UUID_NAMESPACE_LLAMA_CPP_HEX 0xef , 0x00 , 0x12 , 0x06 , 0xda , 0xdc , 0x5f , 0x6d , 0xa1 , 0x5f , 0x33 , 0x59 , 0xe5 , 0x77 , 0xd4 , 0xe5
22
+
19
23
struct hash_params {
20
24
std::string input;
21
25
bool xxhash = false ;
22
26
bool sha1 = false ;
27
+ bool uuid = false ;
23
28
#ifdef SHA256
24
29
bool sha256 = false ;
25
30
#endif
@@ -36,6 +41,7 @@ static void hash_print_usage(const char * executable) {
36
41
printf (" -h, --help show this help message and exit\n " );
37
42
printf (" --xxhash use xxhash\n " );
38
43
printf (" --sha1 use sha1\n " );
44
+ printf (" --uuid use uuid\n " );
39
45
#ifdef SHA256
40
46
printf (" --sha256 use sha256\n " );
41
47
#endif
@@ -69,6 +75,11 @@ static void hash_params_parse_ex(int argc, const char ** argv, hash_params & par
69
75
params.sha1 = true ;
70
76
}
71
77
78
+ if (arg == " --uuid" ) {
79
+ arg_found = true ;
80
+ params.uuid = true ;
81
+ }
82
+
72
83
#ifdef SHA256
73
84
if (arg == " --sha256" ) {
74
85
arg_found = true ;
@@ -83,6 +94,7 @@ static void hash_params_parse_ex(int argc, const char ** argv, hash_params & par
83
94
84
95
if (!params.xxhash
85
96
&& !params.sha1
97
+ && !params.uuid
86
98
#ifdef SHA256
87
99
&& !params.sha256
88
100
#endif
@@ -254,11 +266,78 @@ static bool gguf_hash(const hash_params & hash_params) {
254
266
return true ;
255
267
}
256
268
269
+ static void generate_uuidv5 (const unsigned char sha1_digest[20 ], unsigned char uuid[16 ]) {
270
+ // Ref: https://www.rfc-editor.org/rfc/rfc9562.html#section-5.5
271
+ // Assumes that digest was processed correctly with the expected namespace
272
+ for (int i = 0 ; i < 16 ; i++) {
273
+ uuid[i] = sha1_digest[i];
274
+ }
275
+
276
+ // Set bits corresponding to UUID ver 5
277
+ uuid[ 6 ] &= ~(0xF << 4 );
278
+ uuid[ 6 ] |= (5 << 4 );
279
+
280
+ // Set bits corresponding to UUID variant 0b10XX
281
+ uuid[ 8 ] &= ~(0xc << 4 );
282
+ uuid[ 8 ] |= (0x8 << 4 );
283
+ }
284
+
285
+ static bool gguf_uuid (const hash_params & hash_params) {
286
+ if (!hash_params.uuid ) {
287
+ return true ;
288
+ }
289
+
290
+ const std::string & fname = hash_params.input ;
291
+ struct ggml_context * ctx_data = NULL ;
292
+
293
+ struct gguf_init_params params = {
294
+ /* .no_alloc = */ false ,
295
+ /* .ctx = */ &ctx_data,
296
+ };
297
+
298
+ // sha1 init
299
+ SHA1_CTX sha1_model_hash_ctx;
300
+ SHA1Init (&sha1_model_hash_ctx);
301
+
302
+ unsigned char const uuidv5_namespace[] = {UUID_NAMESPACE_LLAMA_CPP_HEX};
303
+ SHA1Update ( &sha1_model_hash_ctx, (unsigned char const *)uuidv5_namespace, sizeof (uuidv5_namespace));
304
+
305
+ struct gguf_context * ctx = gguf_init_from_file (fname.c_str (), params);
306
+ const int n_tensors = gguf_get_n_tensors (ctx);
307
+ for (int i = 0 ; i < n_tensors; ++i) {
308
+ const char * name = gguf_get_tensor_name (ctx, i);
309
+ struct ggml_tensor * cur = ggml_get_tensor (ctx_data, name);
310
+ auto n_bytes = ggml_nbytes (cur);
311
+ auto *raw_data = cur->data ;
312
+ SHA1Update ( &sha1_model_hash_ctx, (unsigned char const *)raw_data, n_bytes);
313
+ }
314
+
315
+ unsigned char result[21 ];
316
+ SHA1Final (result, &sha1_model_hash_ctx);
317
+
318
+ unsigned char uuid[16 ];
319
+ generate_uuidv5 (result, uuid);
320
+
321
+ char string_buffer[37 ] = {0 };
322
+ sprintf (string_buffer, " %02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x" ,
323
+ uuid[0 ], uuid[1 ], uuid[2 ], uuid[3 ],
324
+ uuid[4 ], uuid[5 ], uuid[6 ], uuid[7 ],
325
+ uuid[8 ], uuid[9 ], uuid[10 ], uuid[11 ],
326
+ uuid[12 ], uuid[13 ], uuid[14 ], uuid[15 ]);
327
+ printf (" UUIDv5 %s %s\n " , string_buffer, fname.c_str ());
328
+
329
+ ggml_free (ctx_data);
330
+ gguf_free (ctx);
331
+
332
+ return true ;
333
+ }
334
+
257
335
int main (int argc, const char ** argv) {
258
336
hash_params params;
259
337
hash_params_parse (argc, argv, params);
260
338
261
339
gguf_hash (params);
340
+ gguf_uuid (params);
262
341
263
342
return 0 ;
264
343
}
0 commit comments