1
1
#include " llama.h"
2
- #include " ggml.h"
3
2
#include " common.h"
4
3
5
4
#include < algorithm>
6
5
#include < cmath>
7
- #include < cstdint>
8
6
#include < cstdlib>
9
7
#include < fstream>
10
- #include < ios>
11
8
#include < string>
12
9
#include < vector>
13
10
14
11
#include < stdio.h>
15
- #include < fcntl.h>
16
12
#include < string.h>
13
+ #include < climits>
14
+ #include < stdexcept>
15
+
16
+ #if defined(_WIN32)
17
+ #include < windows.h>
18
+ #ifndef PATH_MAX
19
+ #define PATH_MAX MAX_PATH
20
+ #endif
21
+ #include < io.h>
22
+ #endif
17
23
18
24
enum split_operation : uint8_t {
19
25
SPLIT_OP_SPLIT,
20
26
SPLIT_OP_MERGE,
21
27
};
22
28
23
- static const char * const LLM_KV_GENERAL_SPLIT_I_SPLIT = " general.split" ;
24
- static const char * const LLM_KV_GENERAL_SPLIT_N_SPLIT = " general.split_count" ;
25
-
26
- static const int SPLIT_FILENAME_MAX = 256 ;
27
-
28
- static const char * const SPLIT_FILENAME_FORMAT = " %s-%05d-of-%05d.gguf" ;
29
+ static const char * const LLM_KV_GENERAL_SPLIT_I_SPLIT = " split.no" ;
30
+ static const char * const LLM_KV_GENERAL_SPLIT_N_SPLIT = " split.count" ;
31
+ static const char * const LLM_KV_GENERAL_SPLIT_N_TENSORS = " split.tensors.count" ;
29
32
30
33
struct split_params {
31
34
split_operation operation = SPLIT_OP_SPLIT;
@@ -116,13 +119,13 @@ static bool split_params_parse(int argc, const char ** argv, split_params & para
116
119
try {
117
120
if (!split_params_parse_ex (argc, argv, params)) {
118
121
split_print_usage (argv[0 ]);
119
- exit (1 );
122
+ exit (EXIT_FAILURE );
120
123
}
121
124
}
122
125
catch (const std::invalid_argument & ex) {
123
126
fprintf (stderr, " %s\n " , ex.what ());
124
127
split_print_usage (argv[0 ]);
125
- exit (1 );
128
+ exit (EXIT_FAILURE );
126
129
}
127
130
return result;
128
131
}
@@ -134,12 +137,6 @@ static void zeros(std::ofstream & file, size_t n) {
134
137
}
135
138
}
136
139
137
- static std::string split_file_name (const std::string & path, int i_split, int n_split) {
138
- char f_split[SPLIT_FILENAME_MAX] = {0 };
139
- snprintf (f_split, sizeof (f_split), SPLIT_FILENAME_FORMAT, path.c_str (), i_split + 1 , n_split);
140
- return std::string (f_split);
141
- }
142
-
143
140
struct split_strategy {
144
141
const split_params params;
145
142
std::ifstream & f_input;
@@ -180,19 +177,21 @@ struct split_strategy {
180
177
if (i_split == 0 ) {
181
178
gguf_set_kv (ctx_out, ctx_gguf);
182
179
}
183
- gguf_set_val_u8 (ctx_out, LLM_KV_GENERAL_SPLIT_I_SPLIT, i_split);
184
- gguf_set_val_u8 (ctx_out, LLM_KV_GENERAL_SPLIT_N_SPLIT, n_split);
180
+ gguf_set_val_u16 (ctx_out, LLM_KV_GENERAL_SPLIT_I_SPLIT, i_split);
181
+ gguf_set_val_u16 (ctx_out, LLM_KV_GENERAL_SPLIT_N_SPLIT, n_split);
182
+ gguf_set_val_i32 (ctx_out, LLM_KV_GENERAL_SPLIT_N_TENSORS,n_tensors);
185
183
186
184
// populate the original tensors, so we get an initial metadata
187
185
for (int i = i_split * params.n_split_tensors ; i < n_tensors && i < (i_split + 1 ) * params.n_split_tensors ; ++i) {
188
186
struct ggml_tensor * meta = ggml_get_tensor (ctx_meta, gguf_get_tensor_name (ctx_gguf, i));
189
187
gguf_add_tensor (ctx_out, meta);
190
188
}
191
189
192
- auto split_name = split_file_name (params.output , i_split, n_split);
190
+ char split_path[PATH_MAX] = {0 };
191
+ llama_split_path (split_path, sizeof (split_path), params.output .c_str (), i_split, n_split);
193
192
194
- fprintf (stderr, " %s: %s ..." , __func__, split_name. c_str () );
195
- fout = std::ofstream (split_name , std::ios::binary);
193
+ fprintf (stderr, " %s: %s ..." , __func__, split_path );
194
+ fout = std::ofstream (split_path , std::ios::binary);
196
195
fout.exceptions (std::ofstream::failbit); // fail fast on write errors
197
196
198
197
auto meta_size = gguf_get_meta_size (ctx_out);
@@ -250,19 +249,23 @@ static void gguf_split(const split_params & split_params) {
250
249
std::ifstream f_input (split_params.input .c_str (), std::ios::binary);
251
250
if (!f_input.is_open ()) {
252
251
fprintf (stderr, " %s: failed to open input GGUF from %s\n " , __func__, split_params.input .c_str ());
253
- exit (1 );
252
+ exit (EXIT_FAILURE );
254
253
}
255
254
256
255
auto * ctx_gguf = gguf_init_from_file (split_params.input .c_str (), params);
257
256
if (!ctx_gguf) {
258
257
fprintf (stderr, " %s: failed to load input GGUF from %s\n " , __func__, split_params.input .c_str ());
259
- exit (1 );
258
+ exit (EXIT_FAILURE );
260
259
}
261
260
262
261
split_strategy strategy (split_params, f_input, ctx_gguf, ctx_meta);
262
+
263
+ char first_split_path[PATH_MAX] = {0 };
264
+ llama_split_path (first_split_path, sizeof (first_split_path),
265
+ split_params.output .c_str (), strategy.i_split , strategy.n_split );
263
266
fprintf (stderr, " %s: %s -> %s (%d tensors per file)\n " ,
264
267
__func__, split_params.input .c_str (),
265
- split_file_name (split_params. output , strategy. i_split , strategy. n_split ). c_str () ,
268
+ first_split_path ,
266
269
split_params.n_split_tensors );
267
270
268
271
strategy.split_start ();
@@ -298,7 +301,9 @@ static void gguf_merge(const split_params & split_params) {
298
301
std::vector<ggml_context *> ctx_metas;
299
302
std::vector<gguf_context *> ctx_ggufs;
300
303
301
- std::string split_prefix;
304
+ char split_path[PATH_MAX] = {0 };
305
+ strncpy (split_path, split_params.input .c_str (), sizeof (split_path) - 1 );
306
+ char split_prefix[PATH_MAX] = {0 };
302
307
303
308
// First pass to find KV and tensors metadata
304
309
for (int i_split = 0 ; i_split < n_split; i_split++) {
@@ -309,16 +314,15 @@ static void gguf_merge(const split_params & split_params) {
309
314
/* .ctx = */ &ctx_meta,
310
315
};
311
316
312
- auto split_name = split_params.input ;
313
317
if (i_split > 0 ) {
314
- split_name = split_file_name ( split_prefix, i_split, n_split);
318
+ llama_split_path (split_path, sizeof (split_path), split_prefix, i_split, n_split);
315
319
}
316
- fprintf (stderr, " %s: reading metadata %s ..." , __func__, split_name. c_str () );
320
+ fprintf (stderr, " %s: reading metadata %s ..." , __func__, split_path );
317
321
318
- auto * ctx_gguf = gguf_init_from_file (split_name. c_str () , params);
322
+ auto * ctx_gguf = gguf_init_from_file (split_path , params);
319
323
if (!ctx_gguf) {
320
324
fprintf (stderr, " \n %s: failed to load input GGUF from %s\n " , __func__, split_params.input .c_str ());
321
- exit (1 );
325
+ exit (EXIT_FAILURE );
322
326
}
323
327
ctx_ggufs.push_back (ctx_gguf);
324
328
ctx_metas.push_back (ctx_meta);
@@ -331,65 +335,43 @@ static void gguf_merge(const split_params & split_params) {
331
335
__func__,
332
336
LLM_KV_GENERAL_SPLIT_N_SPLIT);
333
337
gguf_free (ctx_gguf);
338
+ ggml_free (ctx_meta);
334
339
gguf_free (ctx_out);
335
340
fout.close ();
336
- exit (1 );
341
+ exit (EXIT_FAILURE );
337
342
}
338
343
339
- n_split = gguf_get_val_u8 (ctx_gguf, key_n_split);
344
+ n_split = gguf_get_val_u16 (ctx_gguf, key_n_split);
340
345
if (n_split < 1 ) {
341
346
fprintf (stderr,
342
347
" \n %s: input file does not contain a valid split count %d\n " ,
343
348
__func__,
344
349
n_split);
345
350
gguf_free (ctx_gguf);
351
+ ggml_free (ctx_meta);
346
352
gguf_free (ctx_out);
347
353
fout.close ();
348
- exit (1 );
354
+ exit (EXIT_FAILURE );
349
355
}
350
356
351
- // Do not trigger merge if we try to merge again the output
352
- gguf_set_val_u8 (ctx_out, LLM_KV_GENERAL_SPLIT_N_SPLIT, 0 );
353
-
354
- // Set metadata from the first split
355
- gguf_set_kv (ctx_out, ctx_gguf);
356
- }
357
-
358
- // Verify the file naming
359
- {
360
- int i_split_file = 0 ;
361
- int n_split_file = 0 ;
362
- const char * i_split_format = " -00000-of-00000.gguf" ;
363
-
364
- if (split_name.size () < strlen (i_split_format)) {
365
- fprintf (stderr, " \n %s: unexpected input file name: %s\n " , __func__, split_params.input .c_str ());
366
- for (auto * _ctx_gguf : ctx_ggufs) {
367
- gguf_free (_ctx_gguf);
368
- }
357
+ // Verify the file naming and extract split_prefix
358
+ if (!llama_split_prefix (split_prefix, split_path, strlen (split_path), i_split, n_split)) {
359
+ fprintf (stderr, " \n %s: unexpected input file name: %s"
360
+ " i_split=%d"
361
+ " n_split=%d\n " , __func__,
362
+ split_path, i_split, n_split);
363
+ gguf_free (ctx_gguf);
364
+ ggml_free (ctx_meta);
369
365
gguf_free (ctx_out);
370
366
fout.close ();
371
- exit (1 );
367
+ exit (EXIT_FAILURE );
372
368
}
373
369
374
- split_prefix = split_name.substr (0 , split_name.size () - strlen (i_split_format));
375
-
376
- const char * split_name_c_str = split_name.c_str ();
377
- int n_part = sscanf (&split_name_c_str[0 ] + split_prefix.size (), " -%d-of-%d" , &i_split_file, &n_split_file);
370
+ // Do not trigger merge if we try to merge again the output
371
+ gguf_set_val_u16 (ctx_gguf, LLM_KV_GENERAL_SPLIT_N_SPLIT, 0 );
378
372
379
- if (n_part != 2 || i_split_file - 1 != i_split || n_split_file != n_split) {
380
- fprintf (stderr, " \n %s: unexpected input file name: %s"
381
- " i_split=%d i_split_file=%d"
382
- " n_split=%d n_split_file=%d\n " , __func__,
383
- split_params.input .c_str (),
384
- i_split, i_split_file,
385
- n_split, n_split_file);
386
- for (auto * _ctx_gguf : ctx_ggufs) {
387
- gguf_free (_ctx_gguf);
388
- }
389
- gguf_free (ctx_out);
390
- fout.close ();
391
- exit (1 );
392
- }
373
+ // Set metadata from the first split
374
+ gguf_set_kv (ctx_out, ctx_gguf);
393
375
}
394
376
395
377
auto n_tensors = gguf_get_n_tensors (ctx_gguf);
@@ -411,18 +393,19 @@ static void gguf_merge(const split_params & split_params) {
411
393
412
394
// Write tensors data
413
395
for (int i_split = 0 ; i_split < n_split; i_split++) {
414
- auto split_name = split_file_name ( split_prefix, i_split, n_split);
415
- std::ifstream f_input (split_name. c_str () , std::ios::binary);
396
+ llama_split_path (split_path, sizeof (split_path), split_prefix, i_split, n_split);
397
+ std::ifstream f_input (split_path , std::ios::binary);
416
398
if (!f_input.is_open ()) {
417
- fprintf (stderr, " %s: failed to open input GGUF from %s\n " , __func__, split_name.c_str ());
418
- for (auto * _ctx_gguf : ctx_ggufs) {
419
- gguf_free (_ctx_gguf);
399
+ fprintf (stderr, " %s: failed to open input GGUF from %s\n " , __func__, split_path);
400
+ for (uint32_t i = 0 ; i < ctx_ggufs.size (); i++) {
401
+ gguf_free (ctx_ggufs[i]);
402
+ ggml_free (ctx_metas[i]);
420
403
}
421
404
gguf_free (ctx_out);
422
405
fout.close ();
423
- exit (1 );
406
+ exit (EXIT_FAILURE );
424
407
}
425
- fprintf (stderr, " %s: writing tensors %s ..." , __func__, split_name. c_str () );
408
+ fprintf (stderr, " %s: writing tensors %s ..." , __func__, split_path );
426
409
427
410
auto * ctx_gguf = ctx_ggufs[i_split];
428
411
auto * ctx_meta = ctx_metas[i_split];
@@ -481,8 +464,8 @@ int main(int argc, const char ** argv) {
481
464
break ;
482
465
case SPLIT_OP_MERGE: gguf_merge (params);
483
466
break ;
484
- default :split_print_usage (argv[0 ]);
485
- exit (1 );
467
+ default : split_print_usage (argv[0 ]);
468
+ exit (EXIT_FAILURE );
486
469
}
487
470
488
471
return 0 ;
0 commit comments