@@ -364,44 +364,18 @@ static size_t llama_calc_tensor_size(const std::vector<uint32_t> & ne, enum ggml
364
364
return size / ggml_blck_size (type);
365
365
}
366
366
367
- struct llama_load_tensor_shard {
368
- std::vector<uint32_t > ne;
369
- size_t size;
370
- enum ggml_type type;
371
- size_t file_off;
372
-
373
- void calc_size () {
374
- size = llama_calc_tensor_size (ne, type);
375
- }
376
- };
377
-
378
367
struct llama_load_tensor {
379
- llama_load_tensor_shard first_shard;
380
-
381
368
std::string name;
382
369
enum ggml_type type = GGML_TYPE_F32;
383
370
std::vector<uint32_t > ne;
371
+ size_t file_off;
384
372
size_t size;
385
373
struct ggml_tensor * ggml_tensor = NULL ;
386
374
uint8_t * data;
387
375
388
376
llama_load_tensor (const std::string & name) : name(name) {}
389
377
390
378
void calc_all () {
391
- calc_type ();
392
- calc_ne ();
393
- calc_size ();
394
- }
395
-
396
- void calc_type () {
397
- type = first_shard.type ;
398
- }
399
-
400
- void calc_ne () {
401
- ne = first_shard.ne ;
402
- }
403
-
404
- void calc_size () {
405
379
size = llama_calc_tensor_size (ne, type);
406
380
}
407
381
};
@@ -491,17 +465,17 @@ struct llama_file_loader {
491
465
}
492
466
void read_tensor_metadata (llama_load_tensors_map & tensors_map) {
493
467
while (file.tell () < file.size ) {
494
- llama_load_tensor_shard shard;
495
468
uint32_t n_dims = file.read_u32 ();
496
469
uint32_t name_len = file.read_u32 ();
497
- shard.type = (enum ggml_type) file.read_u32 ();
498
- shard.ne .resize (n_dims);
499
- file.read_raw (shard.ne .data (), sizeof (shard.ne [0 ]) * n_dims);
470
+ ggml_type type = (enum ggml_type) file.read_u32 ();
471
+ std::vector<uint32_t > ne;
472
+ ne.resize (n_dims);
473
+ file.read_raw (ne.data (), sizeof (ne[0 ]) * n_dims);
500
474
std::string name = file.read_string (name_len);
501
475
if (n_dims < 1 || n_dims > 2 ) {
502
476
throw std::runtime_error (format (" llama.cpp: tensor '%s' should not be %u-dimensional" , name.c_str (), n_dims));
503
477
}
504
- switch (shard. type ) {
478
+ switch (type) {
505
479
case GGML_TYPE_F32:
506
480
case GGML_TYPE_F16:
507
481
case GGML_TYPE_Q4_0:
@@ -516,7 +490,7 @@ struct llama_file_loader {
516
490
case GGML_TYPE_Q6_K:
517
491
break ;
518
492
default : {
519
- throw std::runtime_error (format (" unrecognized tensor type %u\n " , shard. type ));
493
+ throw std::runtime_error (format (" unrecognized tensor type %u\n " , type));
520
494
}
521
495
}
522
496
@@ -525,11 +499,6 @@ struct llama_file_loader {
525
499
file.seek (-static_cast <ptrdiff_t >(file.tell ()) & 31 , SEEK_CUR);
526
500
}
527
501
528
- shard.file_off = file.tell ();
529
-
530
- shard.calc_size ();
531
- file.seek (shard.size , SEEK_CUR);
532
-
533
502
auto it = tensors_map.name_to_idx .find (name);
534
503
size_t idx;
535
504
if (it != tensors_map.name_to_idx .end ()) {
@@ -539,7 +508,14 @@ struct llama_file_loader {
539
508
idx = tensors_map.tensors .size () - 1 ;
540
509
tensors_map.name_to_idx .emplace (name, idx);
541
510
}
542
- tensors_map.tensors .at (idx).first_shard = shard;
511
+ auto tensor = tensors_map.tensors .at (idx);
512
+
513
+ tensor.ne = ne;
514
+ tensor.type = type;
515
+ tensor.file_off = file.tell ();
516
+
517
+ tensor.calc_all ();
518
+ file.seek (tensor.size , SEEK_CUR);
543
519
}
544
520
}
545
521
};
@@ -633,7 +609,7 @@ struct llama_model_loader {
633
609
634
610
bool alignment_prevents_mmap () {
635
611
for (const llama_load_tensor & lt : tensors_map.tensors ) {
636
- if (lt.first_shard . file_off & 3 ) {
612
+ if (lt.file_off & 3 ) {
637
613
return true ;
638
614
}
639
615
}
@@ -646,7 +622,7 @@ struct llama_model_loader {
646
622
throw std::runtime_error (std::string (" missing tok_embeddings.weight" ));
647
623
}
648
624
const llama_load_tensor & lt = tensors_map.tensors .at (it->second );
649
- return file_loader->hparams .n_embd / lt.first_shard . ne .at (0 );
625
+ return file_loader->hparams .n_embd / lt.ne .at (0 );
650
626
}
651
627
652
628
void calc_sizes (size_t * ctx_size_p, size_t * mmapped_size_p) const {
@@ -768,10 +744,10 @@ struct llama_model_loader {
768
744
769
745
void load_data_for (llama_load_tensor & lt) {
770
746
if (use_mmap) {
771
- lt.data = (uint8_t *) mapping->addr + lt.first_shard . file_off ;
747
+ lt.data = (uint8_t *) mapping->addr + lt.file_off ;
772
748
} else {
773
749
llama_file & file = file_loader->file ;
774
- file.seek (lt.first_shard . file_off , SEEK_SET);
750
+ file.seek (lt.file_off , SEEK_SET);
775
751
file.read_raw (lt.data , lt.size );
776
752
}
777
753
0 commit comments