@@ -700,11 +700,11 @@ struct gguf_file_saver {
700
700
size_t info_offset;
701
701
size_t tensor_offset = 0 ;
702
702
703
- gguf_file_saver (const char * fname, gguf_file_loader * fl, enum llama_ftype new_ftype )
703
+ gguf_file_saver (const char * fname, gguf_file_loader * fl)
704
704
: file(fname, " wb" ), fl(fl) {
705
705
fprintf (stderr, " llama.cpp: saving model to %s\n " , fname);
706
706
write_header ();
707
- write_hparams (new_ftype );
707
+ write_kv ( );
708
708
}
709
709
710
710
void write_header () {
@@ -743,75 +743,38 @@ struct gguf_file_saver {
743
743
file.write_arr <float >(key, type, data);
744
744
}
745
745
746
- void write_hparams (enum llama_ftype new_ftype) {
746
+ // re-write the key-value section from the loaded file
747
+ void write_kv () {
747
748
const int32_t n_kv = gguf_get_n_kv (fl->gguf_ctx );
748
749
for (int i = 0 ; i < n_kv; ++i) {
749
750
const char * key = gguf_get_key (fl->gguf_ctx , i);
750
751
if (strcmp (key, " general.quantization_version" ) == 0 ) {
751
- file.write_val <uint32_t >(" general.quantization_version" , GGUF_TYPE_UINT32, new_ftype );
752
+ file.write_val <uint32_t >(" general.quantization_version" , GGUF_TYPE_UINT32, GGML_QNT_VERSION );
752
753
} else {
753
754
const gguf_type vtype = gguf_get_kv_type (fl->gguf_ctx , i);
754
755
755
- bool bool_val;
756
- float f32_val;
757
- int16_t i16_val;
758
- int32_t i32_val;
759
- int8_t i8_val;
760
- std::string str_val;
761
- uint16_t u16_val;
762
- uint32_t u32_val;
763
- uint8_t u8_val;
764
- gguf_type arr_type;
765
- int n_arr;
766
-
767
756
switch (vtype) {
768
- case GGUF_TYPE_BOOL:
769
- bool_val = gguf_get_val_bool (fl->gguf_ctx , i);
770
- file.write_val <bool >(key, GGUF_TYPE_BOOL, bool_val);
771
- break ;
772
- case GGUF_TYPE_FLOAT32:
773
- f32_val = gguf_get_val_f32 (fl->gguf_ctx , i);
774
- file.write_val <float >(key, GGUF_TYPE_FLOAT32, f32_val);
775
- break ;
776
- case GGUF_TYPE_INT16:
777
- i16_val = gguf_get_val_i16 (fl->gguf_ctx , i);
778
- file.write_val <int16_t >(key, GGUF_TYPE_INT16, i16_val);
779
- break ;
780
- case GGUF_TYPE_INT32:
781
- i32_val = gguf_get_val_i32 (fl->gguf_ctx , i);
782
- file.write_val <int32_t >(key, GGUF_TYPE_INT32, i32_val);
783
- break ;
784
- case GGUF_TYPE_INT8:
785
- i8_val = gguf_get_val_i8 (fl->gguf_ctx , i);
786
- file.write_val <int8_t >(key, GGUF_TYPE_INT8, i8_val);
787
- break ;
788
- case GGUF_TYPE_STRING:
789
- str_val = gguf_get_val_str (fl->gguf_ctx , i);
790
- file.write_str (key, GGUF_TYPE_STRING, str_val);
791
- break ;
792
- case GGUF_TYPE_UINT16:
793
- u16_val = gguf_get_val_u16 (fl->gguf_ctx , i);
794
- file.write_val <uint16_t >(key, GGUF_TYPE_UINT16, u16_val);
795
- break ;
796
- case GGUF_TYPE_UINT32:
797
- u32_val = gguf_get_val_u32 (fl->gguf_ctx , i);
798
- file.write_val <uint32_t >(key, GGUF_TYPE_UINT32, u32_val);
799
- break ;
800
- case GGUF_TYPE_UINT8:
801
- u8_val = gguf_get_val_u8 (fl->gguf_ctx , i);
802
- file.write_val <uint8_t >(key, GGUF_TYPE_UINT8, u8_val);
803
- break ;
757
+ case GGUF_TYPE_BOOL: file.write_val <bool > (key, GGUF_TYPE_BOOL, gguf_get_val_bool (fl->gguf_ctx , i)); break ;
758
+ case GGUF_TYPE_FLOAT32: file.write_val <float > (key, GGUF_TYPE_FLOAT32, gguf_get_val_f32 (fl->gguf_ctx , i)); break ;
759
+ case GGUF_TYPE_INT16: file.write_val <int16_t > (key, GGUF_TYPE_INT16, gguf_get_val_i16 (fl->gguf_ctx , i)); break ;
760
+ case GGUF_TYPE_INT32: file.write_val <int32_t > (key, GGUF_TYPE_INT32, gguf_get_val_i32 (fl->gguf_ctx , i)); break ;
761
+ case GGUF_TYPE_INT8: file.write_val <int8_t > (key, GGUF_TYPE_INT8, gguf_get_val_i8 (fl->gguf_ctx , i)); break ;
762
+ case GGUF_TYPE_STRING: file.write_str (key, GGUF_TYPE_STRING, gguf_get_val_str (fl->gguf_ctx , i)); break ;
763
+ case GGUF_TYPE_UINT16: file.write_val <uint16_t >(key, GGUF_TYPE_UINT16, gguf_get_val_u16 (fl->gguf_ctx , i)); break ;
764
+ case GGUF_TYPE_UINT32: file.write_val <uint32_t >(key, GGUF_TYPE_UINT32, gguf_get_val_u32 (fl->gguf_ctx , i)); break ;
765
+ case GGUF_TYPE_UINT8: file.write_val <uint8_t > (key, GGUF_TYPE_UINT8, gguf_get_val_u8 (fl->gguf_ctx , i)); break ;
804
766
case GGUF_TYPE_ARRAY:
805
- arr_type = gguf_get_arr_type (fl->gguf_ctx , i);
806
- n_arr = gguf_get_arr_n (fl->gguf_ctx , i);
807
- if (arr_type == GGUF_TYPE_FLOAT32) {
808
- write_hparam_arr_f32 (key, arr_type, i, n_arr);
809
- } else if (arr_type == GGUF_TYPE_STRING) {
810
- write_hparam_arr_str (key, GGUF_TYPE_STRING, i, n_arr);
811
- } else {
812
- throw std::runtime_error (" not implemented" );
813
- }
814
- break ;
767
+ {
768
+ const gguf_type arr_type = gguf_get_arr_type (fl->gguf_ctx , i);
769
+ const int n_arr = gguf_get_arr_n (fl->gguf_ctx , i);
770
+ if (arr_type == GGUF_TYPE_FLOAT32) {
771
+ write_hparam_arr_f32 (key, arr_type, i, n_arr);
772
+ } else if (arr_type == GGUF_TYPE_STRING) {
773
+ write_hparam_arr_str (key, arr_type, i, n_arr);
774
+ } else {
775
+ throw std::runtime_error (" not implemented" );
776
+ }
777
+ } break ;
815
778
default :
816
779
throw std::runtime_error (format (" cannot recognize value type for key %s\n " , key));
817
780
}
@@ -3263,7 +3226,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
3263
3226
}
3264
3227
3265
3228
std::unique_ptr<llama_model_loader> model_loader (new llama_model_loader (fname_inp, /* use_mmap*/ false ));
3266
- gguf_file_saver file_saver (fname_out.c_str (), model_loader->file_loader .get (), params-> ftype );
3229
+ gguf_file_saver file_saver (fname_out.c_str (), model_loader->file_loader .get ());
3267
3230
3268
3231
#ifdef GGML_USE_K_QUANTS
3269
3232
int n_attention_wv = 0 ;
0 commit comments