Skip to content

Commit 57acd74

Browse files
committed
llama : no need to pass full file loader to the file saver
just gguf_ctx
1 parent 84de6a3 commit 57acd74

File tree

1 file changed

+24
-24
lines changed

1 file changed

+24
-24
lines changed

gguf-llama.cpp

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -696,12 +696,12 @@ struct gguf_file_saver {
696696
// we need to calculate the delta in number of bytes written with a counter as a struct member.
697697

698698
gguf_file file;
699-
gguf_file_loader * fl;
699+
gguf_context * ctx; // loaded gguf context (used to re-write the KV section (good enough for now))
700700
size_t info_offset;
701701
size_t tensor_offset = 0;
702702

703-
gguf_file_saver(const char * fname, gguf_file_loader * fl)
704-
: file(fname, "wb"), fl(fl) {
703+
gguf_file_saver(const char * fname, gguf_context * ctx)
704+
: file(fname, "wb"), ctx(ctx) {
705705
fprintf(stderr, "llama.cpp: saving model to %s\n", fname);
706706
write_header();
707707
write_kv();
@@ -710,15 +710,15 @@ struct gguf_file_saver {
710710
void write_header() {
711711
file.write_i32(GGUF_MAGIC);
712712
file.write_i32(GGUF_VERSION);
713-
file.write_i32(gguf_get_n_tensors(fl->gguf_ctx));
714-
file.write_i32(gguf_get_n_kv (fl->gguf_ctx));
713+
file.write_i32(gguf_get_n_tensors(ctx));
714+
file.write_i32(gguf_get_n_kv (ctx));
715715
}
716716

717717
void write_kv_arr_str(const std::string & key, enum gguf_type type, int i, int n_arr) {
718718
std::vector<std::string> data(n_arr);
719719

720720
for (int j = 0; j < n_arr; ++j) {
721-
std::string val = gguf_get_arr_str(fl->gguf_ctx, i, j);
721+
std::string val = gguf_get_arr_str(ctx, i, j);
722722
data[j] = val;
723723
}
724724

@@ -729,7 +729,7 @@ struct gguf_file_saver {
729729
std::vector<float> data(n_arr);
730730

731731
for (int j = 0; j < n_arr; ++j) {
732-
float val = gguf_get_arr_f32(fl->gguf_ctx, i, j);
732+
float val = gguf_get_arr_f32(ctx, i, j);
733733
data[j] = val;
734734
}
735735

@@ -738,28 +738,28 @@ struct gguf_file_saver {
738738

739739
// re-write the key-value section from the loaded file
740740
void write_kv() {
741-
const int32_t n_kv = gguf_get_n_kv(fl->gguf_ctx);
741+
const int32_t n_kv = gguf_get_n_kv(ctx);
742742
for (int i = 0; i < n_kv; ++i) {
743-
const char * key = gguf_get_key(fl->gguf_ctx, i);
743+
const char * key = gguf_get_key(ctx, i);
744744
if (strcmp(key, "general.quantization_version") == 0) {
745745
file.write_val<uint32_t>("general.quantization_version", GGUF_TYPE_UINT32, GGML_QNT_VERSION);
746746
} else {
747-
const gguf_type vtype = gguf_get_kv_type(fl->gguf_ctx, i);
747+
const gguf_type vtype = gguf_get_kv_type(ctx, i);
748748

749749
switch (vtype) {
750-
case GGUF_TYPE_BOOL: file.write_val<bool> (key, GGUF_TYPE_BOOL, gguf_get_val_bool(fl->gguf_ctx, i)); break;
751-
case GGUF_TYPE_FLOAT32: file.write_val<float> (key, GGUF_TYPE_FLOAT32, gguf_get_val_f32 (fl->gguf_ctx, i)); break;
752-
case GGUF_TYPE_INT16: file.write_val<int16_t> (key, GGUF_TYPE_INT16, gguf_get_val_i16 (fl->gguf_ctx, i)); break;
753-
case GGUF_TYPE_INT32: file.write_val<int32_t> (key, GGUF_TYPE_INT32, gguf_get_val_i32 (fl->gguf_ctx, i)); break;
754-
case GGUF_TYPE_INT8: file.write_val<int8_t> (key, GGUF_TYPE_INT8, gguf_get_val_i8 (fl->gguf_ctx, i)); break;
755-
case GGUF_TYPE_STRING: file.write_str (key, GGUF_TYPE_STRING, gguf_get_val_str (fl->gguf_ctx, i)); break;
756-
case GGUF_TYPE_UINT16: file.write_val<uint16_t>(key, GGUF_TYPE_UINT16, gguf_get_val_u16 (fl->gguf_ctx, i)); break;
757-
case GGUF_TYPE_UINT32: file.write_val<uint32_t>(key, GGUF_TYPE_UINT32, gguf_get_val_u32 (fl->gguf_ctx, i)); break;
758-
case GGUF_TYPE_UINT8: file.write_val<uint8_t> (key, GGUF_TYPE_UINT8, gguf_get_val_u8 (fl->gguf_ctx, i)); break;
750+
case GGUF_TYPE_BOOL: file.write_val<bool> (key, GGUF_TYPE_BOOL, gguf_get_val_bool(ctx, i)); break;
751+
case GGUF_TYPE_FLOAT32: file.write_val<float> (key, GGUF_TYPE_FLOAT32, gguf_get_val_f32 (ctx, i)); break;
752+
case GGUF_TYPE_INT16: file.write_val<int16_t> (key, GGUF_TYPE_INT16, gguf_get_val_i16 (ctx, i)); break;
753+
case GGUF_TYPE_INT32: file.write_val<int32_t> (key, GGUF_TYPE_INT32, gguf_get_val_i32 (ctx, i)); break;
754+
case GGUF_TYPE_INT8: file.write_val<int8_t> (key, GGUF_TYPE_INT8, gguf_get_val_i8 (ctx, i)); break;
755+
case GGUF_TYPE_STRING: file.write_str (key, GGUF_TYPE_STRING, gguf_get_val_str (ctx, i)); break;
756+
case GGUF_TYPE_UINT16: file.write_val<uint16_t>(key, GGUF_TYPE_UINT16, gguf_get_val_u16 (ctx, i)); break;
757+
case GGUF_TYPE_UINT32: file.write_val<uint32_t>(key, GGUF_TYPE_UINT32, gguf_get_val_u32 (ctx, i)); break;
758+
case GGUF_TYPE_UINT8: file.write_val<uint8_t> (key, GGUF_TYPE_UINT8, gguf_get_val_u8 (ctx, i)); break;
759759
case GGUF_TYPE_ARRAY:
760760
{
761-
const gguf_type arr_type = gguf_get_arr_type(fl->gguf_ctx, i);
762-
const int n_arr = gguf_get_arr_n (fl->gguf_ctx, i);
761+
const gguf_type arr_type = gguf_get_arr_type(ctx, i);
762+
const int n_arr = gguf_get_arr_n (ctx, i);
763763
if (arr_type == GGUF_TYPE_FLOAT32) {
764764
write_kv_arr_f32(key, arr_type, i, n_arr);
765765
} else if (arr_type == GGUF_TYPE_STRING) {
@@ -776,9 +776,9 @@ struct gguf_file_saver {
776776

777777
info_offset = file.tell();
778778

779-
GGML_ASSERT(gguf_get_data_offset(fl->gguf_ctx) >= info_offset);
779+
GGML_ASSERT(gguf_get_data_offset(ctx) >= info_offset);
780780

781-
size_t count = gguf_get_data_offset(fl->gguf_ctx) - info_offset;
781+
size_t count = gguf_get_data_offset(ctx) - info_offset;
782782
file.write_zeros(count);
783783
file.seek(info_offset, SEEK_SET);
784784
GGML_ASSERT(info_offset == file.tell());
@@ -3219,7 +3219,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
32193219
}
32203220

32213221
std::unique_ptr<llama_model_loader> model_loader(new llama_model_loader(fname_inp, /*use_mmap*/ false));
3222-
gguf_file_saver file_saver(fname_out.c_str(), model_loader->file_loader.get());
3222+
gguf_file_saver file_saver(fname_out.c_str(), model_loader->file_loader->gguf_ctx);
32233223

32243224
#ifdef GGML_USE_K_QUANTS
32253225
int n_attention_wv = 0;

0 commit comments

Comments
 (0)