Skip to content

Commit bcd87ca

Browse files
committed
update quantize and lora
1 parent 5241045 commit bcd87ca

File tree

1 file changed

+18
-30
lines changed

1 file changed

+18
-30
lines changed

llama.cpp

Lines changed: 18 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -2236,8 +2236,12 @@ struct llama_model_loader {
22362236
return gguf_get_tensor_name(ctx_gguf, i);
22372237
}
22382238

2239+
struct ggml_tensor * get_tensor_meta(const char * name) const {
2240+
return ggml_get_tensor(ctx_meta, name);
2241+
}
2242+
22392243
struct ggml_tensor * get_tensor_meta(int i) const {
2240-
return ggml_get_tensor(ctx_meta, get_tensor_name(i));
2244+
return get_tensor_meta(get_tensor_name(i));
22412245
}
22422246

22432247
struct ggml_tensor * create_tensor_for(struct ggml_context * ctx, struct ggml_tensor * meta, ggml_backend_type backend) {
@@ -2302,7 +2306,7 @@ struct llama_model_loader {
23022306
return gguf_get_data_offset(ctx_gguf) + gguf_get_tensor_offset(ctx_gguf, idx);
23032307
}
23042308

2305-
void init_mapping() {
2309+
void init_mapping(bool prefetch = true) {
23062310
/*
23072311
// prefetch only CPU tensors
23082312
if (use_mmap) {
@@ -2320,17 +2324,19 @@ struct llama_model_loader {
23202324
*/
23212325
// prefetch the whole file - all the data is needed anyway
23222326
if (use_mmap) {
2323-
mapping.reset(new llama_mmap(&file, -1, ggml_is_numa()));
2327+
mapping.reset(new llama_mmap(&file, prefetch ? -1 : 0, ggml_is_numa()));
23242328
}
23252329
}
23262330

2327-
// for backwards compatibility only
2331+
// for backwards compatibility, does not support ggml-backend
23282332
void load_data_for(struct ggml_tensor * cur) const {
23292333
const size_t offs = file_offset(ggml_get_name(cur));
23302334

2331-
if (use_mmap) {
2335+
if (use_mmap && mapping) {
2336+
GGML_ASSERT(cur->data == nullptr);
23322337
cur->data = (uint8_t *)mapping->addr + offs;
23332338
} else {
2339+
GGML_ASSERT(cur->data != nullptr);
23342340
file.seek(offs, SEEK_SET);
23352341
file.read_raw(cur->data, ggml_nbytes(cur));
23362342
}
@@ -8569,9 +8575,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
85698575
#endif
85708576

85718577
llama_model_loader ml(fname_inp, use_mmap, NULL);
8572-
if (ml.use_mmap) {
8573-
ml.mapping.reset(new llama_mmap(&ml.file, /* prefetch */ 0, ggml_is_numa()));
8574-
}
8578+
ml.init_mapping(false); // no prefetching?
85758579

85768580
llama_model model;
85778581
llm_load_arch(ml, model);
@@ -8650,8 +8654,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
86508654
}
86518655
tensor->data = read_data.data();
86528656
}
8653-
GGML_ASSERT(!"not implemented");
8654-
//ml.load_data_for(tensor); TODO
8657+
ml.load_data_for(tensor);
86558658

86568659
LLAMA_LOG_INFO("[%4d/%4d] %36s - [%s], type = %6s, ",
86578660
++idx, ml.n_tensors,
@@ -8871,24 +8874,10 @@ static int llama_apply_lora_from_file_internal(
88718874
// load base model
88728875
std::unique_ptr<llama_model_loader> ml;
88738876

8874-
unique_context base_ctx(nullptr, ggml_free);
8875-
if (path_base_model) {
8877+
if (path_base_model) {
88768878
LLAMA_LOG_INFO("%s: loading base model from '%s'\n", __func__, path_base_model);
8877-
ml.reset(new llama_model_loader(path_base_model, /*use_mmap*/ true, /*kv_overrides*/ NULL));
8878-
8879-
size_t ctx_size = ggml_tensor_overhead() * ml->n_tensors;
8880-
8881-
ggml_init_params base_params;
8882-
base_params.mem_size = ctx_size;
8883-
base_params.mem_buffer = NULL;
8884-
base_params.no_alloc = true;
8885-
8886-
base_ctx.reset(ggml_init(base_params));
8887-
8888-
// maybe this should be in llama_model_loader
8889-
if (ml->use_mmap) {
8890-
ml->mapping.reset(new llama_mmap(&ml->file, /* prefetch */ 0, ggml_is_numa()));
8891-
}
8879+
ml.reset(new llama_model_loader(path_base_model, /*use_mmap*/ true, /*kv_overrides*/ nullptr));
8880+
ml->init_mapping(false); // no prefetching
88928881
}
88938882

88948883
// read tensors and apply
@@ -9001,9 +8990,8 @@ static int llama_apply_lora_from_file_internal(
90018990
return 1;
90028991
}
90038992

9004-
base_t = ml->create_tensor(base_ctx.get(), base_name, { dest_t->ne[0], dest_t->ne[1] }, GGML_BACKEND_CPU);
9005-
GGML_ASSERT(!"not implemented");
9006-
//ml->load_data_for(base_t); // TODO
8993+
base_t = ml->get_tensor_meta(base_name.c_str());
8994+
ml->load_data_for(base_t);
90078995
} else {
90088996
base_t = dest_t;
90098997
}

0 commit comments

Comments
 (0)