Skip to content

Commit 4806c39

Browse files
committed
lora : update API names
ggml-ci
1 parent c89e808 commit 4806c39

File tree

12 files changed

+149
-153
lines changed

12 files changed

+149
-153
lines changed

common/common.cpp

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -908,12 +908,13 @@ struct common_init_result common_init_from_params(common_params & params) {
908908
return iparams;
909909
}
910910

911-
int err = llama_control_vector_apply(lctx,
912-
cvec.data.data(),
913-
cvec.data.size(),
914-
cvec.n_embd,
915-
params.control_vector_layer_start,
916-
params.control_vector_layer_end);
911+
int err = llama_apply_adapter_cvec(
912+
lctx,
913+
cvec.data.data(),
914+
cvec.data.size(),
915+
cvec.n_embd,
916+
params.control_vector_layer_start,
917+
params.control_vector_layer_end);
917918
if (err) {
918919
llama_free(lctx);
919920
llama_model_free(model);
@@ -924,8 +925,8 @@ struct common_init_result common_init_from_params(common_params & params) {
924925

925926
// load and optionally apply lora adapters
926927
for (auto & la : params.lora_adapters) {
927-
llama_lora_adapter_ptr lora;
928-
lora.reset(llama_lora_adapter_init(model, la.path.c_str()));
928+
llama_adapter_lora_ptr lora;
929+
lora.reset(llama_adapter_lora_init(model, la.path.c_str()));
929930
if (lora == nullptr) {
930931
LOG_ERR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str());
931932
llama_free(lctx);
@@ -938,7 +939,7 @@ struct common_init_result common_init_from_params(common_params & params) {
938939
}
939940

940941
if (!params.lora_init_without_apply) {
941-
common_lora_adapters_apply(lctx, params.lora_adapters);
942+
common_adapter_loras_apply(lctx, params.lora_adapters);
942943
}
943944

944945
if (params.sampling.ignore_eos && llama_token_eos(model) == LLAMA_TOKEN_NULL) {
@@ -1005,11 +1006,11 @@ struct common_init_result common_init_from_params(common_params & params) {
10051006
return iparams;
10061007
}
10071008

1008-
void common_lora_adapters_apply(struct llama_context * ctx, std::vector<common_lora_adapter_info> & lora) {
1009-
llama_lora_adapter_clear(ctx);
1009+
void common_adapter_loras_apply(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora) {
1010+
llama_clear_adapter_lora(ctx);
10101011
for (auto & la : lora) {
10111012
if (la.scale != 0.0f) {
1012-
llama_lora_adapter_set(ctx, la.ptr, la.scale);
1013+
llama_set_adapter_lora(ctx, la.ptr, la.scale);
10131014
}
10141015
}
10151016
}

common/common.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,11 @@
2424

2525
#define DEFAULT_MODEL_PATH "models/7B/ggml-model-f16.gguf"
2626

27-
struct common_lora_adapter_info {
27+
struct common_adapter_lora_info {
2828
std::string path;
2929
float scale;
3030

31-
struct llama_lora_adapter * ptr;
31+
struct llama_adapter_lora * ptr;
3232
};
3333

3434
using llama_tokens = std::vector<llama_token>;
@@ -246,8 +246,8 @@ struct common_params {
246246
std::vector<std::string> antiprompt; // strings upon which more user input is prompted (a.k.a. reverse prompts)
247247
std::vector<llama_model_kv_override> kv_overrides;
248248

249-
bool lora_init_without_apply = false; // only load lora to memory, but do not apply it to ctx (user can manually apply lora later using llama_lora_adapter_apply)
250-
std::vector<common_lora_adapter_info> lora_adapters; // lora adapter path with user defined scale
249+
bool lora_init_without_apply = false; // only load lora to memory, but do not apply it to ctx (user can manually apply lora later using llama_adapter_lora_apply)
250+
std::vector<common_adapter_lora_info> lora_adapters; // lora adapter path with user defined scale
251251

252252
std::vector<common_control_vector_load_info> control_vectors; // control vector with user defined scale
253253

@@ -481,7 +481,7 @@ struct common_init_result {
481481
llama_model_ptr model;
482482
llama_context_ptr context;
483483

484-
std::vector<llama_lora_adapter_ptr> lora;
484+
std::vector<llama_adapter_lora_ptr> lora;
485485
};
486486

487487
struct common_init_result common_init_from_params(common_params & params);
@@ -503,7 +503,7 @@ struct llama_model * common_load_model_from_hf(
503503
const struct llama_model_params & params);
504504

505505
// clear LoRA adapters from context, then apply new list of adapters
506-
void common_lora_adapters_apply(struct llama_context * ctx, std::vector<common_lora_adapter_info> & lora);
506+
void common_adapter_loras_apply(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora);
507507

508508
//
509509
// Batch utils

examples/export-lora/export-lora.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ struct lora_merge_ctx {
130130

131131
lora_merge_ctx(
132132
std::string & base_fname,
133-
std::vector<common_lora_adapter_info> & lora_files,
133+
std::vector<common_adapter_lora_info> & lora_files,
134134
std::string & outfile,
135135
int n_threads) : base_model(base_fname, 0), n_threads(n_threads), fout(outfile, std::ios::binary) {
136136
fout.exceptions(std::ofstream::failbit); // fail fast on write errors

examples/server/server.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ struct slot_params {
9898
int64_t t_max_prompt_ms = -1; // TODO: implement
9999
int64_t t_max_predict_ms = -1; // if positive, limit the generation phase to this time limit
100100

101-
std::vector<common_lora_adapter_info> lora;
101+
std::vector<common_adapter_lora_info> lora;
102102

103103
std::vector<std::string> antiprompt;
104104
std::vector<std::string> response_fields;
@@ -198,7 +198,7 @@ struct server_task {
198198
bool metrics_reset_bucket = false;
199199

200200
// used by SERVER_TASK_TYPE_SET_LORA
201-
std::vector<common_lora_adapter_info> set_lora;
201+
std::vector<common_adapter_lora_info> set_lora;
202202

203203
server_task(server_task_type type) : type(type) {}
204204

@@ -1131,7 +1131,7 @@ struct server_slot {
11311131

11321132
common_speculative * spec = nullptr;
11331133

1134-
std::vector<common_lora_adapter_info> lora;
1134+
std::vector<common_adapter_lora_info> lora;
11351135

11361136
// the index relative to completion multi-task request
11371137
size_t index = 0;
@@ -2927,7 +2927,7 @@ struct server_context {
29272927
// make sure we're in the right embedding mode
29282928
llama_set_embeddings(ctx, slot_batched->is_non_causal());
29292929
// apply lora, only need to do it once per batch
2930-
common_lora_adapters_apply(ctx, slot_batched->lora);
2930+
common_adapter_loras_apply(ctx, slot_batched->lora);
29312931
}
29322932

29332933
// process the created batch of tokens

examples/server/utils.hpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -799,8 +799,8 @@ static std::vector<llama_token_data> get_token_probabilities(llama_context * ctx
799799
}
800800

801801
static bool are_lora_equal(
802-
const std::vector<common_lora_adapter_info> & l1,
803-
const std::vector<common_lora_adapter_info> & l2) {
802+
const std::vector<common_adapter_lora_info> & l1,
803+
const std::vector<common_adapter_lora_info> & l2) {
804804
if (l1.size() != l2.size()) {
805805
return false;
806806
}
@@ -814,10 +814,10 @@ static bool are_lora_equal(
814814
}
815815

816816
// parse lora config from JSON request, returned a copy of lora_base with updated scale
817-
static std::vector<common_lora_adapter_info> parse_lora_request(
818-
const std::vector<common_lora_adapter_info> & lora_base,
817+
static std::vector<common_adapter_lora_info> parse_lora_request(
818+
const std::vector<common_adapter_lora_info> & lora_base,
819819
const json & data) {
820-
std::vector<common_lora_adapter_info> lora(lora_base);
820+
std::vector<common_adapter_lora_info> lora(lora_base);
821821
int max_idx = lora.size();
822822

823823
// clear existing value

include/llama-cpp.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@ struct llama_sampler_deleter {
2020
void operator()(llama_sampler * sampler) { llama_sampler_free(sampler); }
2121
};
2222

23-
struct llama_lora_adapter_deleter {
24-
void operator()(llama_lora_adapter * lora_adapter) { llama_lora_adapter_free(lora_adapter); }
23+
struct llama_adapter_lora_deleter {
24+
void operator()(llama_adapter_lora * adapter) { llama_adapter_lora_free(adapter); }
2525
};
2626

2727
typedef std::unique_ptr<llama_model, llama_model_deleter> llama_model_ptr;
2828
typedef std::unique_ptr<llama_context, llama_context_deleter> llama_context_ptr;
2929
typedef std::unique_ptr<llama_sampler, llama_sampler_deleter> llama_sampler_ptr;
30-
typedef std::unique_ptr<llama_lora_adapter, llama_lora_adapter_deleter> llama_lora_adapter_ptr;
30+
typedef std::unique_ptr<llama_adapter_lora, llama_adapter_lora_deleter> llama_adapter_lora_ptr;

include/llama.h

Lines changed: 15 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -385,8 +385,7 @@ extern "C" {
385385
} llama_chat_message;
386386

387387
// lora adapter
388-
// TODO: rename to llama_adapter_lora
389-
struct llama_lora_adapter;
388+
struct llama_adapter_lora;
390389

391390
// Helpers for getting default parameters
392391
// TODO: update API to start accepting pointers to params structs (https://github.com/ggerganov/llama.cpp/discussions/9172)
@@ -515,44 +514,40 @@ extern "C" {
515514
//
516515

517516
// Load a LoRA adapter from file
518-
// TODO: rename to llama_adapter_lora_init
519-
LLAMA_API struct llama_lora_adapter * llama_lora_adapter_init(
517+
LLAMA_API struct llama_adapter_lora * llama_adapter_lora_init(
520518
struct llama_model * model,
521519
const char * path_lora);
522520

521+
// Manually free a LoRA adapter
522+
// Note: loaded adapters will be free when the associated model is deleted
523+
LLAMA_API void llama_adapter_lora_free(struct llama_adapter_lora * adapter);
524+
525+
// The following functions operate on a llama_context, hence the naming: llama_verb_...
526+
523527
// Add a loaded LoRA adapter to given context
524528
// This will not modify model's weight
525-
// TODO: rename to llama_set_adapter_lora
526-
LLAMA_API int32_t llama_lora_adapter_set(
529+
LLAMA_API int32_t llama_set_adapter_lora(
527530
struct llama_context * ctx,
528-
struct llama_lora_adapter * adapter,
531+
struct llama_adapter_lora * adapter,
529532
float scale);
530533

531534
// Remove a specific LoRA adapter from given context
532535
// Return -1 if the adapter is not present in the context
533-
// TODO: rename to llama_rm_adapter_lora
534-
LLAMA_API int32_t llama_lora_adapter_remove(
536+
LLAMA_API int32_t llama_rm_adapter_lora(
535537
struct llama_context * ctx,
536-
struct llama_lora_adapter * adapter);
538+
struct llama_adapter_lora * adapter);
537539

538540
// Remove all LoRA adapters from given context
539-
// TODO: rename to llama_clear_adapter_lora
540-
LLAMA_API void llama_lora_adapter_clear(struct llama_context * ctx);
541-
542-
// Manually free a LoRA adapter
543-
// Note: loaded adapters will be free when the associated model is deleted
544-
// TODO: rename to llama_adapter_lora_free
545-
LLAMA_API void llama_lora_adapter_free(struct llama_lora_adapter * adapter);
541+
LLAMA_API void llama_clear_adapter_lora(struct llama_context * ctx);
546542

547543
// Apply a loaded control vector to a llama_context, or if data is NULL, clear
548544
// the currently loaded vector.
549545
// n_embd should be the size of a single layer's control, and data should point
550546
// to an n_embd x n_layers buffer starting from layer 1.
551547
// il_start and il_end are the layer range the vector should apply to (both inclusive)
552548
// See llama_control_vector_load in common to load a control vector.
553-
// TODO: rename to llama_adapter_cvec_apply
554-
LLAMA_API int32_t llama_control_vector_apply(
555-
struct llama_context * lctx,
549+
LLAMA_API int32_t llama_apply_adapter_cvec(
550+
struct llama_context * ctx,
556551
const float * data,
557552
size_t len,
558553
int32_t n_embd,

0 commit comments

Comments
 (0)