@@ -82,14 +82,14 @@ enum llama_example {
82
82
LLAMA_EXAMPLE_COUNT,
83
83
};
84
84
85
- enum gpt_sampler_type {
86
- GPT_SAMPLER_TYPE_NONE = 0 ,
87
- GPT_SAMPLER_TYPE_TOP_K = 1 ,
88
- GPT_SAMPLER_TYPE_TOP_P = 2 ,
89
- GPT_SAMPLER_TYPE_MIN_P = 3 ,
90
- GPT_SAMPLER_TYPE_TFS_Z = 4 ,
91
- GPT_SAMPLER_TYPE_TYPICAL_P = 5 ,
92
- GPT_SAMPLER_TYPE_TEMPERATURE = 6 ,
85
+ enum common_sampler_type {
86
+ COMMON_SAMPLER_TYPE_NONE = 0 ,
87
+ COMMON_SAMPLER_TYPE_TOP_K = 1 ,
88
+ COMMON_SAMPLER_TYPE_TOP_P = 2 ,
89
+ COMMON_SAMPLER_TYPE_MIN_P = 3 ,
90
+ COMMON_SAMPLER_TYPE_TFS_Z = 4 ,
91
+ COMMON_SAMPLER_TYPE_TYPICAL_P = 5 ,
92
+ COMMON_SAMPLER_TYPE_TEMPERATURE = 6 ,
93
93
};
94
94
95
95
// dimensionality reduction methods, used by cvector-generator
@@ -99,7 +99,7 @@ enum dimre_method {
99
99
};
100
100
101
101
// sampler parameters
102
- struct gpt_sampler_params {
102
+ struct common_sampler_params {
103
103
uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler
104
104
105
105
int32_t n_prev = 64 ; // number of previous tokens to remember
@@ -124,13 +124,13 @@ struct gpt_sampler_params {
124
124
bool ignore_eos = false ;
125
125
bool no_perf = false ; // disable performance metrics
126
126
127
- std::vector<enum gpt_sampler_type > samplers = {
128
- GPT_SAMPLER_TYPE_TOP_K ,
129
- GPT_SAMPLER_TYPE_TFS_Z ,
130
- GPT_SAMPLER_TYPE_TYPICAL_P ,
131
- GPT_SAMPLER_TYPE_TOP_P ,
132
- GPT_SAMPLER_TYPE_MIN_P ,
133
- GPT_SAMPLER_TYPE_TEMPERATURE
127
+ std::vector<enum common_sampler_type > samplers = {
128
+ COMMON_SAMPLER_TYPE_TOP_K ,
129
+ COMMON_SAMPLER_TYPE_TFS_Z ,
130
+ COMMON_SAMPLER_TYPE_TYPICAL_P ,
131
+ COMMON_SAMPLER_TYPE_TOP_P ,
132
+ COMMON_SAMPLER_TYPE_MIN_P ,
133
+ COMMON_SAMPLER_TYPE_TEMPERATURE
134
134
};
135
135
136
136
std::string grammar; // optional BNF-like grammar to constrain sampling
@@ -141,7 +141,7 @@ struct gpt_sampler_params {
141
141
std::string print () const ;
142
142
};
143
143
144
- struct gpt_params {
144
+ struct common_params {
145
145
int32_t n_predict = -1 ; // new tokens to predict
146
146
int32_t n_ctx = 0 ; // context size
147
147
int32_t n_batch = 2048 ; // logical batch size for prompt processing (must be >=32 to use BLAS)
@@ -183,7 +183,7 @@ struct gpt_params {
183
183
enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED; // pooling type for embeddings
184
184
enum llama_attention_type attention_type = LLAMA_ATTENTION_TYPE_UNSPECIFIED; // attention type for embeddings
185
185
186
- struct gpt_sampler_params sparams;
186
+ struct common_sampler_params sparams;
187
187
188
188
std::string model = " " ; // model path // NOLINT
189
189
std::string model_draft = " " ; // draft model for speculative decoding // NOLINT
@@ -348,9 +348,9 @@ struct gpt_params {
348
348
349
349
// call once at the start of a program if it uses libcommon
350
350
// initializes the logging system and prints info about the build
351
- void gpt_init ();
351
+ void common_init ();
352
352
353
- std::string gpt_params_get_system_info (const gpt_params & params);
353
+ std::string common_params_get_system_info (const common_params & params);
354
354
355
355
bool parse_cpu_range (const std::string& range, bool (&boolmask)[GGML_MAX_N_THREADS]);
356
356
bool parse_cpu_mask (const std::string& mask, bool (&boolmask)[GGML_MAX_N_THREADS]);
@@ -410,10 +410,10 @@ struct common_init_result {
410
410
std::vector<common_lora_adapter_container> lora_adapters;
411
411
};
412
412
413
- struct common_init_result llama_init_from_gpt_params (gpt_params & params);
413
+ struct common_init_result common_init_from_common_params (common_params & params);
414
414
415
- struct llama_model_params common_model_params_from_gpt_params (const gpt_params & params);
416
- struct llama_context_params common_context_params_from_gpt_params (const gpt_params & params);
415
+ struct llama_model_params common_model_params_from_common_params (const common_params & params);
416
+ struct llama_context_params common_context_params_from_common_params (const common_params & params);
417
417
struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params (const cpu_params & params);
418
418
419
419
struct llama_model * common_load_model_from_url (const char * model_url, const char * path_model, const char * hf_token, const struct llama_model_params & params);
@@ -554,5 +554,5 @@ void yaml_dump_vector_int (FILE * stream, const char * prop_name, const std
554
554
void yaml_dump_string_multiline (FILE * stream, const char * prop_name, const char * data);
555
555
556
556
void yaml_dump_non_result_info (
557
- FILE * stream, const gpt_params & params, const llama_context * lctx,
557
+ FILE * stream, const common_params & params, const llama_context * lctx,
558
558
const std::string & timestamp, const std::vector<int > & prompt_tokens, const char * model_desc);
0 commit comments