@@ -208,7 +208,7 @@ enum llm_arch {
208
208
LLM_ARCH_UNKNOWN,
209
209
};
210
210
211
- static std::map<llm_arch, std::string > LLM_ARCH_NAMES = {
211
+ static std::map<llm_arch, const char * > LLM_ARCH_NAMES = {
212
212
{ LLM_ARCH_LLAMA, " llama" },
213
213
{ LLM_ARCH_FALCON, " falcon" },
214
214
{ LLM_ARCH_GPT2, " gpt2" },
@@ -285,7 +285,7 @@ enum llm_kv {
285
285
LLM_KV_TOKENIZER_RWKV,
286
286
};
287
287
288
- static std::map<llm_kv, std::string > LLM_KV_NAMES = {
288
+ static std::map<llm_kv, const char * > LLM_KV_NAMES = {
289
289
{ LLM_KV_GENERAL_ARCHITECTURE, " general.architecture" },
290
290
{ LLM_KV_GENERAL_QUANTIZATION_VERSION, " general.quantization_version" },
291
291
{ LLM_KV_GENERAL_ALIGNMENT, " general.alignment" },
@@ -346,7 +346,7 @@ struct LLM_KV {
346
346
llm_arch arch;
347
347
348
348
std::string operator ()(llm_kv kv) const {
349
- return ::format (LLM_KV_NAMES[kv]. c_str () , LLM_ARCH_NAMES[arch]. c_str () );
349
+ return ::format (LLM_KV_NAMES[kv], LLM_ARCH_NAMES[arch]);
350
350
}
351
351
};
352
352
@@ -747,13 +747,13 @@ struct LLM_TN {
747
747
// gguf helpers
748
748
//
749
749
750
- static std::map<int8_t , std::string > LLAMA_ROPE_SCALING_TYPES = {
750
+ static std::map<int32_t , const char * > LLAMA_ROPE_SCALING_TYPES = {
751
751
{ LLAMA_ROPE_SCALING_NONE, " none" },
752
752
{ LLAMA_ROPE_SCALING_LINEAR, " linear" },
753
753
{ LLAMA_ROPE_SCALING_YARN, " yarn" },
754
754
};
755
755
756
- static int8_t llama_rope_scaling_type_from_string (const std::string & name) {
756
+ static int32_t llama_rope_scaling_type_from_string (const std::string & name) {
757
757
for (const auto & kv : LLAMA_ROPE_SCALING_TYPES) {
758
758
if (kv.second == name) {
759
759
return kv.first ;
@@ -1415,6 +1415,7 @@ static const size_t GiB = 1024*MiB;
1415
1415
1416
1416
struct llama_hparams {
1417
1417
bool vocab_only;
1418
+ bool rope_finetuned;
1418
1419
uint32_t n_vocab;
1419
1420
uint32_t n_ctx_train; // context size the model was trained on
1420
1421
uint32_t n_embd;
@@ -1434,8 +1435,7 @@ struct llama_hparams {
1434
1435
float rope_freq_base_train;
1435
1436
float rope_freq_scale_train;
1436
1437
uint32_t n_yarn_orig_ctx;
1437
- int8_t rope_scaling_type_train : 3 ;
1438
- bool rope_finetuned : 1 ;
1438
+ int32_t rope_scaling_type_train;
1439
1439
1440
1440
float f_clamp_kqv;
1441
1441
float f_max_alibi_bias;
@@ -2701,7 +2701,7 @@ struct llama_model_loader {
2701
2701
// load LLaMA models
2702
2702
//
2703
2703
2704
- static std::string llama_model_arch_name (llm_arch arch) {
2704
+ static const char * llama_model_arch_name (llm_arch arch) {
2705
2705
auto it = LLM_ARCH_NAMES.find (arch);
2706
2706
if (it == LLM_ARCH_NAMES.end ()) {
2707
2707
return " unknown" ;
@@ -3310,11 +3310,11 @@ static void llm_load_print_meta(llama_model_loader & ml, llama_model & model) {
3310
3310
const auto & hparams = model.hparams ;
3311
3311
const auto & vocab = model.vocab ;
3312
3312
3313
- const auto rope_scaling_type = LLAMA_ROPE_SCALING_TYPES.at (hparams.rope_scaling_type_train );
3313
+ const char * rope_scaling_type = LLAMA_ROPE_SCALING_TYPES.at (hparams.rope_scaling_type_train );
3314
3314
3315
3315
// hparams
3316
3316
LLAMA_LOG_INFO (" %s: format = %s\n " , __func__, llama_file_version_name (ml.fver ));
3317
- LLAMA_LOG_INFO (" %s: arch = %s\n " , __func__, LLM_ARCH_NAMES.at (model.arch ). c_str () );
3317
+ LLAMA_LOG_INFO (" %s: arch = %s\n " , __func__, LLM_ARCH_NAMES.at (model.arch ));
3318
3318
LLAMA_LOG_INFO (" %s: vocab type = %s\n " , __func__, llama_model_vocab_type_name (vocab.type ));
3319
3319
LLAMA_LOG_INFO (" %s: n_vocab = %u\n " , __func__, hparams.n_vocab );
3320
3320
LLAMA_LOG_INFO (" %s: n_merges = %u\n " , __func__, (int ) vocab.bpe_ranks .size ());
@@ -3336,7 +3336,7 @@ static void llm_load_print_meta(llama_model_loader & ml, llama_model & model) {
3336
3336
LLAMA_LOG_INFO (" %s: n_ff = %u\n " , __func__, hparams.n_ff );
3337
3337
LLAMA_LOG_INFO (" %s: n_expert = %u\n " , __func__, hparams.n_expert );
3338
3338
LLAMA_LOG_INFO (" %s: n_expert_used = %u\n " , __func__, hparams.n_expert_used );
3339
- LLAMA_LOG_INFO (" %s: rope scaling = %s\n " , __func__, rope_scaling_type. c_str () );
3339
+ LLAMA_LOG_INFO (" %s: rope scaling = %s\n " , __func__, rope_scaling_type);
3340
3340
LLAMA_LOG_INFO (" %s: freq_base_train = %.1f\n " , __func__, hparams.rope_freq_base_train );
3341
3341
LLAMA_LOG_INFO (" %s: freq_scale_train = %g\n " , __func__, hparams.rope_freq_scale_train );
3342
3342
LLAMA_LOG_INFO (" %s: n_yarn_orig_ctx = %u\n " , __func__, hparams.n_yarn_orig_ctx );
@@ -10735,7 +10735,7 @@ int32_t llama_model_meta_val_str_by_index(const struct llama_model * model, int3
10735
10735
10736
10736
int32_t llama_model_desc (const struct llama_model * model, char * buf, size_t buf_size) {
10737
10737
return snprintf (buf, buf_size, " %s %s %s" ,
10738
- llama_model_arch_name (model->arch ). c_str () ,
10738
+ llama_model_arch_name (model->arch ),
10739
10739
llama_model_type_name (model->type ),
10740
10740
llama_model_ftype_name (model->ftype ).c_str ());
10741
10741
}
0 commit comments