ggml-org
diff --git a/‎llama.cpp
Lines changed: 25 additions & 25 deletions b/‎llama.cpp
Lines changed: 25 additions & 25 deletions
@@ -8030,7 +8030,7 @@ void llama_sample_softmax(struct llama_context * ctx, llama_token_data_array * c
     }
 }
 
-void llama_sample_top_k(struct llama_context * ctx, llama_token_data_array * candidates, int k, size_t min_keep) {
+void llama_sample_top_k(struct llama_context * ctx, llama_token_data_array * candidates, int32_t k, size_t min_keep) {
     const int64_t t_start_sample_us = ggml_time_us();
 
     k = std::max(k, (int) min_keep);
@@ -8390,7 +8390,7 @@ void llama_sample_classifier_free_guidance(
     }
 }
 
-llama_token llama_sample_token_mirostat(struct llama_context * ctx, llama_token_data_array * candidates, float tau, float eta, int m, float * mu) {
+llama_token llama_sample_token_mirostat(struct llama_context * ctx, llama_token_data_array * candidates, float tau, float eta, int32_t m, float * mu) {
     GGML_ASSERT(ctx);
 
     auto N = float(llama_n_vocab(llama_get_model(ctx)));
@@ -9598,7 +9598,7 @@ struct llama_model_quantize_params llama_model_quantize_default_params() {
     return result;
 }
 
-int llama_max_devices(void) {
+int32_t llama_max_devices(void) {
     return LLAMA_MAX_DEVICES;
 }
 
@@ -9909,23 +9909,23 @@ enum llama_vocab_type llama_vocab_type(const struct llama_model * model) {
     return model->vocab.type;
 }
 
-int llama_n_vocab(const struct llama_model * model) {
+int32_t llama_n_vocab(const struct llama_model * model) {
     return model->vocab.id_to_token.size();
 }
 
-int llama_n_ctx_train(const struct llama_model * model) {
+int32_t llama_n_ctx_train(const struct llama_model * model) {
     return model->hparams.n_ctx_train;
 }
 
-int llama_n_embd(const struct llama_model * model) {
+int32_t llama_n_embd(const struct llama_model * model) {
     return model->hparams.n_embd;
 }
 
 float llama_rope_freq_scale_train(const struct llama_model * model) {
     return model->hparams.rope_freq_scale_train;
 }
 
-int llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size) {
+int32_t llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size) {
     const auto & it = model->gguf_kv.find(key);
     if (it == model->gguf_kv.end()) {
         if (buf_size > 0) {
@@ -9936,11 +9936,11 @@ int llama_model_meta_val_str(const struct llama_model * model, const char * key,
     return snprintf(buf, buf_size, "%s", it->second.c_str());
 }
 
-int llama_model_meta_count(const struct llama_model * model) {
+int32_t llama_model_meta_count(const struct llama_model * model) {
     return (int)model->gguf_kv.size();
 }
 
-int llama_model_meta_key_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size) {
+int32_t llama_model_meta_key_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size) {
     if (i < 0 || i >= (int)model->gguf_kv.size()) {
         if (buf_size > 0) {
             buf[0] = '\0';
@@ -9952,7 +9952,7 @@ int llama_model_meta_key_by_index(const struct llama_model * model, int i, char
     return snprintf(buf, buf_size, "%s", it->first.c_str());
 }
 
-int llama_model_meta_val_str_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size) {
+int32_t llama_model_meta_val_str_by_index(const struct llama_model * model, int32_t i, char * buf, size_t buf_size) {
     if (i < 0 || i >= (int)model->gguf_kv.size()) {
         if (buf_size > 0) {
             buf[0] = '\0';
@@ -9964,7 +9964,7 @@ int llama_model_meta_val_str_by_index(const struct llama_model * model, int i, c
     return snprintf(buf, buf_size, "%s", it->second.c_str());
 }
 
-int llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size) {
+int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size) {
     return snprintf(buf, buf_size, "%s %s %s",
             llama_model_arch_name(model->arch).c_str(),
             llama_model_type_name(model->type),
@@ -9991,7 +9991,7 @@ struct ggml_tensor * llama_get_model_tensor(struct llama_model * model, const ch
     return ggml_get_tensor(model->ctx, name);
 }
 
-int llama_model_quantize(
+uint32_t llama_model_quantize(
         const char * fname_inp,
         const char * fname_out,
         const llama_model_quantize_params * params) {
@@ -10004,7 +10004,7 @@ int llama_model_quantize(
     }
 }
 
-int llama_apply_lora_from_file(struct llama_context * ctx, const char * path_lora, float scale, const char * path_base_model, int n_threads) {
+int32_t llama_apply_lora_from_file(struct llama_context * ctx, const char * path_lora, float scale, const char * path_base_model, int32_t n_threads) {
     try {
         return llama_apply_lora_from_file_internal(ctx->model, path_lora, scale, path_base_model, n_threads);
     } catch (const std::exception & err) {
@@ -10013,7 +10013,7 @@ int llama_apply_lora_from_file(struct llama_context * ctx, const char * path_lor
     }
 }
 
-int llama_model_apply_lora_from_file(const struct llama_model * model, const char * path_lora, float scale, const char * path_base_model, int n_threads) {
+int32_t llama_model_apply_lora_from_file(const struct llama_model * model, const char * path_lora, float scale, const char * path_base_model, int32_t n_threads) {
     try {
         return llama_apply_lora_from_file_internal(*model, path_lora, scale, path_base_model, n_threads);
     } catch (const std::exception & err) {
@@ -10111,7 +10111,7 @@ void llama_kv_cache_view_update(const struct llama_context * ctx, struct llama_k
     }
 }
 
-int llama_get_kv_cache_token_count(const struct llama_context * ctx) {
+int32_t llama_get_kv_cache_token_count(const struct llama_context * ctx) {
     int result = 0;
 
     for (uint32_t i = 0; i < ctx->kv_self.size; i++) {
@@ -10121,7 +10121,7 @@ int llama_get_kv_cache_token_count(const struct llama_context * ctx) {
     return result;
 }
 
-int llama_get_kv_cache_used_cells(const struct llama_context * ctx) {
+int32_t llama_get_kv_cache_used_cells(const struct llama_context * ctx) {
     return ctx->kv_self.used;
 }
 
@@ -10603,7 +10603,7 @@ int llama_eval(
         struct llama_context * ctx,
                  llama_token * tokens,
                      int32_t   n_tokens,
-                         int   n_past) {
+                     int32_t   n_past) {
     llama_kv_cache_seq_rm(ctx->kv_self, -1, n_past, -1);
 
     const int ret = llama_decode_internal(*ctx, llama_batch_get_one(tokens, n_tokens, n_past, 0));
@@ -10618,7 +10618,7 @@ int llama_eval_embd(
             struct llama_context * ctx,
                            float * embd,
                          int32_t   n_tokens,
-                             int   n_past) {
+                         int32_t   n_past) {
     llama_kv_cache_seq_rm(ctx->kv_self, -1, n_past, -1);
 
     llama_batch batch = { n_tokens, nullptr, embd, nullptr, nullptr, nullptr, nullptr, n_past, 1, 0, };
@@ -10689,7 +10689,7 @@ void llama_batch_free(struct llama_batch batch) {
     if (batch.logits)   free(batch.logits);
 }
 
-int llama_decode(
+int32_t llama_decode(
         struct llama_context * ctx,
           struct llama_batch   batch) {
     const int ret = llama_decode_internal(*ctx, batch);
@@ -10737,11 +10737,11 @@ llama_token llama_token_nl(const struct llama_model * model) {
     return model->vocab.linefeed_id;
 }
 
-int llama_add_bos_token(const struct llama_model * model) {
+int32_t llama_add_bos_token(const struct llama_model * model) {
     return model->vocab.special_add_bos;
 }
 
-int llama_add_eos_token(const struct llama_model * model) {
+int32_t llama_add_eos_token(const struct llama_model * model) {
     return model->vocab.special_add_eos;
 }
 
@@ -10761,12 +10761,12 @@ llama_token llama_token_eot(const struct llama_model * model) {
     return model->vocab.special_eot_id;
 }
 
-int llama_tokenize(
+int32_t llama_tokenize(
     const struct llama_model * model,
                   const char * text,
-                         int   text_len,
+                     int32_t   text_len,
                  llama_token * tokens,
-                         int   n_max_tokens,
+                     int32_t   n_max_tokens,
                         bool   add_bos,
                         bool   special) {
     auto res = llama_tokenize_internal(model->vocab, std::string(text, text_len), add_bos, special);
@@ -10794,7 +10794,7 @@ static std::string llama_decode_text(const std::string & text) {
 }
 
 // does not write null-terminator to buf
-int llama_token_to_piece(const struct llama_model * model, llama_token token, char * buf, int length) {
+int32_t llama_token_to_piece(const struct llama_model * model, llama_token token, char * buf, int32_t length) {
     if (0 <= token && token < llama_n_vocab(model)) {
         switch (llama_vocab_get_type(model->vocab)) {
         case LLAMA_VOCAB_TYPE_SPM: {
Original file line number	Diff line number	Diff line change
`@@ -8030,7 +8030,7 @@ void llama_sample_softmax(struct llama_context * ctx, llama_token_data_array * c`
`8030`	`8030`	`}`
`8031`	`8031`	`}`
`8032`	`8032`
`8033`		`-void llama_sample_top_k(struct llama_context * ctx, llama_token_data_array * candidates, int k, size_t min_keep) {`
	`8033`	`+void llama_sample_top_k(struct llama_context * ctx, llama_token_data_array * candidates, int32_t k, size_t min_keep) {`
`8034`	`8034`	`const int64_t t_start_sample_us = ggml_time_us();`
`8035`	`8035`
`8036`	`8036`	`k = std::max(k, (int) min_keep);`
`@@ -8390,7 +8390,7 @@ void llama_sample_classifier_free_guidance(`
`8390`	`8390`	`}`
`8391`	`8391`	`}`
`8392`	`8392`
`8393`		`-llama_token llama_sample_token_mirostat(struct llama_context * ctx, llama_token_data_array * candidates, float tau, float eta, int m, float * mu) {`
	`8393`	`+llama_token llama_sample_token_mirostat(struct llama_context * ctx, llama_token_data_array * candidates, float tau, float eta, int32_t m, float * mu) {`
`8394`	`8394`	`GGML_ASSERT(ctx);`
`8395`	`8395`
`8396`	`8396`	`auto N = float(llama_n_vocab(llama_get_model(ctx)));`
`@@ -9598,7 +9598,7 @@ struct llama_model_quantize_params llama_model_quantize_default_params() {`
`9598`	`9598`	`return result;`
`9599`	`9599`	`}`
`9600`	`9600`
`9601`		`-int llama_max_devices(void) {`
	`9601`	`+int32_t llama_max_devices(void) {`
`9602`	`9602`	`return LLAMA_MAX_DEVICES;`
`9603`	`9603`	`}`
`9604`	`9604`
`@@ -9909,23 +9909,23 @@ enum llama_vocab_type llama_vocab_type(const struct llama_model * model) {`
`9909`	`9909`	`return model->vocab.type;`
`9910`	`9910`	`}`
`9911`	`9911`
`9912`		`-int llama_n_vocab(const struct llama_model * model) {`
	`9912`	`+int32_t llama_n_vocab(const struct llama_model * model) {`
`9913`	`9913`	`return model->vocab.id_to_token.size();`
`9914`	`9914`	`}`
`9915`	`9915`
`9916`		`-int llama_n_ctx_train(const struct llama_model * model) {`
	`9916`	`+int32_t llama_n_ctx_train(const struct llama_model * model) {`
`9917`	`9917`	`return model->hparams.n_ctx_train;`
`9918`	`9918`	`}`
`9919`	`9919`
`9920`		`-int llama_n_embd(const struct llama_model * model) {`
	`9920`	`+int32_t llama_n_embd(const struct llama_model * model) {`
`9921`	`9921`	`return model->hparams.n_embd;`
`9922`	`9922`	`}`
`9923`	`9923`
`9924`	`9924`	`float llama_rope_freq_scale_train(const struct llama_model * model) {`
`9925`	`9925`	`return model->hparams.rope_freq_scale_train;`
`9926`	`9926`	`}`
`9927`	`9927`
`9928`		`-int llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size) {`
	`9928`	`+int32_t llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size) {`
`9929`	`9929`	`const auto & it = model->gguf_kv.find(key);`
`9930`	`9930`	`if (it == model->gguf_kv.end()) {`
`9931`	`9931`	`if (buf_size > 0) {`
`@@ -9936,11 +9936,11 @@ int llama_model_meta_val_str(const struct llama_model * model, const char * key,`
`9936`	`9936`	`return snprintf(buf, buf_size, "%s", it->second.c_str());`
`9937`	`9937`	`}`
`9938`	`9938`
`9939`		`-int llama_model_meta_count(const struct llama_model * model) {`
	`9939`	`+int32_t llama_model_meta_count(const struct llama_model * model) {`
`9940`	`9940`	`return (int)model->gguf_kv.size();`
`9941`	`9941`	`}`
`9942`	`9942`
`9943`		`-int llama_model_meta_key_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size) {`
	`9943`	`+int32_t llama_model_meta_key_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size) {`
`9944`	`9944`	`if (i < 0 \|\| i >= (int)model->gguf_kv.size()) {`
`9945`	`9945`	`if (buf_size > 0) {`
`9946`	`9946`	`buf[0] = '\0';`
`@@ -9952,7 +9952,7 @@ int llama_model_meta_key_by_index(const struct llama_model * model, int i, char`
`9952`	`9952`	`return snprintf(buf, buf_size, "%s", it->first.c_str());`
`9953`	`9953`	`}`
`9954`	`9954`
`9955`		`-int llama_model_meta_val_str_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size) {`
	`9955`	`+int32_t llama_model_meta_val_str_by_index(const struct llama_model * model, int32_t i, char * buf, size_t buf_size) {`
`9956`	`9956`	`if (i < 0 \|\| i >= (int)model->gguf_kv.size()) {`
`9957`	`9957`	`if (buf_size > 0) {`
`9958`	`9958`	`buf[0] = '\0';`
`@@ -9964,7 +9964,7 @@ int llama_model_meta_val_str_by_index(const struct llama_model * model, int i, c`
`9964`	`9964`	`return snprintf(buf, buf_size, "%s", it->second.c_str());`
`9965`	`9965`	`}`
`9966`	`9966`
`9967`		`-int llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size) {`
	`9967`	`+int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size) {`
`9968`	`9968`	`return snprintf(buf, buf_size, "%s %s %s",`
`9969`	`9969`	`llama_model_arch_name(model->arch).c_str(),`
`9970`	`9970`	`llama_model_type_name(model->type),`
`@@ -9991,7 +9991,7 @@ struct ggml_tensor * llama_get_model_tensor(struct llama_model * model, const ch`
`9991`	`9991`	`return ggml_get_tensor(model->ctx, name);`
`9992`	`9992`	`}`
`9993`	`9993`
`9994`		`-int llama_model_quantize(`
	`9994`	`+uint32_t llama_model_quantize(`
`9995`	`9995`	`const char * fname_inp,`
`9996`	`9996`	`const char * fname_out,`
`9997`	`9997`	`const llama_model_quantize_params * params) {`
`@@ -10004,7 +10004,7 @@ int llama_model_quantize(`
`10004`	`10004`	`}`
`10005`	`10005`	`}`
`10006`	`10006`
`10007`		`-int llama_apply_lora_from_file(struct llama_context * ctx, const char * path_lora, float scale, const char * path_base_model, int n_threads) {`
	`10007`	`+int32_t llama_apply_lora_from_file(struct llama_context * ctx, const char * path_lora, float scale, const char * path_base_model, int32_t n_threads) {`
`10008`	`10008`	`try {`
`10009`	`10009`	`return llama_apply_lora_from_file_internal(ctx->model, path_lora, scale, path_base_model, n_threads);`
`10010`	`10010`	`} catch (const std::exception & err) {`
`@@ -10013,7 +10013,7 @@ int llama_apply_lora_from_file(struct llama_context * ctx, const char * path_lor`
`10013`	`10013`	`}`
`10014`	`10014`	`}`
`10015`	`10015`
`10016`		`-int llama_model_apply_lora_from_file(const struct llama_model * model, const char * path_lora, float scale, const char * path_base_model, int n_threads) {`
	`10016`	`+int32_t llama_model_apply_lora_from_file(const struct llama_model * model, const char * path_lora, float scale, const char * path_base_model, int32_t n_threads) {`
`10017`	`10017`	`try {`
`10018`	`10018`	`return llama_apply_lora_from_file_internal(*model, path_lora, scale, path_base_model, n_threads);`
`10019`	`10019`	`} catch (const std::exception & err) {`
`@@ -10111,7 +10111,7 @@ void llama_kv_cache_view_update(const struct llama_context * ctx, struct llama_k`
`10111`	`10111`	`}`
`10112`	`10112`	`}`
`10113`	`10113`
`10114`		`-int llama_get_kv_cache_token_count(const struct llama_context * ctx) {`
	`10114`	`+int32_t llama_get_kv_cache_token_count(const struct llama_context * ctx) {`
`10115`	`10115`	`int result = 0;`
`10116`	`10116`
`10117`	`10117`	`for (uint32_t i = 0; i < ctx->kv_self.size; i++) {`
`@@ -10121,7 +10121,7 @@ int llama_get_kv_cache_token_count(const struct llama_context * ctx) {`
`10121`	`10121`	`return result;`
`10122`	`10122`	`}`
`10123`	`10123`
`10124`		`-int llama_get_kv_cache_used_cells(const struct llama_context * ctx) {`
	`10124`	`+int32_t llama_get_kv_cache_used_cells(const struct llama_context * ctx) {`
`10125`	`10125`	`return ctx->kv_self.used;`
`10126`	`10126`	`}`
`10127`	`10127`
`@@ -10603,7 +10603,7 @@ int llama_eval(`
`10603`	`10603`	`struct llama_context * ctx,`
`10604`	`10604`	`llama_token * tokens,`
`10605`	`10605`	`int32_t n_tokens,`
`10606`		`- int n_past) {`
	`10606`	`+ int32_t n_past) {`
`10607`	`10607`	`llama_kv_cache_seq_rm(ctx->kv_self, -1, n_past, -1);`
`10608`	`10608`
`10609`	`10609`	`const int ret = llama_decode_internal(*ctx, llama_batch_get_one(tokens, n_tokens, n_past, 0));`
`@@ -10618,7 +10618,7 @@ int llama_eval_embd(`
`10618`	`10618`	`struct llama_context * ctx,`
`10619`	`10619`	`float * embd,`
`10620`	`10620`	`int32_t n_tokens,`
`10621`		`- int n_past) {`
	`10621`	`+ int32_t n_past) {`
`10622`	`10622`	`llama_kv_cache_seq_rm(ctx->kv_self, -1, n_past, -1);`
`10623`	`10623`
`10624`	`10624`	`llama_batch batch = { n_tokens, nullptr, embd, nullptr, nullptr, nullptr, nullptr, n_past, 1, 0, };`
`@@ -10689,7 +10689,7 @@ void llama_batch_free(struct llama_batch batch) {`
`10689`	`10689`	`if (batch.logits) free(batch.logits);`
`10690`	`10690`	`}`
`10691`	`10691`
`10692`		`-int llama_decode(`
	`10692`	`+int32_t llama_decode(`
`10693`	`10693`	`struct llama_context * ctx,`
`10694`	`10694`	`struct llama_batch batch) {`
`10695`	`10695`	`const int ret = llama_decode_internal(*ctx, batch);`
`@@ -10737,11 +10737,11 @@ llama_token llama_token_nl(const struct llama_model * model) {`
`10737`	`10737`	`return model->vocab.linefeed_id;`
`10738`	`10738`	`}`
`10739`	`10739`
`10740`		`-int llama_add_bos_token(const struct llama_model * model) {`
	`10740`	`+int32_t llama_add_bos_token(const struct llama_model * model) {`
`10741`	`10741`	`return model->vocab.special_add_bos;`
`10742`	`10742`	`}`
`10743`	`10743`
`10744`		`-int llama_add_eos_token(const struct llama_model * model) {`
	`10744`	`+int32_t llama_add_eos_token(const struct llama_model * model) {`
`10745`	`10745`	`return model->vocab.special_add_eos;`
`10746`	`10746`	`}`
`10747`	`10747`
`@@ -10761,12 +10761,12 @@ llama_token llama_token_eot(const struct llama_model * model) {`
`10761`	`10761`	`return model->vocab.special_eot_id;`
`10762`	`10762`	`}`
`10763`	`10763`
`10764`		`-int llama_tokenize(`
	`10764`	`+int32_t llama_tokenize(`
`10765`	`10765`	`const struct llama_model * model,`
`10766`	`10766`	`const char * text,`
`10767`		`- int text_len,`
	`10767`	`+ int32_t text_len,`
`10768`	`10768`	`llama_token * tokens,`
`10769`		`- int n_max_tokens,`
	`10769`	`+ int32_t n_max_tokens,`
`10770`	`10770`	`bool add_bos,`
`10771`	`10771`	`bool special) {`
`10772`	`10772`	`auto res = llama_tokenize_internal(model->vocab, std::string(text, text_len), add_bos, special);`
`@@ -10794,7 +10794,7 @@ static std::string llama_decode_text(const std::string & text) {`
`10794`	`10794`	`}`
`10795`	`10795`
`10796`	`10796`	`// does not write null-terminator to buf`
`10797`		`-int llama_token_to_piece(const struct llama_model * model, llama_token token, char * buf, int length) {`
	`10797`	`+int32_t llama_token_to_piece(const struct llama_model * model, llama_token token, char * buf, int32_t length) {`
`10798`	`10798`	`if (0 <= token && token < llama_n_vocab(model)) {`
`10799`	`10799`	`switch (llama_vocab_get_type(model->vocab)) {`
`10800`	`10800`	`case LLAMA_VOCAB_TYPE_SPM: {`