ggml-org
diff --git a/‎llama.cpp
Lines changed: 25 additions & 25 deletions b/‎llama.cpp
Lines changed: 25 additions & 25 deletions
@@ -7512,7 +7512,7 @@ void llama_sample_softmax(struct llama_context * ctx, llama_token_data_array * c
     }
 }
 
-void llama_sample_top_k(struct llama_context * ctx, llama_token_data_array * candidates, int k, size_t min_keep) {
+void llama_sample_top_k(struct llama_context * ctx, llama_token_data_array * candidates, int32_t k, size_t min_keep) {
     const int64_t t_start_sample_us = ggml_time_us();
 
     k = std::max(k, (int) min_keep);
@@ -7872,7 +7872,7 @@ void llama_sample_classifier_free_guidance(
     }
 }
 
-llama_token llama_sample_token_mirostat(struct llama_context * ctx, llama_token_data_array * candidates, float tau, float eta, int m, float * mu) {
+llama_token llama_sample_token_mirostat(struct llama_context * ctx, llama_token_data_array * candidates, float tau, float eta, int32_t m, float * mu) {
     GGML_ASSERT(ctx);
 
     auto N = float(llama_n_vocab(llama_get_model(ctx)));
@@ -9080,7 +9080,7 @@ struct llama_model_quantize_params llama_model_quantize_default_params() {
     return result;
 }
 
-int llama_max_devices(void) {
+int32_t llama_max_devices(void) {
     return LLAMA_MAX_DEVICES;
 }
 
@@ -9383,23 +9383,23 @@ enum llama_vocab_type llama_vocab_type(const struct llama_model * model) {
     return model->vocab.type;
 }
 
-int llama_n_vocab(const struct llama_model * model) {
+int32_t llama_n_vocab(const struct llama_model * model) {
     return model->vocab.id_to_token.size();
 }
 
-int llama_n_ctx_train(const struct llama_model * model) {
+int32_t llama_n_ctx_train(const struct llama_model * model) {
     return model->hparams.n_ctx_train;
 }
 
-int llama_n_embd(const struct llama_model * model) {
+int32_t llama_n_embd(const struct llama_model * model) {
     return model->hparams.n_embd;
 }
 
 float llama_rope_freq_scale_train(const struct llama_model * model) {
     return model->hparams.rope_freq_scale_train;
 }
 
-int llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size) {
+int32_t llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size) {
     const auto & it = model->gguf_kv.find(key);
     if (it == model->gguf_kv.end()) {
         if (buf_size > 0) {
@@ -9410,11 +9410,11 @@ int llama_model_meta_val_str(const struct llama_model * model, const char * key,
     return snprintf(buf, buf_size, "%s", it->second.c_str());
 }
 
-int llama_model_meta_count(const struct llama_model * model) {
+int32_t llama_model_meta_count(const struct llama_model * model) {
     return (int)model->gguf_kv.size();
 }
 
-int llama_model_meta_key_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size) {
+int32_t llama_model_meta_key_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size) {
     if (i < 0 || i >= (int)model->gguf_kv.size()) {
         if (buf_size > 0) {
             buf[0] = '\0';
@@ -9426,7 +9426,7 @@ int llama_model_meta_key_by_index(const struct llama_model * model, int i, char
     return snprintf(buf, buf_size, "%s", it->first.c_str());
 }
 
-int llama_model_meta_val_str_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size) {
+int32_t llama_model_meta_val_str_by_index(const struct llama_model * model, int32_t i, char * buf, size_t buf_size) {
     if (i < 0 || i >= (int)model->gguf_kv.size()) {
         if (buf_size > 0) {
             buf[0] = '\0';
@@ -9438,7 +9438,7 @@ int llama_model_meta_val_str_by_index(const struct llama_model * model, int i, c
     return snprintf(buf, buf_size, "%s", it->second.c_str());
 }
 
-int llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size) {
+int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size) {
     return snprintf(buf, buf_size, "%s %s %s",
             llama_model_arch_name(model->arch).c_str(),
             llama_model_type_name(model->type),
@@ -9465,7 +9465,7 @@ struct ggml_tensor * llama_get_model_tensor(struct llama_model * model, const ch
     return ggml_get_tensor(model->ctx, name);
 }
 
-int llama_model_quantize(
+uint32_t llama_model_quantize(
         const char * fname_inp,
         const char * fname_out,
         const llama_model_quantize_params * params) {
@@ -9478,7 +9478,7 @@ int llama_model_quantize(
     }
 }
 
-int llama_apply_lora_from_file(struct llama_context * ctx, const char * path_lora, float scale, const char * path_base_model, int n_threads) {
+int32_t llama_apply_lora_from_file(struct llama_context * ctx, const char * path_lora, float scale, const char * path_base_model, int n_threads) {
     try {
         return llama_apply_lora_from_file_internal(ctx->model, path_lora, scale, path_base_model, n_threads);
     } catch (const std::exception & err) {
@@ -9487,7 +9487,7 @@ int llama_apply_lora_from_file(struct llama_context * ctx, const char * path_lor
     }
 }
 
-int llama_model_apply_lora_from_file(const struct llama_model * model, const char * path_lora, float scale, const char * path_base_model, int n_threads) {
+int32_t llama_model_apply_lora_from_file(const struct llama_model * model, const char * path_lora, float scale, const char * path_base_model, int32_t n_threads) {
     try {
         return llama_apply_lora_from_file_internal(*model, path_lora, scale, path_base_model, n_threads);
     } catch (const std::exception & err) {
@@ -9585,7 +9585,7 @@ void llama_kv_cache_view_update(const struct llama_context * ctx, struct llama_k
     }
 }
 
-int llama_get_kv_cache_token_count(const struct llama_context * ctx) {
+int32_t llama_get_kv_cache_token_count(const struct llama_context * ctx) {
     int result = 0;
 
     for (uint32_t i = 0; i < ctx->kv_self.size; i++) {
@@ -9595,7 +9595,7 @@ int llama_get_kv_cache_token_count(const struct llama_context * ctx) {
     return result;
 }
 
-int llama_get_kv_cache_used_cells(const struct llama_context * ctx) {
+int32_t llama_get_kv_cache_used_cells(const struct llama_context * ctx) {
     return ctx->kv_self.used;
 }
 
@@ -10075,7 +10075,7 @@ int llama_eval(
         struct llama_context * ctx,
                  llama_token * tokens,
                      int32_t   n_tokens,
-                         int   n_past) {
+                     int32_t   n_past) {
     llama_kv_cache_seq_rm(ctx->kv_self, -1, n_past, -1);
 
     const int ret = llama_decode_internal(*ctx, llama_batch_get_one(tokens, n_tokens, n_past, 0));
@@ -10090,7 +10090,7 @@ int llama_eval_embd(
             struct llama_context * ctx,
                            float * embd,
                          int32_t   n_tokens,
-                             int   n_past) {
+                         int32_t   n_past) {
     llama_kv_cache_seq_rm(ctx->kv_self, -1, n_past, -1);
 
     llama_batch batch = { n_tokens, nullptr, embd, nullptr, nullptr, nullptr, nullptr, n_past, 1, 0, };
@@ -10161,7 +10161,7 @@ void llama_batch_free(struct llama_batch batch) {
     if (batch.logits)   free(batch.logits);
 }
 
-int llama_decode(
+int32_t llama_decode(
         struct llama_context * ctx,
           struct llama_batch   batch) {
     const int ret = llama_decode_internal(*ctx, batch);
@@ -10209,11 +10209,11 @@ llama_token llama_token_nl(const struct llama_model * model) {
     return model->vocab.linefeed_id;
 }
 
-int llama_add_bos_token(const struct llama_model * model) {
+int32_t llama_add_bos_token(const struct llama_model * model) {
     return model->vocab.special_add_bos;
 }
 
-int llama_add_eos_token(const struct llama_model * model) {
+int32_t llama_add_eos_token(const struct llama_model * model) {
     return model->vocab.special_add_eos;
 }
 
@@ -10233,12 +10233,12 @@ llama_token llama_token_eot(const struct llama_model * model) {
     return model->vocab.special_eot_id;
 }
 
-int llama_tokenize(
+int32_t llama_tokenize(
     const struct llama_model * model,
                   const char * text,
-                         int   text_len,
+                     int32_t   text_len,
                  llama_token * tokens,
-                         int   n_max_tokens,
+                     int32_t   n_max_tokens,
                         bool   add_bos,
                         bool   special) {
     auto res = llama_tokenize_internal(model->vocab, std::string(text, text_len), add_bos, special);
@@ -10266,7 +10266,7 @@ static std::string llama_decode_text(const std::string & text) {
 }
 
 // does not write null-terminator to buf
-int llama_token_to_piece(const struct llama_model * model, llama_token token, char * buf, int length) {
+int32_t llama_token_to_piece(const struct llama_model * model, llama_token token, char * buf, int length) {
     if (0 <= token && token < llama_n_vocab(model)) {
         switch (llama_vocab_get_type(model->vocab)) {
         case LLAMA_VOCAB_TYPE_SPM: {
Original file line number	Diff line number	Diff line change
`@@ -7512,7 +7512,7 @@ void llama_sample_softmax(struct llama_context * ctx, llama_token_data_array * c`
`7512`	`7512`	`}`
`7513`	`7513`	`}`
`7514`	`7514`
`7515`		`-void llama_sample_top_k(struct llama_context * ctx, llama_token_data_array * candidates, int k, size_t min_keep) {`
	`7515`	`+void llama_sample_top_k(struct llama_context * ctx, llama_token_data_array * candidates, int32_t k, size_t min_keep) {`
`7516`	`7516`	`const int64_t t_start_sample_us = ggml_time_us();`
`7517`	`7517`
`7518`	`7518`	`k = std::max(k, (int) min_keep);`
`@@ -7872,7 +7872,7 @@ void llama_sample_classifier_free_guidance(`
`7872`	`7872`	`}`
`7873`	`7873`	`}`
`7874`	`7874`
`7875`		`-llama_token llama_sample_token_mirostat(struct llama_context * ctx, llama_token_data_array * candidates, float tau, float eta, int m, float * mu) {`
	`7875`	`+llama_token llama_sample_token_mirostat(struct llama_context * ctx, llama_token_data_array * candidates, float tau, float eta, int32_t m, float * mu) {`
`7876`	`7876`	`GGML_ASSERT(ctx);`
`7877`	`7877`
`7878`	`7878`	`auto N = float(llama_n_vocab(llama_get_model(ctx)));`
`@@ -9080,7 +9080,7 @@ struct llama_model_quantize_params llama_model_quantize_default_params() {`
`9080`	`9080`	`return result;`
`9081`	`9081`	`}`
`9082`	`9082`
`9083`		`-int llama_max_devices(void) {`
	`9083`	`+int32_t llama_max_devices(void) {`
`9084`	`9084`	`return LLAMA_MAX_DEVICES;`
`9085`	`9085`	`}`
`9086`	`9086`
`@@ -9383,23 +9383,23 @@ enum llama_vocab_type llama_vocab_type(const struct llama_model * model) {`
`9383`	`9383`	`return model->vocab.type;`
`9384`	`9384`	`}`
`9385`	`9385`
`9386`		`-int llama_n_vocab(const struct llama_model * model) {`
	`9386`	`+int32_t llama_n_vocab(const struct llama_model * model) {`
`9387`	`9387`	`return model->vocab.id_to_token.size();`
`9388`	`9388`	`}`
`9389`	`9389`
`9390`		`-int llama_n_ctx_train(const struct llama_model * model) {`
	`9390`	`+int32_t llama_n_ctx_train(const struct llama_model * model) {`
`9391`	`9391`	`return model->hparams.n_ctx_train;`
`9392`	`9392`	`}`
`9393`	`9393`
`9394`		`-int llama_n_embd(const struct llama_model * model) {`
	`9394`	`+int32_t llama_n_embd(const struct llama_model * model) {`
`9395`	`9395`	`return model->hparams.n_embd;`
`9396`	`9396`	`}`
`9397`	`9397`
`9398`	`9398`	`float llama_rope_freq_scale_train(const struct llama_model * model) {`
`9399`	`9399`	`return model->hparams.rope_freq_scale_train;`
`9400`	`9400`	`}`
`9401`	`9401`
`9402`		`-int llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size) {`
	`9402`	`+int32_t llama_model_meta_val_str(const struct llama_model * model, const char * key, char * buf, size_t buf_size) {`
`9403`	`9403`	`const auto & it = model->gguf_kv.find(key);`
`9404`	`9404`	`if (it == model->gguf_kv.end()) {`
`9405`	`9405`	`if (buf_size > 0) {`
`@@ -9410,11 +9410,11 @@ int llama_model_meta_val_str(const struct llama_model * model, const char * key,`
`9410`	`9410`	`return snprintf(buf, buf_size, "%s", it->second.c_str());`
`9411`	`9411`	`}`
`9412`	`9412`
`9413`		`-int llama_model_meta_count(const struct llama_model * model) {`
	`9413`	`+int32_t llama_model_meta_count(const struct llama_model * model) {`
`9414`	`9414`	`return (int)model->gguf_kv.size();`
`9415`	`9415`	`}`
`9416`	`9416`
`9417`		`-int llama_model_meta_key_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size) {`
	`9417`	`+int32_t llama_model_meta_key_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size) {`
`9418`	`9418`	`if (i < 0 \|\| i >= (int)model->gguf_kv.size()) {`
`9419`	`9419`	`if (buf_size > 0) {`
`9420`	`9420`	`buf[0] = '\0';`
`@@ -9426,7 +9426,7 @@ int llama_model_meta_key_by_index(const struct llama_model * model, int i, char`
`9426`	`9426`	`return snprintf(buf, buf_size, "%s", it->first.c_str());`
`9427`	`9427`	`}`
`9428`	`9428`
`9429`		`-int llama_model_meta_val_str_by_index(const struct llama_model * model, int i, char * buf, size_t buf_size) {`
	`9429`	`+int32_t llama_model_meta_val_str_by_index(const struct llama_model * model, int32_t i, char * buf, size_t buf_size) {`
`9430`	`9430`	`if (i < 0 \|\| i >= (int)model->gguf_kv.size()) {`
`9431`	`9431`	`if (buf_size > 0) {`
`9432`	`9432`	`buf[0] = '\0';`
`@@ -9438,7 +9438,7 @@ int llama_model_meta_val_str_by_index(const struct llama_model * model, int i, c`
`9438`	`9438`	`return snprintf(buf, buf_size, "%s", it->second.c_str());`
`9439`	`9439`	`}`
`9440`	`9440`
`9441`		`-int llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size) {`
	`9441`	`+int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size) {`
`9442`	`9442`	`return snprintf(buf, buf_size, "%s %s %s",`
`9443`	`9443`	`llama_model_arch_name(model->arch).c_str(),`
`9444`	`9444`	`llama_model_type_name(model->type),`
`@@ -9465,7 +9465,7 @@ struct ggml_tensor * llama_get_model_tensor(struct llama_model * model, const ch`
`9465`	`9465`	`return ggml_get_tensor(model->ctx, name);`
`9466`	`9466`	`}`
`9467`	`9467`
`9468`		`-int llama_model_quantize(`
	`9468`	`+uint32_t llama_model_quantize(`
`9469`	`9469`	`const char * fname_inp,`
`9470`	`9470`	`const char * fname_out,`
`9471`	`9471`	`const llama_model_quantize_params * params) {`
`@@ -9478,7 +9478,7 @@ int llama_model_quantize(`
`9478`	`9478`	`}`
`9479`	`9479`	`}`
`9480`	`9480`
`9481`		`-int llama_apply_lora_from_file(struct llama_context * ctx, const char * path_lora, float scale, const char * path_base_model, int n_threads) {`
	`9481`	`+int32_t llama_apply_lora_from_file(struct llama_context * ctx, const char * path_lora, float scale, const char * path_base_model, int n_threads) {`
`9482`	`9482`	`try {`
`9483`	`9483`	`return llama_apply_lora_from_file_internal(ctx->model, path_lora, scale, path_base_model, n_threads);`
`9484`	`9484`	`} catch (const std::exception & err) {`
`@@ -9487,7 +9487,7 @@ int llama_apply_lora_from_file(struct llama_context * ctx, const char * path_lor`
`9487`	`9487`	`}`
`9488`	`9488`	`}`
`9489`	`9489`
`9490`		`-int llama_model_apply_lora_from_file(const struct llama_model * model, const char * path_lora, float scale, const char * path_base_model, int n_threads) {`
	`9490`	`+int32_t llama_model_apply_lora_from_file(const struct llama_model * model, const char * path_lora, float scale, const char * path_base_model, int32_t n_threads) {`
`9491`	`9491`	`try {`
`9492`	`9492`	`return llama_apply_lora_from_file_internal(*model, path_lora, scale, path_base_model, n_threads);`
`9493`	`9493`	`} catch (const std::exception & err) {`
`@@ -9585,7 +9585,7 @@ void llama_kv_cache_view_update(const struct llama_context * ctx, struct llama_k`
`9585`	`9585`	`}`
`9586`	`9586`	`}`
`9587`	`9587`
`9588`		`-int llama_get_kv_cache_token_count(const struct llama_context * ctx) {`
	`9588`	`+int32_t llama_get_kv_cache_token_count(const struct llama_context * ctx) {`
`9589`	`9589`	`int result = 0;`
`9590`	`9590`
`9591`	`9591`	`for (uint32_t i = 0; i < ctx->kv_self.size; i++) {`
`@@ -9595,7 +9595,7 @@ int llama_get_kv_cache_token_count(const struct llama_context * ctx) {`
`9595`	`9595`	`return result;`
`9596`	`9596`	`}`
`9597`	`9597`
`9598`		`-int llama_get_kv_cache_used_cells(const struct llama_context * ctx) {`
	`9598`	`+int32_t llama_get_kv_cache_used_cells(const struct llama_context * ctx) {`
`9599`	`9599`	`return ctx->kv_self.used;`
`9600`	`9600`	`}`
`9601`	`9601`
`@@ -10075,7 +10075,7 @@ int llama_eval(`
`10075`	`10075`	`struct llama_context * ctx,`
`10076`	`10076`	`llama_token * tokens,`
`10077`	`10077`	`int32_t n_tokens,`
`10078`		`- int n_past) {`
	`10078`	`+ int32_t n_past) {`
`10079`	`10079`	`llama_kv_cache_seq_rm(ctx->kv_self, -1, n_past, -1);`
`10080`	`10080`
`10081`	`10081`	`const int ret = llama_decode_internal(*ctx, llama_batch_get_one(tokens, n_tokens, n_past, 0));`
`@@ -10090,7 +10090,7 @@ int llama_eval_embd(`
`10090`	`10090`	`struct llama_context * ctx,`
`10091`	`10091`	`float * embd,`
`10092`	`10092`	`int32_t n_tokens,`
`10093`		`- int n_past) {`
	`10093`	`+ int32_t n_past) {`
`10094`	`10094`	`llama_kv_cache_seq_rm(ctx->kv_self, -1, n_past, -1);`
`10095`	`10095`
`10096`	`10096`	`llama_batch batch = { n_tokens, nullptr, embd, nullptr, nullptr, nullptr, nullptr, n_past, 1, 0, };`
`@@ -10161,7 +10161,7 @@ void llama_batch_free(struct llama_batch batch) {`
`10161`	`10161`	`if (batch.logits) free(batch.logits);`
`10162`	`10162`	`}`
`10163`	`10163`
`10164`		`-int llama_decode(`
	`10164`	`+int32_t llama_decode(`
`10165`	`10165`	`struct llama_context * ctx,`
`10166`	`10166`	`struct llama_batch batch) {`
`10167`	`10167`	`const int ret = llama_decode_internal(*ctx, batch);`
`@@ -10209,11 +10209,11 @@ llama_token llama_token_nl(const struct llama_model * model) {`
`10209`	`10209`	`return model->vocab.linefeed_id;`
`10210`	`10210`	`}`
`10211`	`10211`
`10212`		`-int llama_add_bos_token(const struct llama_model * model) {`
	`10212`	`+int32_t llama_add_bos_token(const struct llama_model * model) {`
`10213`	`10213`	`return model->vocab.special_add_bos;`
`10214`	`10214`	`}`
`10215`	`10215`
`10216`		`-int llama_add_eos_token(const struct llama_model * model) {`
	`10216`	`+int32_t llama_add_eos_token(const struct llama_model * model) {`
`10217`	`10217`	`return model->vocab.special_add_eos;`
`10218`	`10218`	`}`
`10219`	`10219`
`@@ -10233,12 +10233,12 @@ llama_token llama_token_eot(const struct llama_model * model) {`
`10233`	`10233`	`return model->vocab.special_eot_id;`
`10234`	`10234`	`}`
`10235`	`10235`
`10236`		`-int llama_tokenize(`
	`10236`	`+int32_t llama_tokenize(`
`10237`	`10237`	`const struct llama_model * model,`
`10238`	`10238`	`const char * text,`
`10239`		`- int text_len,`
	`10239`	`+ int32_t text_len,`
`10240`	`10240`	`llama_token * tokens,`
`10241`		`- int n_max_tokens,`
	`10241`	`+ int32_t n_max_tokens,`
`10242`	`10242`	`bool add_bos,`
`10243`	`10243`	`bool special) {`
`10244`	`10244`	`auto res = llama_tokenize_internal(model->vocab, std::string(text, text_len), add_bos, special);`
`@@ -10266,7 +10266,7 @@ static std::string llama_decode_text(const std::string & text) {`
`10266`	`10266`	`}`
`10267`	`10267`
`10268`	`10268`	`// does not write null-terminator to buf`
`10269`		`-int llama_token_to_piece(const struct llama_model * model, llama_token token, char * buf, int length) {`
	`10269`	`+int32_t llama_token_to_piece(const struct llama_model * model, llama_token token, char * buf, int length) {`
`10270`	`10270`	`if (0 <= token && token < llama_n_vocab(model)) {`
`10271`	`10271`	`switch (llama_vocab_get_type(model->vocab)) {`
`10272`	`10272`	`case LLAMA_VOCAB_TYPE_SPM: {`