ggml-org
diff --git a/‎examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
Lines changed: 5 additions & 5 deletions b/‎examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
Lines changed: 5 additions & 5 deletions
diff --git a/‎src/llama-adapter.h
Lines changed: 1 addition & 0 deletions b/‎src/llama-adapter.h
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/llama-arch.h
Lines changed: 1 addition & 0 deletions b/‎src/llama-arch.h
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/llama-batch.h
Lines changed: 2 additions & 0 deletions b/‎src/llama-batch.h
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/llama-context.cpp
Lines changed: 2 additions & 2 deletions b/‎src/llama-context.cpp
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/llama-impl.h
Lines changed: 1 addition & 0 deletions b/‎src/llama-impl.h
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/llama-kv-cache.h
Lines changed: 1 addition & 0 deletions b/‎src/llama-kv-cache.h
Lines changed: 1 addition & 0 deletions
@@ -434,12 +434,12 @@ static void print_matrix(struct ggml_tensor * probs) {
     }
 }
 
-struct llama_file {
+struct my_llama_file {
     // use FILE * so we don't have to re-open the file to mmap
     FILE * fp;
     size_t size;
 
-    llama_file(const char * fname, const char * mode) {
+    my_llama_file(const char * fname, const char * mode) {
         fp = std::fopen(fname, mode);
         if (fp == NULL) {
             size = 0;
@@ -500,15 +500,15 @@ struct llama_file {
         return std::string(chars.data(), len);
     }
 
-    ~llama_file() {
+    ~my_llama_file() {
         if (fp) {
             std::fclose(fp);
         }
     }
 };
 
 static bool is_ggml_file(const char * filename) {
-    llama_file file(filename, "rb");
+    my_llama_file file(filename, "rb");
     if (file.size < 4) {
         return false;
     }
@@ -576,7 +576,7 @@ static void load_vocab(const char * filename, const Config * config, struct my_l
     } else {
         // assume llama2.c vocabulary
         LOG_INF("%s: Assuming llama2.c vocabulary since %s is not a gguf file\n", __func__, filename);
-        llama_file file(filename, "rb");
+        my_llama_file file(filename, "rb");
         if (!file.fp) {
             die_fmt("%s: %s", strerror(errno), filename);
         }
 
@@ -7,6 +7,7 @@
 
 #include <vector>
 #include <map>
+#include <algorithm>
 
 //
 // llama_adapter_vec
 
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <map>
+#include <string>
 
 //
 // gguf constants (sync with gguf.py)
 
@@ -3,6 +3,8 @@
 #include "llama.h"
 
 #include <vector>
+#include <cstring>
+#include <algorithm>
 
 // very similar to llama_batch,
 // but has more metadata about sequences
 
@@ -799,7 +799,7 @@ static bool llama_state_load_file_internal(struct llama_context * ctx, const cha
 
     // restore the context state
     {
-        const size_t n_state_size_cur = file.size - file.tell();
+        const size_t n_state_size_cur = file.size() - file.tell();
 
         llama_data_read_file data_ctx(&file);
         const size_t n_read = llama_state_set_data_internal(ctx, data_ctx);
@@ -936,7 +936,7 @@ static size_t llama_state_seq_load_file_internal(struct llama_context * ctx, con
 
     // restore the context state
     {
-        const size_t state_size = file.size - file.tell();
+        const size_t state_size = file.size() - file.tell();
         llama_data_read_file data_ctx(&file);
         const size_t nread = llama_state_seq_set_data_internal(ctx, data_ctx, dest_seq_id);
         if (!nread) {
 
@@ -24,6 +24,7 @@ LLAMA_ATTRIBUTE_FORMAT(2, 3)
 void llama_log_internal        (ggml_log_level level, const char * format, ...);
 void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data);
 
+// TODO: rename to llama_format ?
 LLAMA_ATTRIBUTE_FORMAT(1, 2)
 std::string format(const char * fmt, ...);
 
 
@@ -8,6 +8,7 @@
 
 #include <set>
 #include <vector>
+#include <limits>
 
 struct llama_kv_cell {
     llama_pos pos   = -1;
Original file line number	Diff line number	Diff line change
`@@ -434,12 +434,12 @@ static void print_matrix(struct ggml_tensor * probs) {`
`434`	`434`	`}`
`435`	`435`	`}`
`436`	`436`
`437`		`-struct llama_file {`
	`437`	`+struct my_llama_file {`
`438`	`438`	`// use FILE * so we don't have to re-open the file to mmap`
`439`	`439`	`FILE * fp;`
`440`	`440`	`size_t size;`
`441`	`441`
`442`		`- llama_file(const char * fname, const char * mode) {`
	`442`	`+ my_llama_file(const char * fname, const char * mode) {`
`443`	`443`	`fp = std::fopen(fname, mode);`
`444`	`444`	`if (fp == NULL) {`
`445`	`445`	`size = 0;`
`@@ -500,15 +500,15 @@ struct llama_file {`
`500`	`500`	`return std::string(chars.data(), len);`
`501`	`501`	`}`
`502`	`502`
`503`		`- ~llama_file() {`
	`503`	`+ ~my_llama_file() {`
`504`	`504`	`if (fp) {`
`505`	`505`	`std::fclose(fp);`
`506`	`506`	`}`
`507`	`507`	`}`
`508`	`508`	`};`
`509`	`509`
`510`	`510`	`static bool is_ggml_file(const char * filename) {`
`511`		`- llama_file file(filename, "rb");`
	`511`	`+ my_llama_file file(filename, "rb");`
`512`	`512`	`if (file.size < 4) {`
`513`	`513`	`return false;`
`514`	`514`	`}`
`@@ -576,7 +576,7 @@ static void load_vocab(const char * filename, const Config * config, struct my_l`
`576`	`576`	`} else {`
`577`	`577`	`// assume llama2.c vocabulary`
`578`	`578`	`LOG_INF("%s: Assuming llama2.c vocabulary since %s is not a gguf file\n", __func__, filename);`
`579`		`- llama_file file(filename, "rb");`
	`579`	`+ my_llama_file file(filename, "rb");`
`580`	`580`	`if (!file.fp) {`
`581`	`581`	`die_fmt("%s: %s", strerror(errno), filename);`
`582`	`582`	`}`
Original file line number	Diff line number	Diff line change
`@@ -799,7 +799,7 @@ static bool llama_state_load_file_internal(struct llama_context * ctx, const cha`
`799`	`799`
`800`	`800`	`// restore the context state`
`801`	`801`	`{`
`802`		`- const size_t n_state_size_cur = file.size - file.tell();`
	`802`	`+ const size_t n_state_size_cur = file.size() - file.tell();`
`803`	`803`
`804`	`804`	`llama_data_read_file data_ctx(&file);`
`805`	`805`	`const size_t n_read = llama_state_set_data_internal(ctx, data_ctx);`
`@@ -936,7 +936,7 @@ static size_t llama_state_seq_load_file_internal(struct llama_context * ctx, con`
`936`	`936`
`937`	`937`	`// restore the context state`
`938`	`938`	`{`
`939`		`- const size_t state_size = file.size - file.tell();`
	`939`	`+ const size_t state_size = file.size() - file.tell();`
`940`	`940`	`llama_data_read_file data_ctx(&file);`
`941`	`941`	`const size_t nread = llama_state_seq_set_data_internal(ctx, data_ctx, dest_seq_id);`
`942`	`942`	`if (!nread) {`