ggml-org
diff --git a/‎common/arg.cpp
Lines changed: 0 additions & 3 deletions b/‎common/arg.cpp
Lines changed: 0 additions & 3 deletions
diff --git a/‎common/common.cpp
Lines changed: 117 additions & 52 deletions b/‎common/common.cpp
Lines changed: 117 additions & 52 deletions
diff --git a/‎common/common.h
Lines changed: 3 additions & 0 deletions b/‎common/common.h
Lines changed: 3 additions & 0 deletions
diff --git a/‎common/log.cpp
Lines changed: 56 additions & 25 deletions b/‎common/log.cpp
Lines changed: 56 additions & 25 deletions
diff --git a/‎common/log.h
Lines changed: 13 additions & 15 deletions b/‎common/log.h
Lines changed: 13 additions & 15 deletions
diff --git a/‎common/sampling.cpp
Lines changed: 1 addition & 1 deletion b/‎common/sampling.cpp
Lines changed: 1 addition & 1 deletion
@@ -1950,8 +1950,6 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
             else { std::invalid_argument("invalid value"); }
         }
     ).set_examples({LLAMA_EXAMPLE_BENCH}));
-#ifndef LOG_DISABLE_LOGS
-    // TODO: make this looks less weird
     add_opt(llama_arg(
         {"--log-disable"},
         "Log disable",
@@ -1966,7 +1964,6 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
             gpt_log_set_file(gpt_log_main(), value.c_str());
         }
     ));
-#endif // LOG_DISABLE_LOGS
 
     return ctx_arg;
 }
 
@@ -374,6 +374,9 @@ static std::vector<T> string_split(const std::string & str, char delim) {
 bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);
 void string_process_escapes(std::string & input);
 
+std::string string_from_tokens(const struct llama_context * ctx, const std::vector<llama_token> & tokens);
+std::string string_from_batch (const struct llama_context * ctx, const struct llama_batch & batch);
+
 //
 // Filesystem utils
 //
 
@@ -1,9 +1,11 @@
 #include "log.h"
 
-#include <thread>
-#include <mutex>
-#include <cstdio>
 #include <condition_variable>
+#include <cstdio>
+#include <mutex>
+#include <thread>
+
+int gpt_log_verbosity_env = getenv("LLAMA_LOG") ? atoi(getenv("LLAMA_LOG")) : LOG_DEFAULT_LLAMA;
 
 #define LOG_COLORS // TMP
 
@@ -36,46 +38,56 @@ static int64_t t_us() {
 struct gpt_log_entry {
     enum ggml_log_level level;
 
-    int verbosity;
     int64_t timestamp;
 
     std::vector<char> msg;
 
     // signals the worker thread to stop
     bool is_end;
 
-    void print(FILE * file) {
+    void print(FILE * file = nullptr) const {
+        FILE * fcur = file;
+        if (!fcur) {
+            // stderr displays DBG messages only when the verbosity is high
+            // these messages can still be logged to a file
+            if (level == GGML_LOG_LEVEL_DEBUG && gpt_log_verbosity_env < LOG_DEFAULT_DEBUG) {
+                return;
+            }
+
+            fcur = stdout;
+
+            if (level != GGML_LOG_LEVEL_NONE) {
+                fcur = stderr;
+            }
+        }
+
         if (level != GGML_LOG_LEVEL_NONE) {
             if (timestamp) {
                 // [M.s.ms.us]
-                fprintf(file, "[%04d.%02d.%03d.%03d] ",
+                fprintf(fcur, "" LOG_COL_BLUE "%05d.%02d.%03d.%03d" LOG_COL_DEFAULT " ",
                         (int) (timestamp / 1000000 / 60),
                         (int) (timestamp / 1000000 % 60),
                         (int) (timestamp / 1000 % 1000),
                         (int) (timestamp % 1000));
             }
 
             switch (level) {
-                case GGML_LOG_LEVEL_INFO:
-                    fprintf(file, LOG_COL_GREEN "INF " LOG_COL_DEFAULT);
-                    break;
-                case GGML_LOG_LEVEL_WARN:
-                    fprintf(file, LOG_COL_MAGENTA "WRN " LOG_COL_DEFAULT);
-                    break;
-                case GGML_LOG_LEVEL_ERROR:
-                    fprintf(file, LOG_COL_RED "ERR " LOG_COL_DEFAULT);
-                    break;
-                case GGML_LOG_LEVEL_DEBUG:
-                    fprintf(file, LOG_COL_YELLOW "DBG " LOG_COL_DEFAULT);
-                    break;
+                case GGML_LOG_LEVEL_INFO:  fprintf(fcur, LOG_COL_GREEN   "I " LOG_COL_DEFAULT); break;
+                case GGML_LOG_LEVEL_WARN:  fprintf(fcur, LOG_COL_MAGENTA "W "                ); break;
+                case GGML_LOG_LEVEL_ERROR: fprintf(fcur, LOG_COL_RED     "E "                ); break;
+                case GGML_LOG_LEVEL_DEBUG: fprintf(fcur, LOG_COL_YELLOW  "D "                ); break;
                 default:
                     break;
             }
         }
 
-        fprintf(file, "%s", msg.data());
+        fprintf(fcur, "%s", msg.data());
 
-        fflush(file);
+        if (level == GGML_LOG_LEVEL_WARN || level == GGML_LOG_LEVEL_ERROR || level == GGML_LOG_LEVEL_DEBUG) {
+            fprintf(fcur, LOG_COL_DEFAULT);
+        }
+
+        fflush(fcur);
     }
 };
 
@@ -120,7 +132,7 @@ struct gpt_log {
     gpt_log_entry cur;
 
 public:
-    void add(enum ggml_log_level level, int verbosity, const char * fmt, va_list args) {
+    void add(enum ggml_log_level level, const char * fmt, va_list args) {
         std::lock_guard<std::mutex> lock(mtx);
 
         if (!running) {
@@ -130,15 +142,34 @@ struct gpt_log {
         auto & entry = entries[tail];
 
         {
+#if 1
             const size_t n = vsnprintf(entry.msg.data(), entry.msg.size(), fmt, args);
             if (n >= entry.msg.size()) {
                 entry.msg.resize(n + 1);
                 vsnprintf(entry.msg.data(), entry.msg.size(), fmt, args);
             }
+#else
+            // hack for bolding arguments
+
+            std::stringstream ss;
+            for (int i = 0; fmt[i] != 0; i++) {
+                if (fmt[i] == '%') {
+                    ss << LOG_COL_BOLD;
+                    while (fmt[i] != ' ' && fmt[i] != ')' && fmt[i] != ']' && fmt[i] != 0) ss << fmt[i++];
+                    ss << LOG_COL_DEFAULT;
+                    if (fmt[i] == 0) break;
+                }
+                ss << fmt[i];
+            }
+            const size_t n = vsnprintf(entry.msg.data(), entry.msg.size(), ss.str().c_str(), args);
+            if (n >= entry.msg.size()) {
+                entry.msg.resize(n + 1);
+                vsnprintf(entry.msg.data(), entry.msg.size(), ss.str().c_str(), args);
+            }
+#endif
         }
 
         entry.level = level;
-        entry.verbosity = verbosity;
         entry.timestamp = 0;
         if (timestamps) {
             entry.timestamp = t_us() - t_start;
@@ -192,7 +223,7 @@ struct gpt_log {
                     break;
                 }
 
-                cur.print(stdout);
+                cur.print(); // stdout and stderr
 
                 if (file) {
                     cur.print(file);
@@ -267,10 +298,10 @@ void gpt_log_free(struct gpt_log * log) {
     delete log;
 }
 
-void gpt_log_add(struct gpt_log * log, enum ggml_log_level level, int verbosity, const char * fmt, ...) {
+void gpt_log_add(struct gpt_log * log, enum ggml_log_level level, const char * fmt, ...) {
     va_list args;
     va_start(args, fmt);
-    log->add(level, verbosity, fmt, args);
+    log->add(level, fmt, args);
     va_end(args);
 }
 
 
@@ -2,8 +2,6 @@
 
 #include "ggml.h"
 
-#include <cstdarg>
-
 #ifndef __GNUC__
 #    define LOG_ATTRIBUTE_FORMAT(...)
 #elif defined(__MINGW32__)
@@ -12,9 +10,11 @@
 #    define LOG_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
 #endif
 
-#ifndef LOG_VERBOSITY
-#define LOG_VERBOSITY 10
-#endif
+#define LOG_DEFAULT_DEBUG 10
+#define LOG_DEFAULT_LLAMA 5
+
+// intialized in log.cpp from environment variable LLAMA_LOG
+extern int gpt_log_verbosity_env;
 
 struct gpt_log;
 
@@ -24,30 +24,28 @@ void             gpt_log_pause (struct gpt_log * log);
 void             gpt_log_resume(struct gpt_log * log);
 void             gpt_log_free  (struct gpt_log * log);
 
-LOG_ATTRIBUTE_FORMAT(4, 5)
-void gpt_log_add(struct gpt_log * log, enum ggml_log_level level, int verbosity, const char * fmt, ...);
+LOG_ATTRIBUTE_FORMAT(3, 4)
+void gpt_log_add(struct gpt_log * log, enum ggml_log_level level, const char * fmt, ...);
 
 void gpt_log_set_file      (struct gpt_log * log, const char * file); // not thread-safe
 void gpt_log_set_timestamps(struct gpt_log * log, bool timestamps);
 
 #define LOG_TMPL(level, verbosity, ...) \
     do { \
-        if ((verbosity) <= LOG_VERBOSITY) { \
-            gpt_log_add(gpt_log_main(), (level), (verbosity), __VA_ARGS__); \
+        if ((verbosity) <= gpt_log_verbosity_env) { \
+            gpt_log_add(gpt_log_main(), (level), __VA_ARGS__); \
         } \
     } while (0)
 
 #define LOG(...)             LOG_TMPL(GGML_LOG_LEVEL_NONE, 0,         __VA_ARGS__)
 #define LOGV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_NONE, verbosity, __VA_ARGS__)
 
-#define LOG_INF(...) LOG_TMPL(GGML_LOG_LEVEL_INFO,  0, __VA_ARGS__)
-#define LOG_WRN(...) LOG_TMPL(GGML_LOG_LEVEL_WARN,  0, __VA_ARGS__)
-#define LOG_ERR(...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, 0, __VA_ARGS__)
-#define LOG_DBG(...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, 0, __VA_ARGS__)
+#define LOG_INF(...) LOG_TMPL(GGML_LOG_LEVEL_INFO,  0,                 __VA_ARGS__)
+#define LOG_WRN(...) LOG_TMPL(GGML_LOG_LEVEL_WARN,  0,                 __VA_ARGS__)
+#define LOG_ERR(...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, 0,                 __VA_ARGS__)
+#define LOG_DBG(...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, LOG_DEFAULT_DEBUG, __VA_ARGS__)
 
 #define LOG_INFV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_INFO,  verbosity, __VA_ARGS__)
 #define LOG_WRNV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_WARN,  verbosity, __VA_ARGS__)
 #define LOG_ERRV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, verbosity, __VA_ARGS__)
 #define LOG_DBGV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, verbosity, __VA_ARGS__)
-
-#define LOG_TOKENS_TOSTR_PRETTY(...) std::string("dummy")
@@ -325,7 +325,7 @@ llama_token gpt_sampler_last(const struct gpt_sampler * gsmpl) {
 }
 
 std::string gpt_sampler_print(const struct gpt_sampler * gsmpl) {
-    std::string result = "\tlogits ";
+    std::string result = "logits ";
 
     for (int i = 0; i < llama_sampler_chain_n(gsmpl->chain); i++) {
         const auto * smpl = llama_sampler_chain_get(gsmpl->chain, i);
Original file line number	Diff line number	Diff line change
`@@ -1950,8 +1950,6 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,`
`1950`	`1950`	`else { std::invalid_argument("invalid value"); }`
`1951`	`1951`	`}`
`1952`	`1952`	`).set_examples({LLAMA_EXAMPLE_BENCH}));`
`1953`		`-#ifndef LOG_DISABLE_LOGS`
`1954`		`- // TODO: make this looks less weird`
`1955`	`1953`	`add_opt(llama_arg(`
`1956`	`1954`	`{"--log-disable"},`
`1957`	`1955`	`"Log disable",`
`@@ -1966,7 +1964,6 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,`
`1966`	`1964`	`gpt_log_set_file(gpt_log_main(), value.c_str());`
`1967`	`1965`	`}`
`1968`	`1966`	`));`
`1969`		`-#endif // LOG_DISABLE_LOGS`
`1970`	`1967`
`1971`	`1968`	`return ctx_arg;`
`1972`	`1969`	`}`
Original file line number	Diff line number	Diff line change
`@@ -325,7 +325,7 @@ llama_token gpt_sampler_last(const struct gpt_sampler * gsmpl) {`
`325`	`325`	`}`
`326`	`326`
`327`	`327`	`std::string gpt_sampler_print(const struct gpt_sampler * gsmpl) {`
`328`		`- std::string result = "\tlogits ";`
	`328`	`+ std::string result = "logits ";`
`329`	`329`
`330`	`330`	`for (int i = 0; i < llama_sampler_chain_n(gsmpl->chain); i++) {`
`331`	`331`	`const auto * smpl = llama_sampler_chain_get(gsmpl->chain, i);`