results per binary

JohannesGaessler · JohannesGaessler · commit 88999fb6611a · 2023-08-19T16:27:43.000+02:00
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
@@ -836,8 +836,20 @@ int main(int argc, char ** argv) {
             FILE * logfile = fopen((params.logdir + timestamp + ".yml").c_str(), "w");
             fprintf(logfile, "binary: main\n");
             dump_non_result_info_yaml(logfile, params, timestamp, input_tokens);
-            llama_dump_result_info_yaml(
-                logfile, ctx, output_ss.str().c_str(), output_tokens.data(), output_tokens.size(), NULL, 0);
+
+            fprintf(logfile, "\n");
+            fprintf(logfile, "######################\n");
+            fprintf(logfile, "# Generation Results #\n");
+            fprintf(logfile, "######################\n");
+            fprintf(logfile, "\n");
+
+            // fprintf(logfile, "ftype: %u\n", ctx->model.hparams.ftype);
+            // fprintf(logfile, "ftype_str: %s\n", llama_ftype_name(ctx->model.hparams.ftype));
+            // fprintf(logfile, "model_type: %s\n", llama_model_type_name(ctx->model.type));
+            dump_string_yaml_multiline(logfile, "output", output_ss.str().c_str(), false);
+            dump_vector_int_yaml(logfile, "output_tokens", output_tokens);
+
+            llama_dump_timing_info_yaml(logfile, ctx);
             fclose(logfile);
         } else {
             fprintf(stderr, "%s: warning: failed to create logdir %s, cannot write logfile\n",
diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp
@@ -369,7 +369,19 @@ int main(int argc, char ** argv) {
             FILE * logfile = fopen((params.logdir + timestamp + ".yml").c_str(), "w");
             fprintf(logfile, "binary: perplexity\n");
             dump_non_result_info_yaml(logfile, params, timestamp, tokens);
-            llama_dump_result_info_yaml(logfile, ctx, NULL, NULL, 0, probs.data(), probs.size());
+
+            fprintf(logfile, "\n");
+            fprintf(logfile, "######################\n");
+            fprintf(logfile, "# Perplexity Results #\n");
+            fprintf(logfile, "######################\n");
+            fprintf(logfile, "\n");
+
+            // fprintf(logfile, "ftype: %u\n", ctx->model.hparams.ftype);
+            // fprintf(logfile, "ftype_str: %s\n", llama_ftype_name(ctx->model.hparams.ftype));
+            // fprintf(logfile, "model_type: %s\n", llama_model_type_name(ctx->model.type));
+            dump_vector_float_yaml(logfile, "probs", probs);
+
+            llama_dump_timing_info_yaml(logfile, ctx);
             fclose(logfile);
         } else {
             fprintf(stderr, "%s: warning: failed to create logdir %s, cannot write logfile\n",
diff --git a/llama-util.h b/llama-util.h
@@ -591,9 +591,9 @@ static bool create_directory_with_parents(const std::string & path) {
 static void dump_vector_float_yaml(FILE * stream, const char * prop_name, const std::vector<float> & data) {
    fprintf(stream, "%s: [", prop_name);
    for (size_t i = 0; i < data.size() - 1; ++i) {
-       fprintf(stream, "%f, ", data[i]);
+       fprintf(stream, "%e, ", data[i]);
    }
-   fprintf(stream, "%f]\n", data.back());
+   fprintf(stream, "%e]\n", data.back());
 }
 
 static void dump_vector_int_yaml(FILE * stream, const char * prop_name, const std::vector<int> & data) {
diff --git a/llama.cpp b/llama.cpp
@@ -4399,19 +4399,14 @@ const char * llama_print_system_info(void) {
     return s.c_str();
 }
 
-void llama_dump_result_info_yaml(
-    FILE * stream, const llama_context * ctx, const char * output_str, const int * output_tokens,
-    const int n_output_tokens, const float * probs, const int n_probs) {
+void llama_dump_timing_info_yaml(FILE * stream, const llama_context * ctx) {
 
     fprintf(stream, "\n");
     fprintf(stream, "###########\n");
-    fprintf(stream, "# Results #\n");
+    fprintf(stream, "# Timings #\n");
     fprintf(stream, "###########\n");
     fprintf(stream, "\n");
 
-    fprintf(stream, "ftype: %u\n", ctx->model.hparams.ftype);
-    fprintf(stream, "ftype_str: %s\n", llama_ftype_name(ctx->model.hparams.ftype));
-    fprintf(stream, "model_type: %s\n", llama_model_type_name(ctx->model.type));
     fprintf(stream, "mst_eval: %.2f  # ms / token during generation\n",
             1.0e-3 * ctx->t_eval_us / ctx->n_eval);
     fprintf(stream, "mst_p_eval: %.2f  # ms / token during prompt processing\n",
@@ -4422,22 +4417,6 @@ void llama_dump_result_info_yaml(
     fprintf(stream, "n_vocab: %d  # output size of the final layer, 32001 for some models\n", ctx->model.hparams.n_vocab);
     fprintf(stream, "n_p_eval: %d  # number of tokens processed in batches at the beginning\n", ctx->n_p_eval);
     fprintf(stream, "n_sample: %d  # number of sampled tokens\n", ctx->n_sample);
-    dump_string_yaml_multiline(stream, "output", output_str, false);
-
-    if (output_tokens == NULL) {
-        fprintf(stream, "output_tokens:\n");
-    } else {
-        const std::vector<int> output_token_vector(output_tokens, output_tokens + n_output_tokens);
-        dump_vector_int_yaml(stream, "output_tokens", output_token_vector);
-    }
-
-    if (probs == NULL) {
-        fprintf(stream, "probs:\n");
-    } else {
-        const std::vector<float> prob_vector(probs, probs + n_probs);
-        dump_vector_float_yaml(stream, "probs", prob_vector);
-    }
-
     fprintf(stream, "t_eval_us: %ld  # total microseconds spent generating tokens\n", ctx->t_eval_us);
     fprintf(stream, "t_load_us: %ld  # total microseconds spent loading the model\n", ctx->t_load_us);
     fprintf(stream, "t_p_eval_us: %ld  # total microseconds spent prompt processing\n", ctx->t_p_eval_us);
diff --git a/llama.h b/llama.h
@@ -471,9 +471,7 @@ extern "C" {
     // Print system information
     LLAMA_API const char * llama_print_system_info(void);
 
-    LLAMA_API void llama_dump_result_info_yaml(
-        FILE * stream, const llama_context * ctx, const char * output_str, const int * output_tokens,
-        int n_output_tokens, const float * probs, int n_probs);
+    LLAMA_API void llama_dump_timing_info_yaml(FILE * stream, const llama_context * ctx);
 
 #ifdef __cplusplus
 }

Original file line number	Diff line number	Diff line change
`@@ -591,9 +591,9 @@ static bool create_directory_with_parents(const std::string & path) {`
`591`	`591`	`static void dump_vector_float_yaml(FILE * stream, const char * prop_name, const std::vector<float> & data) {`
`592`	`592`	`fprintf(stream, "%s: [", prop_name);`
`593`	`593`	`for (size_t i = 0; i < data.size() - 1; ++i) {`
`594`		`- fprintf(stream, "%f, ", data[i]);`
	`594`	`+ fprintf(stream, "%e, ", data[i]);`
`595`	`595`	`}`
`596`		`- fprintf(stream, "%f]\n", data.back());`
	`596`	`+ fprintf(stream, "%e]\n", data.back());`
`597`	`597`	`}`
`598`	`598`
`599`	`599`	`static void dump_vector_int_yaml(FILE * stream, const char * prop_name, const std::vector<int> & data) {`
Original file line number	Diff line number	Diff line change
`@@ -471,9 +471,7 @@ extern "C" {`
`471`	`471`	`// Print system information`
`472`	`472`	`LLAMA_API const char * llama_print_system_info(void);`
`473`	`473`
`474`		`- LLAMA_API void llama_dump_result_info_yaml(`
`475`		`- FILE * stream, const llama_context * ctx, const char * output_str, const int * output_tokens,`
`476`		`- int n_output_tokens, const float * probs, int n_probs);`
	`474`	`+ LLAMA_API void llama_dump_timing_info_yaml(FILE * stream, const llama_context * ctx);`
`477`	`475`
`478`	`476`	`#ifdef __cplusplus`
`479`	`477`	`}`