Skip to content

Logging generations with SQLite #2557

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ OPT = -O3
endif
CFLAGS = -I. $(OPT) -std=c11 -fPIC
CXXFLAGS = -I. -I./examples $(OPT) -std=c++11 -fPIC
LDFLAGS =
LDFLAGS = -lsqlite3

ifdef LLAMA_DEBUG
CFLAGS += -O0 -g
Expand Down
6 changes: 6 additions & 0 deletions benchmark.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/usr/bin/env sh

for ngl in {0..35}
do
./main --model models/nvme/llama-7b-ggml-q4_0.bin --seed 1337 --ignore-eos --n-predict 128 --ctx-size 2048 --threads 8 -ngl $ngl -mmq
done
37 changes: 37 additions & 0 deletions examples/main/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@
#include <ctime>
#include <fstream>
#include <iostream>
#include <sstream>
#include <string>
#include <vector>
#include <sqlite3.h>

#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
#include <signal.h>
Expand Down Expand Up @@ -163,6 +165,30 @@ int main(int argc, char ** argv) {
return 0;
}

sqlite3 * db = NULL;
int return_code;
const int flags = SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE;
return_code = sqlite3_open_v2("llama.sqlite", &db, flags, NULL);
fprintf(stderr, "\nsqlite open: %d %s\n\n", return_code, sqlite3_errmsg(db));

const std::string sql_create_table ="CREATE TABLE IF NOT EXISTS llama_runs("
"id INTEGER PRIMARY KEY AUTOINCREMENT,"
"build_number INTEGER NOT NULL,"
"build_commit TEXT NOT NULL,"

"n_gpu_layers BIGINT NOT NULL,"

"t_sample_us BIGINT NOT NULL,"
"t_eval_us BIGINT NOT NULL,"
"t_p_eval_us BIGINT NOT NULL,"
"n_sample BIGINT NOT NULL,"
"n_eval BIGINT NOT NULL,"
"n_p_eval BIGINT NOT NULL);";

char * errmsg;
return_code = sqlite3_exec(db, sql_create_table.c_str(), NULL, NULL, &errmsg);
fprintf(stderr, "\nsqlite create table: %d %s\n\n", return_code, errmsg);

std::string path_session = params.path_prompt_cache;
std::vector<llama_token> session_tokens;

Expand Down Expand Up @@ -808,6 +834,17 @@ int main(int argc, char ** argv) {
}

llama_print_timings(ctx);

std::ostringstream sql_insert_values;
sql_insert_values << "INSERT INTO llama_runs(build_number, build_commit, n_gpu_layers, "
"t_sample_us, t_eval_us, t_p_eval_us, n_sample, n_eval, n_p_eval) VALUES (";
sql_insert_values << BUILD_NUMBER << ",";
sql_insert_values << "'" << BUILD_COMMIT << "',";
sql_insert_values << params.n_gpu_layers << ",";
llama_sqlite_append_timings(ctx, sql_insert_values);
return_code = sqlite3_exec(db, sql_insert_values.str().c_str(), NULL, NULL, &errmsg);
fprintf(stderr, "\nsqlite insert data: %d %s\n\n", return_code, errmsg);

if (ctx_guidance) { llama_free(ctx_guidance); }
llama_free(ctx);
llama_free_model(model);
Expand Down
9 changes: 9 additions & 0 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4243,6 +4243,15 @@ void llama_print_timings(struct llama_context * ctx) {
fprintf(stderr, "%s: total time = %8.2f ms\n", __func__, (timings.t_end_ms - timings.t_start_ms));
}

void llama_sqlite_append_timings(struct llama_context * ctx, std::ostringstream & sql_insert_values) {
sql_insert_values << ctx->t_sample_us << ",";
sql_insert_values << ctx->t_eval_us << ",";
sql_insert_values << ctx->t_p_eval_us << ",";
sql_insert_values << ctx->n_sample << ",";
sql_insert_values << ctx->n_eval << ",";
sql_insert_values << ctx->n_p_eval << ");";
}

void llama_reset_timings(struct llama_context * ctx) {
ctx->t_start_us = ggml_time_us();
ctx->t_sample_us = ctx->n_sample = 0;
Expand Down
2 changes: 2 additions & 0 deletions llama.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#else
#define LLAMA_MAX_DEVICES 1
#endif // GGML_USE_CUBLAS
#include <sstream>
#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>
Expand Down Expand Up @@ -446,6 +447,7 @@ extern "C" {
// Performance information
LLAMA_API struct llama_timings llama_get_timings(struct llama_context * ctx);
LLAMA_API void llama_print_timings(struct llama_context * ctx);
LLAMA_API void llama_sqlite_append_timings(struct llama_context * ctx, std::ostringstream & sql_insert_values);
LLAMA_API void llama_reset_timings(struct llama_context * ctx);

// Print system information
Expand Down
20 changes: 20 additions & 0 deletions plot_ts_per_ngl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/usr/bin/env python3

import sqlite3
import numpy as np
import matplotlib.pyplot as plt

con = sqlite3.connect("llama.sqlite")
cur = con.cursor()

res = cur.execute("SELECT n_gpu_layers, 1000000.0*n_eval/t_eval_us FROM llama_runs ORDER BY n_gpu_layers;")
ts = np.array(res.fetchall())

plt.plot(ts[:, 0], ts[:, 1])
plt.xlim(0, 35)
plt.ylim(0, 130)
plt.title("7b q4_0, 3700X, 3200 MHz dual-channel RAM, RTX 3090")
plt.xlabel("-ngl")
plt.ylabel("Generated t/s")
plt.savefig("benchmark.png", dpi=240)
plt.show()