Skip to content

Commit 7c05049

Browse files
committed
tunning: check GPU offloading before loading model
1 parent bb590f1 commit 7c05049

File tree

4 files changed

+25
-19
lines changed

4 files changed

+25
-19
lines changed

examples/common.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,14 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
435435
process_escapes(params.prompt);
436436
}
437437

438+
#ifdef GGML_USE_TUNE
439+
if (params.n_gpu_layers > 0) {
440+
if (params.tune || !params.tune_file.empty()) {
441+
fprintf(stderr, "[tune] error: tunning and GPU offloading cannot be used at the same time, abort.\n");
442+
exit(1);
443+
}
444+
}
445+
#endif
438446
return true;
439447
}
440448

examples/perplexity/perplexity.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ int main(int argc, char ** argv) {
159159
}
160160

161161
#ifdef GGML_USE_TUNE
162-
if (params.tune || !params.tune_file.empty()){
162+
if (params.tune || !params.tune_file.empty()) {
163163
bool ok = llama_mulmat_tune(ctx, params.n_threads, params.tune, params.tune_file.c_str());
164164
if (!ok || (params.tune && !params.tune_file.empty())) {
165165
llama_free(ctx);

ggml-tune.c

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ const struct ggml_task_profile *ggml_mulmat_tune_select_task_profile(
104104
prof->stages[i].backend);
105105
}
106106
printf(
107-
"\n[mulmat tune] M: %3d, N: %5d, K: %5d, backends of the "
107+
"\n[tune] M: %3d, N: %5d, K: %5d, backends of the "
108108
"fastest profile: %s %s %s\n",
109109
M, N, K, names[0], names[1], names[2]);
110110
#endif
@@ -358,7 +358,7 @@ bool ggml_mulmat_tune_validate(const struct ggml_mulmat_tune *tune,
358358
bool ok = ggml_mulmat_tune_validate_internal(tune, model, ftype, n_threads,
359359
errbuf, sizeof(errbuf));
360360
if (!ok) {
361-
fprintf(stderr, "[mulmat tune] error: %s. run bench again.\n", errbuf);
361+
fprintf(stderr, "[tune] error: %s. run bench again.\n", errbuf);
362362
}
363363

364364
return ok;
@@ -371,7 +371,7 @@ bool ggml_mulmat_tune_read_data(struct ggml_mulmat_tune *tune, FILE *fp) {
371371
}
372372

373373
if (tune->version != GGML_MULMAT_TUNE_VERSION) {
374-
fprintf(stderr, "[mulmat tune] version mismatch, run bench again\n");
374+
fprintf(stderr, "[tune] version mismatch, run bench again\n");
375375
return false;
376376
}
377377

@@ -396,7 +396,7 @@ bool ggml_mulmat_tune_read_data(struct ggml_mulmat_tune *tune, FILE *fp) {
396396
(shape->n_profiles * shape->m_num);
397397
shape->items = malloc(item_size);
398398
if (shape->items == NULL) {
399-
fprintf(stderr, "[mulmat tune] failed to allocate memory\n");
399+
fprintf(stderr, "[tune] failed to allocate memory\n");
400400
return false;
401401
}
402402
memset(shape->items, 0, item_size);
@@ -708,7 +708,7 @@ static size_t ggml_mulmat_allocate_wdata(int N, int K, char **wdata) {
708708

709709
if (!buf) {
710710
fprintf(stderr,
711-
"[mulmat tune] error: failed to allocate %zu MiB memory",
711+
"[tune] error: failed to allocate %zu MiB memory",
712712
sz / 1024 / 1024);
713713
return 0;
714714
}
@@ -745,7 +745,7 @@ bool ggml_mulmat_tune_bench(struct ggml_mulmat_tune *tune,
745745
int n_backends = ggml_mulmat_tune_get_builtin_task_backends(backends);
746746
if (n_backends < 2) {
747747
fprintf(stderr,
748-
"[mulmat tune] error: this program was not built with BLAS.\n");
748+
"[tune] error: this program was not built with BLAS.\n");
749749
return false;
750750
}
751751

@@ -770,7 +770,7 @@ bool ggml_mulmat_tune_bench(struct ggml_mulmat_tune *tune,
770770
}
771771

772772
fprintf(stdout,
773-
"[mulmat tune] model: %s, ggml ftype: %d, "
773+
"[tune] model: %s, ggml ftype: %d, "
774774
"n_pass: %d, n_threads: %d, n_shapes: %d, backends: %s\n",
775775
params->model.name, params->model.ftype, params->n_pass,
776776
params->n_threads, tune->n_shapes, buf);
@@ -871,7 +871,7 @@ bool ggml_mulmat_tune_bench(struct ggml_mulmat_tune *tune,
871871

872872
ggml_threading_stop(thrd_ctx);
873873

874-
fprintf(stdout, "[mulmat tune] done, elapsed time: %d seconds.\n",
874+
fprintf(stdout, "[tune] done, elapsed time: %d seconds.\n",
875875
(int)(ggml_time_ms() - t0) / 1000);
876876

877877
// output
@@ -880,7 +880,7 @@ bool ggml_mulmat_tune_bench(struct ggml_mulmat_tune *tune,
880880
FILE *fp = fopen(params->fname, "w");
881881
if (!fp) {
882882
fprintf(stderr,
883-
"[mulmat tune] warn: failed to open file `%s`, print to "
883+
"[tune] warn: failed to open file `%s`, print to "
884884
"console instead\n\n",
885885
params->fname);
886886
params->output_console = 1;
@@ -889,12 +889,12 @@ bool ggml_mulmat_tune_bench(struct ggml_mulmat_tune *tune,
889889
fclose(fp);
890890

891891
if (ok) {
892-
fprintf(stdout, "[mulmat tune] data was written to `%s`\n",
892+
fprintf(stdout, "[tune] data was written to `%s`\n",
893893
params->fname);
894894
} else {
895895
fprintf(
896896
stderr,
897-
"[mulmat tune] warn: failed to write file `%s`, print to "
897+
"[tune] warn: failed to write file `%s`, print to "
898898
"console instead\n\n",
899899
params->fname);
900900
params->output_console = 1;

llama.cpp

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2745,11 +2745,9 @@ struct llama_context * llama_init_from_file(
27452745

27462746
#ifdef GGML_USE_TUNE
27472747
bool llama_mulmat_tune(struct llama_context *ctx, int n_threads, bool tune, const char *fname) {
2748+
GGML_ASSERT (ctx->model.n_gpu_layers == 0);
2749+
27482750
printf("\n");
2749-
if (ctx->model.n_gpu_layers != 0) {
2750-
fprintf(stderr, "[mulmat tune] error: is disabled by GPU offloading\n");
2751-
return false;
2752-
}
27532751

27542752
const char *model_name = llama_model_type_name(ctx->model.type);
27552753

@@ -2855,20 +2853,20 @@ bool llama_mulmat_tune(struct llama_context *ctx, int n_threads, bool tune, cons
28552853
if (!empty_fname) {
28562854
FILE *fp = fopen(fname, "r");
28572855
if (!fp) {
2858-
fprintf(stderr, "[mulmat tune] failed to open file %s.\n",
2856+
fprintf(stderr, "[tune] failed to open file %s.\n",
28592857
fname);
28602858
} else {
28612859
bool ok = ggml_mulmat_tune_read_data(ctx->tune, fp);
28622860
fclose(fp);
28632861

28642862
if (!ok) {
28652863
fprintf(stderr,
2866-
"[mulmat tune] failed to read data from %s\n",
2864+
"[tune] failed to read data from %s\n",
28672865
fname);
28682866
return false;
28692867
}
28702868

2871-
fprintf(stderr, "[mulmat tune] loaded data from %s\n", fname);
2869+
fprintf(stderr, "[tune] loaded data from %s\n", fname);
28722870

28732871
ok = ggml_mulmat_tune_validate(ctx->tune, model_name, ggml_ftype, params.n_threads);
28742872
if (!ok) {

0 commit comments

Comments
 (0)