examples : make n_ctx warning work again

cebtenzzre · cebtenzzre · commit 01948ddf3c60 · 2023-09-07T00:18:47.000-04:00
This was broken by commit e36ecdc ("build : on Mac OS enable Metal by default (ggml-org#2901)").
diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp
@@ -17,11 +17,6 @@ int main(int argc, char ** argv) {
 
     params.embedding = true;
 
-    if (params.n_ctx > 2048) {
-        fprintf(stderr, "%s: warning: model might not support context sizes greater than 2048 tokens (%d specified);"
-                "expect poor results\n", __func__, params.n_ctx);
-    }
-
     fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT);
 
     if (params.seed == LLAMA_DEFAULT_SEED) {
@@ -47,6 +42,12 @@ int main(int argc, char ** argv) {
         return 1;
     }
 
+    const int n_ctx_train = llama_n_ctx_train(ctx);
+    if (params.n_ctx > n_ctx_train) {
+        fprintf(stderr, "%s: warning: model was trained on only %d context tokens (%d specified)\n",
+                __func__, n_ctx_train, params.n_ctx);
+    }
+
     // print system information
     {
         fprintf(stderr, "\n");
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
@@ -186,8 +186,10 @@ int main(int argc, char ** argv) {
         return 1;
     }
 
-    if (params.n_ctx > llama_n_ctx(ctx)) {
-        LOG_TEE("%s: warning: base model only supports context sizes no greater than %d tokens (%d specified)\n", __func__, llama_n_ctx(ctx), params.n_ctx);
+    const int n_ctx_train = llama_n_ctx_train(ctx);
+    if (params.n_ctx > n_ctx_train) {
+        LOG_TEE("%s: warning: model was trained on only %d context tokens (%d specified)\n",
+                __func__, n_ctx_train, params.n_ctx);
     } else if (params.n_ctx < 8) {
         LOG_TEE("%s: warning: minimum context size is 8, using minimum size.\n", __func__);
         params.n_ctx = 8;
diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp
@@ -693,9 +693,10 @@ int main(int argc, char ** argv) {
         return 1;
     }
 
-    if (params.n_ctx > llama_n_ctx(ctx)) {
-        fprintf(stderr, "%s: warning: model might not support context sizes greater than %d tokens (%d specified);"
-                "expect poor results\n", __func__, llama_n_ctx(ctx), params.n_ctx);
+    const int n_ctx_train = llama_n_ctx_train(ctx);
+    if (params.n_ctx > n_ctx_train) {
+        fprintf(stderr, "%s: warning: model was trained on only %d context tokens (%d specified)\n",
+                __func__, n_ctx_train, params.n_ctx);
     }
 
     // print system information
diff --git a/llama.cpp b/llama.cpp
@@ -5649,15 +5649,19 @@ void llama_free(struct llama_context * ctx) {
 }
 
 int llama_n_vocab(const struct llama_context * ctx) {
-    return ctx->model.vocab.id_to_token.size();
+    return llama_model_n_vocab(&ctx->model);
 }
 
 int llama_n_ctx(const struct llama_context * ctx) {
-    return ctx->model.hparams.n_ctx;
+    return llama_model_n_ctx(&ctx->model);
+}
+
+int llama_n_ctx_train(const struct llama_context * ctx) {
+    return llama_model_n_ctx_train(&ctx->model);
 }
 
 int llama_n_embd(const struct llama_context * ctx) {
-    return ctx->model.hparams.n_embd;
+    return llama_model_n_embd(&ctx->model);
 }
 
 enum llama_vocab_type llama_vocab_type(const struct llama_context * ctx) {
@@ -5672,6 +5676,10 @@ int llama_model_n_ctx(const struct llama_model * model) {
     return model->hparams.n_ctx;
 }
 
+int llama_model_n_ctx_train(const struct llama_model * model) {
+    return model->hparams.n_ctx_train;
+}
+
 int llama_model_n_embd(const struct llama_model * model) {
     return model->hparams.n_embd;
 }
diff --git a/llama.h b/llama.h
@@ -245,15 +245,17 @@ extern "C" {
     LLAMA_API bool llama_mmap_supported (void);
     LLAMA_API bool llama_mlock_supported(void);
 
-    LLAMA_API int llama_n_vocab(const struct llama_context * ctx);
-    LLAMA_API int llama_n_ctx  (const struct llama_context * ctx);
-    LLAMA_API int llama_n_embd (const struct llama_context * ctx);
+    LLAMA_API int llama_n_vocab    (const struct llama_context * ctx);
+    LLAMA_API int llama_n_ctx      (const struct llama_context * ctx);
+    LLAMA_API int llama_n_ctx_train(const struct llama_context * ctx);
+    LLAMA_API int llama_n_embd     (const struct llama_context * ctx);
 
     LLAMA_API enum llama_vocab_type llama_vocab_type(const struct llama_context * ctx);
 
-    LLAMA_API int llama_model_n_vocab(const struct llama_model * model);
-    LLAMA_API int llama_model_n_ctx  (const struct llama_model * model);
-    LLAMA_API int llama_model_n_embd (const struct llama_model * model);
+    LLAMA_API int llama_model_n_vocab    (const struct llama_model * model);
+    LLAMA_API int llama_model_n_ctx      (const struct llama_model * model);
+    LLAMA_API int llama_model_n_ctx_train(const struct llama_model * model);
+    LLAMA_API int llama_model_n_embd     (const struct llama_model * model);
 
     // Get a string describing the model type
     LLAMA_API int llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size);

Original file line number	Diff line number	Diff line change
`@@ -693,9 +693,10 @@ int main(int argc, char ** argv) {`
`693`	`693`	`return 1;`
`694`	`694`	`}`
`695`	`695`
`696`		`- if (params.n_ctx > llama_n_ctx(ctx)) {`
`697`		`- fprintf(stderr, "%s: warning: model might not support context sizes greater than %d tokens (%d specified);"`
`698`		`- "expect poor results\n", __func__, llama_n_ctx(ctx), params.n_ctx);`
	`696`	`+ const int n_ctx_train = llama_n_ctx_train(ctx);`
	`697`	`+ if (params.n_ctx > n_ctx_train) {`
	`698`	`+ fprintf(stderr, "%s: warning: model was trained on only %d context tokens (%d specified)\n",`
	`699`	`+ __func__, n_ctx_train, params.n_ctx);`
`699`	`700`	`}`
`700`	`701`
`701`	`702`	`// print system information`
Original file line number	Diff line number	Diff line change
`@@ -5649,15 +5649,19 @@ void llama_free(struct llama_context * ctx) {`
`5649`	`5649`	`}`
`5650`	`5650`
`5651`	`5651`	`int llama_n_vocab(const struct llama_context * ctx) {`
`5652`		`- return ctx->model.vocab.id_to_token.size();`
	`5652`	`+ return llama_model_n_vocab(&ctx->model);`
`5653`	`5653`	`}`
`5654`	`5654`
`5655`	`5655`	`int llama_n_ctx(const struct llama_context * ctx) {`
`5656`		`- return ctx->model.hparams.n_ctx;`
	`5656`	`+ return llama_model_n_ctx(&ctx->model);`
	`5657`	`+}`
	`5658`	`+`
	`5659`	`+int llama_n_ctx_train(const struct llama_context * ctx) {`
	`5660`	`+ return llama_model_n_ctx_train(&ctx->model);`
`5657`	`5661`	`}`
`5658`	`5662`
`5659`	`5663`	`int llama_n_embd(const struct llama_context * ctx) {`
`5660`		`- return ctx->model.hparams.n_embd;`
	`5664`	`+ return llama_model_n_embd(&ctx->model);`
`5661`	`5665`	`}`
`5662`	`5666`
`5663`	`5667`	`enum llama_vocab_type llama_vocab_type(const struct llama_context * ctx) {`
`@@ -5672,6 +5676,10 @@ int llama_model_n_ctx(const struct llama_model * model) {`
`5672`	`5676`	`return model->hparams.n_ctx;`
`5673`	`5677`	`}`
`5674`	`5678`
	`5679`	`+int llama_model_n_ctx_train(const struct llama_model * model) {`
	`5680`	`+ return model->hparams.n_ctx_train;`
	`5681`	`+}`
	`5682`	`+`
`5675`	`5683`	`int llama_model_n_embd(const struct llama_model * model) {`
`5676`	`5684`	`return model->hparams.n_embd;`
`5677`	`5685`	`}`