dont crash when pooling_type is specified but in generation mode (possibly useful for server)

iamlemec · iamlemec · commit a7f93257e983 · 2024-06-27T18:14:47.000-05:00
diff --git a/llama.cpp b/llama.cpp
@@ -12343,7 +12343,7 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) {
         }
     }
 
-    if (cparams.pooling_type == LLAMA_POOLING_TYPE_MEAN) {
+    if (cparams.embeddings && cparams.pooling_type == LLAMA_POOLING_TYPE_MEAN) {
         const int64_t n_tokens = batch.n_tokens;
 
         GGML_ASSERT(lctx.inp_mean);
@@ -12375,7 +12375,7 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) {
         }
     }
 
-    if (cparams.pooling_type == LLAMA_POOLING_TYPE_CLS) {
+    if (cparams.embeddings && cparams.pooling_type == LLAMA_POOLING_TYPE_CLS) {
         const int64_t n_tokens = batch.n_tokens;
 
         GGML_ASSERT(lctx.inp_cls);
@@ -12396,7 +12396,7 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) {
         }
     }
 
-    if (cparams.pooling_type == LLAMA_POOLING_TYPE_LAST) {
+    if (cparams.embeddings && cparams.pooling_type == LLAMA_POOLING_TYPE_LAST) {
         const int64_t n_tokens = batch.n_tokens;
 
         GGML_ASSERT(lctx.inp_cls);

Original file line number	Diff line number	Diff line change
`@@ -12343,7 +12343,7 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) {`
`12343`	`12343`	`}`
`12344`	`12344`	`}`
`12345`	`12345`
`12346`		`- if (cparams.pooling_type == LLAMA_POOLING_TYPE_MEAN) {`
	`12346`	`+ if (cparams.embeddings && cparams.pooling_type == LLAMA_POOLING_TYPE_MEAN) {`
`12347`	`12347`	`const int64_t n_tokens = batch.n_tokens;`
`12348`	`12348`
`12349`	`12349`	`GGML_ASSERT(lctx.inp_mean);`
`@@ -12375,7 +12375,7 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) {`
`12375`	`12375`	`}`
`12376`	`12376`	`}`
`12377`	`12377`
`12378`		`- if (cparams.pooling_type == LLAMA_POOLING_TYPE_CLS) {`
	`12378`	`+ if (cparams.embeddings && cparams.pooling_type == LLAMA_POOLING_TYPE_CLS) {`
`12379`	`12379`	`const int64_t n_tokens = batch.n_tokens;`
`12380`	`12380`
`12381`	`12381`	`GGML_ASSERT(lctx.inp_cls);`
`@@ -12396,7 +12396,7 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) {`
`12396`	`12396`	`}`
`12397`	`12397`	`}`
`12398`	`12398`
`12399`		`- if (cparams.pooling_type == LLAMA_POOLING_TYPE_LAST) {`
	`12399`	`+ if (cparams.embeddings && cparams.pooling_type == LLAMA_POOLING_TYPE_LAST) {`
`12400`	`12400`	`const int64_t n_tokens = batch.n_tokens;`
`12401`	`12401`
`12402`	`12402`	`GGML_ASSERT(lctx.inp_cls);`