skip -1 tok in loop to avoid send '' on end

jhen0409 · jhen0409 · commit 8cec4409c6ee · 2023-08-25T08:54:44.000+08:00
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
@@ -549,7 +549,10 @@ struct llama_server_context
                 llama_grammar_accept_token(ctx, grammar, result.tok);
             }
 
-            for (size_t i = 0; i < std::min(candidates_p.size, (size_t)n_probs); ++i)
+            size_t prob_size = std::min(candidates_p.size, (size_t)n_probs);
+            // Pick the first prob
+            
+            for (size_t i = 0; i < ; ++i)
             {
                 result.probs.push_back({candidates_p.data[i].id, candidates_p.data[i].p});
             }
@@ -1322,10 +1325,10 @@ int main(int argc, char **argv)
 
                 while (llama.has_next_token) {
                     const completion_token_output token_with_probs = llama.doCompletion();
-                    const std::string token_text = token_with_probs.tok == -1 ? "" : llama_token_to_str(llama.ctx, token_with_probs.tok);
-                    if (llama.multibyte_pending > 0) {
+                    if (token_with_probs.tok == -1 || llama.multibyte_pending > 0) {
                         continue;
                     }
+                    const std::string token_text = llama_token_to_str(llama.ctx, token_with_probs.tok);
 
                     size_t pos = std::min(sent_count, llama.generated_text.size());
 

Original file line number	Diff line number	Diff line change
`@@ -549,7 +549,10 @@ struct llama_server_context`
`549`	`549`	`llama_grammar_accept_token(ctx, grammar, result.tok);`
`550`	`550`	`}`
`551`	`551`
`552`		`- for (size_t i = 0; i < std::min(candidates_p.size, (size_t)n_probs); ++i)`
	`552`	`+ size_t prob_size = std::min(candidates_p.size, (size_t)n_probs);`
	`553`	`+ // Pick the first prob`
	`554`	`+`
	`555`	`+ for (size_t i = 0; i < ; ++i)`
`553`	`556`	`{`
`554`	`557`	`result.probs.push_back({candidates_p.data[i].id, candidates_p.data[i].p});`
`555`	`558`	`}`
`@@ -1322,10 +1325,10 @@ int main(int argc, char **argv)`
`1322`	`1325`
`1323`	`1326`	`while (llama.has_next_token) {`
`1324`	`1327`	`const completion_token_output token_with_probs = llama.doCompletion();`
`1325`		`- const std::string token_text = token_with_probs.tok == -1 ? "" : llama_token_to_str(llama.ctx, token_with_probs.tok);`
`1326`		`- if (llama.multibyte_pending > 0) {`
	`1328`	`+ if (token_with_probs.tok == -1 \|\| llama.multibyte_pending > 0) {`
`1327`	`1329`	`continue;`
`1328`	`1330`	`}`
	`1331`	`+ const std::string token_text = llama_token_to_str(llama.ctx, token_with_probs.tok);`
`1329`	`1332`
`1330`	`1333`	`size_t pos = std::min(sent_count, llama.generated_text.size());`
`1331`	`1334`