File tree Expand file tree Collapse file tree 1 file changed +5
-3
lines changed Expand file tree Collapse file tree 1 file changed +5
-3
lines changed Original file line number Diff line number Diff line change @@ -504,9 +504,11 @@ struct llama_server_context
504
504
});
505
505
}
506
506
507
+ bool tg = true ;
507
508
while (n_past < embd.size ())
508
509
{
509
510
int n_eval = (int )embd.size () - n_past;
511
+ tg = n_eval == 1 ;
510
512
if (n_eval > params.n_batch )
511
513
{
512
514
n_eval = params.n_batch ;
@@ -633,7 +635,9 @@ struct llama_server_context
633
635
634
636
last_n_tokens.erase (last_n_tokens.begin ());
635
637
last_n_tokens.push_back (result.tok );
636
- num_tokens_predicted++;
638
+ if (tg) {
639
+ num_tokens_predicted++;
640
+ }
637
641
}
638
642
639
643
// add it to the context
@@ -1124,8 +1128,6 @@ static json format_timings(llama_server_context &llama)
1124
1128
{
1125
1129
const auto timings = llama_get_timings (llama.ctx );
1126
1130
1127
- assert (timings.n_eval == ptrdiff_t (llama.num_tokens_predicted ));
1128
-
1129
1131
return json{
1130
1132
{" prompt_n" , timings.n_p_eval },
1131
1133
{" prompt_ms" , timings.t_p_eval_ms },
You can’t perform that action at this time.
0 commit comments