@@ -462,7 +462,13 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) {
462
462
for (size_t ending_idx = 1 ; ending_idx < 4 ; ending_idx++) {
463
463
464
464
// Tokenize the query
465
- query_embd = ::llama_tokenize (ctx, hs_data[task_idx].ending [ending_idx], false );
465
+ // SPM tokenizer: Do not tokenize the starting space in the ending since it is always added by the tokenizer
466
+ if (is_spm) {
467
+ query_embd = ::llama_tokenize (ctx, hs_data[task_idx].ending [ending_idx].substr (1 ,hs_data[task_idx].ending [ending_idx].size ()-1 ), false );
468
+ } else {
469
+ query_embd = ::llama_tokenize (ctx, hs_data[task_idx].ending [ending_idx], false );
470
+ }
471
+
466
472
query_size = query_embd.size ();
467
473
468
474
// Stop if query wont fit the ctx window
@@ -505,6 +511,14 @@ void hellaswag_score(llama_context * ctx, const gpt_params & params) {
505
511
// task_idx,ending_idx,whole_size,context_size, hs_data[task_idx].ending_logprob_count[ending_idx], hs_data[task_idx].ending_logprob[ending_idx] );
506
512
}
507
513
514
+ // TODO: Temporary check for NaNs until Falcon MMQ is solved
515
+ for (size_t j = 0 ; j < 4 ; j++) {
516
+ if (std::isnan (hs_data[task_idx].ending_logprob [j])) {
517
+ printf (" NAN in task, %zu ending %zu\n " ,task_idx, j);
518
+ return ;
519
+ }
520
+ }
521
+
508
522
// Find the ending with maximum logprob
509
523
size_t ending_logprob_max_idx = 0 ;
510
524
double ending_logprob_max_val = hs_data[task_idx].ending_logprob [0 ];
0 commit comments