@@ -835,7 +835,7 @@ struct llm_tokenizer_ugm_session {
835
835
}
836
836
837
837
// initialize score_sum to -FLT_MAX so it will be always lower than sums of token scores
838
- std::vector<struct best_tokenization > tokenization_results (input_len + 1 , {vocab.token_unk (), 0 , -FLT_MAX });
838
+ std::vector<struct best_tokenization > tokenization_results (input_len + 1 , {vocab.token_unk (), 0 , -DBL_MAX });
839
839
// at the beginning tokenization score is zero
840
840
tokenization_results[0 ] = { vocab.token_unk (), 0 , 0 };
841
841
@@ -867,7 +867,7 @@ struct llm_tokenizer_ugm_session {
867
867
const double challenger_score = current_best.score_sum + token_score;
868
868
struct best_tokenization & current_champ = tokenization_results[prefix_offset];
869
869
if (challenger_score > current_champ.score_sum ) {
870
- struct best_tokenization challenger = { token_id, input_offset, ( float ) challenger_score };
870
+ struct best_tokenization challenger = { token_id, input_offset, challenger_score };
871
871
current_champ = challenger;
872
872
}
873
873
}
@@ -881,7 +881,7 @@ struct llm_tokenizer_ugm_session {
881
881
prefix_offset = input_offset + n_utf8_code_units;
882
882
struct best_tokenization & current_champ = tokenization_results[prefix_offset];
883
883
if (challenger_score > current_champ.score_sum ) {
884
- struct best_tokenization challenger = { vocab.token_unk (), input_offset, ( float ) challenger_score };
884
+ struct best_tokenization challenger = { vocab.token_unk (), input_offset, challenger_score };
885
885
current_champ = challenger;
886
886
}
887
887
}
@@ -1007,7 +1007,7 @@ struct llm_tokenizer_ugm_session {
1007
1007
struct best_tokenization {
1008
1008
llama_token token_id;
1009
1009
size_t input_offset;
1010
- float score_sum;
1010
+ double score_sum;
1011
1011
};
1012
1012
1013
1013
struct normalization_result normalize_prefix (const std::string & input, size_t input_offset) {
0 commit comments