Skip to content

Commit c3a2624

Browse files
authored
vocab : fix ugm tokenizer precision (#13743)
1 parent ffd0eae commit c3a2624

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

src/llama-vocab.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -835,7 +835,7 @@ struct llm_tokenizer_ugm_session {
835835
}
836836

837837
// initialize score_sum to -FLT_MAX so it will be always lower than sums of token scores
838-
std::vector<struct best_tokenization> tokenization_results(input_len + 1, {vocab.token_unk(), 0, -FLT_MAX});
838+
std::vector<struct best_tokenization> tokenization_results(input_len + 1, {vocab.token_unk(), 0, -DBL_MAX});
839839
// at the beginning tokenization score is zero
840840
tokenization_results[0] = { vocab.token_unk(), 0, 0 };
841841

@@ -867,7 +867,7 @@ struct llm_tokenizer_ugm_session {
867867
const double challenger_score = current_best.score_sum + token_score;
868868
struct best_tokenization & current_champ = tokenization_results[prefix_offset];
869869
if (challenger_score > current_champ.score_sum) {
870-
struct best_tokenization challenger = { token_id, input_offset, (float) challenger_score };
870+
struct best_tokenization challenger = { token_id, input_offset, challenger_score };
871871
current_champ = challenger;
872872
}
873873
}
@@ -881,7 +881,7 @@ struct llm_tokenizer_ugm_session {
881881
prefix_offset = input_offset + n_utf8_code_units;
882882
struct best_tokenization & current_champ = tokenization_results[prefix_offset];
883883
if (challenger_score > current_champ.score_sum) {
884-
struct best_tokenization challenger = { vocab.token_unk(), input_offset, (float) challenger_score };
884+
struct best_tokenization challenger = { vocab.token_unk(), input_offset, challenger_score };
885885
current_champ = challenger;
886886
}
887887
}
@@ -1007,7 +1007,7 @@ struct llm_tokenizer_ugm_session {
10071007
struct best_tokenization {
10081008
llama_token token_id;
10091009
size_t input_offset;
1010-
float score_sum;
1010+
double score_sum;
10111011
};
10121012

10131013
struct normalization_result normalize_prefix(const std::string & input, size_t input_offset) {

0 commit comments

Comments
 (0)