@@ -975,14 +975,15 @@ static void llama_nop(struct ggml_tensor * tensor) { // don't offload by default
975
975
(void ) tensor;
976
976
}
977
977
978
- static std::string llama_token_to_str (const struct llama_context * ctx, llama_token token) {
978
+ static std::string llama_token_to_piece (const struct llama_context * ctx, llama_token token) {
979
979
std::vector<char > result (8 , 0 );
980
980
const int n_tokens = llama_token_to_piece (llama_get_model (ctx), token, result.data (), result.size ());
981
981
if (n_tokens < 0 ) {
982
982
result.resize (-n_tokens);
983
983
int check = llama_token_to_piece (llama_get_model (ctx), token, result.data (), result.size ());
984
984
GGML_ASSERT (check == -n_tokens);
985
- } else {
985
+ }
986
+ else {
986
987
result.resize (n_tokens);
987
988
}
988
989
@@ -1202,10 +1203,10 @@ struct llama_vocab {
1202
1203
id special_eot_id = 32010 ;
1203
1204
1204
1205
int find_bpe_rank (std::string token_left, std::string token_right) const {
1205
- replace_all (token_left, " " , " \u0120 " );
1206
- replace_all (token_left, " \n " , " \u010A " );
1207
- replace_all (token_right, " " , " \u0120 " );
1208
- replace_all (token_right, " \n " , " \u010A " );
1206
+ GGML_ASSERT (token_left. find ( " " ) == std::string::npos );
1207
+ GGML_ASSERT (token_left. find ( " \n " ) == std::string::npos );
1208
+ GGML_ASSERT (token_right. find ( " " ) == std::string::npos );
1209
+ GGML_ASSERT (token_right. find ( " \n " ) == std::string::npos );
1209
1210
1210
1211
auto it = bpe_ranks.find (std::make_pair (token_left, token_right));
1211
1212
if (it == bpe_ranks.end ()) {
@@ -7499,7 +7500,7 @@ void llama_sample_grammar(struct llama_context * ctx, llama_token_data_array * c
7499
7500
7500
7501
for (size_t i = 0 ; i < candidates->size ; ++i) {
7501
7502
const llama_token id = candidates->data [i].id ;
7502
- const std::string piece = llama_token_to_str (ctx, id);
7503
+ const std::string piece = llama_token_to_piece (ctx, id);
7503
7504
if (id == eos) {
7504
7505
if (!allow_eos) {
7505
7506
candidates->data [i].logit = -INFINITY;
@@ -7711,7 +7712,7 @@ void llama_grammar_accept_token(struct llama_context * ctx, struct llama_grammar
7711
7712
GGML_ASSERT (false );
7712
7713
}
7713
7714
7714
- const std::string piece = llama_token_to_str (ctx, token);
7715
+ const std::string piece = llama_token_to_piece (ctx, token);
7715
7716
7716
7717
// Note terminating 0 in decoded string
7717
7718
const auto decoded = decode_utf8 (piece.c_str (), grammar->partial_utf8 );
0 commit comments