@@ -281,7 +281,6 @@ struct llama_vocab {
281
281
282
282
llama_trie special_token_trie;
283
283
std::unordered_map<token, id> special_token_to_id;
284
- std::vector<id> special_tokens;
285
284
size_t max_special_token_length;
286
285
};
287
286
@@ -580,14 +579,13 @@ struct llama_file_loader {
580
579
581
580
for (uint32_t i = 0 ; i < hparams.n_vocab_sp ; i++) {
582
581
uint32_t token_id = file.read_u32 ();
583
- const auto & token = vocab.id_to_token [token_id].tok ;
582
+ const auto & word = vocab.id_to_token [token_id].tok ;
584
583
585
- vocab.special_token_trie .add (token);
586
- vocab.special_tokens .push_back (token_id);
587
- vocab.special_token_to_id [token] = token_id;
584
+ vocab.special_token_trie .add (word);
585
+ vocab.special_token_to_id [word] = token_id;
588
586
589
- if (vocab.max_special_token_length < token .size ()) {
590
- vocab.max_special_token_length = token .size ();
587
+ if (vocab.max_special_token_length < word .size ()) {
588
+ vocab.max_special_token_length = word .size ();
591
589
}
592
590
}
593
591
}
@@ -674,9 +672,8 @@ struct llama_file_saver {
674
672
file.write_raw (token_score.tok .data (), token_score.tok .size ());
675
673
file.write_raw (&token_score.score , sizeof (token_score.score ));
676
674
}
677
- uint32_t n_vocab_sp = any_file_loader->hparams .n_vocab_sp ;
678
- for (uint32_t i = 0 ; i < n_vocab; i++) {
679
- file.write_u32 (any_file_loader->vocab .special_tokens [i]);
675
+ for (const auto & pair : any_file_loader->vocab .special_token_to_id ) {
676
+ file.write_u32 (pair.second );
680
677
}
681
678
}
682
679
void write_tensor (llama_load_tensor & tensor, enum ggml_type new_type, const void * new_data, size_t new_size) {
@@ -2111,24 +2108,23 @@ static std::vector<llama_vocab::id> llama_tokenize(const llama_vocab & vocab, co
2111
2108
return output;
2112
2109
}
2113
2110
2114
- auto offsets = vocab.special_token_trie .split (text);
2111
+ std::vector< int > offsets = vocab.special_token_trie .split (text);
2115
2112
int start = 0 ;
2116
2113
for (int end : offsets) {
2117
2114
if (start >= end) {
2118
2115
continue ;
2119
2116
}
2120
2117
2121
- size_t part_length = end - start;
2122
- // printf("\"%.*s\"\n", (int) part_length, text.c_str() + start);
2123
-
2124
- if (vocab.max_special_token_length < part_length) {
2125
- tokenizer.tokenize (text.c_str () + start, part_length, output);
2118
+ const char *part = text.c_str () + start;
2119
+ size_t part_len = end - start;
2120
+ if (vocab.max_special_token_length < part_len) {
2121
+ tokenizer.tokenize (part, part_len, output);
2126
2122
} else {
2127
- auto token_it = vocab.special_token_to_id .find (std::string (text. c_str () + start, part_length ));
2123
+ auto token_it = vocab.special_token_to_id .find (std::string (part, part_len ));
2128
2124
if (token_it != vocab.special_token_to_id .end ()) {
2129
2125
output.push_back (token_it->second );
2130
2126
} else {
2131
- tokenizer.tokenize (text. c_str () + start, part_length , output);
2127
+ tokenizer.tokenize (part, part_len , output);
2132
2128
}
2133
2129
}
2134
2130
start = end;
@@ -4270,10 +4266,6 @@ llama_token llama_token_nl() {
4270
4266
return 13 ;
4271
4267
}
4272
4268
4273
- bool llama_is_special_token (const struct llama_context *ctx, llama_token token) {
4274
- return std::find (ctx->vocab .special_tokens .begin (), ctx->vocab .special_tokens .end (), token) != ctx->vocab .special_tokens .end ();
4275
- }
4276
-
4277
4269
struct llama_timings llama_get_timings (struct llama_context * ctx) {
4278
4270
struct llama_timings result = {
4279
4271
/* .t_start_ms =*/ 1e-3 * ctx->t_start_us ,
0 commit comments