Skip to content

Commit 0c14627

Browse files
committed
Code cleanup
1 parent 61a98bc commit 0c14627

File tree

2 files changed

+14
-24
lines changed

2 files changed

+14
-24
lines changed

llama.cpp

Lines changed: 14 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,6 @@ struct llama_vocab {
281281

282282
llama_trie special_token_trie;
283283
std::unordered_map<token, id> special_token_to_id;
284-
std::vector<id> special_tokens;
285284
size_t max_special_token_length;
286285
};
287286

@@ -580,14 +579,13 @@ struct llama_file_loader {
580579

581580
for (uint32_t i = 0; i < hparams.n_vocab_sp; i++) {
582581
uint32_t token_id = file.read_u32();
583-
const auto & token = vocab.id_to_token[token_id].tok;
582+
const auto & word = vocab.id_to_token[token_id].tok;
584583

585-
vocab.special_token_trie.add(token);
586-
vocab.special_tokens.push_back(token_id);
587-
vocab.special_token_to_id[token] = token_id;
584+
vocab.special_token_trie.add(word);
585+
vocab.special_token_to_id[word] = token_id;
588586

589-
if (vocab.max_special_token_length < token.size()) {
590-
vocab.max_special_token_length = token.size();
587+
if (vocab.max_special_token_length < word.size()) {
588+
vocab.max_special_token_length = word.size();
591589
}
592590
}
593591
}
@@ -674,9 +672,8 @@ struct llama_file_saver {
674672
file.write_raw(token_score.tok.data(), token_score.tok.size());
675673
file.write_raw(&token_score.score, sizeof(token_score.score));
676674
}
677-
uint32_t n_vocab_sp = any_file_loader->hparams.n_vocab_sp;
678-
for (uint32_t i = 0; i < n_vocab; i++) {
679-
file.write_u32(any_file_loader->vocab.special_tokens[i]);
675+
for (const auto & pair : any_file_loader->vocab.special_token_to_id) {
676+
file.write_u32(pair.second);
680677
}
681678
}
682679
void write_tensor(llama_load_tensor & tensor, enum ggml_type new_type, const void * new_data, size_t new_size) {
@@ -2111,24 +2108,23 @@ static std::vector<llama_vocab::id> llama_tokenize(const llama_vocab & vocab, co
21112108
return output;
21122109
}
21132110

2114-
auto offsets = vocab.special_token_trie.split(text);
2111+
std::vector<int> offsets = vocab.special_token_trie.split(text);
21152112
int start = 0;
21162113
for (int end : offsets) {
21172114
if (start >= end) {
21182115
continue;
21192116
}
21202117

2121-
size_t part_length = end - start;
2122-
//printf("\"%.*s\"\n", (int) part_length, text.c_str() + start);
2123-
2124-
if (vocab.max_special_token_length < part_length) {
2125-
tokenizer.tokenize(text.c_str() + start, part_length, output);
2118+
const char *part = text.c_str() + start;
2119+
size_t part_len = end - start;
2120+
if (vocab.max_special_token_length < part_len) {
2121+
tokenizer.tokenize(part, part_len, output);
21262122
} else {
2127-
auto token_it = vocab.special_token_to_id.find(std::string(text.c_str() + start, part_length));
2123+
auto token_it = vocab.special_token_to_id.find(std::string(part, part_len));
21282124
if (token_it != vocab.special_token_to_id.end()) {
21292125
output.push_back(token_it->second);
21302126
} else {
2131-
tokenizer.tokenize(text.c_str() + start, part_length, output);
2127+
tokenizer.tokenize(part, part_len, output);
21322128
}
21332129
}
21342130
start = end;
@@ -4270,10 +4266,6 @@ llama_token llama_token_nl() {
42704266
return 13;
42714267
}
42724268

4273-
bool llama_is_special_token(const struct llama_context *ctx, llama_token token) {
4274-
return std::find(ctx->vocab.special_tokens.begin(), ctx->vocab.special_tokens.end(), token) != ctx->vocab.special_tokens.end();
4275-
}
4276-
42774269
struct llama_timings llama_get_timings(struct llama_context * ctx) {
42784270
struct llama_timings result = {
42794271
/*.t_start_ms =*/ 1e-3 * ctx->t_start_us,

llama.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -373,8 +373,6 @@ extern "C" {
373373
LLAMA_API llama_token llama_token_eos(); // end-of-sentence
374374
LLAMA_API llama_token llama_token_nl(); // next-line
375375

376-
LLAMA_API bool llama_is_special_token(const struct llama_context * ctx, llama_token token);
377-
378376
// Grammar
379377
//
380378
LLAMA_API struct llama_grammar * llama_grammar_init(

0 commit comments

Comments
 (0)