Skip to content

Commit a645e01

Browse files
committed
Remove trailing whitespaces
1 parent ea2c33f commit a645e01

File tree

3 files changed

+15
-15
lines changed

3 files changed

+15
-15
lines changed

convert.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ def __init__(self, fname_tokenizer: Path, fname_added_tokens: Optional[Path], fn
183183
"eos_token": self.sentencepiece_tokenizer.eos_id(),
184184
"pad_token": self.sentencepiece_tokenizer.pad_id()
185185
}
186-
186+
187187
tokenizer_config: Dict[str, Any]
188188
if fname_tokenizer_config is not None:
189189
tokenizer_config = json.load(open(fname_tokenizer_config))
@@ -194,7 +194,7 @@ def __init__(self, fname_tokenizer: Path, fname_added_tokens: Optional[Path], fn
194194
if key not in TOKEN_NAME_TO_ID or TOKEN_NAME_TO_ID[key] == -1:
195195
continue
196196
self.special_tokens_map[TOKEN_NAME_TO_ID[key]] = value["content"] if isinstance(value, dict) else value
197-
197+
198198
special_tokens: Dict[str, Any]
199199
if fname_special_tokens is not None:
200200
special_tokens = json.load(open(fname_special_tokens))
@@ -208,7 +208,7 @@ def __init__(self, fname_tokenizer: Path, fname_added_tokens: Optional[Path], fn
208208
if token_id == -1 or token_id in self.special_tokens_map:
209209
continue
210210
self.special_tokens_map[token_id] = value["content"] if isinstance(value, dict) else value
211-
211+
212212
self.vocab_special_size: int = len(self.added_tokens_list) + len(self.special_tokens_map)
213213

214214
def sentencepiece_tokens(self) -> Iterable[Tuple[bytes, float]]:

llama-util.h

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -491,7 +491,7 @@ typedef llama_buffer llama_ctx_buffer;
491491

492492
struct llama_trie_node {
493493
llama_trie_node(): is_terminator(false) {}
494-
494+
495495
std::unordered_map<char, llama_trie_node*> children;
496496
bool is_terminator;
497497
};
@@ -506,7 +506,7 @@ struct llama_trie {
506506
if (word.empty()) {
507507
return;
508508
}
509-
509+
510510
llama_trie_node *ref = root_;
511511
for (char c : word) {
512512
if (ref->children.find(c) == ref->children.end()) {
@@ -576,7 +576,7 @@ struct llama_trie {
576576
end = lookahead_index;
577577
skip = lookahead_index;
578578
}
579-
579+
580580
auto looktrie_pointer_it = looktrie_pointer->children.find(next_char);
581581
while (looktrie_pointer_it != looktrie_pointer->children.end()) {
582582
looktrie_pointer = looktrie_pointer_it->second;
@@ -586,7 +586,7 @@ struct llama_trie {
586586
end = lookahead_index;
587587
skip = lookahead_index;
588588
}
589-
589+
590590
if (lookahead_index == text.size()) {
591591
// End of string
592592
break;
@@ -595,13 +595,13 @@ struct llama_trie {
595595
looktrie_pointer_it = looktrie_pointer->children.find(next_char);
596596
}
597597
}
598-
598+
599599
offsets.push_back(start);
600600
offsets.push_back(end);
601601
reset = true;
602602
break;
603-
}
604-
603+
}
604+
605605
auto trie_pointer_it = trie_pointer->children.find(current_char);
606606
if (trie_pointer_it != trie_pointer->children.end()) {
607607
// The current character being looked at has a match within the trie
@@ -615,20 +615,20 @@ struct llama_trie {
615615
state = states.erase(state);
616616
}
617617
}
618-
618+
619619
if (reset) {
620620
// Clear the full start (we found a real match)
621621
states.clear();
622622
}
623-
623+
624624
// If this character is a starting character within the trie
625625
// start keeping track of this partial match.
626626
auto children_it = root_->children.find(current_char);
627627
if (current >= skip && children_it != root_->children.end()) {
628628
states[current] = children_it->second;
629629
}
630630
}
631-
631+
632632
// We have a cut at the end with states.
633633
for (const auto & state : states) {
634634
int start = state.first;
@@ -642,7 +642,7 @@ struct llama_trie {
642642
break;
643643
}
644644
}
645-
645+
646646
offsets.push_back(text.size());
647647
return offsets;
648648
}

llama.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1844,7 +1844,7 @@ struct llama_tokenizer {
18441844

18451845
void tokenize(const char * text, size_t len, std::vector<llama_vocab::id> & output) {
18461846
symbols_.clear();
1847-
1847+
18481848
// split string into utf8 chars
18491849
int index = 0;
18501850
size_t offs = 0;

0 commit comments

Comments
 (0)