Skip to content

Commit 56affa6

Browse files
committed
Replace custom code with stdlib function
1 parent 8dcf6aa commit 56affa6

File tree

2 files changed

+4
-10
lines changed

2 files changed

+4
-10
lines changed

tokenizers/src/normalizers/byte_level.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,10 @@ impl Normalizer for ByteLevel {
3232
if !normalized.is_empty() {
3333
let s = normalized.get();
3434
let mut transformations: Vec<(char, isize)> = Vec::with_capacity(s.len());
35-
let mut i = 0;
36-
for cur_char in s.chars() {
35+
for (i, cur_char) in s.char_indices() {
3736
let size = cur_char.len_utf8();
38-
let bytes = &s.as_bytes()[i..i + size];
39-
i += size;
4037
transformations.extend(
41-
bytes
38+
s.as_bytes()[i..i + size]
4239
.iter()
4340
.enumerate()
4441
.map(|(i, b)| (BYTES_CHAR[b], isize::from(i > 0))),

tokenizers/src/pre_tokenizers/byte_level.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -132,13 +132,10 @@ impl PreTokenizer for ByteLevel {
132132
pretokenized.normalize(|normalized| {
133133
let s = normalized.get();
134134
let mut transformations: Vec<(char, isize)> = Vec::with_capacity(s.len());
135-
let mut i = 0;
136-
for cur_char in s.chars() {
135+
for (i, cur_char) in s.char_indices() {
137136
let size = cur_char.len_utf8();
138-
let bytes = &s.as_bytes()[i..i + size];
139-
i += size;
140137
transformations.extend(
141-
bytes
138+
s.as_bytes()[i..i + size]
142139
.iter()
143140
.enumerate()
144141
.map(|(i, b)| (BYTES_CHAR[b], isize::from(i > 0))),

0 commit comments

Comments
 (0)