Skip to content

Commit e3262dd

Browse files
committed
update mlua-rs, fix warnings, remove dead code
1 parent 1b9baba commit e3262dd

File tree

4 files changed

+160
-30
lines changed

4 files changed

+160
-30
lines changed

Cargo.lock

Lines changed: 151 additions & 13 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ name = "tiktoken_core"
99
crate-type = ["cdylib"]
1010

1111
[dependencies]
12-
mlua = { version = "0.9.7", features = ["serialize", "module"] }
12+
mlua = { version = "0.10.5", features = ["serialize", "module"] }
1313
# tiktoken dependencies
1414
fancy-regex = "0.11.0"
1515
regex = "1.8.3"

src/lib.rs

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use base64;
1+
use base64::{prelude::BASE64_STANDARD, Engine as _};
22
use fancy_regex::Regex;
33
use mlua::prelude::*;
44
use rustc_hash::FxHashMap as HashMap;
@@ -8,10 +8,6 @@ use std::io::{BufRead, BufReader};
88
use std::sync::{Arc, Mutex};
99
use std::thread;
1010

11-
#[cfg(feature = "multithreading")]
12-
const MAX_NUM_THREADS: usize = 128;
13-
14-
#[cfg(not(feature = "multithreading"))]
1511
const MAX_NUM_THREADS: usize = 1;
1612

1713
fn _byte_pair_merge<T>(
@@ -224,7 +220,9 @@ fn new(
224220
for line in reader.lines() {
225221
let line = line.unwrap();
226222
let mut parts = line.split_whitespace();
227-
let token = base64::decode(parts.next().unwrap().as_bytes()).unwrap();
223+
let token = BASE64_STANDARD
224+
.decode(parts.next().unwrap().as_bytes())
225+
.unwrap();
228226
let rank = parts.next().unwrap().parse().unwrap();
229227
encoder.insert(token, rank);
230228
}
@@ -262,7 +260,8 @@ fn new(
262260
}
263261

264262
fn encode(state: &State, text: mlua::String) -> LuaResult<(Vec<usize>, usize, usize)> {
265-
let encoded_str = String::from_utf8_lossy(text.as_bytes());
263+
let text_bytes = text.as_bytes();
264+
let encoded_str = String::from_utf8_lossy(&text_bytes);
266265
let allowed_special = HashSet::new();
267266
let max_tokens = None;
268267
Ok(state
@@ -592,13 +591,6 @@ impl CoreBPENative {
592591
Err(piece.to_owned())
593592
}
594593

595-
fn encode_single_piece(&self, piece: &[u8]) -> Vec<usize> {
596-
if let Some(token) = self.encoder.get(piece) {
597-
return vec![*token];
598-
}
599-
byte_pair_encode(piece, &self.encoder)
600-
}
601-
602594
// ====================
603595
// Decoding
604596
// ====================

tiktoken_core-0.2.3-1.rockspec renamed to tiktoken_core-0.2.4-1.rockspec

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
package = "tiktoken_core"
2-
version = "0.2.3-1"
2+
version = "0.2.4-1"
33

44
source = {
55
url = "git+https://github.com/gptlang/lua-tiktoken",
6-
tag = "v0.2.3",
6+
tag = "v0.2.4",
77
}
88

99
description = {

0 commit comments

Comments
 (0)