1 file changed
+1
-1
lines changedSubmodule tokenizers updated 25 files
- .clang-format+244
- CMakeLists.txt+2-2
- examples/tokenize_tool/CMakeLists.txt+3
- include/pytorch/tokenizers/bpe_tokenizer_base.h+83-9
- include/pytorch/tokenizers/string_integer_map.h+625
- include/pytorch/tokenizers/tiktoken.h+2-1
- pytorch_tokenizers/targets.bzl+2-8
- pytorch_tokenizers/tools/__init__.py
- pytorch_tokenizers/tools/llama2c/convert.py+1-1
- pytorch_tokenizers/tools/llama2c/targets.bzl+3
- setup.py+1-2
- src/bpe_tokenizer_base.cpp+27-31
- src/hf_tokenizer.cpp+41-46
- src/pre_tokenizer.cpp+26-22
- src/tiktoken.cpp+35-50
- src/token_decoder.cpp+3-3
- targets.bzl+1-1
- test/fb/TARGETS+30
- test/targets.bzl+23
- test/test_pre_tokenizer.cpp+2-1
- test/test_string_integer_map.cpp+324
- third-party/TARGETS+5-57
- third-party/targets.bzl+61
- tools/llama2c/convert.py-52
- tools/llama2c/targets.bzl-32
0 commit comments