Skip to content

Migrate users of llm tokenizer to use pytorch-labs/tokenizers #9114

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .ci/scripts/utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,14 @@ clean_executorch_install_folders() {
./install_executorch.sh --clean
}

update_tokenizers_git_submodule() {
echo "Updating tokenizers git submodule..."
git submodule update --init
pushd extension/llm/tokenizers
git submodule update --init
popd
}

install_executorch() {
which pip
# Install executorch, this assumes that Executorch is checked out in the
Expand Down
8 changes: 4 additions & 4 deletions examples/mediatek/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -137,18 +137,18 @@ if(${ANDROID})
set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag})

# Build tokenizers
set(LLAMA2_TOKENIZER_DIR ${EXTENSIONS_LLM_DIR}/tokenizer)
set(LLAMA2_TOKENIZER_DIR ${EXTENSIONS_LLM_DIR}/tokenizers)
add_library(tokenizer STATIC)
target_include_directories(
tokenizer PUBLIC ${_common_include_directories} ${THIRD_PARTY_ABSL_DIR}
${THIRD_PARTY_RE2_DIR}
${THIRD_PARTY_RE2_DIR} ${LLAMA2_TOKENIZER_DIR}/include
)
target_link_libraries(tokenizer PRIVATE re2::re2)
target_sources(
tokenizer
PRIVATE
${LLAMA2_TOKENIZER_DIR}/tiktoken.cpp
${LLAMA2_TOKENIZER_DIR}/bpe_tokenizer.cpp
${LLAMA2_TOKENIZER_DIR}/src/tiktoken.cpp
${LLAMA2_TOKENIZER_DIR}/src/llama2c_tokenizer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../models/llama/tokenizer/llama_tiktoken.cpp
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@
#include "llama_runner/llm_helper/include/llm_types.h"

#include <executorch/examples/models/llama/tokenizer/llama_tiktoken.h>
#include <executorch/extension/llm/tokenizer/bpe_tokenizer.h>
#include <executorch/extension/llm/tokenizer/tiktoken.h>
#include <pytorch/tokenizers/llama2c_tokenizer.h>
#include <pytorch/tokenizers/tiktoken.h>

// Llama model options
DEFINE_uint64(
Expand Down Expand Up @@ -140,10 +140,10 @@ using example::utils::read_file;
using example::utils::split;
using example::utils::Timer;
using example::utils::to_string;
using executorch::extension::llm::BPETokenizer;
using executorch::extension::llm::Tokenizer;
using executorch::runtime::Error;
using executorch::runtime::Result;
using tokenizers::Llama2cTokenizer;
using tokenizers::Tokenizer;

LlamaModelOptions get_model_options() {
LlamaModelOptions options = {
Expand Down
6 changes: 3 additions & 3 deletions examples/mediatek/executor_runner/mtk_llama_runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
#include <executorch/examples/models/llama/tokenizer/llama_tiktoken.h>
#include <executorch/extension/llm/runner/irunner.h>
#include <executorch/extension/llm/runner/stats.h>
#include <executorch/extension/llm/tokenizer/bpe_tokenizer.h>
#include <executorch/extension/llm/tokenizer/tiktoken.h>
#include <pytorch/tokenizers/llama2c_tokenizer.h>
#include <pytorch/tokenizers/tiktoken.h>
#include <cstdint>
#include <functional>
#include <memory>
Expand All @@ -28,9 +28,9 @@ using Stats = ::executorch::llm::Stats;
using example::LlamaModelOptions;
using example::LlamaModelPaths;
using example::LlamaRuntime;
using executorch::extension::llm::Tokenizer;
using executorch::runtime::Error;
using executorch::runtime::Result;
using tokenizers::Tokenizer;

class MTKLlamaRunner : public executorch::extension::llm::IRunner {
public:
Expand Down
6 changes: 5 additions & 1 deletion examples/models/llama/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,11 @@ if(CMAKE_BUILD_TYPE STREQUAL "Release")
endif()
endif()

target_include_directories(llama_main PUBLIC ${_common_include_directories})
target_include_directories(
llama_main
PUBLIC ${_common_include_directories}
${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
)
target_link_libraries(llama_main PUBLIC llama_runner ${link_libraries})
target_compile_options(llama_main PUBLIC ${_common_compile_options})

Expand Down
9 changes: 6 additions & 3 deletions examples/models/llama/runner/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ target_include_directories(

list(
APPEND _llama_runner__srcs
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizer/tiktoken.cpp
${EXECUTORCH_ROOT}/extension/llm/tokenizers/src/tiktoken.cpp
)
list(APPEND _llama_runner__srcs
${CMAKE_CURRENT_SOURCE_DIR}/../tokenizer/llama_tiktoken.cpp
Expand Down Expand Up @@ -83,7 +83,10 @@ target_link_libraries(llama_runner PUBLIC ${llama_runner_deps})

target_include_directories(
llama_runner
INTERFACE ${_common_include_directories} ${EXECUTORCH_ROOT}
${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
INTERFACE ${_common_include_directories}
)
target_include_directories(
llama_runner
PUBLIC ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
)
target_compile_options(llama_runner PUBLIC ${_preprocessor_flag})
21 changes: 13 additions & 8 deletions examples/models/llama/runner/runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include <executorch/extension/llm/runner/util.h>

#include <executorch/examples/models/llama/tokenizer/llama_tiktoken.h>
#include <executorch/extension/llm/tokenizer/bpe_tokenizer.h>
#include <pytorch/tokenizers/llama2c_tokenizer.h>

namespace example {

Expand Down Expand Up @@ -78,17 +78,21 @@ Error Runner::load() {
// load tokenizer. Assuming tiktoken is the default tokenizer
tokenizer_ = nullptr;
tokenizer_ = get_tiktoken_for_llama();
Error err = tokenizer_->load(tokenizer_path_);
::tokenizers::Error err = tokenizer_->load(tokenizer_path_);
// Rely on tiktoken to throw error if the artifact is incompatible. Then we
// fallback to BPE tokenizer.
if (err == Error::InvalidArgument) {
if (err != ::tokenizers::Error::Ok) {
ET_LOG(
Info,
"Failed to load %s as a Tiktoken artifact, trying BPE tokenizer",
tokenizer_path_.c_str());
tokenizer_.reset();
tokenizer_ = std::make_unique<llm::BPETokenizer>();
tokenizer_->load(tokenizer_path_);
tokenizer_ = std::make_unique<::tokenizers::Llama2cTokenizer>();
err = tokenizer_->load(tokenizer_path_);
ET_CHECK_TK_OK_OR_RETURN_ERROR(
err,
"Failed to load %s as a llama2.c tokenizer artifact",
tokenizer_path_.c_str());
}

ET_LOG(Info, "Reading metadata from model");
Expand Down Expand Up @@ -201,12 +205,12 @@ Error Runner::generate(
? seq_len
: metadata_.at(kMaxSeqLen);

Result<std::vector<uint64_t>> encode_res = tokenizer_->encode(
::tokenizers::Result<std::vector<uint64_t>> encode_res = tokenizer_->encode(
prompt,
/* bos */ 0,
/* eos */ 0);

ET_CHECK_OK_OR_RETURN_ERROR(
ET_CHECK_TK_OK_OR_RETURN_ERROR(
encode_res.error(), "Failed to encode prompt %s", prompt.c_str());

// encode the (string) prompt into tokens sequence
Expand Down Expand Up @@ -242,7 +246,8 @@ Error Runner::generate(
uint64_t cur_token = prefill_res.get();

// print the first token from prefill. No prev_token so use cur_token for it.
wrapped_callback(ET_UNWRAP(tokenizer_->decode(cur_token, cur_token)));
wrapped_callback(
ET_UNWRAP_TOKENIZER(tokenizer_->decode(cur_token, cur_token)));
RUNNER_ET_LOG(
warmup,
"RSS after prompt prefill: %f MiB (0 if unsupported)",
Expand Down
4 changes: 2 additions & 2 deletions examples/models/llama/runner/runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
#include <executorch/extension/llm/runner/text_decoder_runner.h>
#include <executorch/extension/llm/runner/text_prefiller.h>
#include <executorch/extension/llm/runner/text_token_generator.h>
#include <executorch/extension/llm/tokenizer/tokenizer.h>
#include <executorch/extension/module/module.h>
#include <pytorch/tokenizers/tokenizer.h>

namespace example {

Expand Down Expand Up @@ -58,7 +58,7 @@ class ET_EXPERIMENTAL Runner : public executorch::extension::llm::IRunner {
// model
std::unique_ptr<::executorch::extension::Module> module_;
std::string tokenizer_path_;
std::unique_ptr<::executorch::extension::llm::Tokenizer> tokenizer_;
std::unique_ptr<::tokenizers::Tokenizer> tokenizer_;
std::unordered_map<std::string, int64_t> metadata_;
std::unique_ptr<::executorch::extension::llm::TextDecoderRunner>
text_decoder_runner_;
Expand Down
2 changes: 1 addition & 1 deletion examples/models/llama/runner/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def define_common_targets():
"//executorch/runtime/core/exec_aten:lib" + aten_suffix,
"//executorch/runtime/core/exec_aten/util:tensor_util" + aten_suffix,
"//executorch/examples/models/llama/tokenizer:tiktoken",
"//executorch/extension/llm/tokenizer:bpe_tokenizer",
"//pytorch/tokenizers:llama2c_tokenizer",
] + (_get_operator_lib(aten)) + ([
# Vulkan API currently cannot build on some platforms (e.g. Apple, FBCODE)
# Therefore enable it explicitly for now to avoid failing tests
Expand Down
37 changes: 18 additions & 19 deletions examples/models/llama/tokenizer/llama_tiktoken.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

namespace example {

using ::executorch::extension::llm::Tiktoken;
using ::tokenizers::Tiktoken;

namespace {
static constexpr int32_t kSpecialTokensSize = 256;
Expand Down Expand Up @@ -42,8 +42,23 @@ _get_default_special_tokens() {
return special_tokens;
}

static inline std::unique_ptr<std::vector<std::string>>
_get_multimodal_special_tokens() {
std::unique_ptr<std::vector<std::string>> _get_special_tokens(Version version) {
switch (version) {
case Version::Multimodal:
return get_multimodal_special_tokens();
default:
return _get_default_special_tokens();
}
}

} // namespace

std::unique_ptr<Tiktoken> get_tiktoken_for_llama(Version version) {
return std::make_unique<Tiktoken>(
_get_special_tokens(version), kBOSTokenIndex, kEOSTokenIndex);
}

std::unique_ptr<std::vector<std::string>> get_multimodal_special_tokens() {
auto special_tokens =
std::make_unique<std::vector<std::string>>(std::vector<std::string>{
"<|begin_of_text|>",
Expand Down Expand Up @@ -72,20 +87,4 @@ _get_multimodal_special_tokens() {
return special_tokens;
}

std::unique_ptr<std::vector<std::string>> _get_special_tokens(Version version) {
switch (version) {
case Version::Multimodal:
return _get_multimodal_special_tokens();
default:
return _get_default_special_tokens();
}
}

} // namespace

std::unique_ptr<Tiktoken> get_tiktoken_for_llama(Version version) {
return std::make_unique<Tiktoken>(
_get_special_tokens(version), kBOSTokenIndex, kEOSTokenIndex);
}

} // namespace example
6 changes: 4 additions & 2 deletions examples/models/llama/tokenizer/llama_tiktoken.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

#pragma once

#include <executorch/extension/llm/tokenizer/tiktoken.h>
#include <pytorch/tokenizers/tiktoken.h>

namespace example {

Expand All @@ -17,7 +17,9 @@ enum class Version {
Multimodal,
};

std::unique_ptr<::executorch::extension::llm::Tiktoken> get_tiktoken_for_llama(
std::unique_ptr<::tokenizers::Tiktoken> get_tiktoken_for_llama(
Version version = Version::Default);

std::unique_ptr<std::vector<std::string>> get_multimodal_special_tokens();

} // namespace example
3 changes: 2 additions & 1 deletion examples/models/llama/tokenizer/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ def define_common_targets():
"llama_tiktoken.h",
],
exported_deps = [
"//executorch/extension/llm/tokenizer:tiktoken",
"//pytorch/tokenizers:tiktoken",
"//executorch/extension/llm/tokenizer:tiktoken", # TODO: remove
],
visibility = [
"@EXECUTORCH_CLIENTS",
Expand Down
6 changes: 3 additions & 3 deletions examples/models/llama/tokenizer/test/test_tiktoken.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

#include <vector>

#include <executorch/runtime/platform/runtime.h>
#include <executorch/extension/llm/tokenizer/tiktoken.h>

#include <gtest/gtest.h>

Expand All @@ -36,8 +36,8 @@ static std::string get_resource_path(const std::string& name) {
class MultimodalTiktokenV5ExtensionTest : public Test {
public:
void SetUp() override {
executorch::runtime::runtime_init();
tokenizer_ = get_tiktoken_for_llama(Version::Multimodal);
tokenizer_ = std::make_unique<executorch::extension::llm::Tiktoken>(
example::get_multimodal_special_tokens(), 0, 1);
modelPath_ = get_resource_path("test_tiktoken_tokenizer.model");
}

Expand Down
5 changes: 3 additions & 2 deletions examples/models/llava/runner/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ set(_common_include_directories ${EXECUTORCH_ROOT}/..)
set(_llava_runner__srcs
"${CMAKE_CURRENT_SOURCE_DIR}/llava_runner.cpp"
"${EXECUTORCH_ROOT}/extension/llm/sampler/sampler.cpp"
"${EXECUTORCH_ROOT}/extension/llm/tokenizer/bpe_tokenizer.cpp"
"${EXECUTORCH_ROOT}/extension/llm/tokenizers/src/llama2c_tokenizer.cpp"
)

# extension llm runner lib
Expand All @@ -47,5 +47,6 @@ set(llava_runner_deps executorch extension_data_loader extension_llm_runner
target_link_libraries(llava_runner PUBLIC ${llava_runner_deps})

target_include_directories(
llava_runner INTERFACE ${_common_include_directories} ${EXECUTORCH_ROOT}
llava_runner INTERFACE ${_common_include_directories}
${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
)
6 changes: 3 additions & 3 deletions examples/models/llava/runner/llava_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#include <executorch/examples/models/llava/runner/llava_image_prefiller.h>
#include <executorch/examples/models/llava/runner/llava_runner.h>
#include <executorch/examples/models/llava/runner/llava_text_decoder_runner.h>
#include <executorch/extension/llm/tokenizer/bpe_tokenizer.h>
#include <pytorch/tokenizers/llama2c_tokenizer.h>

#include <ctime>
#include <memory>
Expand Down Expand Up @@ -43,7 +43,7 @@ Error LlavaRunner::load() {
stats_.model_load_start_ms = llm::time_in_ms();

// Load the tokenizer
tokenizer_ = std::make_unique<llm::BPETokenizer>();
tokenizer_ = std::make_unique<tokenizers::Llama2cTokenizer>();
tokenizer_->load(tokenizer_path_);

// Load the text decoder runner
Expand Down Expand Up @@ -90,7 +90,7 @@ Result<uint64_t> LlavaRunner::prefill_prompt(
int8_t bos,
int8_t eos) {
std::vector<uint64_t> prompt_tokens =
ET_UNWRAP(tokenizer_->encode(prompt, bos, eos));
ET_UNWRAP_TOKENIZER(tokenizer_->encode(prompt, bos, eos));

return text_prefiller_->prefill(prompt_tokens, start_pos);
}
Expand Down
2 changes: 1 addition & 1 deletion examples/models/llava/runner/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ def define_common_targets():
exported_deps = [
"//executorch/backends/xnnpack:xnnpack_backend",
"//executorch/extension/llm/runner:runner_lib",
"//executorch/extension/llm/tokenizer:bpe_tokenizer",
"//executorch/extension/evalue_util:print_evalue",
"//executorch/extension/module:module",
"//executorch/extension/tensor:tensor",
Expand All @@ -23,5 +22,6 @@ def define_common_targets():
"//executorch/runtime/core/exec_aten/util:tensor_util",
"//executorch/configurations:optimized_native_cpu_ops",
"//executorch/extension/llm/custom_ops:custom_ops",
"//pytorch/tokenizers:llama2c_tokenizer",
],
)
3 changes: 2 additions & 1 deletion examples/models/phi-3-mini/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,12 @@ add_executable(
phi_3_mini_runner
main.cpp runner.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/sampler/sampler.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/tokenizer/bpe_tokenizer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/tokenizers/src/llama2c_tokenizer.cpp
)
target_include_directories(
phi_3_mini_runner
PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/../../../third-party/gflags/src
${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/tokenizers/include
)
target_link_libraries(
phi_3_mini_runner PRIVATE executorch extension_module_static extension_tensor
Expand Down
Loading
Loading