Skip to content

Fix test_llama_runner by hiding tiktoken #3055

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/models/llama2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ project(llama_runner)
# Duplicating options as root CMakeLists.txt
option(EXECUTORCH_BUILD_OPTIMIZED "Build the optimized kernels" OFF)

option(EXECUTORCH_BUILD_RE2 "Build RE2" OFF)
option(EXECUTORCH_USE_TIKTOKEN "Use Tiktoken as a tokenizer" OFF)

include(CMakeDependentOption)
#
Expand Down Expand Up @@ -88,7 +88,7 @@ endif()

# llama_runner library
add_subdirectory(runner)
if(EXECUTORCH_BUILD_RE2)
if(EXECUTORCH_USE_TIKTOKEN)
# find RE2 for tokenizer
set(ABSL_ENABLE_INSTALL ON)
set(_pic_flag
Expand Down
10 changes: 1 addition & 9 deletions examples/models/llama2/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,6 @@ DEFINE_int32(
-1,
"Number of CPU threads for inference. Defaults to -1, which implies we'll use a heuristic to derive the # of performant cores for a specific device.");

DEFINE_bool(
use_tiktoken,
false,
"Use Tiktoken tokenizer instead of the default BPE tokenizer.");

int32_t main(int32_t argc, char** argv) {
gflags::ParseCommandLineFlags(&argc, &argv, true);

Expand All @@ -62,8 +57,6 @@ int32_t main(int32_t argc, char** argv) {

int32_t cpu_threads = FLAGS_cpu_threads;

bool use_tiktoken = FLAGS_use_tiktoken;

#if defined(ET_USE_THREADPOOL)
uint32_t num_performant_cores = cpu_threads == -1
? torch::executorch::cpuinfo::get_num_performant_cores()
Expand All @@ -76,8 +69,7 @@ int32_t main(int32_t argc, char** argv) {
}
#endif
// create llama runner
::torch::executor::Runner runner(
model_path, tokenizer_path, temperature, use_tiktoken);
::torch::executor::Runner runner(model_path, tokenizer_path, temperature);

// generate
runner.generate(prompt, seq_len);
Expand Down
23 changes: 15 additions & 8 deletions examples/models/llama2/runner/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,19 +39,26 @@ list(TRANSFORM _llama_runner__srcs PREPEND "${EXECUTORCH_ROOT}/")
target_include_directories(extension_module
INTERFACE ${_common_include_directories})

if(CMAKE_TOOLCHAIN_IOS OR ANDROID OR APPLE)
# Building a share library on iOS requires code signing
# On Android we see duplicated registration when using shared lib
if(EXECUTORCH_USE_TIKTOKEN)
list(APPEND _llama_runner__srcs
${CMAKE_CURRENT_SOURCE_DIR}/../tokenizer/tiktoken.cpp)
set(_preprocessor_flag -DET_USE_TIKTOKEN)
endif()

if(CMAKE_TOOLCHAIN_IOS
OR ANDROID
OR APPLE)
# Building a share library on iOS requires code signing On Android we see
# duplicated registration when using shared lib
add_library(llama_runner STATIC ${_llama_runner__srcs})
else()
add_library(llama_runner SHARED ${_llama_runner__srcs})
endif()

set(llama_runner_deps executorch extension_module extension_data_loader)

target_link_libraries(
llama_runner PUBLIC ${llama_runner_deps})
target_link_libraries(llama_runner PUBLIC ${llama_runner_deps})

target_include_directories(llama_runner
INTERFACE ${_common_include_directories}
${EXECUTORCH_ROOT})
target_include_directories(llama_runner INTERFACE ${_common_include_directories}
${EXECUTORCH_ROOT})
target_compile_options(llama_runner PUBLIC ${_preprocessor_flag})
18 changes: 9 additions & 9 deletions examples/models/llama2/runner/runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@

#include <executorch/examples/models/llama2/runner/runner.h>
#include <executorch/examples/models/llama2/tokenizer/bpe_tokenizer.h>
#if defined(ET_USE_TIKTOKEN)
#include <executorch/examples/models/llama2/tokenizer/tiktoken.h>
#endif
#include <executorch/extension/evalue_util/print_evalue.h>
#include <executorch/extension/runner_util/managed_tensor.h>

Expand All @@ -38,10 +40,8 @@ std::string statsToJsonString(const Runner::Stats& stats);
Runner::Runner(
const std::string& model_path,
const std::string& tokenizer_path,
const float temperature,
bool use_tiktoken)
: use_tiktoken_(use_tiktoken),
module_(std::make_unique<Module>(
const float temperature)
: module_(std::make_unique<Module>(
model_path,
Module::MlockConfig::UseMlockIgnoreErrors)),
tokenizer_path_(tokenizer_path),
Expand Down Expand Up @@ -80,11 +80,11 @@ Error Runner::load() {
append_eos_ = getMetadataHelper("append_eos_to_prompt", false);

// Load tokenizer
if (use_tiktoken_) {
tokenizer_ = std::make_unique<Tiktoken>(vocab_size_, bos_id_, eos_id_);
} else {
tokenizer_ = std::make_unique<BPETokenizer>(vocab_size_, bos_id_, eos_id_);
}
#if defined(ET_USE_TIKTOKEN)
tokenizer_ = std::make_unique<Tiktoken>(vocab_size_, bos_id_, eos_id_);
#else
tokenizer_ = std::make_unique<BPETokenizer>(vocab_size_, bos_id_, eos_id_);
#endif
tokenizer_->load(tokenizer_path_);
if (tokenizer_->bos_tok() != bos_id_) {
ET_LOG(
Expand Down
4 changes: 1 addition & 3 deletions examples/models/llama2/runner/runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ class Runner {
explicit Runner(
const std::string& model_path,
const std::string& tokenizer_path,
const float temperature = 0.8f,
bool use_tiktoken = false);
const float temperature = 0.8f);

struct Stats {
// Scaling factor for timestamps - in this case, we use ms.
Expand Down Expand Up @@ -86,7 +85,6 @@ class Runner {
int32_t n_bos_;
int32_t n_eos_;
int32_t max_seq_len_;
bool use_tiktoken_;
bool use_kv_cache_;
bool use_sdpa_with_kv_cache_;
bool append_eos_;
Expand Down
7 changes: 5 additions & 2 deletions examples/models/llama2/runner/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,17 @@ def define_common_targets():
exported_deps = [
"//executorch/backends/xnnpack:xnnpack_backend",
"//executorch/examples/models/llama2/sampler:sampler" + aten_suffix,
"//executorch/examples/models/llama2/tokenizer:tokenizer",
"//executorch/extension/evalue_util:print_evalue" + aten_suffix,
"//executorch/extension/runner_util:managed_tensor" + aten_suffix,
"//executorch/extension/module:module" + aten_suffix,
"//executorch/kernels/quantized:generated_lib" + aten_suffix,
"//executorch/runtime/core/exec_aten:lib" + aten_suffix,
"//executorch/runtime/core/exec_aten/util:tensor_util" + aten_suffix,
] + (_get_operator_lib(aten)) + ([
] + ([
"//executorch/examples/models/llama2/tokenizer:tiktoken",
] if native.read_config("llama", "use_tiktoken", "0") == "1" else [
"//executorch/examples/models/llama2/tokenizer:bpe_tokenizer",
]) + (_get_operator_lib(aten)) + ([
# Vulkan API currently cannot build on some platforms (e.g. Apple, FBCODE)
# Therefore enable it explicitly for now to avoid failing tests
"//executorch/backends/vulkan:vulkan_backend_lib",
Expand Down
20 changes: 18 additions & 2 deletions examples/models/llama2/tokenizer/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,30 @@ load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")

def define_common_targets():
runtime.cxx_library(
name = "tokenizer",
name = "bpe_tokenizer",
srcs = [
"bpe_tokenizer.cpp",
"tiktoken.cpp",
],
exported_headers = [
"tokenizer.h",
"bpe_tokenizer.h",
],
exported_deps = [
"//executorch/runtime/core/exec_aten:lib",
"//executorch/runtime/core/exec_aten/util:scalar_type_util",
],
visibility = [
"@EXECUTORCH_CLIENTS",
],
)

runtime.cxx_library(
name = "tiktoken",
srcs = [
"tiktoken.cpp",
],
exported_headers = [
"tokenizer.h",
"tiktoken.h",
"base64.h",
],
Expand Down
8 changes: 4 additions & 4 deletions examples/models/llama2/tokenizer/test/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ def define_common_targets():
"""

runtime.cxx_test(
name = "test",
name = "test_bpe_tokenizer",
srcs = [
"test_tokenizer.cpp",
"test_bpe_tokenizer.cpp",
],
deps = [
"//executorch/examples/models/llama2/tokenizer:tokenizer",
"//executorch/examples/models/llama2/tokenizer:bpe_tokenizer",
],
env = {
"RESOURCES_PATH": "$(location :resources)/resources",
Expand All @@ -26,7 +26,7 @@ def define_common_targets():
"test_tiktoken.cpp",
],
deps = [
"//executorch/examples/models/llama2/tokenizer:tokenizer",
"//executorch/examples/models/llama2/tokenizer:tiktoken",
],
env = {
"RESOURCES_PATH": "$(location :resources_fb_only)/resources",
Expand Down