Skip to content

Commit efe8a89

Browse files
committed
Fix test_llama_runner by hiding tiktoken
Summary: We don't always want to build tiktoken dependencies (re2 and abseil) so this PR only build it if the option is on. Test Plan: Reviewers: Subscribers: Tasks: Tags: ghstack-source-id: 9d5342d Pull Request resolved: #3055
1 parent 458d743 commit efe8a89

File tree

7 files changed

+51
-35
lines changed

7 files changed

+51
-35
lines changed

examples/models/llama2/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ project(llama_runner)
2121
# Duplicating options as root CMakeLists.txt
2222
option(EXECUTORCH_BUILD_OPTIMIZED "Build the optimized kernels" OFF)
2323

24-
option(EXECUTORCH_BUILD_RE2 "Build RE2" OFF)
24+
option(EXECUTORCH_USE_TIKTOKEN "Use Tiktoken as a tokenizer" OFF)
2525

2626
include(CMakeDependentOption)
2727
#
@@ -88,7 +88,7 @@ endif()
8888

8989
# llama_runner library
9090
add_subdirectory(runner)
91-
if(EXECUTORCH_BUILD_RE2)
91+
if(EXECUTORCH_USE_TIKTOKEN)
9292
# find RE2 for tokenizer
9393
set(ABSL_ENABLE_INSTALL ON)
9494
set(_pic_flag

examples/models/llama2/main.cpp

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,6 @@ DEFINE_int32(
3939
-1,
4040
"Number of CPU threads for inference. Defaults to -1, which implies we'll use a heuristic to derive the # of performant cores for a specific device.");
4141

42-
DEFINE_bool(
43-
use_tiktoken,
44-
false,
45-
"Use Tiktoken tokenizer instead of the default BPE tokenizer.");
46-
4742
int32_t main(int32_t argc, char** argv) {
4843
gflags::ParseCommandLineFlags(&argc, &argv, true);
4944

@@ -62,8 +57,6 @@ int32_t main(int32_t argc, char** argv) {
6257

6358
int32_t cpu_threads = FLAGS_cpu_threads;
6459

65-
bool use_tiktoken = FLAGS_use_tiktoken;
66-
6760
#if defined(ET_USE_THREADPOOL)
6861
uint32_t num_performant_cores = cpu_threads == -1
6962
? torch::executorch::cpuinfo::get_num_performant_cores()
@@ -76,8 +69,7 @@ int32_t main(int32_t argc, char** argv) {
7669
}
7770
#endif
7871
// create llama runner
79-
::torch::executor::Runner runner(
80-
model_path, tokenizer_path, temperature, use_tiktoken);
72+
::torch::executor::Runner runner(model_path, tokenizer_path, temperature);
8173

8274
// generate
8375
runner.generate(prompt, seq_len);

examples/models/llama2/runner/CMakeLists.txt

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,19 +39,26 @@ list(TRANSFORM _llama_runner__srcs PREPEND "${EXECUTORCH_ROOT}/")
3939
target_include_directories(extension_module
4040
INTERFACE ${_common_include_directories})
4141

42-
if(CMAKE_TOOLCHAIN_IOS OR ANDROID OR APPLE)
43-
# Building a share library on iOS requires code signing
44-
# On Android we see duplicated registration when using shared lib
42+
if(EXECUTORCH_USE_TIKTOKEN)
43+
list(APPEND _llama_runner__srcs
44+
${CMAKE_CURRENT_SOURCE_DIR}/../tokenizer/tiktoken.cpp)
45+
set(_preprocessor_flag -DET_USE_TIKTOKEN)
46+
endif()
47+
48+
if(CMAKE_TOOLCHAIN_IOS
49+
OR ANDROID
50+
OR APPLE)
51+
# Building a share library on iOS requires code signing On Android we see
52+
# duplicated registration when using shared lib
4553
add_library(llama_runner STATIC ${_llama_runner__srcs})
4654
else()
4755
add_library(llama_runner SHARED ${_llama_runner__srcs})
4856
endif()
4957

5058
set(llama_runner_deps executorch extension_module extension_data_loader)
5159

52-
target_link_libraries(
53-
llama_runner PUBLIC ${llama_runner_deps})
60+
target_link_libraries(llama_runner PUBLIC ${llama_runner_deps})
5461

55-
target_include_directories(llama_runner
56-
INTERFACE ${_common_include_directories}
57-
${EXECUTORCH_ROOT})
62+
target_include_directories(llama_runner INTERFACE ${_common_include_directories}
63+
${EXECUTORCH_ROOT})
64+
target_compile_options(llama_runner PUBLIC ${_preprocessor_flag})

examples/models/llama2/runner/runner.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@
1111

1212
#include <executorch/examples/models/llama2/runner/runner.h>
1313
#include <executorch/examples/models/llama2/tokenizer/bpe_tokenizer.h>
14+
#if defined(ET_USE_TIKTOKEN)
1415
#include <executorch/examples/models/llama2/tokenizer/tiktoken.h>
16+
#endif
1517
#include <executorch/extension/evalue_util/print_evalue.h>
1618
#include <executorch/extension/runner_util/managed_tensor.h>
1719

@@ -38,10 +40,8 @@ std::string statsToJsonString(const Runner::Stats& stats);
3840
Runner::Runner(
3941
const std::string& model_path,
4042
const std::string& tokenizer_path,
41-
const float temperature,
42-
bool use_tiktoken)
43-
: use_tiktoken_(use_tiktoken),
44-
module_(std::make_unique<Module>(
43+
const float temperature)
44+
: module_(std::make_unique<Module>(
4545
model_path,
4646
Module::MlockConfig::UseMlockIgnoreErrors)),
4747
tokenizer_path_(tokenizer_path),
@@ -80,11 +80,11 @@ Error Runner::load() {
8080
append_eos_ = getMetadataHelper("append_eos_to_prompt", false);
8181

8282
// Load tokenizer
83-
if (use_tiktoken_) {
84-
tokenizer_ = std::make_unique<Tiktoken>(vocab_size_, bos_id_, eos_id_);
85-
} else {
86-
tokenizer_ = std::make_unique<BPETokenizer>(vocab_size_, bos_id_, eos_id_);
87-
}
83+
#if defined(ET_USE_TIKTOKEN)
84+
tokenizer_ = std::make_unique<Tiktoken>(vocab_size_, bos_id_, eos_id_);
85+
#else
86+
tokenizer_ = std::make_unique<BPETokenizer>(vocab_size_, bos_id_, eos_id_);
87+
#endif
8888
tokenizer_->load(tokenizer_path_);
8989
if (tokenizer_->bos_tok() != bos_id_) {
9090
ET_LOG(

examples/models/llama2/runner/runner.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,7 @@ class Runner {
2929
explicit Runner(
3030
const std::string& model_path,
3131
const std::string& tokenizer_path,
32-
const float temperature = 0.8f,
33-
bool use_tiktoken = false);
32+
const float temperature = 0.8f);
3433

3534
struct Stats {
3635
// Scaling factor for timestamps - in this case, we use ms.
@@ -86,7 +85,6 @@ class Runner {
8685
int32_t n_bos_;
8786
int32_t n_eos_;
8887
int32_t max_seq_len_;
89-
bool use_tiktoken_;
9088
bool use_kv_cache_;
9189
bool use_sdpa_with_kv_cache_;
9290
bool append_eos_;

examples/models/llama2/runner/targets.bzl

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,17 @@ def define_common_targets():
3030
exported_deps = [
3131
"//executorch/backends/xnnpack:xnnpack_backend",
3232
"//executorch/examples/models/llama2/sampler:sampler" + aten_suffix,
33-
"//executorch/examples/models/llama2/tokenizer:tokenizer",
3433
"//executorch/extension/evalue_util:print_evalue" + aten_suffix,
3534
"//executorch/extension/runner_util:managed_tensor" + aten_suffix,
3635
"//executorch/extension/module:module" + aten_suffix,
3736
"//executorch/kernels/quantized:generated_lib" + aten_suffix,
3837
"//executorch/runtime/core/exec_aten:lib" + aten_suffix,
3938
"//executorch/runtime/core/exec_aten/util:tensor_util" + aten_suffix,
40-
] + (_get_operator_lib(aten)) + ([
39+
] + ([
40+
"//executorch/examples/models/llama2/tokenizer:tiktoken",
41+
] if native.read_config("llama", "use_tiktoken", "0") == "1" else [
42+
"//executorch/examples/models/llama2/tokenizer:bpe_tokenizer",
43+
]) + (_get_operator_lib(aten)) + ([
4144
# Vulkan API currently cannot build on some platforms (e.g. Apple, FBCODE)
4245
# Therefore enable it explicitly for now to avoid failing tests
4346
"//executorch/backends/vulkan:vulkan_backend_lib",

examples/models/llama2/tokenizer/targets.bzl

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,30 @@ load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
22

33
def define_common_targets():
44
runtime.cxx_library(
5-
name = "tokenizer",
5+
name = "bpe_tokenizer",
66
srcs = [
77
"bpe_tokenizer.cpp",
8-
"tiktoken.cpp",
98
],
109
exported_headers = [
1110
"tokenizer.h",
1211
"bpe_tokenizer.h",
12+
],
13+
exported_deps = [
14+
"//executorch/runtime/core/exec_aten:lib",
15+
"//executorch/runtime/core/exec_aten/util:scalar_type_util",
16+
],
17+
visibility = [
18+
"@EXECUTORCH_CLIENTS",
19+
],
20+
)
21+
22+
runtime.cxx_library(
23+
name = "tiktoken",
24+
srcs = [
25+
"tiktoken.cpp",
26+
],
27+
exported_headers = [
28+
"tokenizer.h",
1329
"tiktoken.h",
1430
"base64.h",
1531
],

0 commit comments

Comments
 (0)