pytorch
diff --git a/‎CMakeLists.txt
Lines changed: 5 additions & 0 deletions b/‎CMakeLists.txt
Lines changed: 5 additions & 0 deletions
diff --git a/‎examples/models/llama2/CMakeLists.txt
Lines changed: 5 additions & 2 deletions b/‎examples/models/llama2/CMakeLists.txt
Lines changed: 5 additions & 2 deletions
diff --git a/‎examples/models/llama2/main.cpp
Lines changed: 9 additions & 1 deletion b/‎examples/models/llama2/main.cpp
Lines changed: 9 additions & 1 deletion
diff --git a/‎examples/models/llama2/runner/runner.cpp
Lines changed: 10 additions & 3 deletions b/‎examples/models/llama2/runner/runner.cpp
Lines changed: 10 additions & 3 deletions
diff --git a/‎examples/models/llama2/runner/runner.h
Lines changed: 3 additions & 1 deletion b/‎examples/models/llama2/runner/runner.h
Lines changed: 3 additions & 1 deletion
diff --git a/‎examples/models/llama2/tokenizer/base64.h
Lines changed: 181 additions & 0 deletions b/‎examples/models/llama2/tokenizer/base64.h
Lines changed: 181 additions & 0 deletions
diff --git a/‎examples/models/llama2/tokenizer/targets.bzl
Lines changed: 6 additions & 0 deletions b/‎examples/models/llama2/tokenizer/targets.bzl
Lines changed: 6 additions & 0 deletions
diff --git a/‎examples/models/llama2/tokenizer/test/targets.bzl
Lines changed: 23 additions & 0 deletions b/‎examples/models/llama2/tokenizer/test/targets.bzl
Lines changed: 23 additions & 0 deletions
@@ -174,6 +174,7 @@ option(EXECUTORCH_BUILD_XNNPACK "Build the XNNPACK backend" OFF)
 
 option(EXECUTORCH_BUILD_VULKAN "Build the Vulkan backend" OFF)
 
+option(EXECUTORCH_BUILD_RE2 "Build RE2" OFF)
 #
 # pthreadpool: build pthreadpool library. Disable on unsupported platforms
 #
@@ -518,6 +519,10 @@ if(EXECUTORCH_BUILD_COREML)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/apple/coreml)
 endif()
 
+if(EXECUTORCH_BUILD_RE2)
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/examples/third-party/re2)
+endif()
+
 if(EXECUTORCH_BUILD_PYBIND)
   add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third-party/pybind11)
 
 
@@ -68,6 +68,9 @@ set(_common_include_directories ${EXECUTORCH_ROOT}/..)
 set(gflags_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../third-party/gflags)
 find_package(gflags REQUIRED)
 
+# find RE2 for tokenizer
+find_package(re2 REQUIRED)
+
 #
 # llama_main: test binary to run llama, with tokenizer and sampler integrated
 #
@@ -87,7 +90,7 @@ endif()
 # llama_runner library
 add_subdirectory(runner)
 
-set(link_libraries)
+set(link_libraries gflags re2)
 set(_srcs main.cpp)
 
 if(EXECUTORCH_BUILD_OPTIMIZED)
@@ -162,7 +165,7 @@ if(CMAKE_BUILD_TYPE EQUAL "RELEASE")
 endif()
 
 target_include_directories(llama_main PUBLIC ${_common_include_directories})
-target_link_libraries(llama_main PUBLIC gflags llama_runner ${link_libraries})
+target_link_libraries(llama_main PUBLIC llama_runner ${link_libraries})
 target_compile_options(llama_main PUBLIC ${_common_compile_options})
 
 if(APPLE)
 
@@ -39,6 +39,11 @@ DEFINE_int32(
     -1,
     "Number of CPU threads for inference. Defaults to -1, which implies we'll use a heuristic to derive the # of performant cores for a specific device.");
 
+DEFINE_bool(
+    use_tiktoken,
+    false,
+    "Use Tiktoken tokenizer instead of the default BPE tokenizer.");
+
 int32_t main(int32_t argc, char** argv) {
   gflags::ParseCommandLineFlags(&argc, &argv, true);
 
@@ -57,6 +62,8 @@ int32_t main(int32_t argc, char** argv) {
 
   int32_t cpu_threads = FLAGS_cpu_threads;
 
+  bool use_tiktoken = FLAGS_use_tiktoken;
+
 #if defined(ET_USE_THREADPOOL)
   uint32_t num_performant_cores = cpu_threads == -1
       ? torch::executorch::cpuinfo::get_num_performant_cores()
@@ -69,7 +76,8 @@ int32_t main(int32_t argc, char** argv) {
   }
 #endif
   // create llama runner
-  ::torch::executor::Runner runner(model_path, tokenizer_path, temperature);
+  ::torch::executor::Runner runner(
+      model_path, tokenizer_path, temperature, use_tiktoken);
 
   // generate
   runner.generate(prompt, seq_len);
 
@@ -11,6 +11,7 @@
 
 #include <executorch/examples/models/llama2/runner/runner.h>
 #include <executorch/examples/models/llama2/tokenizer/bpe_tokenizer.h>
+#include <executorch/examples/models/llama2/tokenizer/tiktoken.h>
 #include <executorch/extension/evalue_util/print_evalue.h>
 #include <executorch/extension/runner_util/managed_tensor.h>
 
@@ -37,8 +38,10 @@ std::string statsToJsonString(const Runner::Stats& stats);
 Runner::Runner(
     const std::string& model_path,
     const std::string& tokenizer_path,
-    const float temperature)
-    : module_(std::make_unique<Module>(
+    const float temperature,
+    bool use_tiktoken)
+    : use_tiktoken_(use_tiktoken),
+      module_(std::make_unique<Module>(
           model_path,
           Module::MlockConfig::UseMlockIgnoreErrors)),
       tokenizer_path_(tokenizer_path),
@@ -77,7 +80,11 @@ Error Runner::load() {
   append_eos_ = getMetadataHelper("append_eos_to_prompt", false);
 
   // Load tokenizer
-  tokenizer_ = std::make_unique<BPETokenizer>(vocab_size_, bos_id_, eos_id_);
+  if (use_tiktoken_) {
+    tokenizer_ = std::make_unique<Tiktoken>(vocab_size_, bos_id_, eos_id_);
+  } else {
+    tokenizer_ = std::make_unique<BPETokenizer>(vocab_size_, bos_id_, eos_id_);
+  }
   tokenizer_->load(tokenizer_path_);
   if (tokenizer_->bos_tok() != bos_id_) {
     ET_LOG(
 
@@ -29,7 +29,8 @@ class Runner {
   explicit Runner(
       const std::string& model_path,
       const std::string& tokenizer_path,
-      const float temperature = 0.8f);
+      const float temperature = 0.8f,
+      bool use_tiktoken = false);
 
   struct Stats {
     // Scaling factor for timestamps - in this case, we use ms.
@@ -85,6 +86,7 @@ class Runner {
   int32_t n_bos_;
   int32_t n_eos_;
   int32_t max_seq_len_;
+  bool use_tiktoken_;
   bool use_kv_cache_;
   bool use_sdpa_with_kv_cache_;
   bool append_eos_;
 
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+// @lint-ignore-every LICENSELINT
+/**************************************************************************
+   Copyright (c) 2023 sewenew
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+ *************************************************************************/
+
+#pragma once
+
+#include <executorch/runtime/core/exec_aten/exec_aten.h>
+#include <runtime/platform/assert.h>
+#include <cassert>
+#include <string>
+#include <string_view>
+
+namespace torch {
+namespace executor {
+namespace base64 {
+
+std::string decode(const std::string_view& input);
+
+namespace detail {
+
+constexpr uint32_t DECODE_TABLE[] = {
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62,  255,
+    255, 255, 63,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  255, 255,
+    255, 255, 255, 255, 255, 0,   1,   2,   3,   4,   5,   6,   7,   8,   9,
+    10,  11,  12,  13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,
+    25,  255, 255, 255, 255, 255, 255, 26,  27,  28,  29,  30,  31,  32,  33,
+    34,  35,  36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,
+    49,  50,  51,  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+    255};
+
+inline void validate(uint32_t v) {
+  ET_CHECK_MSG(v != 255, "invalid char");
+}
+
+inline void decode(const std::string_view& input, std::string& output) {
+  ET_CHECK_MSG(
+      input.size() == 4, "input length must be 4, got %zu", input.size());
+
+  uint32_t val = 0;
+
+  uint8_t c = input[0];
+  auto v = DECODE_TABLE[c];
+  validate(v);
+  val = v;
+
+  c = input[1];
+  v = DECODE_TABLE[c];
+  validate(v);
+  val = (val << 6) | v;
+
+  c = input[2];
+  v = DECODE_TABLE[c];
+  validate(v);
+  val = (val << 6) | v;
+
+  c = input[3];
+  v = DECODE_TABLE[c];
+  validate(v);
+  val = (val << 6) | v;
+
+  output.push_back(static_cast<char>((val >> 16) & 0xFF));
+  output.push_back(static_cast<char>((val >> 8) & 0xFF));
+  output.push_back(static_cast<char>(val & 0xFF));
+}
+
+inline void decode_1_padding(
+    const std::string_view& input,
+    std::string& output) {
+  ET_CHECK_MSG(
+      input.size() == 3, "input length must be 3, got %zu", input.size());
+
+  uint32_t val = 0;
+
+  uint8_t c = input[0];
+  auto v = DECODE_TABLE[c];
+  validate(v);
+  val = v;
+
+  c = input[1];
+  v = DECODE_TABLE[c];
+  validate(v);
+  val = (val << 6) | v;
+
+  c = input[2];
+  v = DECODE_TABLE[c];
+  validate(v);
+  val = (val << 6) | v;
+
+  output.push_back(static_cast<char>((val >> 10) & 0xFF));
+  output.push_back(static_cast<char>((val >> 2) & 0xFF));
+}
+
+inline void decode_2_padding(
+    const std::string_view& input,
+    std::string& output) {
+  assert(input.size() == 2);
+
+  uint32_t val = 0;
+
+  uint8_t c = input[0];
+  auto v = DECODE_TABLE[c];
+  validate(v);
+  val = v;
+
+  c = input[1];
+  v = DECODE_TABLE[c];
+  validate(v);
+  val = (val << 6) | v;
+
+  output.push_back(static_cast<char>((val >> 4) & 0xFF));
+}
+
+} // namespace detail
+
+inline std::string decode(const std::string_view& input) {
+  ET_CHECK_MSG(!input.empty(), "empty input");
+
+  // Faster than `input.size() % 4`.
+  ET_CHECK_MSG(
+      (input.size() & 3) == 0 && input.size() >= 4,
+      "input length must be larger than 4 and is multiple of 4, got %zu",
+      input.size());
+
+  std::string output;
+  output.reserve(input.size() / 4 * 3);
+  auto idx = 0U;
+  for (; idx < input.size() - 4; idx += 4) {
+    detail::decode(input.substr(idx, 4), output);
+  }
+
+  // Last 4 bytes. Might contain paddings.
+  if (input[idx + 3] == '=') {
+    if (input[idx + 2] == '=') {
+      // Tow paddings.
+      detail::decode_2_padding(input.substr(idx, 2), output);
+    } else {
+      // One padding.
+      detail::decode_1_padding(input.substr(idx, 3), output);
+    }
+  } else {
+    // No padding.
+    detail::decode(input.substr(idx, 4), output);
+  }
+
+  return output;
+}
+
+} // namespace base64
+
+} // namespace executor
+} // namespace torch
@@ -5,10 +5,13 @@ def define_common_targets():
         name = "tokenizer",
         srcs = [
             "bpe_tokenizer.cpp",
+            "tiktoken.cpp",
         ],
         exported_headers = [
             "tokenizer.h",
             "bpe_tokenizer.h",
+            "tiktoken.h",
+            "base64.h",
         ],
         exported_deps = [
             "//executorch/runtime/core/exec_aten:lib",
@@ -17,6 +20,9 @@ def define_common_targets():
         visibility = [
             "@EXECUTORCH_CLIENTS",
         ],
+        exported_external_deps = [
+            "re2",
+        ],
     )
 
     runtime.python_library(
 
@@ -20,13 +20,36 @@ def define_common_targets():
         },
     )
 
+    runtime.cxx_test(
+        name = "test_tiktoken",
+        srcs = [
+            "test_tiktoken.cpp",
+        ],
+        deps = [
+            "//executorch/examples/models/llama2/tokenizer:tokenizer",
+        ],
+        env = {
+            "RESOURCES_PATH": "$(location :resources_fb_only)/resources",
+        },
+        external_deps = [
+            "re2",
+        ],
+    )
+
     runtime.filegroup(
         name = "resources",
         srcs = native.glob([
             "resources/**",
         ]),
     )
 
+    runtime.filegroup(
+        name = "resources_fb_only",
+        srcs = native.glob([
+            "resources/fb/**",
+        ]),
+    )
+
     runtime.python_test(
         name = "test_tokenizer_py",
         srcs = [
Original file line number	Diff line number	Diff line change
`@@ -174,6 +174,7 @@ option(EXECUTORCH_BUILD_XNNPACK "Build the XNNPACK backend" OFF)`
`174`	`174`
`175`	`175`	`option(EXECUTORCH_BUILD_VULKAN "Build the Vulkan backend" OFF)`
`176`	`176`
	`177`	`+option(EXECUTORCH_BUILD_RE2 "Build RE2" OFF)`
`177`	`178`	`#`
`178`	`179`	`# pthreadpool: build pthreadpool library. Disable on unsupported platforms`
`179`	`180`	`#`
`@@ -518,6 +519,10 @@ if(EXECUTORCH_BUILD_COREML)`
`518`	`519`	`add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/backends/apple/coreml)`
`519`	`520`	`endif()`
`520`	`521`
	`522`	`+if(EXECUTORCH_BUILD_RE2)`
	`523`	`+ add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/examples/third-party/re2)`
	`524`	`+endif()`
	`525`	`+`
`521`	`526`	`if(EXECUTORCH_BUILD_PYBIND)`
`522`	`527`	`add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/third-party/pybind11)`
`523`	`528`