pytorch
diff --git a/‎.github/workflows/pull.yml
Lines changed: 3 additions & 5 deletions b/‎.github/workflows/pull.yml
Lines changed: 3 additions & 5 deletions
diff --git a/‎.gitmodules
Lines changed: 3 additions & 0 deletions b/‎.gitmodules
Lines changed: 3 additions & 0 deletions
diff --git a/‎requirements.txt
Lines changed: 1 addition & 1 deletion b/‎requirements.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎runner/run.cpp
Lines changed: 8 additions & 9 deletions b/‎runner/run.cpp
Lines changed: 8 additions & 9 deletions
diff --git a/‎tokenizer/CMakeLists.txt
Lines changed: 5 additions & 3 deletions b/‎tokenizer/CMakeLists.txt
Lines changed: 5 additions & 3 deletions
@@ -463,7 +463,6 @@ jobs:
           pushd checkpoints/stories15M
           wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
           wget https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
-          wget https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin
           popd
 
           mkdir gguf_files
@@ -900,14 +899,14 @@ jobs:
       - name: Run inference
         run: |
           python torchchat.py download stories15M
-          wget -O ./tokenizer.bin https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin
+          wget -O ./tokenizer.model https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
 
           export PRMT="Once upon a time in a land far away"
 
           python torchchat.py generate stories15M --temperature 0 --prompt "${PRMT}"
 
           python torchchat.py export stories15M --output-pte-path ./model.pte
-          ./cmake-out/et_run ./model.pte -z ./tokenizer.bin -t 0 -i "${PRMT}"
+          ./cmake-out/et_run ./model.pte -z ./tokenizer.model -t 0 -i "${PRMT}"
 
           echo "Tests complete."
   runner-aoti:
@@ -946,7 +945,6 @@ jobs:
           pushd checkpoints/stories15M
           wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
           wget https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
-          wget https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin
           popd
       - name: Run inference
         run: |
@@ -960,7 +958,7 @@ jobs:
           for dtype in fp32 fp16 bf16 fast fast16; do
             echo "Running export + runner with dtype=$dtype"
             python torchchat.py export --checkpoint-path ${MODEL_DIR}/stories15M.pt --dtype $dtype --output-dso-path /tmp/model.so
-            ./cmake-out/aoti_run /tmp/model.so -z ${MODEL_DIR}/tokenizer.bin -i "${PROMPT}"
+            ./cmake-out/aoti_run /tmp/model.so -z ${MODEL_DIR}/tokenizer.model -i "${PROMPT}"
           done
 
           echo "Tests complete."
 
@@ -4,3 +4,6 @@
 [submodule "tokenizer/third-party/re2"]
 	path = tokenizer/third-party/re2
 	url = https://github.com/google/re2.git
+[submodule "tokenizer/third-party/sentencepiece"]
+	path = tokenizer/third-party/sentencepiece
+	url = https://github.com/google/sentencepiece.git
@@ -3,7 +3,7 @@
 # PyTorch ecosystem
 torch
 torchao
-executorch
+executorch==0.1.2
 
 # Hugging Face download
 huggingface_hub
 
@@ -383,11 +383,11 @@ Tokenizer* build_tokenizer(
   Tokenizer* tokenizer = NULL;
   switch (model_type) {
     case LLAMA2_MODEL:
-      tokenizer = new BPETokenizer(vocab_size, /*bos*/ 1, /*eos*/ 2);
+      tokenizer = new SPTokenizer(vocab_size, /*bos*/ 1, /*eos*/ 2);
       tokenizer->load(tokenizer_path);
       break;
     case LLAMA3_MODEL:
-      tokenizer = new Tiktoken(vocab_size, /*bos*/ 1, /*eos*/ 2);
+      tokenizer = new Tiktoken(vocab_size, /*bos*/ 128000, /*eos*/ 128001);
       tokenizer->load(tokenizer_path);
       break;
     default:
@@ -503,9 +503,11 @@ unsigned generate_from_prompt_tokens(
         printf("\n");
       } else {
         std::string piece = tokenizer->decode(token, next);
-        safe_printf(piece.c_str()); // same as printf("%s", piece), but skips
-                                    // "unsafe" bytes
-        fflush(stdout);
+        if (!piece.empty() && piece.length() != 0) {
+          safe_printf(piece.c_str()); // same as printf("%s", piece), but skips
+                                      // "unsafe" bytes
+          fflush(stdout);
+        }
       }
     }
 
@@ -553,10 +555,7 @@ void generate(
       stop_tokens.push_back(tokenizer->eos_tok());
       break;
     case LLAMA3_MODEL:
-      prompt_tokens = tokenizer->encode(prompt, 0, 0);
-      prompt_tokens.insert(
-          prompt_tokens.begin(),
-          tokenizer->encode("<|begin_of_text|>", 0, 0)[0]);
+      prompt_tokens = tokenizer->encode(prompt, 1, 0);
       stop_tokens.push_back(tokenizer->encode("<|end_of_text|>", 0, 0)[0]);
       stop_tokens.push_back(tokenizer->encode("<|eot_id|>", 0, 0)[0]);
       break;
 
@@ -9,10 +9,11 @@ ENDIF()
 # build tokenizer library
 add_library(
     tokenizer
-    bpe_tokenizer.cpp
+    tokenizer.h
+    sentencepiece.cpp
     tiktoken.cpp)
 
-target_include_directories(tokenizer PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
+target_include_directories(tokenizer PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} third-party/sentencepiece/src)
 
 # add RE2 as subdirectory
 set(ABSL_ENABLE_INSTALL ON)
@@ -22,6 +23,7 @@ ${CMAKE_POSITION_INDEPENDENT_CODE})
 set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 add_subdirectory(third-party/abseil-cpp)
 add_subdirectory(third-party/re2)
+add_subdirectory(third-party/sentencepiece)
 set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag})
 
-target_link_libraries(tokenizer PUBLIC re2::re2)
+target_link_libraries(tokenizer PUBLIC re2::re2 sentencepiece-static)