pytorch · larryliu0820 · Apr 24, 2024 · Apr 23, 2024 · Apr 24, 2024 · Apr 24, 2024
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -711,7 +711,9 @@ jobs:
     runs-on: ${{matrix.runner}}
     steps:
       - name: Checkout repo
-        uses: actions/checkout@v2
+        uses: actions/checkout@v3
+        with:
+          submodules: true
       - name: Setup Python
         uses: actions/setup-python@v2
         with:
@@ -734,8 +736,8 @@ jobs:
           python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
           python3 -c 'import torchvision;print(f"torchvision: {torchvision.__version__, torchvision.version.git_version}")'
           python3 -c 'import torchaudio;print(f"torchaudio: {torchaudio.__version__, torchaudio.version.git_version}")'
-          cmake -S ./runner-et -B ./runner-et/cmake-out -G Ninja
-          cmake --build ./runner-et/cmake-out
+          cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'` -G Ninja
+          cmake --build ./cmake-out --target et_run
       - name: Download checkpoints
         run: |
 
@@ -750,7 +752,7 @@ jobs:
           cat ./output_eager
 
           python torchchat.py export stories15M --output-pte-path ./model.pte
-          ./runner-et/cmake-out/run ./model.pte -z ./tokenizer.bin -t 0 -i "${PRMT}" > ./output_et
+          ./cmake-out/et_run ./model.pte -z ./tokenizer.bin -t 0 -i "${PRMT}" > ./output_et
           cat ./output_et
 
           echo "Tests complete."
@@ -767,6 +769,8 @@ jobs:
     steps:
       - name: Checkout repo
         uses: actions/checkout@v3
+        with:
+          submodules: true
       - name: Setup Python
         uses: actions/setup-python@v4
         with:
@@ -780,10 +784,8 @@ jobs:
           pip install -r requirements.txt
           pip list
 
-          cd ${TORCHCHAT_ROOT}/runner-aoti
-          cmake -Bbuild -DCMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'`
-          cmake --build build
-          cd ..
+          cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'` -G Ninja
+          cmake --build ./cmake-out --target aoti_run
       - name: Download checkpoint
         run: |
           mkdir -p checkpoints/stories15M
@@ -804,7 +806,7 @@ jobs:
 
           python torchchat.py export --checkpoint-path ${MODEL_DIR}/stories15M.pt --output-dso-path /tmp/model.so
 
-          ./runner-aoti/build/run /tmp/model.so -z ${MODEL_DIR}/tokenizer.bin -i "${PROMPT}" > ${PWD}/output_aoti
+          ./cmake-out/aoti_run /tmp/model.so -z ${MODEL_DIR}/tokenizer.bin -i "${PROMPT}" > ${PWD}/output_aoti
           cat ${PWD}/output_aoti
 
           echo "Tests complete."
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,6 @@
+[submodule "tokenizer/third-party/abseil-cpp"]
+	path = tokenizer/third-party/abseil-cpp
+	url = https://github.com/abseil/abseil-cpp.git
+[submodule "tokenizer/third-party/re2"]
+	path = tokenizer/third-party/re2
+	url = https://github.com/google/re2.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -0,0 +1,24 @@
+cmake_minimum_required(VERSION 3.24)
+set(CMAKE_CXX_STANDARD 17)
+IF(DEFINED ENV{TORCHCHAT_ROOT})
+    set(TORCHCHAT_ROOT $ENV{TORCHCHAT_ROOT})
+ELSE()
+    set(TORCHCHAT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/..)
+ENDIF()
+
+project(Torchchat)
+
+# include tokenizer
+add_subdirectory(tokenizer)
+
+# include et_run executable
+include(runner/et.cmake)
+if(TARGET et_run)
+    target_link_libraries(et_run PUBLIC tokenizer)
+endif()
+
+# include aoti_run executable
+include(runner/aoti.cmake)
+if(TARGET aoti_run)
+    target_link_libraries(aoti_run tokenizer)
+endif()
diff --git a/runner-aoti/CMakeLists.txt b/runner-aoti/CMakeLists.txt
diff --git a/runner-aoti/run.cpp b/runner-aoti/run.cpp
diff --git a/runner-et/CMakeLists.txt b/runner-et/CMakeLists.txt
diff --git a/runner-et/run.cpp b/runner-et/run.cpp
diff --git a/runner-et/Utils.cmake → runner/Utils.cmake b/runner-et/Utils.cmake → runner/Utils.cmake
diff --git a/runner/aoti.cmake b/runner/aoti.cmake
@@ -0,0 +1,21 @@
+cmake_minimum_required(VERSION 3.24)
+set(CMAKE_CXX_STANDARD 17)
+IF(DEFINED ENV{TORCHCHAT_ROOT})
+    set(TORCHCHAT_ROOT $ENV{TORCHCHAT_ROOT})
+ELSE()
+    set(TORCHCHAT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/..)
+ENDIF()
+
+find_package(CUDA)
+
+find_package(Torch)
+if(Torch_FOUND)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g ${TORCH_CXX_FLAGS} -fpermissive")
+
+    add_executable(aoti_run runner/run.cpp)
+
+    target_compile_options(aoti_run PUBLIC -D__AOTI_MODEL__)
+    target_include_directories(aoti_run PRIVATE ${TORCHCHAT_ROOT}/runner)
+    target_link_libraries(aoti_run "${TORCH_LIBRARIES}" m)
+    set_property(TARGET aoti_run PROPERTY CXX_STANDARD 17)
+endif()
diff --git a/runner-et/build_android.sh → runner/build_android.sh b/runner-et/build_android.sh → runner/build_android.sh
@@ -30,10 +30,10 @@ export CMAKE_OUT_DIR="cmake-out-android"
 #
 
 build_runner_et() {
-  rm -rf build/cmake-out-android
+  rm -rf cmake-out-android
   echo "ET BUILD DIR IS ${ET_BUILD_DIR}"
-  cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=android-23 -S ./runner-et -B build/cmake-out-android -G Ninja
-  cmake --build build/cmake-out-android/ -j16 --config Release
+  cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=android-23 -S . -B cmake-out-android -G Ninja
+  cmake --build cmake-out-android/ -j16 --config Release --target et_run
 }
 
 find_cmake_prefix_path

diff --git a/runner/et.cmake b/runner/et.cmake
@@ -0,0 +1,98 @@
+cmake_minimum_required(VERSION 3.24)
+set(CMAKE_CXX_STANDARD 17)
+
+IF(DEFINED ENV{ET_BUILD_DIR})
+  set(ET_BUILD_DIR $ENV{ET_BUILD_DIR})
+ELSE()
+  set(ET_BUILD_DIR "et-build")
+ENDIF()
+
+MESSAGE(STATUS "Using ET BUILD DIR: --[${ET_BUILD_DIR}]--")
+
+IF(DEFINED ENV{CMAKE_OUT_DIR})
+  set(CMAKE_OUT_DIR $ENV{CMAKE_OUT_DIR})
+ELSE()
+  set(CMAKE_OUT_DIR "cmake-out")
+ENDIF()
+
+MESSAGE(STATUS "Using ET BUILD DIR: --[${ET_BUILD_DIR}]--")
+
+IF(DEFINED ENV{TORCHCHAT_ROOT})
+    set(TORCHCHAT_ROOT $ENV{TORCHCHAT_ROOT})
+ELSE()
+    set(TORCHCHAT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/..)
+ENDIF()
+
+project(Torchchat)
+
+include(CMakePrintHelpers)
+include(runner/Utils.cmake)
+
+cmake_print_variables(TORCHCHAT_ROOT)
+
+MESSAGE(STATUS "Looking for excutorch in ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/cmake/ExecuTorch")
+set(executorch_DIR ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/cmake/ExecuTorch)
+find_package(executorch CONFIG PATHS ${executorch_DIR})
+if(executorch_FOUND)
+  set(_common_include_directories ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src)
+
+  cmake_print_variables(_common_include_directories)
+
+  target_include_directories(executorch INTERFACE ${_common_include_directories}) # Ideally ExecuTorch installation process would do this
+  add_executable(et_run runner/run.cpp)
+
+  target_compile_options(et_run PUBLIC -D__ET__MODEL -D_GLIBCXX_USE_CXX11_ABI=1)
+
+  # Link ET runtime + extensions
+  target_link_libraries(
+    et_run PRIVATE
+          executorch
+          extension_module
+          ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src/executorch/${CMAKE_OUT_DIR}/extension/data_loader/libextension_data_loader.a # This one does not get installed by ExecuTorch
+          optimized_kernels
+          quantized_kernels
+          portable_kernels
+          cpublas
+          eigen_blas
+          # The libraries below need to be whole-archived linked
+          optimized_native_cpu_ops_lib
+          quantized_ops_lib
+          xnnpack_backend
+          XNNPACK
+          pthreadpool
+          cpuinfo
+  )
+  target_link_options_shared_lib(optimized_native_cpu_ops_lib)
+  target_link_options_shared_lib(quantized_ops_lib)
+  target_link_options_shared_lib(xnnpack_backend)
+  # Not clear why linking executorch as whole-archive outside android/apple is leading
+  # to double registration. Most likely because of linkage issues.
+  # Will figure this out later. Until then use this.
+  if(ANDROID OR APPLE)
+    target_link_options_shared_lib(executorch)
+  endif()
+
+  target_link_libraries(et_run PRIVATE
+  "$<LINK_LIBRARY:WHOLE_ARCHIVE,${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src/executorch/${CMAKE_OUT_DIR}/examples/models/llama2/custom_ops/libcustom_ops.a>")
+
+  # This one is needed for cpuinfo where it uses android specific log lib
+  if(ANDROID)
+    target_link_libraries(et_run PRIVATE log)
+  endif()
+
+  # Adding target_link_options_shared_lib as commented out below leads to this:
+  #
+  # CMake Error at Utils.cmake:22 (target_link_options):
+  #   Cannot specify link options for target
+  #   "/Users/scroy/etorch/torchchat/et-build/src/executorch/${CMAKE_OUT_DIR}/examples/models/llama2/custom_ops/libcustom_ops_lib.a"
+  #   which is not built by this project.
+  # Call Stack (most recent call first):
+  #   Utils.cmake:30 (macos_kernel_link_options)
+  #   CMakeLists.txt:41 (target_link_options_shared_lib)
+  #
+  #target_link_options_shared_lib("${TORCHCHAT_ROOT}/et-build/src/executorch/${CMAKE_OUT_DIR}/examples/models/llama2/custom_ops/libcustom_ops_lib.a") # This one does not get installed by ExecuTorch
+
+  # This works on mac, but appears to run into issues on linux
+  # It is needed to solve:
+  # E 00:00:00.055965 executorch:method.cpp:536] Missing operator: [8] llama::sdpa_with_kv_cache.out
+endif()
diff --git a/runner/run.cpp b/runner/run.cpp
@@ -1,5 +1,4 @@
 /* Inference for Llama-2 Transformer model in pure C++ */
-
 #include <ctype.h>
 #include <math.h>
 #include <stdint.h>
@@ -397,7 +396,7 @@ void generate(
   }
 
   // encode the (string) prompt into tokens sequence
-  std::string prompt_str(prompt);
+  std::string prompt_str = prompt;
   std::vector<uint64_t> prompt_tokens = tokenizer->encode(prompt_str, 1, 0);
   int num_prompt_tokens = prompt_tokens.size();
   if (num_prompt_tokens < 1) {
@@ -674,9 +673,23 @@ int main(int argc, char* argv[]) {
   build_transformer(&transformer, checkpoint_path, vocab_size, steps);
 
   // build the Tokenizer via the tokenizer .bin file
-  Tokenizer* tokenizer =
-      new BPETokenizer(transformer.config.vocab_size, /*bos*/ 1, /*eos*/ 2);
-  tokenizer->load(tokenizer_path);
+  Tokenizer* tokenizer = nullptr;
+
+  // Try to load using Tiktoken, if exception then switch to another tokenizer
+  try {
+    tokenizer =
+        new Tiktoken(transformer.config.vocab_size, /*bos*/ 1, /*eos*/ 2);
+    tokenizer->load(tokenizer_path);
+  } catch (const std::invalid_argument&) {
+    fprintf(
+        stderr,
+        "Failed to load %s into a Tiktoken tokenizer. Trying sentencepiece tokenizer..\n",
+        tokenizer_path);
+    delete tokenizer;
+    tokenizer =
+        new BPETokenizer(transformer.config.vocab_size, /*bos*/ 1, /*eos*/ 2);
+    tokenizer->load(tokenizer_path);
+  }
 
   // build the Sampler
   Sampler sampler;