Skip to content

Add support for tiktoken and refactored runner structure #435

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Apr 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 11 additions & 9 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -711,7 +711,9 @@ jobs:
runs-on: ${{matrix.runner}}
steps:
- name: Checkout repo
uses: actions/checkout@v2
uses: actions/checkout@v3
with:
submodules: true
- name: Setup Python
uses: actions/setup-python@v2
with:
Expand All @@ -734,8 +736,8 @@ jobs:
python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
python3 -c 'import torchvision;print(f"torchvision: {torchvision.__version__, torchvision.version.git_version}")'
python3 -c 'import torchaudio;print(f"torchaudio: {torchaudio.__version__, torchaudio.version.git_version}")'
cmake -S ./runner-et -B ./runner-et/cmake-out -G Ninja
cmake --build ./runner-et/cmake-out
cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'` -G Ninja
cmake --build ./cmake-out --target et_run
- name: Download checkpoints
run: |

Expand All @@ -750,7 +752,7 @@ jobs:
cat ./output_eager

python torchchat.py export stories15M --output-pte-path ./model.pte
./runner-et/cmake-out/run ./model.pte -z ./tokenizer.bin -t 0 -i "${PRMT}" > ./output_et
./cmake-out/et_run ./model.pte -z ./tokenizer.bin -t 0 -i "${PRMT}" > ./output_et
cat ./output_et

echo "Tests complete."
Expand All @@ -767,6 +769,8 @@ jobs:
steps:
- name: Checkout repo
uses: actions/checkout@v3
with:
submodules: true
- name: Setup Python
uses: actions/setup-python@v4
with:
Expand All @@ -780,10 +784,8 @@ jobs:
pip install -r requirements.txt
pip list

cd ${TORCHCHAT_ROOT}/runner-aoti
cmake -Bbuild -DCMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'`
cmake --build build
cd ..
cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'` -G Ninja
cmake --build ./cmake-out --target aoti_run
- name: Download checkpoint
run: |
mkdir -p checkpoints/stories15M
Expand All @@ -804,7 +806,7 @@ jobs:

python torchchat.py export --checkpoint-path ${MODEL_DIR}/stories15M.pt --output-dso-path /tmp/model.so

./runner-aoti/build/run /tmp/model.so -z ${MODEL_DIR}/tokenizer.bin -i "${PROMPT}" > ${PWD}/output_aoti
./cmake-out/aoti_run /tmp/model.so -z ${MODEL_DIR}/tokenizer.bin -i "${PROMPT}" > ${PWD}/output_aoti
cat ${PWD}/output_aoti

echo "Tests complete."
6 changes: 6 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[submodule "tokenizer/third-party/abseil-cpp"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to update instructions to do git clone recursive, git submodule updates?

This is what PyTorch tells users:

git clone --recursive https://github.com/pytorch/pytorch
cd pytorch
# if you are updating an existing checkout
git submodule sync
git submodule update --init --recursive

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah where should I update the doc?

path = tokenizer/third-party/abseil-cpp
url = https://github.com/abseil/abseil-cpp.git
[submodule "tokenizer/third-party/re2"]
path = tokenizer/third-party/re2
url = https://github.com/google/re2.git
24 changes: 24 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
cmake_minimum_required(VERSION 3.24)
set(CMAKE_CXX_STANDARD 17)
IF(DEFINED ENV{TORCHCHAT_ROOT})
set(TORCHCHAT_ROOT $ENV{TORCHCHAT_ROOT})
ELSE()
set(TORCHCHAT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/..)
ENDIF()

project(Torchchat)

# include tokenizer
add_subdirectory(tokenizer)

# include et_run executable
include(runner/et.cmake)
if(TARGET et_run)
target_link_libraries(et_run PUBLIC tokenizer)
endif()

# include aoti_run executable
include(runner/aoti.cmake)
if(TARGET aoti_run)
target_link_libraries(aoti_run tokenizer)
endif()
17 changes: 0 additions & 17 deletions runner-aoti/CMakeLists.txt

This file was deleted.

6 changes: 0 additions & 6 deletions runner-aoti/run.cpp

This file was deleted.

89 changes: 0 additions & 89 deletions runner-et/CMakeLists.txt

This file was deleted.

6 changes: 0 additions & 6 deletions runner-et/run.cpp

This file was deleted.

File renamed without changes.
21 changes: 21 additions & 0 deletions runner/aoti.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
cmake_minimum_required(VERSION 3.24)
set(CMAKE_CXX_STANDARD 17)
IF(DEFINED ENV{TORCHCHAT_ROOT})
set(TORCHCHAT_ROOT $ENV{TORCHCHAT_ROOT})
ELSE()
set(TORCHCHAT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/..)
ENDIF()

find_package(CUDA)

find_package(Torch)
if(Torch_FOUND)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g ${TORCH_CXX_FLAGS} -fpermissive")

add_executable(aoti_run runner/run.cpp)

target_compile_options(aoti_run PUBLIC -D__AOTI_MODEL__)
target_include_directories(aoti_run PRIVATE ${TORCHCHAT_ROOT}/runner)
target_link_libraries(aoti_run "${TORCH_LIBRARIES}" m)
set_property(TARGET aoti_run PROPERTY CXX_STANDARD 17)
endif()
6 changes: 3 additions & 3 deletions runner-et/build_android.sh → runner/build_android.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ export CMAKE_OUT_DIR="cmake-out-android"
#

build_runner_et() {
rm -rf build/cmake-out-android
rm -rf cmake-out-android
echo "ET BUILD DIR IS ${ET_BUILD_DIR}"
cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=android-23 -S ./runner-et -B build/cmake-out-android -G Ninja
cmake --build build/cmake-out-android/ -j16 --config Release
cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=android-23 -S . -B cmake-out-android -G Ninja
cmake --build cmake-out-android/ -j16 --config Release --target et_run
}

find_cmake_prefix_path
Expand Down
98 changes: 98 additions & 0 deletions runner/et.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
cmake_minimum_required(VERSION 3.24)
set(CMAKE_CXX_STANDARD 17)

IF(DEFINED ENV{ET_BUILD_DIR})
set(ET_BUILD_DIR $ENV{ET_BUILD_DIR})
ELSE()
set(ET_BUILD_DIR "et-build")
ENDIF()

MESSAGE(STATUS "Using ET BUILD DIR: --[${ET_BUILD_DIR}]--")

IF(DEFINED ENV{CMAKE_OUT_DIR})
set(CMAKE_OUT_DIR $ENV{CMAKE_OUT_DIR})
ELSE()
set(CMAKE_OUT_DIR "cmake-out")
ENDIF()

MESSAGE(STATUS "Using ET BUILD DIR: --[${ET_BUILD_DIR}]--")

IF(DEFINED ENV{TORCHCHAT_ROOT})
set(TORCHCHAT_ROOT $ENV{TORCHCHAT_ROOT})
ELSE()
set(TORCHCHAT_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/..)
ENDIF()

project(Torchchat)

include(CMakePrintHelpers)
include(runner/Utils.cmake)

cmake_print_variables(TORCHCHAT_ROOT)

MESSAGE(STATUS "Looking for excutorch in ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/cmake/ExecuTorch")
set(executorch_DIR ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/cmake/ExecuTorch)
find_package(executorch CONFIG PATHS ${executorch_DIR})
if(executorch_FOUND)
set(_common_include_directories ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src)

cmake_print_variables(_common_include_directories)

target_include_directories(executorch INTERFACE ${_common_include_directories}) # Ideally ExecuTorch installation process would do this
add_executable(et_run runner/run.cpp)

target_compile_options(et_run PUBLIC -D__ET__MODEL -D_GLIBCXX_USE_CXX11_ABI=1)

# Link ET runtime + extensions
target_link_libraries(
et_run PRIVATE
executorch
extension_module
${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src/executorch/${CMAKE_OUT_DIR}/extension/data_loader/libextension_data_loader.a # This one does not get installed by ExecuTorch
optimized_kernels
quantized_kernels
portable_kernels
cpublas
eigen_blas
# The libraries below need to be whole-archived linked
optimized_native_cpu_ops_lib
quantized_ops_lib
xnnpack_backend
XNNPACK
pthreadpool
cpuinfo
)
target_link_options_shared_lib(optimized_native_cpu_ops_lib)
target_link_options_shared_lib(quantized_ops_lib)
target_link_options_shared_lib(xnnpack_backend)
# Not clear why linking executorch as whole-archive outside android/apple is leading
# to double registration. Most likely because of linkage issues.
# Will figure this out later. Until then use this.
if(ANDROID OR APPLE)
target_link_options_shared_lib(executorch)
endif()

target_link_libraries(et_run PRIVATE
"$<LINK_LIBRARY:WHOLE_ARCHIVE,${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src/executorch/${CMAKE_OUT_DIR}/examples/models/llama2/custom_ops/libcustom_ops.a>")

# This one is needed for cpuinfo where it uses android specific log lib
if(ANDROID)
target_link_libraries(et_run PRIVATE log)
endif()

# Adding target_link_options_shared_lib as commented out below leads to this:
#
# CMake Error at Utils.cmake:22 (target_link_options):
# Cannot specify link options for target
# "/Users/scroy/etorch/torchchat/et-build/src/executorch/${CMAKE_OUT_DIR}/examples/models/llama2/custom_ops/libcustom_ops_lib.a"
# which is not built by this project.
# Call Stack (most recent call first):
# Utils.cmake:30 (macos_kernel_link_options)
# CMakeLists.txt:41 (target_link_options_shared_lib)
#
#target_link_options_shared_lib("${TORCHCHAT_ROOT}/et-build/src/executorch/${CMAKE_OUT_DIR}/examples/models/llama2/custom_ops/libcustom_ops_lib.a") # This one does not get installed by ExecuTorch

# This works on mac, but appears to run into issues on linux
# It is needed to solve:
# E 00:00:00.055965 executorch:method.cpp:536] Missing operator: [8] llama::sdpa_with_kv_cache.out
endif()
23 changes: 18 additions & 5 deletions runner/run.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
/* Inference for Llama-2 Transformer model in pure C++ */

#include <ctype.h>
#include <math.h>
#include <stdint.h>
Expand Down Expand Up @@ -397,7 +396,7 @@ void generate(
}

// encode the (string) prompt into tokens sequence
std::string prompt_str(prompt);
std::string prompt_str = prompt;
std::vector<uint64_t> prompt_tokens = tokenizer->encode(prompt_str, 1, 0);
int num_prompt_tokens = prompt_tokens.size();
if (num_prompt_tokens < 1) {
Expand Down Expand Up @@ -674,9 +673,23 @@ int main(int argc, char* argv[]) {
build_transformer(&transformer, checkpoint_path, vocab_size, steps);

// build the Tokenizer via the tokenizer .bin file
Tokenizer* tokenizer =
new BPETokenizer(transformer.config.vocab_size, /*bos*/ 1, /*eos*/ 2);
tokenizer->load(tokenizer_path);
Tokenizer* tokenizer = nullptr;

// Try to load using Tiktoken, if exception then switch to another tokenizer
try {
tokenizer =
new Tiktoken(transformer.config.vocab_size, /*bos*/ 1, /*eos*/ 2);
tokenizer->load(tokenizer_path);
} catch (const std::invalid_argument&) {
fprintf(
stderr,
"Failed to load %s into a Tiktoken tokenizer. Trying sentencepiece tokenizer..\n",
tokenizer_path);
delete tokenizer;
tokenizer =
new BPETokenizer(transformer.config.vocab_size, /*bos*/ 1, /*eos*/ 2);
tokenizer->load(tokenizer_path);
}

// build the Sampler
Sampler sampler;
Expand Down
Loading