pytorch · metascroy · Apr 22, 2024 · Apr 22, 2024 · Apr 22, 2024 · Apr 22, 2024
diff --git a/.gitignore b/.gitignore
@@ -8,3 +8,11 @@ __pycache__/
 
 .model-artifacts/
 .venv
+
+# Build directories
+et-build/*
+runner-et/cmake-out/*
+runner-aoti/cmake-out/*
+
+# pte files
+*.pte
diff --git a/README.md b/README.md
@@ -185,6 +185,7 @@ NOTE: The exported model will be large. We suggest you quantize the model, expla
 
 ### ExecuTorch
 ExecuTorch enables you to optimize your model for execution on a mobile or embedded device, but can also be used on desktop for testing.
+Before running ExecuTorch commands, you must first set-up ExecuTorch in torchchat, see [Set-up Executorch](docs/executorch_setup.md).
 
 **Examples**
 The following example uses the Stories15M model.

diff --git a/docs/executorch_setup.md b/docs/executorch_setup.md
@@ -0,0 +1,13 @@
+# Set-up executorch
+
+Before running any commands in torchchat that require ExecuTorch, you must first install ExecuTorch.
+
+To install ExecuTorch, run the following commands *from the torchchat root directory*.
+
+```
+export TORCHCHAT_ROOT=${PWD}
+export ENABLE_ET_PYBIND=true
+./scripts/install_et.sh $ENABLE_ET_PYBIND
+```
+
+This will download the ExecuTorch repo to ./et-build/src and install various ExecuTorch libraries to ./et-build/install.
diff --git a/docs/runner_build.md b/docs/runner_build.md
@@ -0,0 +1,66 @@
+# Building runner-aoti and runner-et
+Building the runners is straightforward and is covered in the next sections.
+
+## Building and running runner-aoti
+To build runner-aoti, run the following commands *from the torchchat root directory*
+
+```
+cmake -S ./runner-aoti -B ./runner-aoti/cmake-out -G Ninja -DCMAKE_PREFIX_PATH=`python -c 'import torch;print(torch.utils.cmake_prefix_path)'`
+cmake --build ./runner-aoti/cmake-out
+```
+
+After running these, the runner-aoti binary is located at ./runner-aoti/cmake-out/run.
+
+Let us try using it with an example.
+We first download stories15M and export it to AOTI.
+
+```
+python torchchat.py download stories15M
+python torchchat.py export --output-dso-path ./model.dso
+```
+
+We also need a tokenizer.bin file for the stories15M model:
+
+```
+wget ./tokenizer.bin https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin
+```
+
+We can now execute the runner with:
+
+```
+./runner-aoti/cmake-out/run ./model.dso -z ./tokenizer.bin -i "Once upon a time"
+```
+
+## Building and running runner-et
+Before building runner-et, you must first set-up ExecuTorch by following [Set-up Executorch](executorch_setup.md).
+
+
+To build runner-et, run the following commands *from the torchchat root directory*
+
+```
+export TORCHCHAT_ROOT=${PWD}
+cmake -S ./runner-et -B ./runner-et/cmake-out -G Ninja
+cmake --build ./runner-et/cmake-out
+```
+
+After running these, the runner-et binary is located at ./runner-et/cmake-out/runner-et.
+
+Let us try using it with an example.
+We first download stories15M and export it to ExecuTorch.
+
+```
+python torchchat.py download stories15M
+python torchchat.py export stories15M --output-pte-path ./model.pte
+```
+
+We also need a tokenizer.bin file for the stories15M model:
+
+```
+wget ./tokenizer.bin https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin
+```
+
+We can now execute the runner with:
+
+```
+./runner-et/cmake-out/runner_et ./model.pte -z ./tokenizer.bin -i "Once upon a time"
+```
diff --git a/export_et.py b/export_et.py
@@ -83,7 +83,8 @@ def export_model(model, device, output_path, args=None) -> str:  # noqa: C901
     else:
         raise ValueError(f"Unsupported dtype for ET export: {target_precision}")
 
-    replace_attention_with_custom_sdpa_attention(model)
+    # TODO: we can bring with pack when dynamo error P1220158146 is resolved
+    # replace_attention_with_custom_sdpa_attention(model)
     with torch.nn.attention.sdpa_kernel(
         [torch.nn.attention.SDPBackend.MATH]
     ), torch.no_grad():

diff --git a/runner-et/CMakeLists.txt b/runner-et/CMakeLists.txt
@@ -41,8 +41,7 @@ target_link_libraries(
     runner_et PRIVATE
         executorch
         extension_module
-        ${TORCHCHAT_ROOT}/et-build/src/executorch/${CMAKE_OUT_DIR}/extension/data_loader/libextension_data_loader.a # This one does not get installed by ExecuTorch
-        ${TORCHCHAT_ROOT}/et-build/src/executorch/${CMAKE_OUT_DIR}/examples/models/llama2/custom_ops/libcustom_ops.a # This one does not get installed by ExecuTorch
+        ${TORCHCHAT_ROOT}/et-build/src/executorch/${CMAKE_OUT_DIR}/lib/libextension_data_loader.a # This one gets installed in build directory by ExecuTorch
         optimized_kernels
         portable_kernels
         cpublas
@@ -60,20 +59,4 @@ target_link_options_shared_lib(XNNPACK)
 target_link_options_shared_lib(pthreadpool)
 target_link_options_shared_lib(cpuinfo)
 target_link_options_shared_lib(executorch)
-target_link_libraries(runner_et PRIVATE "$<LINK_LIBRARY:WHOLE_ARCHIVE,${TORCHCHAT_ROOT}/et-build/src/executorch/${CMAKE_OUT_DIR}/examples/models/llama2/custom_ops/libcustom_ops_lib.a>")
-
-# Adding target_link_options_shared_lib as commented out below leads to this:
-#
-# CMake Error at Utils.cmake:22 (target_link_options):
-#   Cannot specify link options for target
-#   "/Users/scroy/etorch/torchchat/et-build/src/executorch/${CMAKE_OUT_DIR}/examples/models/llama2/custom_ops/libcustom_ops_lib.a"
-#   which is not built by this project.
-# Call Stack (most recent call first):
-#   Utils.cmake:30 (macos_kernel_link_options)
-#   CMakeLists.txt:41 (target_link_options_shared_lib)
-#
-#target_link_options_shared_lib("${TORCHCHAT_ROOT}/et-build/src/executorch/${CMAKE_OUT_DIR}/examples/models/llama2/custom_ops/libcustom_ops_lib.a") # This one does not get installed by ExecuTorch
-
-# This works on mac, but appears to run into issues on linux
-# It is needed to solve:
-# E 00:00:00.055965 executorch:method.cpp:536] Missing operator: [8] llama::sdpa_with_kv_cache.out
+target_link_libraries(runner_et PRIVATE "$<LINK_LIBRARY:WHOLE_ARCHIVE,${TORCHCHAT_ROOT}/et-build/src/executorch/${CMAKE_OUT_DIR}/examples/models/llama2/custom_ops/libcustom_ops.a>")