rename executable in runner-et to run to align with AOTI (#382)

metascroy · malfet · commit ee280db8a869 · 2024-07-17T09:55:42.000-07:00
* rename executable in runner-et to run to align with AOTI

* add missing quote

* download tokenizer.bin

* add -O
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -734,28 +734,24 @@ jobs:
           python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
           python3 -c 'import torchvision;print(f"torchvision: {torchvision.__version__, torchvision.version.git_version}")'
           python3 -c 'import torchaudio;print(f"torchaudio: {torchaudio.__version__, torchaudio.version.git_version}")'
-          cmake -S ./runner-et -B et-build/cmake-out -G Ninja
-          cmake --build ./et-build/cmake-out
+          cmake -S ./runner-et -B ./runner-et/cmake-out -G Ninja
+          cmake --build ./runner-et/cmake-out
       - name: Download checkpoints
         run: |
-          mkdir -p checkpoints/stories15M
-          pushd checkpoints/stories15M
-          wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories15M.pt
-          wget https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
-          wget https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin
-          popd
+
       - name: Run inference
         run: |
-          export MODEL_DIR=${PWD}/checkpoints/stories15M
-          export PROMPT="Once upon a time in a land far away"
+          python torchchat.py download stories15M
+          wget -O ./tokenizer.bin https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin
 
-          python torchchat.py generate --checkpoint-path ${MODEL_DIR}/stories15M.pt --temperature 0 --prompt "${PROMPT}"  > ${PWD}/output_eager
-          cat ${PWD}/output_eager
+          export PRMT="Once upon a time in a land far away"
 
-          python torchchat.py export --checkpoint-path ${MODEL_DIR}/stories15M.pt --output-pte-path ${PWD}/stories15M.pte
+          python torchchat.py generate stories15M --temperature 0 --prompt "${PRMT}"  > ./output_eager
+          cat ./output_eager
 
-          ./et-build/cmake-out/runner_et ${PWD}/stories15M.pte -z ${MODEL_DIR}/tokenizer.bin -i "${PROMPT}" > ${PWD}/output_et
-          cat ${PWD}/output_et
+          python torchchat.py export stories15M --output-pte-path ./model.pte
+          ./runner-et/cmake-out/run ./model.pte -z ./tokenizer.bin -t 0 -i "${PRMT}" > ./output_et
+          cat ./output_et
 
           echo "Tests complete."
   runner-aoti:
diff --git a/docs/runner_build.md b/docs/runner_build.md
@@ -1,5 +1,19 @@
 # Building runner-aoti and runner-et
-Building the runners is straightforward and is covered in the next sections.
+Building the runners is straightforward and is covered in the next sections.  We will showcase the runners using stories15M.
+
+The runners accept the following CLI arguments:
+
+```
+Options:
+-t <float>  temperature in [0,inf], default 1.0
+-p <float>  p value in top-p (nucleus) sampling in [0,1] default 0.9
+-s <int>    random seed, default time(NULL)
+-n <int>    number of steps to run for, default 256. 0 = max_seq_len
+-i <string> input prompt
+-z <string> optional path to custom tokenizer
+-m <string> mode: generate|chat, default: generate
+-y <string> (optional) system prompt in chat mode
+```
 
 ## Building and running runner-aoti
 To build runner-aoti, run the following commands *from the torchchat root directory*
@@ -16,19 +30,14 @@ We first download stories15M and export it to AOTI.
 
 ```
 python torchchat.py download stories15M
-python torchchat.py export --output-dso-path ./model.dso
-```
-
-We also need a tokenizer.bin file for the stories15M model:
-
-```
-wget ./tokenizer.bin https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin
+python torchchat.py export stories15M --output-dso-path ./model.so
 ```
 
 We can now execute the runner with:
 
 ```
-./runner-aoti/cmake-out/run ./model.dso -z ./tokenizer.bin -i "Once upon a time"
+wget -O ./tokenizer.bin https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin
+./runner-aoti/cmake-out/run ./model.so -z ./tokenizer.bin -i "Once upon a time"
 ```
 
 ## Building and running runner-et
@@ -43,7 +52,7 @@ cmake -S ./runner-et -B ./runner-et/cmake-out -G Ninja
 cmake --build ./runner-et/cmake-out
 ```
 
-After running these, the runner-et binary is located at ./runner-et/cmake-out/runner-et.
+After running these, the runner-et binary is located at ./runner-et/cmake-out/run.
 
 Let us try using it with an example.
 We first download stories15M and export it to ExecuTorch.
@@ -53,14 +62,9 @@ python torchchat.py download stories15M
 python torchchat.py export stories15M --output-pte-path ./model.pte
 ```
 
-We also need a tokenizer.bin file for the stories15M model:
-
-```
-wget ./tokenizer.bin https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin
-```
-
 We can now execute the runner with:
 
 ```
-./runner-et/cmake-out/runner_et ./model.pte -z ./tokenizer.bin -i "Once upon a time"
+wget -O ./tokenizer.bin https://github.com/karpathy/llama2.c/raw/master/tokenizer.bin
+./runner-et/cmake-out/run ./model.pte -z ./tokenizer.bin -i "Once upon a time"
 ```
diff --git a/runner-et/CMakeLists.txt b/runner-et/CMakeLists.txt
@@ -34,11 +34,11 @@ set(_common_include_directories ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src)
 cmake_print_variables(_common_include_directories)
 
 target_include_directories(executorch INTERFACE ${_common_include_directories}) # Ideally ExecuTorch installation process would do this
-add_executable(runner_et run.cpp)
+add_executable(run run.cpp)
 
 # Link ET runtime + extensions
 target_link_libraries(
-    runner_et PRIVATE
+  run PRIVATE
         executorch
         extension_module
         ${TORCHCHAT_ROOT}/et-build/src/executorch/${CMAKE_OUT_DIR}/lib/libextension_data_loader.a # This one gets installed in build directory by ExecuTorch
@@ -59,4 +59,4 @@ target_link_options_shared_lib(XNNPACK)
 target_link_options_shared_lib(pthreadpool)
 target_link_options_shared_lib(cpuinfo)
 target_link_options_shared_lib(executorch)
-target_link_libraries(runner_et PRIVATE "$<LINK_LIBRARY:WHOLE_ARCHIVE,${TORCHCHAT_ROOT}/et-build/src/executorch/${CMAKE_OUT_DIR}/examples/models/llama2/custom_ops/libcustom_ops.a>")
+target_link_libraries(run PRIVATE "$<LINK_LIBRARY:WHOLE_ARCHIVE,${TORCHCHAT_ROOT}/et-build/src/executorch/${CMAKE_OUT_DIR}/examples/models/llama2/custom_ops/libcustom_ops.a>")