Port number of threads selection logic from executorch (#505)

kimishpatel · malfet · commit 3ef5c3e8c0c4 · 2024-07-17T09:55:44.000-07:00
Summary:
Without this optimization llama3 on s22 is around 4 tok/sec. With the
fix it is &gt; 7 tok/sec

Test Plan:
./runner/build_android.sh
python3 torchchat.py download llama3
python3 torchchat.py export llama3 --output-pte-pat llama3.pte
--quantize config/data/mobile.json
adb push llama3.pte /data/local/tmp/
adb push tokenizer.model /data/local/tmp/
adb shell "cd /data/local/tmp/ &amp;&amp; ./et_run llama3.pte -z tokenizer.model
-t 0 -i "Once upon" -n 124"

Reviewers:

Subscribers:

Tasks:

Tags:
diff --git a/runner/build_android.sh b/runner/build_android.sh
@@ -30,7 +30,7 @@ export CMAKE_OUT_DIR="cmake-out-android"
 build_runner_et() {
   rm -rf cmake-out-android
   echo "ET BUILD DIR IS ${ET_BUILD_DIR}"
-  cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=android-23 -S . -B cmake-out-android -G Ninja
+  cmake -DET_USE_ADPATIVE_THREADS=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=android-23 -S . -B cmake-out-android -G Ninja
   cmake --build cmake-out-android/ -j16 --config Release --target et_run
 }
 
diff --git a/runner/et.cmake b/runner/et.cmake
@@ -48,10 +48,25 @@ if(executorch_FOUND)
 
   cmake_print_variables(_common_include_directories)
 
+  set(_srcs runner/run.cpp)
+  set(_common_compile_options -D__ET__MODEL -D_GLIBCXX_USE_CXX11_ABI=1)
+  if(ET_USE_ADPATIVE_THREADS)
+    list(APPEND _common_compile_options -DET_USE_ADPATIVE_THREADS)
+
+    set(EXECUTORCH_SRC_ROOT ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src/executorch)
+    set(XNNPACK_ROOT ${EXECUTORCH_SRC_ROOT}/backends/xnnpack)
+    list(APPEND _srcs ${XNNPACK_ROOT}/threadpool/cpuinfo_utils.cpp)
+    list(APPEND _common_include_directories
+         ${XNNPACK_ROOT}/third-party/cpuinfo/include)
+
+    list(APPEND _common_include_directories
+         ${XNNPACK_ROOT}/third-party/pthreadpool/include)
+  endif()
+
   target_include_directories(executorch INTERFACE ${_common_include_directories}) # Ideally ExecuTorch installation process would do this
-  add_executable(et_run runner/run.cpp)
+  add_executable(et_run ${_srcs})
 
-  target_compile_options(et_run PUBLIC -D__ET__MODEL -D_GLIBCXX_USE_CXX11_ABI=1)
+  target_compile_options(et_run PUBLIC ${_common_compile_options})
 
   # Link ET runtime + extensions
   target_link_libraries(
diff --git a/runner/run.cpp b/runner/run.cpp
@@ -26,6 +26,11 @@
 #include <executorch/runtime/core/exec_aten/exec_aten.h>
 #include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
 
+#if defined(ET_USE_ADPATIVE_THREADS)
+#include <executorch/backends/xnnpack/threadpool/cpuinfo_utils.h>
+#include <executorch/backends/xnnpack/threadpool/threadpool.h>
+#endif
+
 using exec_aten::ScalarType;
 using torch::executor::EValue;
 using torch::executor::ManagedTensor;
@@ -633,6 +638,13 @@ int main(int argc, char* argv[]) {
   char* system_prompt =
       NULL; // the (optional) system prompt to use in chat mode
 
+#if defined(ET_USE_ADPATIVE_THREADS)
+  uint32_t num_performant_cores = torch::executorch::cpuinfo::get_num_performant_cores();
+  if (num_performant_cores > 0) {
+    torch::executorch::threadpool::get_threadpool()->_unsafe_reset_threadpool(
+        num_performant_cores);
+  }
+#endif
   // poor man's C argparse so we can override the defaults above from the
   // command line
   if (argc >= 2) {

Original file line number	Diff line number	Diff line change
`@@ -30,7 +30,7 @@ export CMAKE_OUT_DIR="cmake-out-android"`
`30`	`30`	`build_runner_et() {`
`31`	`31`	`rm -rf cmake-out-android`
`32`	`32`	`echo "ET BUILD DIR IS ${ET_BUILD_DIR}"`
`33`		`- cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=android-23 -S . -B cmake-out-android -G Ninja`
	`33`	`+ cmake -DET_USE_ADPATIVE_THREADS=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=android-23 -S . -B cmake-out-android -G Ninja`
`34`	`34`	`cmake --build cmake-out-android/ -j16 --config Release --target et_run`
`35`	`35`	`}`
`36`	`36`