[Executorch][llama] Set # of threads to use performant cores

kimishpatel · kimishpatel · commit c8c9c4f94edc · 2024-03-15T10:04:35.000-07:00
Pull Request resolved: #2352 When using all cores, slower ones are dragging the performance down by blocking large cores. Perhaps when we have uarch specific implementation, we may not need this, but this tool is useful in general until we have better API //unrelated failures @github-bypass-export-checks ghstack-source-id: 218830345 @exported-using-ghexport Differential Revision: [D54766071](https://our.internmc.facebook.com/intern/diff/D54766071/)
diff --git a/examples/models/llama2/main.cpp b/examples/models/llama2/main.cpp
@@ -10,6 +10,11 @@
 
 #include <executorch/examples/models/llama2/runner/runner.h>
 
+#if defined(ET_USE_THREADPOOL)
+#include <executorch/backends/xnnpack/threadpool/cpuinfo_utils.h>
+#include <executorch/backends/xnnpack/threadpool/threadpool.h>
+#endif
+
 DEFINE_string(
     model_path,
     "llama2.pte",
@@ -45,6 +50,14 @@ int32_t main(int32_t argc, char** argv) {
 
   int32_t seq_len = FLAGS_seq_len;
 
+#if defined(ET_USE_THREADPOOL)
+  uint32_t num_performant_cores =
+      torch::executorch::cpuinfo::get_num_performant_cores();
+  ET_LOG(
+      Info, "Resetting threadpool with num threads = %d", num_performant_cores);
+  torch::executorch::threadpool::get_threadpool()->_unsafe_reset_threadpool(
+      num_performant_cores);
+#endif
   // create llama runner
   ::torch::executor::Runner runner(model_path, tokenizer_path, temperature);
 
diff --git a/examples/models/llama2/targets.bzl b/examples/models/llama2/targets.bzl
@@ -16,6 +16,8 @@ def define_common_targets():
                 deps = [
                     "//executorch/examples/models/llama2/runner:runner" + aten_suffix,
                     "//executorch/extension/evalue_util:print_evalue",
+                    "//executorch/backends/xnnpack/threadpool:threadpool",
+                    "//executorch/backends/xnnpack/threadpool:cpuinfo_utils",
                 ],
                 external_deps = [
                     "gflags",