Skip to content

Commit c8c9c4f

Browse files
committed
[Executorch][llama] Set # of threads to use performant cores
Pull Request resolved: #2352 When using all cores, slower ones are dragging the performance down by blocking large cores. Perhaps when we have uarch specific implementation, we may not need this, but this tool is useful in general until we have better API //unrelated failures @github-bypass-export-checks ghstack-source-id: 218830345 @exported-using-ghexport Differential Revision: [D54766071](https://our.internmc.facebook.com/intern/diff/D54766071/)
1 parent f9141b3 commit c8c9c4f

File tree

2 files changed

+15
-0
lines changed

2 files changed

+15
-0
lines changed

examples/models/llama2/main.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@
1010

1111
#include <executorch/examples/models/llama2/runner/runner.h>
1212

13+
#if defined(ET_USE_THREADPOOL)
14+
#include <executorch/backends/xnnpack/threadpool/cpuinfo_utils.h>
15+
#include <executorch/backends/xnnpack/threadpool/threadpool.h>
16+
#endif
17+
1318
DEFINE_string(
1419
model_path,
1520
"llama2.pte",
@@ -45,6 +50,14 @@ int32_t main(int32_t argc, char** argv) {
4550

4651
int32_t seq_len = FLAGS_seq_len;
4752

53+
#if defined(ET_USE_THREADPOOL)
54+
uint32_t num_performant_cores =
55+
torch::executorch::cpuinfo::get_num_performant_cores();
56+
ET_LOG(
57+
Info, "Resetting threadpool with num threads = %d", num_performant_cores);
58+
torch::executorch::threadpool::get_threadpool()->_unsafe_reset_threadpool(
59+
num_performant_cores);
60+
#endif
4861
// create llama runner
4962
::torch::executor::Runner runner(model_path, tokenizer_path, temperature);
5063

examples/models/llama2/targets.bzl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ def define_common_targets():
1616
deps = [
1717
"//executorch/examples/models/llama2/runner:runner" + aten_suffix,
1818
"//executorch/extension/evalue_util:print_evalue",
19+
"//executorch/backends/xnnpack/threadpool:threadpool",
20+
"//executorch/backends/xnnpack/threadpool:cpuinfo_utils",
1921
],
2022
external_deps = [
2123
"gflags",

0 commit comments

Comments
 (0)