Skip to content

Commit 2204a7e

Browse files
GregoryComerfacebook-github-bot
authored andcommitted
Default to cores/2 threads in JNI layer (#6042)
Summary: Default to using cores/2 threadpool threads. The long-term plan is to improve performant core detection in CPUInfo, but for now we can use cores/2 as a sane default. Based on testing, this is almost universally faster than using all cores, as efficiency cores can be quite slow. In extreme cases, using all cores can be 10x slower than using cores/2. This also matches Lite Interpreter's default behavior when it doesn't have a more precise heuristic for the target hardware. Differential Revision: D64107326
1 parent e540bcb commit 2204a7e

File tree

2 files changed

+28
-0
lines changed

2 files changed

+28
-0
lines changed

extension/android/jni/BUCK

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
load("@fbsource//tools/build_defs/android:fb_android_cxx_library.bzl", "fb_android_cxx_library")
2+
load("@fbsource//xplat/executorch/backends/xnnpack/third-party:third_party_libs.bzl", "third_party_dep")
23
load("@fbsource//xplat/executorch/codegen:codegen.bzl", "executorch_generated_lib")
34

45
oncall("executorch")
@@ -41,6 +42,8 @@ fb_android_cxx_library(
4142
"//xplat/executorch/extension/module:module_static",
4243
"//xplat/executorch/extension/runner_util:inputs_static",
4344
"//xplat/executorch/extension/tensor:tensor_static",
45+
"//xplat/executorch/extension/threadpool:threadpool",
46+
third_party_dep("cpuinfo"),
4447
],
4548
)
4649

extension/android/jni/jni_layer.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
#include "jni_layer_constants.h"
1919

20+
2021
#include <executorch/extension/module/module.h>
2122
#include <executorch/extension/runner_util/inputs.h>
2223
#include <executorch/extension/tensor/tensor.h>
@@ -25,6 +26,11 @@
2526
#include <executorch/runtime/platform/platform.h>
2627
#include <executorch/runtime/platform/runtime.h>
2728

29+
#ifdef ET_USE_THREADPOOL
30+
#include <cpuinfo.h>
31+
#include <executorch/extension/threadpool/threadpool.h>
32+
#endif
33+
2834
#include <fbjni/ByteBuffer.h>
2935
#include <fbjni/fbjni.h>
3036

@@ -260,6 +266,25 @@ class ExecuTorchJni : public facebook::jni::HybridClass<ExecuTorchJni> {
260266
}
261267

262268
module_ = std::make_unique<Module>(modelPath->toStdString(), load_mode);
269+
270+
#ifdef ET_USE_THREADPOOL
271+
// Default to using cores/2 threadpool threads. The long-term plan is to
272+
// improve performant core detection in CPUInfo, but for now we can use
273+
// cores/2 as a sane default.
274+
//
275+
// Based on testing, this is almost universally faster than using all
276+
// cores, as efficiency cores can be quite slow. In extreme cases, using
277+
// all cores can be 10x slower than using cores/2.
278+
//
279+
// TODO Allow overriding this default from Java.
280+
auto threadpool = executorch::extension::threadpool::get_threadpool();
281+
if (threadpool) {
282+
int thread_count = cpuinfo_get_processors_count() / 2;
283+
if (thread_count > 0) {
284+
threadpool->_unsafe_reset_threadpool(thread_count);
285+
}
286+
}
287+
#endif
263288
}
264289

265290
facebook::jni::local_ref<facebook::jni::JArrayClass<JEValue>> forward(

0 commit comments

Comments
 (0)