|
17 | 17 |
|
18 | 18 | #include "jni_layer_constants.h"
|
19 | 19 |
|
| 20 | +#include <cpuinfo.h> |
| 21 | + |
20 | 22 | #include <executorch/extension/module/module.h>
|
21 | 23 | #include <executorch/extension/runner_util/inputs.h>
|
22 | 24 | #include <executorch/extension/tensor/tensor.h>
|
| 25 | +#include <executorch/extension/threadpool/threadpool.h> |
23 | 26 | #include <executorch/runtime/core/portable_type/tensor_impl.h>
|
24 | 27 | #include <executorch/runtime/platform/log.h>
|
25 | 28 | #include <executorch/runtime/platform/platform.h>
|
@@ -260,6 +263,23 @@ class ExecuTorchJni : public facebook::jni::HybridClass<ExecuTorchJni> {
|
260 | 263 | }
|
261 | 264 |
|
262 | 265 | module_ = std::make_unique<Module>(modelPath->toStdString(), load_mode);
|
| 266 | + |
| 267 | + // Default to using cores/2 threadpool threads. The long-term plan is to |
| 268 | + // improve performant core detection in CPUInfo, but for now we can use |
| 269 | + // cores/2 as a sane default. |
| 270 | + // |
| 271 | + // Based on testing, this is almost universally faster than using all |
| 272 | + // cores, as efficiency cores can be quite slow. In extreme cases, using |
| 273 | + // all cores can be 10x slower than using cores/2. |
| 274 | + // |
| 275 | + // TODO Allow overriding this default from Java. |
| 276 | + auto threadpool = executorch::extension::threadpool::get_threadpool(); |
| 277 | + if (threadpool) { |
| 278 | + int thread_count = cpuinfo_get_processors_count() / 2; |
| 279 | + if (thread_count > 0) { |
| 280 | + threadpool->_unsafe_reset_threadpool(thread_count); |
| 281 | + } |
| 282 | + } |
263 | 283 | }
|
264 | 284 |
|
265 | 285 | facebook::jni::local_ref<facebook::jni::JArrayClass<JEValue>> forward(
|
|
0 commit comments