Skip to content

[Execuotrch][cpuinfo] Add util to find out # of performant cores #2350

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
175 changes: 175 additions & 0 deletions backends/xnnpack/threadpool/cpuinfo_utils.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <executorch/runtime/platform/assert.h>
#include <fstream>
#include <mutex>
#include <string>
#include <vector>

#include "cpuinfo_utils.h"

namespace torch {
namespace executorch {
namespace cpuinfo {

// Ignore revisions (last digit (4 LSBs))
#define CPUINFO_ARM_MIDR_CORTEX_A520 UINT32_C(0x410FD800)
#define CPUINFO_ARM_MIDR_CORTEX_A53 UINT32_C(0x410FD030)
#define CPUINFO_ARM_MIDR_CORTEX_A55 UINT32_C(0x410FD050)
#define CPUINFO_ARM_MIDR_CORTEX_A57 UINT32_C(0x410FD070)

#define RIVISION_MASK UINT32_C(0xFFFFFFF0)

namespace {
bool is_non_performant_core(const struct cpuinfo_uarch_info* uarch_info) {
switch (uarch_info->uarch) {
case cpuinfo_uarch_cortex_a55:
case cpuinfo_uarch_cortex_a53:
case cpuinfo_uarch_cortex_a510:
return true;
// This can be so many other cores.
// Need to update this to better account for slow cores
// Also does not account Apple's A/M series cores
// And not yet qcomm's
default:
break;
}
// A520 is not yet updated in cpuinfo
// Hence decode it separately.
#if CPUINFO_ARCH_ARM || CPUINFO_ARCH_ARM64
if ((uarch_info->midr & RIVISION_MASK) == CPUINFO_ARM_MIDR_CORTEX_A520) {
return true;
}
#endif
return false;
}

std::vector<uint32_t>* get_static_cpu_midr_vector() {
static std::vector<uint32_t> cpu_midrs;
return &cpu_midrs;
}

uint32_t _get_model_specific_num_cores() {
// Not sure how reliable this is but going with it for now.
const std::string kImageVersionPath = "/sys/devices/soc0/image_version";
ET_LOG(Info, "Reading file %s", kImageVersionPath.c_str());
std::fstream image_version_file(kImageVersionPath, std::ios_base::in);
if (image_version_file.is_open()) {
std::string x;
std::getline(image_version_file, x);
// Hardcoding some rules for now
if (x.find("S911") != std::string::npos) {
// Samsung S23 has:
// 1x3.36 GHz Cortex-X3
// 2x2.8 GHz Cortex-A715
// 2x2.8 GHz Cortex-A710
// 3x2.0 GHz Cortex-A510
// And we have balanced execution with 4 cores.
return 4;
}
}
ET_LOG(Info, "Failed to open midr file %s", kImageVersionPath.c_str());
return 0;
}

bool populate_available_cpu_mids() {
std::vector<uint32_t>* cpu_midrs = get_static_cpu_midr_vector();
uint32_t num_possible_cores = cpuinfo_get_processors_count();
cpu_midrs->resize(num_possible_cores);
const std::string kMidrFilePathPrefix = "/sys/devices/system/cpu/cpu";
const std::string kMidrFilePathSuffix = "/regs/identification/midr_el1";
for (int32_t i = 0; i < num_possible_cores; ++i) {
std::string midr_file_path =
kMidrFilePathPrefix + std::to_string(i) + kMidrFilePathSuffix;
ET_LOG(Info, "Reading file %s", midr_file_path.c_str());
std::fstream midr_file(midr_file_path, std::ios_base::in);
uint32_t tmp{0};
if (midr_file.is_open()) {
std::string x;
std::getline(midr_file, x);
tmp = std::stoi(x, nullptr, 16);
(*cpu_midrs)[i] = tmp;
} else {
ET_LOG(Info, "Failed to open midr file %s", midr_file_path.c_str());
cpu_midrs->clear();
return false;
}
}
return true;
}

uint32_t _get_num_performant_cores() {
// @lint-ignore CLANGTIDY facebook-hte-std::once_flag
static std::once_flag flag;
// @lint-ignore CLANGTIDY facebook-hte-std::call_once
std::call_once(flag, []() { populate_available_cpu_mids(); });
std::vector<uint32_t>* cpu_midrs = get_static_cpu_midr_vector();
uint32_t num_possible_cores = cpuinfo_get_processors_count();
if (num_possible_cores != cpu_midrs->size()) {
ET_LOG(Info, "CPU info and manual query on # of cpus dont match.");
return 0;
}
for (int32_t i = 0; i < cpu_midrs->size(); ++i) {
uint32_t masked_midr = (*cpu_midrs)[i] & RIVISION_MASK;
switch (masked_midr) {
case CPUINFO_ARM_MIDR_CORTEX_A520:
case CPUINFO_ARM_MIDR_CORTEX_A53:
case CPUINFO_ARM_MIDR_CORTEX_A55:
case CPUINFO_ARM_MIDR_CORTEX_A57:
num_possible_cores--;
break;
default:
break;
}
}
return num_possible_cores;
}

} // namespace

uint32_t get_num_performant_cores() {
ET_CHECK_MSG(cpuinfo_initialize(), "cpuinfo cannot be initialized.");
// First try and see if we have number of cores profiled for this specific
// device
uint32_t model_specific_num_cores = _get_model_specific_num_cores();
if (model_specific_num_cores > 0) {
return model_specific_num_cores;
}

// Else looks at either the # of litte cores if found
// Or parse the midr in "Something seems wrong" section.
const uint32_t uarch_count = cpuinfo_get_uarchs_count();
uint32_t num_possible_cores = cpuinfo_get_processors_count();
uint32_t num_non_performant_core = 0;
if (uarch_count > 1) {
for (int32_t i = 0; i < uarch_count; ++i) {
const struct cpuinfo_uarch_info* uarch_info = cpuinfo_get_uarch(i);
if (is_non_performant_core(uarch_info)) {
num_non_performant_core += uarch_info->processor_count;
}
}
ET_LOG(Info, "Number of efficient cores %d", num_non_performant_core);
if (num_possible_cores <= num_non_performant_core) {
ET_LOG(
Info, "Total number of cores must be larger than efficient cores.");
return 0;
}
return (num_possible_cores - num_non_performant_core);
} else {
// Something seems wrong. Lets check each processor's midr
// In one plua 12 while it has 2 little cores, the topology
// reported in /sys/devices/system/cpu/cpu* /topology/core_siblings_list
// report wrong topology which results in wront configratuon
return _get_num_performant_cores();
}
}

} // namespace cpuinfo
} // namespace executorch
} // namespace torch
21 changes: 21 additions & 0 deletions backends/xnnpack/threadpool/cpuinfo_utils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#pragma once

#include <cpuinfo.h>

namespace torch {
namespace executorch {
namespace cpuinfo {

uint32_t get_num_performant_cores();

} // namespace cpuinfo
} // namespace executorch
} // namespace torch
24 changes: 24 additions & 0 deletions backends/xnnpack/threadpool/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,27 @@ def define_common_targets():
"@EXECUTORCH_CLIENTS",
],
)

runtime.cxx_library(
name = "cpuinfo_utils",
srcs = [
"cpuinfo_utils.cpp",
],
deps = [
"//executorch/runtime/core:core",
],
exported_headers = [
"cpuinfo_utils.h",
],
exported_deps = [
third_party_dep("pthreadpool"),
third_party_dep("cpuinfo"),
],
visibility = [
"//executorch/...",
"//executorch/backends/...",
"//executorch/runtime/backend/...",
"//executorch/extension/threadpool/test/...",
"@EXECUTORCH_CLIENTS",
],
)