Skip to content

Commit 057a4f3

Browse files
Septa2112arthw
authored andcommitted
common : add support for cpu_get_num_physical_cores() on Windows (ggml-org#8771)
* Add support for cpu_get_num_phsical_cores() on Windows * fix build bug on msys2-clang64 and ucrt64 * avoid adding new function * add new macros to avoid windows+mingw64 * Add error checking to return default value
1 parent ca08dbf commit 057a4f3

File tree

1 file changed

+34
-2
lines changed

1 file changed

+34
-2
lines changed

common/common.cpp

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,34 @@ int32_t cpu_get_num_physical_cores() {
110110
if (result == 0) {
111111
return num_physical_cores;
112112
}
113-
#elif defined(_WIN32)
114-
//TODO: Implement
113+
#elif defined(_WIN32) && (_WIN32_WINNT >= 0x0601) && !defined(__MINGW64__) // windows 7 and later
114+
// TODO: windows + arm64 + mingw64
115+
unsigned int n_threads_win = std::thread::hardware_concurrency();
116+
unsigned int default_threads = n_threads_win > 0 ? (n_threads_win <= 4 ? n_threads_win : n_threads_win / 2) : 4;
117+
118+
DWORD buffer_size = 0;
119+
if (!GetLogicalProcessorInformationEx(RelationProcessorCore, nullptr, &buffer_size)) {
120+
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
121+
return default_threads;
122+
}
123+
}
124+
125+
std::vector<char> buffer(buffer_size);
126+
if (!GetLogicalProcessorInformationEx(RelationProcessorCore, reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(buffer.data()), &buffer_size)) {
127+
return default_threads;
128+
}
129+
130+
int32_t num_physical_cores = 0;
131+
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(buffer.data());
132+
while (buffer_size > 0) {
133+
if (info->Relationship == RelationProcessorCore) {
134+
num_physical_cores += info->Processor.GroupCount;
135+
}
136+
buffer_size -= info->Size;
137+
info = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(reinterpret_cast<char*>(info) + info->Size);
138+
}
139+
140+
return num_physical_cores > 0 ? num_physical_cores : default_threads;
115141
#endif
116142
unsigned int n_threads = std::thread::hardware_concurrency();
117143
return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
@@ -1727,7 +1753,13 @@ std::string gpt_params_get_system_info(const gpt_params & params) {
17271753
if (params.n_threads_batch != -1) {
17281754
os << " (n_threads_batch = " << params.n_threads_batch << ")";
17291755
}
1756+
#if defined(_WIN32) && (_WIN32_WINNT >= 0x0601) && !defined(__MINGW64__) // windows 7 and later
1757+
// TODO: windows + arm64 + mingw64
1758+
DWORD logicalProcessorCount = GetActiveProcessorCount(ALL_PROCESSOR_GROUPS);
1759+
os << " / " << logicalProcessorCount << " | " << llama_print_system_info();
1760+
#else
17301761
os << " / " << std::thread::hardware_concurrency() << " | " << llama_print_system_info();
1762+
#endif
17311763

17321764
return os.str();
17331765
}

0 commit comments

Comments
 (0)