Skip to content

Commit 336ab2d

Browse files
committed
[Support] On Windows, take the affinity mask into account
The number of hardware threads available to a ThreadPool can be limited if setting an affinity mask. For example: > start /B /AFFINITY 0xF lld-link.exe ... Would let LLD only use 4 hyper-threads. Previously, there was an outstanding issue on Windows Server 2019 on dual-CPU machines, which was preventing from using both CPU sockets. In normal conditions, when no affinity mask was set, ProcessorGroup::AllThreads was different from ProcessorGroup::UsableThreads. The previous code in llvm/lib/Support/Windows/Threading.inc L201 was improperly assuming those two values to be equal, and consequently was limiting the execution to only one CPU socket. Differential Revision: https://reviews.llvm.org/D92419
1 parent cd4c55c commit 336ab2d

File tree

6 files changed

+118
-28
lines changed

6 files changed

+118
-28
lines changed

llvm/include/llvm/Support/Program.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#define LLVM_SUPPORT_PROGRAM_H
1515

1616
#include "llvm/ADT/ArrayRef.h"
17+
#include "llvm/ADT/BitVector.h"
1718
#include "llvm/ADT/Optional.h"
1819
#include "llvm/ADT/StringRef.h"
1920
#include "llvm/Config/llvm-config.h"
@@ -125,9 +126,11 @@ namespace sys {
125126
///< string is non-empty upon return an error occurred while invoking the
126127
///< program.
127128
bool *ExecutionFailed = nullptr,
128-
Optional<ProcessStatistics> *ProcStat = nullptr ///< If non-zero, provides
129-
/// a pointer to a structure in which process execution statistics will be
130-
/// stored.
129+
Optional<ProcessStatistics> *ProcStat = nullptr, ///< If non-zero,
130+
/// provides a pointer to a structure in which process execution
131+
/// statistics will be stored.
132+
BitVector *AffinityMask = nullptr ///< CPUs or processors the new
133+
/// program shall run on.
131134
);
132135

133136
/// Similar to ExecuteAndWait, but returns immediately.
@@ -140,7 +143,8 @@ namespace sys {
140143
ArrayRef<Optional<StringRef>> Redirects = {},
141144
unsigned MemoryLimit = 0,
142145
std::string *ErrMsg = nullptr,
143-
bool *ExecutionFailed = nullptr);
146+
bool *ExecutionFailed = nullptr,
147+
BitVector *AffinityMask = nullptr);
144148

145149
/// Return true if the given arguments fit within system-specific
146150
/// argument length limits.

llvm/lib/Support/Program.cpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,17 +26,20 @@ using namespace sys;
2626
static bool Execute(ProcessInfo &PI, StringRef Program,
2727
ArrayRef<StringRef> Args, Optional<ArrayRef<StringRef>> Env,
2828
ArrayRef<Optional<StringRef>> Redirects,
29-
unsigned MemoryLimit, std::string *ErrMsg);
29+
unsigned MemoryLimit, std::string *ErrMsg,
30+
BitVector *AffinityMask);
3031

3132
int sys::ExecuteAndWait(StringRef Program, ArrayRef<StringRef> Args,
3233
Optional<ArrayRef<StringRef>> Env,
3334
ArrayRef<Optional<StringRef>> Redirects,
3435
unsigned SecondsToWait, unsigned MemoryLimit,
3536
std::string *ErrMsg, bool *ExecutionFailed,
36-
Optional<ProcessStatistics> *ProcStat) {
37+
Optional<ProcessStatistics> *ProcStat,
38+
BitVector *AffinityMask) {
3739
assert(Redirects.empty() || Redirects.size() == 3);
3840
ProcessInfo PI;
39-
if (Execute(PI, Program, Args, Env, Redirects, MemoryLimit, ErrMsg)) {
41+
if (Execute(PI, Program, Args, Env, Redirects, MemoryLimit, ErrMsg,
42+
AffinityMask)) {
4043
if (ExecutionFailed)
4144
*ExecutionFailed = false;
4245
ProcessInfo Result =
@@ -55,12 +58,13 @@ ProcessInfo sys::ExecuteNoWait(StringRef Program, ArrayRef<StringRef> Args,
5558
Optional<ArrayRef<StringRef>> Env,
5659
ArrayRef<Optional<StringRef>> Redirects,
5760
unsigned MemoryLimit, std::string *ErrMsg,
58-
bool *ExecutionFailed) {
61+
bool *ExecutionFailed, BitVector *AffinityMask) {
5962
assert(Redirects.empty() || Redirects.size() == 3);
6063
ProcessInfo PI;
6164
if (ExecutionFailed)
6265
*ExecutionFailed = false;
63-
if (!Execute(PI, Program, Args, Env, Redirects, MemoryLimit, ErrMsg))
66+
if (!Execute(PI, Program, Args, Env, Redirects, MemoryLimit, ErrMsg,
67+
AffinityMask))
6468
if (ExecutionFailed)
6569
*ExecutionFailed = true;
6670

llvm/lib/Support/Unix/Program.inc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,14 +174,18 @@ toNullTerminatedCStringArray(ArrayRef<StringRef> Strings, StringSaver &Saver) {
174174
static bool Execute(ProcessInfo &PI, StringRef Program,
175175
ArrayRef<StringRef> Args, Optional<ArrayRef<StringRef>> Env,
176176
ArrayRef<Optional<StringRef>> Redirects,
177-
unsigned MemoryLimit, std::string *ErrMsg) {
177+
unsigned MemoryLimit, std::string *ErrMsg,
178+
BitVector *AffinityMask) {
178179
if (!llvm::sys::fs::exists(Program)) {
179180
if (ErrMsg)
180181
*ErrMsg = std::string("Executable \"") + Program.str() +
181182
std::string("\" doesn't exist!");
182183
return false;
183184
}
184185

186+
assert(!AffinityMask && "Starting a process with an affinity mask is "
187+
"currently not supported on Unix!");
188+
185189
BumpPtrAllocator Allocator;
186190
StringSaver Saver(Allocator);
187191
std::vector<const char *> ArgVector, EnvVector;

llvm/lib/Support/Windows/Program.inc

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,8 @@ static HANDLE RedirectIO(Optional<StringRef> Path, int fd,
171171
static bool Execute(ProcessInfo &PI, StringRef Program,
172172
ArrayRef<StringRef> Args, Optional<ArrayRef<StringRef>> Env,
173173
ArrayRef<Optional<StringRef>> Redirects,
174-
unsigned MemoryLimit, std::string *ErrMsg) {
174+
unsigned MemoryLimit, std::string *ErrMsg,
175+
BitVector *AffinityMask) {
175176
if (!sys::fs::can_execute(Program)) {
176177
if (ErrMsg)
177178
*ErrMsg = "program not executable";
@@ -277,11 +278,15 @@ static bool Execute(ProcessInfo &PI, StringRef Program,
277278
return false;
278279
}
279280

281+
unsigned CreateFlags = CREATE_UNICODE_ENVIRONMENT;
282+
if (AffinityMask)
283+
CreateFlags |= CREATE_SUSPENDED;
284+
280285
std::vector<wchar_t> CommandUtf16(Command.size() + 1, 0);
281286
std::copy(Command.begin(), Command.end(), CommandUtf16.begin());
282287
BOOL rc = CreateProcessW(ProgramUtf16.data(), CommandUtf16.data(), 0, 0, TRUE,
283-
CREATE_UNICODE_ENVIRONMENT,
284-
EnvBlock.empty() ? 0 : EnvBlock.data(), 0, &si, &pi);
288+
CreateFlags, EnvBlock.empty() ? 0 : EnvBlock.data(),
289+
0, &si, &pi);
285290
DWORD err = GetLastError();
286291

287292
// Regardless of whether the process got created or not, we are done with
@@ -329,6 +334,13 @@ static bool Execute(ProcessInfo &PI, StringRef Program,
329334
}
330335
}
331336

337+
// Set the affinity mask
338+
if (AffinityMask) {
339+
::SetProcessAffinityMask(pi.hProcess,
340+
(DWORD_PTR)AffinityMask->getData().front());
341+
::ResumeThread(pi.hThread);
342+
}
343+
332344
return true;
333345
}
334346

llvm/lib/Support/Windows/Threading.inc

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -195,14 +195,27 @@ static ArrayRef<ProcessorGroup> getProcessorGroups() {
195195
if (!IterateProcInfo(RelationProcessorCore, HandleProc))
196196
return std::vector<ProcessorGroup>();
197197

198-
// If there's an affinity mask set on one of the CPUs, then assume the user
199-
// wants to constrain the current process to only a single CPU.
200-
for (auto &G : Groups) {
201-
if (G.UsableThreads != G.AllThreads) {
202-
ProcessorGroup NewG{G};
198+
// If there's an affinity mask set, assume the user wants to constrain the
199+
// current process to only a single CPU group. On Windows, it is not
200+
// possible for affinity masks to cross CPU group boundaries.
201+
DWORD_PTR ProcessAffinityMask = 0, SystemAffinityMask = 0;
202+
if (::GetProcessAffinityMask(GetCurrentProcess(), &ProcessAffinityMask,
203+
&SystemAffinityMask) &&
204+
ProcessAffinityMask != SystemAffinityMask) {
205+
// We don't expect more that 4 CPU groups on Windows (256 processors).
206+
USHORT GroupCount = 4;
207+
USHORT GroupArray[4]{};
208+
if (::GetProcessGroupAffinity(GetCurrentProcess(), &GroupCount,
209+
GroupArray)) {
210+
assert(GroupCount == 1 &&
211+
"On startup, a program is expected to be assigned only to "
212+
"one processor group!");
213+
unsigned CurrentGroupID = GroupArray[0];
214+
ProcessorGroup NewG{Groups[CurrentGroupID]};
215+
NewG.Affinity = ProcessAffinityMask;
216+
NewG.UsableThreads = countPopulation(ProcessAffinityMask);
203217
Groups.clear();
204218
Groups.push_back(NewG);
205-
break;
206219
}
207220
}
208221

llvm/unittests/Support/ThreadPool.cpp

Lines changed: 62 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,13 @@
88

99
#include "llvm/Support/ThreadPool.h"
1010

11-
#include "llvm/ADT/DenseSet.h"
1211
#include "llvm/ADT/STLExtras.h"
12+
#include "llvm/ADT/SetVector.h"
1313
#include "llvm/ADT/SmallVector.h"
1414
#include "llvm/ADT/Triple.h"
15+
#include "llvm/Support/CommandLine.h"
1516
#include "llvm/Support/Host.h"
17+
#include "llvm/Support/Program.h"
1618
#include "llvm/Support/TargetSelect.h"
1719
#include "llvm/Support/Threading.h"
1820

@@ -71,7 +73,7 @@ class ThreadPoolTest : public testing::Test {
7173

7274
void SetUp() override { MainThreadReady = false; }
7375

74-
void RunOnAllSockets(ThreadPoolStrategy S);
76+
std::vector<llvm::BitVector> RunOnAllSockets(ThreadPoolStrategy S);
7577

7678
std::condition_variable WaitMainThread;
7779
std::mutex WaitMainThreadMutex;
@@ -169,15 +171,16 @@ TEST_F(ThreadPoolTest, PoolDestruction) {
169171

170172
#if LLVM_ENABLE_THREADS == 1
171173

172-
void ThreadPoolTest::RunOnAllSockets(ThreadPoolStrategy S) {
174+
std::vector<llvm::BitVector>
175+
ThreadPoolTest::RunOnAllSockets(ThreadPoolStrategy S) {
173176
// FIXME: Skip these tests on non-Windows because multi-socket system were not
174177
// tested on Unix yet, and llvm::get_thread_affinity_mask() isn't implemented
175178
// for Unix.
176179
Triple Host(Triple::normalize(sys::getProcessTriple()));
177180
if (!Host.isOSWindows())
178-
return;
181+
return {};
179182

180-
llvm::DenseSet<llvm::BitVector> ThreadsUsed;
183+
llvm::SetVector<llvm::BitVector> ThreadsUsed;
181184
std::mutex Lock;
182185
{
183186
std::condition_variable AllThreads;
@@ -198,25 +201,75 @@ void ThreadPoolTest::RunOnAllSockets(ThreadPoolStrategy S) {
198201
ThreadsUsed.insert(Mask);
199202
});
200203
}
201-
ASSERT_EQ(true, ThreadsUsed.empty());
204+
EXPECT_EQ(true, ThreadsUsed.empty());
202205
{
203206
std::unique_lock<std::mutex> Guard(AllThreadsLock);
204207
AllThreads.wait(Guard,
205208
[&]() { return Active == S.compute_thread_count(); });
206209
}
207210
setMainThreadReady();
208211
}
209-
ASSERT_EQ(llvm::get_cpus(), ThreadsUsed.size());
212+
return ThreadsUsed.takeVector();
210213
}
211214

212215
TEST_F(ThreadPoolTest, AllThreads_UseAllRessources) {
213216
CHECK_UNSUPPORTED();
214-
RunOnAllSockets({});
217+
std::vector<llvm::BitVector> ThreadsUsed = RunOnAllSockets({});
218+
ASSERT_EQ(llvm::get_cpus(), ThreadsUsed.size());
215219
}
216220

217221
TEST_F(ThreadPoolTest, AllThreads_OneThreadPerCore) {
218222
CHECK_UNSUPPORTED();
219-
RunOnAllSockets(llvm::heavyweight_hardware_concurrency());
223+
std::vector<llvm::BitVector> ThreadsUsed =
224+
RunOnAllSockets(llvm::heavyweight_hardware_concurrency());
225+
ASSERT_EQ(llvm::get_cpus(), ThreadsUsed.size());
220226
}
221227

228+
#if defined(_WIN32) // FIXME: implement AffinityMask in Support/Unix/Program.inc
229+
230+
// From TestMain.cpp.
231+
extern const char *TestMainArgv0;
232+
233+
// Just a reachable symbol to ease resolving of the executable's path.
234+
static cl::opt<std::string> ThreadPoolTestStringArg1("thread-pool-string-arg1");
235+
236+
#ifdef _MSC_VER
237+
#define setenv(name, var, ignore) _putenv_s(name, var)
222238
#endif
239+
240+
TEST_F(ThreadPoolTest, AffinityMask) {
241+
CHECK_UNSUPPORTED();
242+
243+
// Skip this test if less than 4 threads are available.
244+
if (llvm::hardware_concurrency().compute_thread_count() < 4)
245+
return;
246+
247+
using namespace llvm::sys;
248+
if (getenv("LLVM_THREADPOOL_AFFINITYMASK")) {
249+
std::vector<llvm::BitVector> ThreadsUsed = RunOnAllSockets({});
250+
// Ensure the threads only ran on CPUs 0-3.
251+
for (auto &It : ThreadsUsed)
252+
ASSERT_LT(It.getData().front(), 16UL);
253+
return;
254+
}
255+
std::string Executable =
256+
sys::fs::getMainExecutable(TestMainArgv0, &ThreadPoolTestStringArg1);
257+
StringRef argv[] = {Executable, "--gtest_filter=ThreadPoolTest.AffinityMask"};
258+
259+
// Add environment variable to the environment of the child process.
260+
int Res = setenv("LLVM_THREADPOOL_AFFINITYMASK", "1", false);
261+
ASSERT_EQ(Res, 0);
262+
263+
std::string Error;
264+
bool ExecutionFailed;
265+
BitVector Affinity;
266+
Affinity.resize(4);
267+
Affinity.set(0, 4); // Use CPUs 0,1,2,3.
268+
int Ret = sys::ExecuteAndWait(Executable, argv, {}, {}, 0, 0, &Error,
269+
&ExecutionFailed, nullptr, &Affinity);
270+
ASSERT_EQ(0, Ret);
271+
}
272+
273+
#endif // #if _WIN32
274+
275+
#endif // #if LLVM_ENABLE_THREADS == 1

0 commit comments

Comments
 (0)