Skip to content

Commit f8ee045

Browse files
committed
[OpenMP] Eliminate the ThreadStates array in favor of indirection
If we have thread states, the program is going to be rather slow. If we don't, we want to avoid wasting shared memory. This patch introduces a slight penalty (malloc + indirection) for the slow path and reduces resource usage for the fast path. Differential Revision: https://reviews.llvm.org/D135037
1 parent b113965 commit f8ee045

File tree

3 files changed

+24
-8
lines changed

3 files changed

+24
-8
lines changed

openmp/libomptarget/DeviceRTL/include/State.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ struct ThreadStateTy {
109109
}
110110
};
111111

112-
extern ThreadStateTy *ThreadStates[mapping::MaxThreadsPerTeam];
112+
extern ThreadStateTy **ThreadStates;
113113
#pragma omp allocate(ThreadStates) allocator(omp_pteam_mem_alloc)
114114

115115
/// Initialize the state machinery. Must be called by all threads.

openmp/libomptarget/DeviceRTL/include/Types.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ using uint32_t = unsigned int;
3333
using int64_t = long;
3434
using uint64_t = unsigned long;
3535
using size_t = decltype(sizeof(char));
36+
// TODO: Properly implement this
37+
using intptr_t = int64_t;
38+
using uintptr_t = uint64_t;
3639

3740
static_assert(sizeof(int8_t) == 1, "type size mismatch");
3841
static_assert(sizeof(uint8_t) == 1, "type size mismatch");

openmp/libomptarget/DeviceRTL/src/State.cpp

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "Configuration.h"
1313
#include "Debug.h"
1414
#include "Interface.h"
15+
#include "Mapping.h"
1516
#include "Synchronization.h"
1617
#include "Types.h"
1718
#include "Utils.h"
@@ -221,10 +222,7 @@ void state::TeamStateTy::assertEqual(TeamStateTy &Other) const {
221222
}
222223

223224
state::TeamStateTy SHARED(_OMP::state::TeamState);
224-
225-
__attribute__((loader_uninitialized))
226-
state::ThreadStateTy *_OMP::state::ThreadStates[mapping::MaxThreadsPerTeam];
227-
#pragma omp allocate(_OMP::state::ThreadStates) allocator(omp_pteam_mem_alloc)
225+
state::ThreadStateTy **SHARED(_OMP::state::ThreadStates);
228226

229227
namespace {
230228

@@ -248,18 +246,32 @@ void state::init(bool IsSPMD) {
248246
if (mapping::isInitialThreadInLevel0(IsSPMD)) {
249247
TeamState.init(IsSPMD);
250248
DebugEntryRAII::init();
249+
ThreadStates = nullptr;
251250
}
252-
253-
ThreadStates[mapping::getThreadIdInBlock()] = nullptr;
254251
}
255252

256253
void state::enterDataEnvironment(IdentTy *Ident) {
257254
ASSERT(config::mayUseThreadStates() &&
258255
"Thread state modified while explicitly disabled!");
256+
if (!config::mayUseThreadStates())
257+
return;
259258

260259
unsigned TId = mapping::getThreadIdInBlock();
261260
ThreadStateTy *NewThreadState =
262261
static_cast<ThreadStateTy *>(__kmpc_alloc_shared(sizeof(ThreadStateTy)));
262+
uintptr_t *ThreadStatesBitsPtr = reinterpret_cast<uintptr_t *>(&ThreadStates);
263+
if (!atomic::load(ThreadStatesBitsPtr, atomic::seq_cst)) {
264+
uint32_t Bytes = sizeof(ThreadStates[0]) * mapping::getBlockSize();
265+
void *ThreadStatesPtr =
266+
memory::allocShared(Bytes, "Thread state array allocation");
267+
if (!atomic::cas(ThreadStatesBitsPtr, uintptr_t(0),
268+
reinterpret_cast<uintptr_t>(ThreadStatesPtr),
269+
atomic::seq_cst, atomic::seq_cst))
270+
memory::freeShared(ThreadStatesPtr, Bytes,
271+
"Thread state array allocated multiple times");
272+
ASSERT(atomic::load(ThreadStatesBitsPtr, atomic::seq_cst) &&
273+
"Expected valid thread states bit!");
274+
}
263275
NewThreadState->init(ThreadStates[TId]);
264276
TeamState.HasThreadState = true;
265277
ThreadStates[TId] = NewThreadState;
@@ -274,6 +286,8 @@ void state::exitDataEnvironment() {
274286
}
275287

276288
void state::resetStateForThread(uint32_t TId) {
289+
if (!config::mayUseThreadStates())
290+
return;
277291
if (OMP_LIKELY(!TeamState.HasThreadState || !ThreadStates[TId]))
278292
return;
279293

@@ -295,7 +309,6 @@ void state::assumeInitialState(bool IsSPMD) {
295309
TeamStateTy InitialTeamState;
296310
InitialTeamState.init(IsSPMD);
297311
InitialTeamState.assertEqual(TeamState);
298-
ASSERT(!ThreadStates[mapping::getThreadIdInBlock()]);
299312
ASSERT(mapping::isSPMDMode() == IsSPMD);
300313
}
301314

0 commit comments

Comments
 (0)