Skip to content

Commit 639b397

Browse files
committed
[OpenMP][Tools] Fix Archer handling of task dependencies
The current handling of dependencies in Archer has two flaws: - annotation of dependency synchronization is not limited to sibling tasks - annotation of in/out dependencies is based on the assumption, that dependency variables will rarely be byte-sized variables. This patch introduces a map in the generating task to manage the dependency variables for the child tasks. The map is only accesses from the generating task, so no locking is necessary. This also limits the dependency-based synchronization to sibling tasks. This patch also introduces proper handling for new dependency types such as mutexinoutset and inoutset. Differential Revision: https://reviews.llvm.org/D103608
1 parent 08d8f1a commit 639b397

File tree

1 file changed

+109
-36
lines changed

1 file changed

+109
-36
lines changed

openmp/tools/archer/ompt-tsan.cpp

Lines changed: 109 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,64 @@ template <typename T> struct DataPoolEntry {
344344
DataPoolEntry(DataPool<T> *dp) : owner(dp) {}
345345
};
346346

347+
struct DependencyData;
348+
typedef DataPool<DependencyData> DependencyDataPool;
349+
template <>
350+
__thread DependencyDataPool *DependencyDataPool::ThreadDataPool = nullptr;
351+
352+
/// Data structure to store additional information for task dependency.
353+
struct DependencyData final : DataPoolEntry<DependencyData> {
354+
ompt_tsan_clockid in;
355+
ompt_tsan_clockid out;
356+
ompt_tsan_clockid inoutset;
357+
void *GetInPtr() { return &in; }
358+
void *GetOutPtr() { return &out; }
359+
void *GetInoutsetPtr() { return &inoutset; }
360+
361+
void Reset() {}
362+
363+
static DependencyData *New() { return DataPoolEntry<DependencyData>::New(); }
364+
365+
DependencyData(DataPool<DependencyData> *dp)
366+
: DataPoolEntry<DependencyData>(dp) {}
367+
};
368+
369+
struct TaskDependency {
370+
void *inPtr;
371+
void *outPtr;
372+
void *inoutsetPtr;
373+
ompt_dependence_type_t type;
374+
TaskDependency(DependencyData *depData, ompt_dependence_type_t type)
375+
: inPtr(depData->GetInPtr()), outPtr(depData->GetOutPtr()),
376+
inoutsetPtr(depData->GetInoutsetPtr()), type(type) {}
377+
void AnnotateBegin() {
378+
if (type == ompt_dependence_type_out ||
379+
type == ompt_dependence_type_inout ||
380+
type == ompt_dependence_type_mutexinoutset) {
381+
TsanHappensAfter(inPtr);
382+
TsanHappensAfter(outPtr);
383+
TsanHappensAfter(inoutsetPtr);
384+
} else if (type == ompt_dependence_type_in) {
385+
TsanHappensAfter(outPtr);
386+
TsanHappensAfter(inoutsetPtr);
387+
} else if (type == ompt_dependence_type_inoutset) {
388+
TsanHappensAfter(inPtr);
389+
TsanHappensAfter(outPtr);
390+
}
391+
}
392+
void AnnotateEnd() {
393+
if (type == ompt_dependence_type_out ||
394+
type == ompt_dependence_type_inout ||
395+
type == ompt_dependence_type_mutexinoutset) {
396+
TsanHappensBefore(outPtr);
397+
} else if (type == ompt_dependence_type_in) {
398+
TsanHappensBefore(inPtr);
399+
} else if (type == ompt_dependence_type_inoutset) {
400+
TsanHappensBefore(inoutsetPtr);
401+
}
402+
}
403+
};
404+
347405
struct ParallelData;
348406
typedef DataPool<ParallelData> ParallelDataPool;
349407
template <>
@@ -451,11 +509,17 @@ struct TaskData final : DataPoolEntry<TaskData> {
451509
Taskgroup *TaskGroup{nullptr};
452510

453511
/// Dependency information for this task.
454-
ompt_dependence_t *Dependencies{nullptr};
512+
TaskDependency *Dependencies{nullptr};
455513

456514
/// Number of dependency entries.
457515
unsigned DependencyCount{0};
458516

517+
// The dependency-map stores DependencyData objects representing
518+
// the dependency variables used on the sibling tasks created from
519+
// this task
520+
// We expect a rare need for the dependency-map, so alloc on demand
521+
std::unordered_map<void *, DependencyData *> *DependencyMap{nullptr};
522+
459523
#ifdef DEBUG
460524
int freed{0};
461525
#endif
@@ -506,6 +570,14 @@ struct TaskData final : DataPoolEntry<TaskData> {
506570
ImplicitTask = nullptr;
507571
Team = nullptr;
508572
TaskGroup = nullptr;
573+
if (DependencyMap) {
574+
for (auto i : *DependencyMap)
575+
i.second->Delete();
576+
delete DependencyMap;
577+
}
578+
DependencyMap = nullptr;
579+
if (Dependencies)
580+
free(Dependencies);
509581
Dependencies = nullptr;
510582
DependencyCount = 0;
511583
#ifdef DEBUG
@@ -528,14 +600,6 @@ static inline TaskData *ToTaskData(ompt_data_t *task_data) {
528600
return reinterpret_cast<TaskData *>(task_data->ptr);
529601
}
530602

531-
static inline void *ToInAddr(void *OutAddr) {
532-
// FIXME: This will give false negatives when a second variable lays directly
533-
// behind a variable that only has a width of 1 byte.
534-
// Another approach would be to "negate" the address or to flip the
535-
// first bit...
536-
return reinterpret_cast<char *>(OutAddr) + 1;
537-
}
538-
539603
/// Store a mutex for each wait_id to resolve race condition with callbacks.
540604
std::unordered_map<ompt_wait_id_t, std::mutex> Locks;
541605
std::mutex LocksMutex;
@@ -551,13 +615,19 @@ static void ompt_tsan_thread_begin(ompt_thread_t thread_type,
551615
TaskDataPool::ThreadDataPool = new TaskDataPool;
552616
TsanNewMemory(TaskDataPool::ThreadDataPool,
553617
sizeof(TaskDataPool::ThreadDataPool));
618+
DependencyDataPool::ThreadDataPool = new DependencyDataPool;
619+
TsanNewMemory(DependencyDataPool::ThreadDataPool,
620+
sizeof(DependencyDataPool::ThreadDataPool));
554621
thread_data->value = my_next_id();
555622
}
556623

557624
static void ompt_tsan_thread_end(ompt_data_t *thread_data) {
625+
TsanIgnoreWritesBegin();
558626
delete ParallelDataPool::ThreadDataPool;
559627
delete TaskgroupPool::ThreadDataPool;
560628
delete TaskDataPool::ThreadDataPool;
629+
delete DependencyDataPool::ThreadDataPool;
630+
TsanIgnoreWritesEnd();
561631
}
562632

563633
/// OMPT event callbacks for handling parallel regions.
@@ -805,17 +875,26 @@ static void ompt_tsan_task_create(
805875
}
806876
}
807877

808-
static void __ompt_tsan_release_task(TaskData *task) {
878+
static void freeTask(TaskData *task) {
809879
while (task != nullptr && --task->RefCount == 0) {
810880
TaskData *Parent = task->Parent;
811-
if (task->DependencyCount > 0) {
812-
delete[] task->Dependencies;
813-
}
814881
task->Delete();
815882
task = Parent;
816883
}
817884
}
818885

886+
static void releaseDependencies(TaskData *task) {
887+
for (unsigned i = 0; i < task->DependencyCount; i++) {
888+
task->Dependencies[i].AnnotateEnd();
889+
}
890+
}
891+
892+
static void acquireDependencies(TaskData *task) {
893+
for (unsigned i = 0; i < task->DependencyCount; i++) {
894+
task->Dependencies[i].AnnotateBegin();
895+
}
896+
}
897+
819898
static void ompt_tsan_task_schedule(ompt_data_t *first_task_data,
820899
ompt_task_status_t prior_task_status,
821900
ompt_data_t *second_task_data) {
@@ -879,18 +958,9 @@ static void ompt_tsan_task_schedule(ompt_data_t *first_task_data,
879958
}
880959

881960
// release dependencies
882-
for (unsigned i = 0; i < FromTask->DependencyCount; i++) {
883-
ompt_dependence_t *Dependency = &FromTask->Dependencies[i];
884-
885-
// in dependencies block following inout and out dependencies!
886-
TsanHappensBefore(ToInAddr(Dependency->variable.ptr));
887-
if (Dependency->dependence_type == ompt_dependence_type_out ||
888-
Dependency->dependence_type == ompt_dependence_type_inout) {
889-
TsanHappensBefore(Dependency->variable.ptr);
890-
}
891-
}
961+
releaseDependencies(FromTask);
892962
// free the previously running task
893-
__ompt_tsan_release_task(FromTask);
963+
freeTask(FromTask);
894964
}
895965

896966
// For late fulfill of detached task, there is no task to schedule to
@@ -919,16 +989,7 @@ static void ompt_tsan_task_schedule(ompt_data_t *first_task_data,
919989
// Handle dependencies on first execution of the task
920990
if (ToTask->execution == 0) {
921991
ToTask->execution++;
922-
for (unsigned i = 0; i < ToTask->DependencyCount; i++) {
923-
ompt_dependence_t *Dependency = &ToTask->Dependencies[i];
924-
925-
TsanHappensAfter(Dependency->variable.ptr);
926-
// in and inout dependencies are also blocked by prior in dependencies!
927-
if (Dependency->dependence_type == ompt_dependence_type_out ||
928-
Dependency->dependence_type == ompt_dependence_type_inout) {
929-
TsanHappensAfter(ToInAddr(Dependency->variable.ptr));
930-
}
931-
}
992+
acquireDependencies(ToTask);
932993
}
933994
// 1. Task will begin execution after it has been created.
934995
// 2. Task will resume after it has been switched away.
@@ -940,9 +1001,21 @@ static void ompt_tsan_dependences(ompt_data_t *task_data,
9401001
if (ndeps > 0) {
9411002
// Copy the data to use it in task_switch and task_end.
9421003
TaskData *Data = ToTaskData(task_data);
943-
Data->Dependencies = new ompt_dependence_t[ndeps];
944-
std::memcpy(Data->Dependencies, deps, sizeof(ompt_dependence_t) * ndeps);
1004+
if (!Data->Parent->DependencyMap)
1005+
Data->Parent->DependencyMap =
1006+
new std::unordered_map<void *, DependencyData *>();
1007+
Data->Dependencies =
1008+
(TaskDependency *)malloc(sizeof(TaskDependency) * ndeps);
9451009
Data->DependencyCount = ndeps;
1010+
for (int i = 0; i < ndeps; i++) {
1011+
auto ret = Data->Parent->DependencyMap->insert(
1012+
std::make_pair(deps[i].variable.ptr, nullptr));
1013+
if (ret.second) {
1014+
ret.first->second = DependencyData::New();
1015+
}
1016+
new ((void *)(Data->Dependencies + i))
1017+
TaskDependency(ret.first->second, deps[i].dependence_type);
1018+
}
9461019

9471020
// This callback is executed before this task is first started.
9481021
TsanHappensBefore(Data->GetTaskPtr());

0 commit comments

Comments
 (0)