Skip to content

Commit 00ccfcf

Browse files
committed
[OpenMP] [OMPT] [7/8] Invoke tool-supplied callbacks before and after target launch and data transfer operations
Implemented RAII objects, initialized at target entry points, that invoke tool-supplied callbacks. Updated status of target callbacks as implemented. Depends on D127365 Patch from John Mellor-Crummey <[email protected]> With contributions from: Dhruva Chakrabarti <[email protected]> Jan-Patrick Lehr <[email protected]> Reviewed By: jdoerfert, dhruvachak Differential Revision: https://reviews.llvm.org/D127367
1 parent 0710290 commit 00ccfcf

14 files changed

+457
-83
lines changed

openmp/libomptarget/include/OmptCallback.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,13 @@
2727
FOREACH_OMPT_NOEMI_EVENT(macro) \
2828
FOREACH_OMPT_EMI_EVENT(macro)
2929

30+
#define performIfOmptInitialized(stmt) \
31+
do { \
32+
if (llvm::omp::target::ompt::Initialized) { \
33+
stmt; \
34+
} \
35+
} while (0)
36+
3037
#define performOmptCallback(CallbackName, ...) \
3138
do { \
3239
if (ompt_callback_##CallbackName##_fn) \
@@ -89,6 +96,8 @@ extern bool Initialized;
8996
} // namespace omp
9097
} // namespace llvm
9198

99+
#else
100+
#define performIfOmptInitialized(stmt)
92101
#endif // OMPT_SUPPORT
93102

94103
#pragma pop_macro("DEBUG_PREFIX")

openmp/libomptarget/src/OmptCallback.cpp

Lines changed: 54 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -35,12 +35,20 @@ FOREACH_OMPT_NOEMI_EVENT(defineOmptCallback)
3535
FOREACH_OMPT_EMI_EVENT(defineOmptCallback)
3636
#undef defineOmptCallback
3737

38-
/// Thread local state for target region and associated metadata
39-
thread_local llvm::omp::target::ompt::Interface OmptInterface;
38+
/// Forward declaration
39+
class LibomptargetRtlFinalizer;
4040

41-
/// Define function pointers
42-
ompt_get_task_data_t ompt_get_task_data_fn = nullptr;
41+
/// Object that will maintain the RTL finalizer from the plugin
42+
LibomptargetRtlFinalizer *LibraryFinalizer = nullptr;
43+
44+
thread_local Interface llvm::omp::target::ompt::RegionInterface;
45+
46+
bool llvm::omp::target::ompt::Initialized = false;
47+
48+
ompt_get_callback_t llvm::omp::target::ompt::lookupCallbackByCode = nullptr;
49+
ompt_function_lookup_t llvm::omp::target::ompt::lookupCallbackByName = nullptr;
4350
ompt_get_target_task_data_t ompt_get_target_task_data_fn = nullptr;
51+
ompt_get_task_data_t ompt_get_task_data_fn = nullptr;
4452

4553
/// Unique correlation id
4654
static std::atomic<uint64_t> IdCounter(1);
@@ -51,14 +59,14 @@ static uint64_t createId() { return IdCounter.fetch_add(1); }
5159
/// Create a new correlation id and update the operations id
5260
static uint64_t createOpId() {
5361
uint64_t NewId = createId();
54-
OmptInterface.setHostOpId(NewId);
62+
RegionInterface.setHostOpId(NewId);
5563
return NewId;
5664
}
5765

5866
/// Create a new correlation id and update the target region id
5967
static uint64_t createRegionId() {
6068
uint64_t NewId = createId();
61-
OmptInterface.setTargetDataValue(NewId);
69+
RegionInterface.setTargetDataValue(NewId);
6270
return NewId;
6371
}
6472

@@ -68,18 +76,19 @@ void Interface::beginTargetDataAlloc(int64_t DeviceId, void *HstPtrBegin,
6876
if (ompt_callback_target_data_op_emi_fn) {
6977
// HostOpId will be set by the tool. Invoke the tool supplied data op EMI
7078
// callback
71-
ompt_callback_target_data_op_emi_fn(ompt_scope_begin, TargetTaskData,
72-
&TargetData, &TargetRegionOpId,
73-
ompt_target_data_alloc, HstPtrBegin,
74-
DeviceId, /* TgtPtrBegin */ nullptr,
75-
/* TgtDeviceNum */ 0, Size, Code);
79+
ompt_callback_target_data_op_emi_fn(
80+
ompt_scope_begin, TargetTaskData, &TargetData, &TargetRegionOpId,
81+
ompt_target_data_alloc, HstPtrBegin,
82+
/* SrcDeviceNum */ omp_get_initial_device(), /* TgtPtrBegin */ nullptr,
83+
/* TgtDeviceNum */ DeviceId, Size, Code);
7684
} else if (ompt_callback_target_data_op_fn) {
7785
// HostOpId is set by the runtime
7886
HostOpId = createOpId();
7987
// Invoke the tool supplied data op callback
8088
ompt_callback_target_data_op_fn(
8189
TargetData.value, HostOpId, ompt_target_data_alloc, HstPtrBegin,
82-
DeviceId, /* TgtPtrBegin */ nullptr, /* TgtDeviceNum */ 0, Size, Code);
90+
/* SrcDeviceNum */ omp_get_initial_device(), /* TgtPtrBegin */ nullptr,
91+
/* TgtDeviceNum */ DeviceId, Size, Code);
8392
}
8493
}
8594

@@ -89,11 +98,11 @@ void Interface::endTargetDataAlloc(int64_t DeviceId, void *HstPtrBegin,
8998
if (ompt_callback_target_data_op_emi_fn) {
9099
// HostOpId will be set by the tool. Invoke the tool supplied data op EMI
91100
// callback
92-
ompt_callback_target_data_op_emi_fn(ompt_scope_end, TargetTaskData,
93-
&TargetData, &TargetRegionOpId,
94-
ompt_target_data_alloc, HstPtrBegin,
95-
DeviceId, /* TgtPtrBegin */ nullptr,
96-
/* TgtDeviceNum */ 0, Size, Code);
101+
ompt_callback_target_data_op_emi_fn(
102+
ompt_scope_end, TargetTaskData, &TargetData, &TargetRegionOpId,
103+
ompt_target_data_alloc, HstPtrBegin,
104+
/* SrcDeviceNum */ omp_get_initial_device(), /* TgtPtrBegin */ nullptr,
105+
/* TgtDeviceNum */ DeviceId, Size, Code);
97106
}
98107
endTargetDataOperation();
99108
}
@@ -108,14 +117,16 @@ void Interface::beginTargetDataSubmit(int64_t DeviceId, void *TgtPtrBegin,
108117
ompt_callback_target_data_op_emi_fn(
109118
ompt_scope_begin, TargetTaskData, &TargetData, &TargetRegionOpId,
110119
ompt_target_data_transfer_to_device, HstPtrBegin,
111-
/* SrcDeviceNum */ 0, TgtPtrBegin, DeviceId, Size, Code);
120+
/* SrcDeviceNum */ omp_get_initial_device(), TgtPtrBegin, DeviceId,
121+
Size, Code);
112122
} else if (ompt_callback_target_data_op_fn) {
113123
// HostOpId is set by the runtime
114124
HostOpId = createOpId();
115125
// Invoke the tool supplied data op callback
116126
ompt_callback_target_data_op_fn(
117127
TargetData.value, HostOpId, ompt_target_data_transfer_to_device,
118-
HstPtrBegin, /* SrcDeviceNum */ 0, TgtPtrBegin, DeviceId, Size, Code);
128+
HstPtrBegin, /* SrcDeviceNum */ omp_get_initial_device(), TgtPtrBegin,
129+
DeviceId, Size, Code);
119130
}
120131
}
121132

@@ -129,7 +140,8 @@ void Interface::endTargetDataSubmit(int64_t DeviceId, void *TgtPtrBegin,
129140
ompt_callback_target_data_op_emi_fn(
130141
ompt_scope_end, TargetTaskData, &TargetData, &TargetRegionOpId,
131142
ompt_target_data_transfer_to_device, HstPtrBegin,
132-
/* SrcDeviceNum */ 0, TgtPtrBegin, DeviceId, Size, Code);
143+
/* SrcDeviceNum */ omp_get_initial_device(), TgtPtrBegin, DeviceId,
144+
Size, Code);
133145
}
134146
endTargetDataOperation();
135147
}
@@ -143,15 +155,15 @@ void Interface::beginTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin,
143155
ompt_callback_target_data_op_emi_fn(
144156
ompt_scope_begin, TargetTaskData, &TargetData, &TargetRegionOpId,
145157
ompt_target_data_delete, TgtPtrBegin, DeviceId,
146-
/* TgtPtrBegin */ nullptr, /* TgtDeviceNum */ 0, /* Bytes */ 0, Code);
158+
/* TgtPtrBegin */ nullptr, /* TgtDeviceNum */ -1, /* Bytes */ 0, Code);
147159
} else if (ompt_callback_target_data_op_fn) {
148160
// HostOpId is set by the runtime
149161
HostOpId = createOpId();
150162
// Invoke the tool supplied data op callback
151163
ompt_callback_target_data_op_fn(TargetData.value, HostOpId,
152164
ompt_target_data_delete, TgtPtrBegin,
153165
DeviceId, /* TgtPtrBegin */ nullptr,
154-
/* TgtDeviceNum */ 0, /* Bytes */ 0, Code);
166+
/* TgtDeviceNum */ -1, /* Bytes */ 0, Code);
155167
}
156168
}
157169

@@ -164,7 +176,7 @@ void Interface::endTargetDataDelete(int64_t DeviceId, void *TgtPtrBegin,
164176
ompt_callback_target_data_op_emi_fn(
165177
ompt_scope_end, TargetTaskData, &TargetData, &TargetRegionOpId,
166178
ompt_target_data_delete, TgtPtrBegin, DeviceId,
167-
/* TgtPtrBegin */ nullptr, /* TgtDeviceNum */ 0, /* Bytes */ 0, Code);
179+
/* TgtPtrBegin */ nullptr, /* TgtDeviceNum */ -1, /* Bytes */ 0, Code);
168180
}
169181
endTargetDataOperation();
170182
}
@@ -176,19 +188,19 @@ void Interface::beginTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin,
176188
if (ompt_callback_target_data_op_emi_fn) {
177189
// HostOpId will be set by the tool. Invoke the tool supplied data op EMI
178190
// callback
179-
ompt_callback_target_data_op_emi_fn(ompt_scope_begin, TargetTaskData,
180-
&TargetData, &TargetRegionOpId,
181-
ompt_target_data_transfer_from_device,
182-
TgtPtrBegin, DeviceId, HstPtrBegin,
183-
/* TgtDeviceNum */ 0, Size, Code);
191+
ompt_callback_target_data_op_emi_fn(
192+
ompt_scope_begin, TargetTaskData, &TargetData, &TargetRegionOpId,
193+
ompt_target_data_transfer_from_device, TgtPtrBegin, DeviceId,
194+
HstPtrBegin,
195+
/* TgtDeviceNum */ omp_get_initial_device(), Size, Code);
184196
} else if (ompt_callback_target_data_op_fn) {
185197
// HostOpId is set by the runtime
186198
HostOpId = createOpId();
187199
// Invoke the tool supplied data op callback
188-
ompt_callback_target_data_op_fn(TargetData.value, HostOpId,
189-
ompt_target_data_transfer_from_device,
190-
TgtPtrBegin, DeviceId, HstPtrBegin,
191-
/* TgtDeviceNum */ 0, Size, Code);
200+
ompt_callback_target_data_op_fn(
201+
TargetData.value, HostOpId, ompt_target_data_transfer_from_device,
202+
TgtPtrBegin, DeviceId, HstPtrBegin,
203+
/* TgtDeviceNum */ omp_get_initial_device(), Size, Code);
192204
}
193205
}
194206

@@ -199,11 +211,11 @@ void Interface::endTargetDataRetrieve(int64_t DeviceId, void *HstPtrBegin,
199211
if (ompt_callback_target_data_op_emi_fn) {
200212
// HostOpId will be set by the tool. Invoke the tool supplied data op EMI
201213
// callback
202-
ompt_callback_target_data_op_emi_fn(ompt_scope_end, TargetTaskData,
203-
&TargetData, &TargetRegionOpId,
204-
ompt_target_data_transfer_from_device,
205-
TgtPtrBegin, DeviceId, HstPtrBegin,
206-
/* TgtDeviceNum */ 0, Size, Code);
214+
ompt_callback_target_data_op_emi_fn(
215+
ompt_scope_end, TargetTaskData, &TargetData, &TargetRegionOpId,
216+
ompt_target_data_transfer_from_device, TgtPtrBegin, DeviceId,
217+
HstPtrBegin,
218+
/* TgtDeviceNum */ omp_get_initial_device(), Size, Code);
207219
}
208220
endTargetDataOperation();
209221
}
@@ -230,6 +242,7 @@ void Interface::endTargetSubmit(unsigned int numTeams) {
230242
numTeams);
231243
}
232244
}
245+
233246
void Interface::beginTargetDataEnter(int64_t DeviceId, void *Code) {
234247
beginTargetRegion();
235248
if (ompt_callback_target_emi_fn) {
@@ -391,14 +404,6 @@ class LibomptargetRtlFinalizer {
391404
llvm::SmallVector<ompt_finalize_t> RtlFinalizationFunctions;
392405
};
393406

394-
/// Object that will maintain the RTL finalizer from the plugin
395-
LibomptargetRtlFinalizer *LibraryFinalizer = nullptr;
396-
397-
bool llvm::omp::target::ompt::Initialized = false;
398-
399-
ompt_get_callback_t llvm::omp::target::ompt::lookupCallbackByCode = nullptr;
400-
ompt_function_lookup_t llvm::omp::target::ompt::lookupCallbackByName = nullptr;
401-
402407
int llvm::omp::target::ompt::initializeLibrary(ompt_function_lookup_t lookup,
403408
int initial_device_num,
404409
ompt_data_t *tool_data) {
@@ -418,6 +423,9 @@ int llvm::omp::target::ompt::initializeLibrary(ompt_function_lookup_t lookup,
418423

419424
assert(lookupCallbackByCode && "lookupCallbackByCode should be non-null");
420425
assert(lookupCallbackByName && "lookupCallbackByName should be non-null");
426+
assert(ompt_get_task_data_fn && "ompt_get_task_data_fn should be non-null");
427+
assert(ompt_get_target_task_data_fn &&
428+
"ompt_get_target_task_data_fn should be non-null");
421429
assert(LibraryFinalizer == nullptr &&
422430
"LibraryFinalizer should not be initialized yet");
423431

@@ -434,6 +442,7 @@ void llvm::omp::target::ompt::finalizeLibrary(ompt_data_t *data) {
434442
// with this library
435443
LibraryFinalizer->finalize();
436444
delete LibraryFinalizer;
445+
Initialized = false;
437446
}
438447

439448
void llvm::omp::target::ompt::connectLibrary() {

openmp/libomptarget/src/OmptInterface.h

Lines changed: 109 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,23 @@
1313
#ifndef _OMPTARGET_OMPTINTERFACE_H
1414
#define _OMPTARGET_OMPTINTERFACE_H
1515

16+
#include <functional>
17+
#include <tuple>
18+
19+
#include "OmptCallback.h"
1620
#include "omp-tools.h"
1721

22+
#include "llvm/Support/ErrorHandling.h"
23+
1824
// If target OMPT support is compiled in
1925
#ifdef OMPT_SUPPORT
2026
#define OMPT_IF_BUILT(stmt) stmt
2127
#else
2228
#define OMPT_IF_BUILT(stmt)
2329
#endif
2430

31+
#define OMPT_GET_RETURN_ADDRESS(level) __builtin_return_address(level)
32+
2533
/// Callbacks for target regions require task_data representing the
2634
/// encountering task.
2735
/// Callbacks for target regions and target data ops require
@@ -108,6 +116,66 @@ class Interface {
108116
/// Top-level function for invoking callback after target construct
109117
void endTarget(int64_t DeviceId, void *Code);
110118

119+
// Callback getter: Target data operations
120+
template <ompt_target_data_op_t OpType> auto getCallbacks() {
121+
if constexpr (OpType == ompt_target_data_alloc ||
122+
OpType == ompt_target_data_alloc_async)
123+
return std::make_pair(std::mem_fn(&Interface::beginTargetDataAlloc),
124+
std::mem_fn(&Interface::endTargetDataAlloc));
125+
126+
if constexpr (OpType == ompt_target_data_delete ||
127+
OpType == ompt_target_data_delete_async)
128+
return std::make_pair(std::mem_fn(&Interface::beginTargetDataDelete),
129+
std::mem_fn(&Interface::endTargetDataDelete));
130+
131+
if constexpr (OpType == ompt_target_data_transfer_to_device ||
132+
OpType == ompt_target_data_transfer_to_device_async)
133+
return std::make_pair(std::mem_fn(&Interface::beginTargetDataSubmit),
134+
std::mem_fn(&Interface::endTargetDataSubmit));
135+
136+
if constexpr (OpType == ompt_target_data_transfer_from_device ||
137+
OpType == ompt_target_data_transfer_from_device_async)
138+
return std::make_pair(std::mem_fn(&Interface::beginTargetDataRetrieve),
139+
std::mem_fn(&Interface::endTargetDataRetrieve));
140+
141+
llvm_unreachable("Unhandled target data operation type!");
142+
}
143+
144+
// Callback getter: Target region operations
145+
template <ompt_target_t OpType> auto getCallbacks() {
146+
if constexpr (OpType == ompt_target_enter_data ||
147+
OpType == ompt_target_enter_data_nowait)
148+
return std::make_pair(std::mem_fn(&Interface::beginTargetDataEnter),
149+
std::mem_fn(&Interface::endTargetDataEnter));
150+
151+
if constexpr (OpType == ompt_target_exit_data ||
152+
OpType == ompt_target_exit_data_nowait)
153+
return std::make_pair(std::mem_fn(&Interface::beginTargetDataExit),
154+
std::mem_fn(&Interface::endTargetDataExit));
155+
156+
if constexpr (OpType == ompt_target_update ||
157+
OpType == ompt_target_update_nowait)
158+
return std::make_pair(std::mem_fn(&Interface::beginTargetUpdate),
159+
std::mem_fn(&Interface::endTargetUpdate));
160+
161+
if constexpr (OpType == ompt_target || OpType == ompt_target_nowait)
162+
return std::make_pair(std::mem_fn(&Interface::beginTarget),
163+
std::mem_fn(&Interface::endTarget));
164+
165+
llvm_unreachable("Unknown target region operation type!");
166+
}
167+
168+
// Callback getter: Kernel launch operation
169+
template <ompt_callbacks_t OpType> auto getCallbacks() {
170+
// We use 'ompt_callbacks_t', because no other enum is currently available
171+
// to model a kernel launch / target submit operation.
172+
if constexpr (OpType == ompt_callback_target_submit)
173+
return std::make_pair(std::mem_fn(&Interface::beginTargetSubmit),
174+
std::mem_fn(&Interface::endTargetSubmit));
175+
176+
llvm_unreachable("Unhandled target operation!");
177+
}
178+
111179
/// Setters for target region and target operation correlation ids
112180
void setTargetDataValue(uint64_t DataValue) { TargetData.value = DataValue; }
113181
void setTargetDataPtr(void *DataPtr) { TargetData.ptr = DataPtr; }
@@ -147,11 +215,50 @@ class Interface {
147215
void endTargetRegion();
148216
};
149217

218+
/// Thread local state for target region and associated metadata
219+
extern thread_local Interface RegionInterface;
220+
221+
template <typename FuncTy, typename ArgsTy, size_t... IndexSeq>
222+
void InvokeInterfaceFunction(FuncTy Func, ArgsTy Args,
223+
std::index_sequence<IndexSeq...>) {
224+
std::invoke(Func, RegionInterface, std::get<IndexSeq>(Args)...);
225+
}
226+
227+
template <typename CallbackPairTy, typename... ArgsTy> class InterfaceRAII {
228+
public:
229+
InterfaceRAII(CallbackPairTy Callbacks, ArgsTy... Args)
230+
: Arguments(Args...), beginFunction(std::get<0>(Callbacks)),
231+
endFunction(std::get<1>(Callbacks)) {
232+
performIfOmptInitialized(begin());
233+
}
234+
~InterfaceRAII() { performIfOmptInitialized(end()); }
235+
236+
private:
237+
void begin() {
238+
auto IndexSequence =
239+
std::make_index_sequence<std::tuple_size_v<decltype(Arguments)>>{};
240+
InvokeInterfaceFunction(beginFunction, Arguments, IndexSequence);
241+
}
242+
243+
void end() {
244+
auto IndexSequence =
245+
std::make_index_sequence<std::tuple_size_v<decltype(Arguments)>>{};
246+
InvokeInterfaceFunction(endFunction, Arguments, IndexSequence);
247+
}
248+
249+
std::tuple<ArgsTy...> Arguments;
250+
typename CallbackPairTy::first_type beginFunction;
251+
typename CallbackPairTy::second_type endFunction;
252+
};
253+
254+
// InterfaceRAII's class template argument deduction guide
255+
template <typename CallbackPairTy, typename... ArgsTy>
256+
InterfaceRAII(CallbackPairTy Callbacks, ArgsTy... Args)
257+
-> InterfaceRAII<CallbackPairTy, ArgsTy...>;
258+
150259
} // namespace ompt
151260
} // namespace target
152261
} // namespace omp
153262
} // namespace llvm
154263

155-
extern thread_local llvm::omp::target::ompt::Interface OmptInterface;
156-
157264
#endif // _OMPTARGET_OMPTINTERFACE_H

0 commit comments

Comments
 (0)