Skip to content

Commit 0d7f232

Browse files
authored
[libomptarget] [OMPT] Fixed return address computation for OMPT events. (llvm#80498)
Currently, __builtin_return_address is used to generate the return address when the callback invoker is created. However, this may result in the return address pointing to an internal runtime function. This is not what a tool would typically want. A tool would want to know the corresponding user code from where the runtime entry point is invoked. This change adds a thread local variable that is assigned the return address at the OpenMP runtime entry points. An RAII is used to manage the modifications to the thread local variable. Whenever the return address is required for OMPT events, it is read from the thread local variable.
1 parent 4520b47 commit 0d7f232

File tree

11 files changed

+275
-30
lines changed

11 files changed

+275
-30
lines changed

openmp/libomptarget/include/OpenMP/OMPT/Interface.h

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
#include "llvm/Support/ErrorHandling.h"
2525

2626
#define OMPT_IF_BUILT(stmt) stmt
27-
#define OMPT_GET_RETURN_ADDRESS(level) __builtin_return_address(level)
2827

2928
/// Callbacks for target regions require task_data representing the
3029
/// encountering task.
@@ -211,6 +210,11 @@ class Interface {
211210
/// Thread local state for target region and associated metadata
212211
extern thread_local Interface RegionInterface;
213212

213+
/// Thread local variable holding the return address.
214+
/// When using __builtin_return_address to set the return address,
215+
/// allow 0 as the only argument to avoid unpredictable effects.
216+
extern thread_local void *ReturnAddress;
217+
214218
template <typename FuncTy, typename ArgsTy, size_t... IndexSeq>
215219
void InvokeInterfaceFunction(FuncTy Func, ArgsTy Args,
216220
std::index_sequence<IndexSeq...>) {
@@ -249,10 +253,42 @@ template <typename CallbackPairTy, typename... ArgsTy>
249253
InterfaceRAII(CallbackPairTy Callbacks, ArgsTy... Args)
250254
-> InterfaceRAII<CallbackPairTy, ArgsTy...>;
251255

256+
/// Used to set and reset the thread-local return address. The RAII is expected
257+
/// to be created at a runtime entry point when the return address should be
258+
/// null. If so, the return address is set and \p IsSetter is set in the ctor.
259+
/// The dtor resets the return address only if the corresponding object set it.
260+
/// So if the RAII is called from a nested runtime function, the ctor/dtor will
261+
/// do nothing since the thread local return address is already set.
262+
class ReturnAddressSetterRAII {
263+
public:
264+
ReturnAddressSetterRAII(void *RA) : IsSetter(false) {
265+
// Handle nested calls. If already set, do not set again since it
266+
// must be in a nested call.
267+
if (ReturnAddress == nullptr) {
268+
// Store the return address to a thread local variable.
269+
ReturnAddress = RA;
270+
IsSetter = true;
271+
}
272+
}
273+
~ReturnAddressSetterRAII() {
274+
// Reset the return address if this object set it.
275+
if (IsSetter)
276+
ReturnAddress = nullptr;
277+
}
278+
279+
private:
280+
// Did this object set the thread-local return address?
281+
bool IsSetter;
282+
};
283+
252284
} // namespace ompt
253285
} // namespace target
254286
} // namespace omp
255287
} // namespace llvm
288+
289+
// The getter returns the address stored in the thread local variable.
290+
#define OMPT_GET_RETURN_ADDRESS llvm::omp::target::ompt::ReturnAddress
291+
256292
#else
257293
#define OMPT_IF_BUILT(stmt)
258294
#endif

openmp/libomptarget/src/LegacyAPI.cpp

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,21 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13+
#include "OpenMP/OMPT/Interface.h"
1314
#include "omptarget.h"
1415
#include "private.h"
1516

1617
#include "Shared/Profile.h"
1718

19+
#ifdef OMPT_SUPPORT
20+
using namespace llvm::omp::target::ompt;
21+
#endif
22+
1823
EXTERN void __tgt_target_data_begin(int64_t DeviceId, int32_t ArgNum,
1924
void **ArgsBase, void **Args,
2025
int64_t *ArgSizes, int64_t *ArgTypes) {
2126
TIMESCOPE();
27+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
2228
__tgt_target_data_begin_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
2329
ArgSizes, ArgTypes, nullptr, nullptr);
2430
}
@@ -30,7 +36,7 @@ EXTERN void __tgt_target_data_begin_nowait(int64_t DeviceId, int32_t ArgNum,
3036
int32_t NoAliasDepNum,
3137
void *NoAliasDepList) {
3238
TIMESCOPE();
33-
39+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
3440
__tgt_target_data_begin_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
3541
ArgSizes, ArgTypes, nullptr, nullptr);
3642
}
@@ -39,6 +45,7 @@ EXTERN void __tgt_target_data_end(int64_t DeviceId, int32_t ArgNum,
3945
void **ArgsBase, void **Args,
4046
int64_t *ArgSizes, int64_t *ArgTypes) {
4147
TIMESCOPE();
48+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
4249
__tgt_target_data_end_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
4350
ArgSizes, ArgTypes, nullptr, nullptr);
4451
}
@@ -47,6 +54,7 @@ EXTERN void __tgt_target_data_update(int64_t DeviceId, int32_t ArgNum,
4754
void **ArgsBase, void **Args,
4855
int64_t *ArgSizes, int64_t *ArgTypes) {
4956
TIMESCOPE();
57+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
5058
__tgt_target_data_update_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
5159
ArgSizes, ArgTypes, nullptr, nullptr);
5260
}
@@ -56,7 +64,7 @@ EXTERN void __tgt_target_data_update_nowait(
5664
int64_t *ArgSizes, int64_t *ArgTypes, int32_t DepNum, void *DepList,
5765
int32_t NoAliasDepNum, void *NoAliasDepList) {
5866
TIMESCOPE();
59-
67+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
6068
__tgt_target_data_update_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
6169
ArgSizes, ArgTypes, nullptr, nullptr);
6270
}
@@ -68,7 +76,7 @@ EXTERN void __tgt_target_data_end_nowait(int64_t DeviceId, int32_t ArgNum,
6876
int32_t NoAliasDepNum,
6977
void *NoAliasDepList) {
7078
TIMESCOPE();
71-
79+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
7280
__tgt_target_data_end_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
7381
ArgSizes, ArgTypes, nullptr, nullptr);
7482
}
@@ -78,6 +86,7 @@ EXTERN int __tgt_target_mapper(ident_t *Loc, int64_t DeviceId, void *HostPtr,
7886
int64_t *ArgSizes, int64_t *ArgTypes,
7987
map_var_info_t *ArgNames, void **ArgMappers) {
8088
TIMESCOPE_WITH_IDENT(Loc);
89+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
8190
KernelArgsTy KernelArgs{1, ArgNum, ArgsBase, Args, ArgSizes,
8291
ArgTypes, ArgNames, ArgMappers, 0};
8392
return __tgt_target_kernel(Loc, DeviceId, -1, -1, HostPtr, &KernelArgs);
@@ -87,6 +96,7 @@ EXTERN int __tgt_target(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
8796
void **ArgsBase, void **Args, int64_t *ArgSizes,
8897
int64_t *ArgTypes) {
8998
TIMESCOPE();
99+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
90100
return __tgt_target_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, Args,
91101
ArgSizes, ArgTypes, nullptr, nullptr);
92102
}
@@ -96,7 +106,7 @@ EXTERN int __tgt_target_nowait(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
96106
int64_t *ArgTypes, int32_t DepNum, void *DepList,
97107
int32_t NoAliasDepNum, void *NoAliasDepList) {
98108
TIMESCOPE();
99-
109+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
100110
return __tgt_target_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, Args,
101111
ArgSizes, ArgTypes, nullptr, nullptr);
102112
}
@@ -107,7 +117,7 @@ EXTERN int __tgt_target_nowait_mapper(
107117
map_var_info_t *ArgNames, void **ArgMappers, int32_t DepNum, void *DepList,
108118
int32_t NoAliasDepNum, void *NoAliasDepList) {
109119
TIMESCOPE_WITH_IDENT(Loc);
110-
120+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
111121
return __tgt_target_mapper(Loc, DeviceId, HostPtr, ArgNum, ArgsBase, Args,
112122
ArgSizes, ArgTypes, ArgNames, ArgMappers);
113123
}
@@ -120,7 +130,7 @@ EXTERN int __tgt_target_teams_mapper(ident_t *Loc, int64_t DeviceId,
120130
void **ArgMappers, int32_t NumTeams,
121131
int32_t ThreadLimit) {
122132
TIMESCOPE_WITH_IDENT(Loc);
123-
133+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
124134
KernelArgsTy KernelArgs{1, ArgNum, ArgsBase, Args, ArgSizes,
125135
ArgTypes, ArgNames, ArgMappers, 0};
126136
return __tgt_target_kernel(Loc, DeviceId, NumTeams, ThreadLimit, HostPtr,
@@ -132,6 +142,7 @@ EXTERN int __tgt_target_teams(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
132142
int64_t *ArgTypes, int32_t NumTeams,
133143
int32_t ThreadLimit) {
134144
TIMESCOPE();
145+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
135146
return __tgt_target_teams_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase,
136147
Args, ArgSizes, ArgTypes, nullptr, nullptr,
137148
NumTeams, ThreadLimit);
@@ -145,7 +156,7 @@ EXTERN int __tgt_target_teams_nowait(int64_t DeviceId, void *HostPtr,
145156
void *DepList, int32_t NoAliasDepNum,
146157
void *NoAliasDepList) {
147158
TIMESCOPE();
148-
159+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
149160
return __tgt_target_teams_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase,
150161
Args, ArgSizes, ArgTypes, nullptr, nullptr,
151162
NumTeams, ThreadLimit);
@@ -158,7 +169,7 @@ EXTERN int __tgt_target_teams_nowait_mapper(
158169
int32_t ThreadLimit, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
159170
void *NoAliasDepList) {
160171
TIMESCOPE_WITH_IDENT(Loc);
161-
172+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
162173
return __tgt_target_teams_mapper(Loc, DeviceId, HostPtr, ArgNum, ArgsBase,
163174
Args, ArgSizes, ArgTypes, ArgNames,
164175
ArgMappers, NumTeams, ThreadLimit);
@@ -182,6 +193,7 @@ EXTERN int __tgt_target_kernel_nowait(ident_t *Loc, int64_t DeviceId,
182193
int32_t NoAliasDepNum,
183194
void *NoAliasDepList) {
184195
TIMESCOPE_WITH_IDENT(Loc);
196+
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
185197
return __tgt_target_kernel(Loc, DeviceId, NumTeams, ThreadLimit, HostPtr,
186198
KernelArgs);
187199
}

0 commit comments

Comments
 (0)