Skip to content

[libomptarget] [OMPT] Fixed return address computation for OMPT events. #80498

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 37 additions & 1 deletion openmp/libomptarget/include/OpenMP/OMPT/Interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
#include "llvm/Support/ErrorHandling.h"

#define OMPT_IF_BUILT(stmt) stmt
#define OMPT_GET_RETURN_ADDRESS(level) __builtin_return_address(level)

/// Callbacks for target regions require task_data representing the
/// encountering task.
Expand Down Expand Up @@ -211,6 +210,11 @@ class Interface {
/// Thread local state for target region and associated metadata
extern thread_local Interface RegionInterface;

/// Thread local variable holding the return address.
/// When using __builtin_return_address to set the return address,
/// allow 0 as the only argument to avoid unpredictable effects.
extern thread_local void *ReturnAddress;

template <typename FuncTy, typename ArgsTy, size_t... IndexSeq>
void InvokeInterfaceFunction(FuncTy Func, ArgsTy Args,
std::index_sequence<IndexSeq...>) {
Expand Down Expand Up @@ -249,10 +253,42 @@ template <typename CallbackPairTy, typename... ArgsTy>
InterfaceRAII(CallbackPairTy Callbacks, ArgsTy... Args)
-> InterfaceRAII<CallbackPairTy, ArgsTy...>;

/// Used to set and reset the thread-local return address. The RAII is expected
/// to be created at a runtime entry point when the return address should be
/// null. If so, the return address is set and \p IsSetter is set in the ctor.
/// The dtor resets the return address only if the corresponding object set it.
/// So if the RAII is called from a nested runtime function, the ctor/dtor will
/// do nothing since the thread local return address is already set.
class ReturnAddressSetterRAII {
public:
ReturnAddressSetterRAII(void *RA) : IsSetter(false) {
// Handle nested calls. If already set, do not set again since it
// must be in a nested call.
if (ReturnAddress == nullptr) {
// Store the return address to a thread local variable.
ReturnAddress = RA;
IsSetter = true;
}
}
~ReturnAddressSetterRAII() {
// Reset the return address if this object set it.
if (IsSetter)
ReturnAddress = nullptr;
}

private:
// Did this object set the thread-local return address?
bool IsSetter;
};

} // namespace ompt
} // namespace target
} // namespace omp
} // namespace llvm

// The getter returns the address stored in the thread local variable.
#define OMPT_GET_RETURN_ADDRESS llvm::omp::target::ompt::ReturnAddress

#else
#define OMPT_IF_BUILT(stmt)
#endif
Expand Down
28 changes: 20 additions & 8 deletions openmp/libomptarget/src/LegacyAPI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,21 @@
//
//===----------------------------------------------------------------------===//

#include "OpenMP/OMPT/Interface.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't think we should bother adding this to the legacy API stuff because these are just kept around for backwards compatibility.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't think we should bother adding this to the legacy API stuff because these are just kept around for backwards compatibility.

Well, downstream still uses it. Since the legacy API is still around upstream, I decided to make the changes here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do we use it for downstream? All of the legacy API functions should have newer versions that clang has emitted for around two years now.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do we use it for downstream? All of the legacy API functions should have newer versions that clang has emitted for around two years now.

legacy flang.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, makes sense.

#include "omptarget.h"
#include "private.h"

#include "Shared/Profile.h"

#ifdef OMPT_SUPPORT
using namespace llvm::omp::target::ompt;
#endif

EXTERN void __tgt_target_data_begin(int64_t DeviceId, int32_t ArgNum,
void **ArgsBase, void **Args,
int64_t *ArgSizes, int64_t *ArgTypes) {
TIMESCOPE();
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
__tgt_target_data_begin_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
ArgSizes, ArgTypes, nullptr, nullptr);
}
Expand All @@ -30,7 +36,7 @@ EXTERN void __tgt_target_data_begin_nowait(int64_t DeviceId, int32_t ArgNum,
int32_t NoAliasDepNum,
void *NoAliasDepList) {
TIMESCOPE();

OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
__tgt_target_data_begin_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
ArgSizes, ArgTypes, nullptr, nullptr);
}
Expand All @@ -39,6 +45,7 @@ EXTERN void __tgt_target_data_end(int64_t DeviceId, int32_t ArgNum,
void **ArgsBase, void **Args,
int64_t *ArgSizes, int64_t *ArgTypes) {
TIMESCOPE();
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
__tgt_target_data_end_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
ArgSizes, ArgTypes, nullptr, nullptr);
}
Expand All @@ -47,6 +54,7 @@ EXTERN void __tgt_target_data_update(int64_t DeviceId, int32_t ArgNum,
void **ArgsBase, void **Args,
int64_t *ArgSizes, int64_t *ArgTypes) {
TIMESCOPE();
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
__tgt_target_data_update_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
ArgSizes, ArgTypes, nullptr, nullptr);
}
Expand All @@ -56,7 +64,7 @@ EXTERN void __tgt_target_data_update_nowait(
int64_t *ArgSizes, int64_t *ArgTypes, int32_t DepNum, void *DepList,
int32_t NoAliasDepNum, void *NoAliasDepList) {
TIMESCOPE();

OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
__tgt_target_data_update_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
ArgSizes, ArgTypes, nullptr, nullptr);
}
Expand All @@ -68,7 +76,7 @@ EXTERN void __tgt_target_data_end_nowait(int64_t DeviceId, int32_t ArgNum,
int32_t NoAliasDepNum,
void *NoAliasDepList) {
TIMESCOPE();

OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
__tgt_target_data_end_mapper(nullptr, DeviceId, ArgNum, ArgsBase, Args,
ArgSizes, ArgTypes, nullptr, nullptr);
}
Expand All @@ -78,6 +86,7 @@ EXTERN int __tgt_target_mapper(ident_t *Loc, int64_t DeviceId, void *HostPtr,
int64_t *ArgSizes, int64_t *ArgTypes,
map_var_info_t *ArgNames, void **ArgMappers) {
TIMESCOPE_WITH_IDENT(Loc);
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
KernelArgsTy KernelArgs{1, ArgNum, ArgsBase, Args, ArgSizes,
ArgTypes, ArgNames, ArgMappers, 0};
return __tgt_target_kernel(Loc, DeviceId, -1, -1, HostPtr, &KernelArgs);
Expand All @@ -87,6 +96,7 @@ EXTERN int __tgt_target(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
void **ArgsBase, void **Args, int64_t *ArgSizes,
int64_t *ArgTypes) {
TIMESCOPE();
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
return __tgt_target_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, Args,
ArgSizes, ArgTypes, nullptr, nullptr);
}
Expand All @@ -96,7 +106,7 @@ EXTERN int __tgt_target_nowait(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
int64_t *ArgTypes, int32_t DepNum, void *DepList,
int32_t NoAliasDepNum, void *NoAliasDepList) {
TIMESCOPE();

OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
return __tgt_target_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase, Args,
ArgSizes, ArgTypes, nullptr, nullptr);
}
Expand All @@ -107,7 +117,7 @@ EXTERN int __tgt_target_nowait_mapper(
map_var_info_t *ArgNames, void **ArgMappers, int32_t DepNum, void *DepList,
int32_t NoAliasDepNum, void *NoAliasDepList) {
TIMESCOPE_WITH_IDENT(Loc);

OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
return __tgt_target_mapper(Loc, DeviceId, HostPtr, ArgNum, ArgsBase, Args,
ArgSizes, ArgTypes, ArgNames, ArgMappers);
}
Expand All @@ -120,7 +130,7 @@ EXTERN int __tgt_target_teams_mapper(ident_t *Loc, int64_t DeviceId,
void **ArgMappers, int32_t NumTeams,
int32_t ThreadLimit) {
TIMESCOPE_WITH_IDENT(Loc);

OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
KernelArgsTy KernelArgs{1, ArgNum, ArgsBase, Args, ArgSizes,
ArgTypes, ArgNames, ArgMappers, 0};
return __tgt_target_kernel(Loc, DeviceId, NumTeams, ThreadLimit, HostPtr,
Expand All @@ -132,6 +142,7 @@ EXTERN int __tgt_target_teams(int64_t DeviceId, void *HostPtr, int32_t ArgNum,
int64_t *ArgTypes, int32_t NumTeams,
int32_t ThreadLimit) {
TIMESCOPE();
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
return __tgt_target_teams_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase,
Args, ArgSizes, ArgTypes, nullptr, nullptr,
NumTeams, ThreadLimit);
Expand All @@ -145,7 +156,7 @@ EXTERN int __tgt_target_teams_nowait(int64_t DeviceId, void *HostPtr,
void *DepList, int32_t NoAliasDepNum,
void *NoAliasDepList) {
TIMESCOPE();

OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
return __tgt_target_teams_mapper(nullptr, DeviceId, HostPtr, ArgNum, ArgsBase,
Args, ArgSizes, ArgTypes, nullptr, nullptr,
NumTeams, ThreadLimit);
Expand All @@ -158,7 +169,7 @@ EXTERN int __tgt_target_teams_nowait_mapper(
int32_t ThreadLimit, int32_t DepNum, void *DepList, int32_t NoAliasDepNum,
void *NoAliasDepList) {
TIMESCOPE_WITH_IDENT(Loc);

OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
return __tgt_target_teams_mapper(Loc, DeviceId, HostPtr, ArgNum, ArgsBase,
Args, ArgSizes, ArgTypes, ArgNames,
ArgMappers, NumTeams, ThreadLimit);
Expand All @@ -182,6 +193,7 @@ EXTERN int __tgt_target_kernel_nowait(ident_t *Loc, int64_t DeviceId,
int32_t NoAliasDepNum,
void *NoAliasDepList) {
TIMESCOPE_WITH_IDENT(Loc);
OMPT_IF_BUILT(ReturnAddressSetterRAII RA(__builtin_return_address(0)));
return __tgt_target_kernel(Loc, DeviceId, NumTeams, ThreadLimit, HostPtr,
KernelArgs);
}
Loading