Skip to content

Commit d56e4b2

Browse files
author
fel-cab
committed
Rebase fix features
1 parent bf5d83c commit d56e4b2

File tree

3 files changed

+21
-8
lines changed

3 files changed

+21
-8
lines changed

openmp/libomptarget/include/Shared/Profile.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,4 +97,16 @@ class Profiler {
9797
std::string RTM = RegionTypeMsg; \
9898
llvm::TimeTraceScope TimeScope(__FUNCTION__, ProfileLocation + RTM)
9999

100+
/// Time spend in the current scope, assigned to the regionType
101+
/// with details from runtime
102+
#define TIMESCOPE_WITH_DETAILS_AND_IDENT(RegionTypeMsg, Details, IDENT) \
103+
SourceInfo SI(IDENT); \
104+
std::string ProfileLocation = SI.getProfileLocation(); \
105+
llvm::TimeTraceScope TimeScope(RegionTypeMsg, ProfileLocation + Details)
106+
107+
/// Time spend in the current scope, assigned to the function name and source
108+
/// with details
109+
#define TIMESCOPE_WITH_DETAILS(Details) \
110+
llvm::TimeTraceScope TimeScope(__FUNCTION__, Details)
111+
100112
#endif // OMPTARGET_SHARED_PROFILE_H

openmp/libomptarget/src/interface.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ targetData(ident_t *Loc, int64_t DeviceId, int32_t ArgNum, void **ArgsBase,
6565
static_assert(std::is_convertible_v<TargetAsyncInfoTy, AsyncInfoTy>,
6666
"TargetAsyncInfoTy must be convertible to AsyncInfoTy.");
6767

68-
TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime Data Copy:",
68+
TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime: Data Copy",
6969
"NumArgs=" + std::to_string(ArgNum), Loc);
7070

7171
DP("Entering data %s region for device %" PRId64 " with %d mappings\n",
@@ -263,7 +263,7 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
263263
!KernelArgs->ThreadLimit[1] && !KernelArgs->ThreadLimit[2] &&
264264
"OpenMP interface should not use multiple dimensions");
265265
TIMESCOPE_WITH_DETAILS_AND_IDENT(
266-
"Runtime target exe:",
266+
"Runtime: target exe",
267267
"NumTeams=" + std::to_string(NumTeams) +
268268
";NumArgs=" + std::to_string(KernelArgs->NumArgs),
269269
Loc);
@@ -297,9 +297,10 @@ static inline int targetKernel(ident_t *Loc, int64_t DeviceId, int32_t NumTeams,
297297

298298
int Rc = OFFLOAD_SUCCESS;
299299
Rc = target(Loc, *DeviceOrErr, HostPtr, *KernelArgs, AsyncInfo);
300-
301-
if (Rc == OFFLOAD_SUCCESS)
302-
Rc = AsyncInfo.synchronize();
300+
{ // required to show syncronization
301+
TIMESCOPE_WITH_DETAILS_AND_IDENT("Runtime: syncronize", "", Loc);
302+
if (Rc == OFFLOAD_SUCCESS)
303+
Rc = AsyncInfo.synchronize();
303304

304305
handleTargetOutcome(Rc == OFFLOAD_SUCCESS, Loc);
305306
assert(Rc == OFFLOAD_SUCCESS && "__tgt_target_kernel unexpected failure!");

openmp/libomptarget/src/omptarget.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -534,7 +534,7 @@ int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
534534
(ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE))
535535
continue;
536536
TIMESCOPE_WITH_DETAILS_AND_IDENT(
537-
"HostToDev:", "Size=" + std::to_string(ArgSizes[I]) + "B", Loc);
537+
"HostToDev", "Size=" + std::to_string(ArgSizes[I]) + "B", Loc);
538538
if (ArgMappers && ArgMappers[I]) {
539539
// Instead of executing the regular path of targetDataBegin, call the
540540
// targetDataMapper variant which will call targetDataBegin again
@@ -910,7 +910,7 @@ int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
910910
DP("Moving %" PRId64 " bytes (tgt:" DPxMOD ") -> (hst:" DPxMOD ")\n",
911911
DataSize, DPxPTR(TgtPtrBegin), DPxPTR(HstPtrBegin));
912912
TIMESCOPE_WITH_DETAILS_AND_IDENT(
913-
"DevToHost:", "Size=" + std::to_string(DataSize) + "B", Loc);
913+
"DevToHost", "Size=" + std::to_string(DataSize) + "B", Loc);
914914
// Wait for any previous transfer if an event is present.
915915
if (void *Event = TPR.getEntry()->getEvent()) {
916916
if (Device.waitEvent(Event, AsyncInfo) != OFFLOAD_SUCCESS) {
@@ -1636,7 +1636,7 @@ int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
16361636
{
16371637
assert(KernelArgs.NumArgs == TgtArgs.size() && "Argument count mismatch!");
16381638
TIMESCOPE_WITH_DETAILS_AND_IDENT(
1639-
"Kernel Target:",
1639+
"Kernel Target",
16401640
"NumArguments=" + std::to_string(KernelArgs.NumArgs) +
16411641
";NumTeams=" + std::to_string(KernelArgs.NumTeams[0]) +
16421642
";TripCount=" + std::to_string(KernelArgs.Tripcount),

0 commit comments

Comments
 (0)