Skip to content

Commit 235401c

Browse files
committed
[OpenMP] [OMPT] [amdgpu] Collect timestamps for D2D transfer.
Now that a synchronous D2D version has been added, enable OMPT timestamps collection for this event. Fixes ROCm/aomp#851 Change-Id: I70c0aeda17f67c4250cdd188539a23f4432e9ab2
1 parent 6975d00 commit 235401c

File tree

1 file changed

+5
-1
lines changed
  • openmp/libomptarget/plugins-nextgen/amdgpu/src

1 file changed

+5
-1
lines changed

openmp/libomptarget/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3136,7 +3136,9 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
31363136
AMDGPUDeviceTy &DstDevice = static_cast<AMDGPUDeviceTy &>(DstGenericDevice);
31373137

31383138
// For large transfers use synchronous behavior.
3139-
if (Size >= OMPX_MaxAsyncCopyBytes) {
3139+
// If OMPT is enabled or synchronous behavior is explicitly requested:
3140+
if (ompt::CallbacksInitialized || OMPX_ForceSyncRegions ||
3141+
Size >= OMPX_MaxAsyncCopyBytes) {
31403142
if (AsyncInfoWrapper.hasQueue())
31413143
if (auto Err = synchronize(AsyncInfoWrapper))
31423144
return Err;
@@ -3153,6 +3155,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
31533155
if (auto Err = Signal.wait(getStreamBusyWaitMicroseconds()))
31543156
return Err;
31553157

3158+
OMPT_IF_TRACING_ENABLED(recordCopyTimingInNs(Signal.get()););
3159+
31563160
return Signal.deinit();
31573161
}
31583162

0 commit comments

Comments
 (0)