Skip to content

Commit b48777f

Browse files
committed
[OpenMP][Offload][AMDGPU] Set tickstotime properly for runtime autotuning
Made some changes so that the function can be shared between OMPT and components for autotuning.
1 parent 08a31b4 commit b48777f

File tree

1 file changed

+19
-10
lines changed
  • offload/plugins-nextgen/amdgpu/src

1 file changed

+19
-10
lines changed

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,20 @@ using namespace llvm::omp::xteam_red;
106106
} while (0)
107107
#endif
108108

109+
double setTicksToTime() {
110+
uint64_t TicksFrequency = 1;
111+
double TicksToTime = 1.0;
112+
113+
hsa_status_t Status =
114+
hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &TicksFrequency);
115+
if (Status == HSA_STATUS_SUCCESS)
116+
TicksToTime = (double)1e9 / (double)TicksFrequency;
117+
else
118+
DP("Error calling hsa_system_get_info for timestamp frequency\n");
119+
120+
return TicksToTime;
121+
}
122+
109123
#ifdef OMPT_SUPPORT
110124
#include "OmptDeviceTracing.h"
111125
#include <omp-tools.h>
@@ -193,15 +207,7 @@ void setOmptAsyncCopyProfile(bool Enable) {
193207
}
194208

195209
/// Compute system timestamp conversion factor, modeled after ROCclr.
196-
void setOmptTicksToTime() {
197-
uint64_t TicksFrequency = 1;
198-
hsa_status_t Status =
199-
hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &TicksFrequency);
200-
if (Status == HSA_STATUS_SUCCESS)
201-
TicksToTime = (double)1e9 / (double)TicksFrequency;
202-
else
203-
DP("Error calling hsa_system_get_info for timestamp frequency\n");
204-
}
210+
void setOmptTicksToTime() { TicksToTime = setTicksToTime(); }
205211

206212
/// Get the current HSA-based device timestamp.
207213
uint64_t getSystemTimestampInNs() {
@@ -1669,6 +1675,10 @@ struct AMDGPUStreamTy {
16691675
uint32_t NumTeams;
16701676
uint32_t NumThreads;
16711677
KernelRunRecordTy *KernelRunRecords;
1678+
1679+
PostKernelRunProcessingArgsTy()
1680+
: Agent{0}, Signal(nullptr), TicksToTime(setTicksToTime()), NumTeams(0),
1681+
NumThreads(0), KernelRunRecords(nullptr) {}
16721682
};
16731683

16741684
using AMDGPUStreamCallbackTy = Error(void *Data);
@@ -2151,7 +2161,6 @@ struct AMDGPUStreamTy {
21512161
if (!KernelRecords->reachedRunLimitForKernel(KernelName)) {
21522162
PostKernelRunProcessingArgs.Agent = Agent;
21532163
PostKernelRunProcessingArgs.Signal = OutputSignal;
2154-
PostKernelRunProcessingArgs.TicksToTime = 1.0;
21552164
PostKernelRunProcessingArgs.KernelName = KernelName;
21562165
PostKernelRunProcessingArgs.NumTeams = NumBlocks[0];
21572166
PostKernelRunProcessingArgs.NumThreads = NumThreads[0];

0 commit comments

Comments
 (0)