Skip to content

Commit b9d0411

Browse files
authored
[OpenMP][Offload][AMDGPU] Set tickstotime properly for runtime autotuning (llvm#871)
2 parents 5c26cda + b48777f commit b9d0411

File tree

1 file changed

+19
-10
lines changed
  • offload/plugins-nextgen/amdgpu/src

1 file changed

+19
-10
lines changed

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,20 @@ using namespace llvm::omp::xteam_red;
106106
} while (0)
107107
#endif
108108

109+
double setTicksToTime() {
110+
uint64_t TicksFrequency = 1;
111+
double TicksToTime = 1.0;
112+
113+
hsa_status_t Status =
114+
hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &TicksFrequency);
115+
if (Status == HSA_STATUS_SUCCESS)
116+
TicksToTime = (double)1e9 / (double)TicksFrequency;
117+
else
118+
DP("Error calling hsa_system_get_info for timestamp frequency\n");
119+
120+
return TicksToTime;
121+
}
122+
109123
#ifdef OMPT_SUPPORT
110124
#include "OmptDeviceTracing.h"
111125
#include <omp-tools.h>
@@ -193,15 +207,7 @@ void setOmptAsyncCopyProfile(bool Enable) {
193207
}
194208

195209
/// Compute system timestamp conversion factor, modeled after ROCclr.
196-
void setOmptTicksToTime() {
197-
uint64_t TicksFrequency = 1;
198-
hsa_status_t Status =
199-
hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &TicksFrequency);
200-
if (Status == HSA_STATUS_SUCCESS)
201-
TicksToTime = (double)1e9 / (double)TicksFrequency;
202-
else
203-
DP("Error calling hsa_system_get_info for timestamp frequency\n");
204-
}
210+
void setOmptTicksToTime() { TicksToTime = setTicksToTime(); }
205211

206212
/// Get the current HSA-based device timestamp.
207213
uint64_t getSystemTimestampInNs() {
@@ -1668,6 +1674,10 @@ struct AMDGPUStreamTy {
16681674
uint32_t NumTeams;
16691675
uint32_t NumThreads;
16701676
KernelRunRecordTy *KernelRunRecords;
1677+
1678+
PostKernelRunProcessingArgsTy()
1679+
: Agent{0}, Signal(nullptr), TicksToTime(setTicksToTime()), NumTeams(0),
1680+
NumThreads(0), KernelRunRecords(nullptr) {}
16711681
};
16721682

16731683
using AMDGPUStreamCallbackTy = Error(void *Data);
@@ -2154,7 +2164,6 @@ struct AMDGPUStreamTy {
21542164
if (!KernelRecords->reachedRunLimitForKernel(KernelName)) {
21552165
PostKernelRunProcessingArgs.Agent = Agent;
21562166
PostKernelRunProcessingArgs.Signal = OutputSignal;
2157-
PostKernelRunProcessingArgs.TicksToTime = 1.0;
21582167
PostKernelRunProcessingArgs.KernelName = KernelName;
21592168
PostKernelRunProcessingArgs.NumTeams = NumBlocks[0];
21602169
PostKernelRunProcessingArgs.NumThreads = NumThreads[0];

0 commit comments

Comments
 (0)