Skip to content

Commit 789cb33

Browse files
Add support for returning raw GPU timestamps via registry flag.
Change-Id: Id80ef2a95132f1cdc1d14e45d406925b11599db1
1 parent 58c34fd commit 789cb33

File tree

6 files changed

+74
-3
lines changed

6 files changed

+74
-3
lines changed

Jenkinsfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
#!groovy
22
neoDependenciesRev='798076-1088'
33
strategy='EQUAL'
4-
allowedCD=272
4+
allowedCD=270

runtime/event/event.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,11 +180,18 @@ cl_int Event::getEventProfilingInfo(cl_profiling_info paramName,
180180
switch (paramName) {
181181
case CL_PROFILING_COMMAND_QUEUED:
182182
src = &queueTimeStamp.CPUTimeinNS;
183+
if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
184+
src = &queueTimeStamp.GPUTimeStamp;
185+
}
186+
183187
srcSize = sizeof(cl_ulong);
184188
break;
185189

186190
case CL_PROFILING_COMMAND_SUBMIT:
187191
src = &submitTimeStamp.CPUTimeinNS;
192+
if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
193+
src = &submitTimeStamp.GPUTimeStamp;
194+
}
188195
srcSize = sizeof(cl_ulong);
189196
break;
190197

@@ -291,8 +298,16 @@ bool Event::calcProfilingData() {
291298
cpuDuration = static_cast<uint64_t>(gpuDuration * frequency);
292299
cpuCompleteDuration = static_cast<uint64_t>(gpuCompleteDuration * frequency);
293300
startTimeStamp = static_cast<uint64_t>(((HwTimeStamps *)timeStampNode->tag)->GlobalStartTS * frequency) + c0;
301+
294302
endTimeStamp = startTimeStamp + cpuDuration;
295303
completeTimeStamp = startTimeStamp + cpuCompleteDuration;
304+
305+
if (DebugManager.flags.ReturnRawGpuTimestamps.get()) {
306+
startTimeStamp = ((HwTimeStamps *)timeStampNode->tag)->ContextStartTS;
307+
endTimeStamp = ((HwTimeStamps *)timeStampNode->tag)->ContextEndTS;
308+
completeTimeStamp = ((HwTimeStamps *)timeStampNode->tag)->ContextCompleteTS;
309+
}
310+
296311
dataCalculated = true;
297312
}
298313
return dataCalculated;
@@ -385,7 +400,6 @@ void Event::unblockEventsBlockedByThis(int32_t transitionStatus) {
385400
}
386401

387402
auto childEventRef = childEventsToNotify.detachNodes();
388-
389403
while (childEventRef != nullptr) {
390404
auto childEvent = childEventRef->ref;
391405

runtime/os_interface/DebugVariables_base.inl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,3 +110,4 @@ DECLARE_DEBUG_VARIABLE(int32_t, OverrideThreadArbitrationPolicy, -1, "-1 (dont o
110110
DECLARE_DEBUG_VARIABLE(int32_t, OverrideAubDeviceId, -1, "-1 dont override, any other: use this value for AUB generation device id")
111111
DECLARE_DEBUG_VARIABLE(bool, UseMaxSimdSizeToDeduceMaxWorkgroupSize, false, "With this flag on, max workgroup size is deduced using SIMD32 instead of SIMD8, this causes the max wkg size to be 4 times bigger")
112112
DECLARE_DEBUG_VARIABLE(bool, EnableTimestampPacket, false, "Write Timestamp Packet for each set of gpu walkers")
113+
DECLARE_DEBUG_VARIABLE(bool, ReturnRawGpuTimestamps, false, "Driver returns raw GPU tiemstamps instead of calculated ones.")

unit_tests/mocks/mock_event.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ struct MockEvent : public BaseEventType {
4747

4848
using BaseEventType::timeStampNode;
4949
using Event::magic;
50+
using Event::queueTimeStamp;
51+
using Event::submitTimeStamp;
5052
};
5153

5254
#undef FORWARD_CONSTRUCTOR

unit_tests/profiling/profiling_tests.cpp

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include "runtime/utilities/tag_allocator.h"
3232

3333
#include "unit_tests/command_queue/command_enqueue_fixture.h"
34+
#include "unit_tests/helpers/debug_manager_state_restore.h"
3435
#include "unit_tests/fixtures/device_fixture.h"
3536
#include "unit_tests/mocks/mock_command_queue.h"
3637
#include "unit_tests/mocks/mock_context.h"
@@ -462,6 +463,58 @@ TEST(EventProfilingTest, givenEventWhenCompleteIsZeroThenCalcProfilingDataSetsEn
462463
cmdQ.device = nullptr;
463464
}
464465

466+
TEST(EventProfilingTest, givenRawTimestampsDebugModeWhenDataIsQueriedThenRawDataIsReturned) {
467+
DebugManagerStateRestore stateRestore;
468+
DebugManager.flags.ReturnRawGpuTimestamps.set(1);
469+
std::unique_ptr<MockDevice> device(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
470+
MyOSTime::instanceNum = 0;
471+
device->setOSTime(new MyOSTime());
472+
EXPECT_EQ(1, MyOSTime::instanceNum);
473+
MockContext context;
474+
cl_command_queue_properties props[5] = {0, 0, 0, 0, 0};
475+
MockCommandQueue cmdQ(&context, device.get(), props);
476+
cmdQ.setProfilingEnabled();
477+
cmdQ.device = device.get();
478+
479+
HwTimeStamps timestamp;
480+
timestamp.GlobalStartTS = 10;
481+
timestamp.ContextStartTS = 20;
482+
timestamp.GlobalEndTS = 80;
483+
timestamp.ContextEndTS = 56;
484+
timestamp.GlobalCompleteTS = 0;
485+
timestamp.ContextCompleteTS = 70;
486+
487+
MockTagNode<HwTimeStamps> timestampNode;
488+
timestampNode.tag = &timestamp;
489+
490+
MockEvent<Event> event(&cmdQ, CL_COMPLETE, 0, 0);
491+
cl_event clEvent = &event;
492+
493+
event.queueTimeStamp.CPUTimeinNS = 1;
494+
event.queueTimeStamp.GPUTimeStamp = 2;
495+
496+
event.submitTimeStamp.CPUTimeinNS = 3;
497+
event.submitTimeStamp.GPUTimeStamp = 4;
498+
499+
event.setCPUProfilingPath(false);
500+
event.timeStampNode = &timestampNode;
501+
event.calcProfilingData();
502+
503+
cl_ulong queued, submited, start, end, complete;
504+
505+
clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &queued, nullptr);
506+
clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &submited, nullptr);
507+
clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, nullptr);
508+
clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, nullptr);
509+
clGetEventProfilingInfo(clEvent, CL_PROFILING_COMMAND_COMPLETE, sizeof(cl_ulong), &complete, nullptr);
510+
511+
EXPECT_EQ(timestamp.ContextCompleteTS, complete);
512+
EXPECT_EQ(timestamp.ContextEndTS, end);
513+
EXPECT_EQ(timestamp.ContextStartTS, start);
514+
EXPECT_EQ(event.submitTimeStamp.GPUTimeStamp, submited);
515+
EXPECT_EQ(event.queueTimeStamp.GPUTimeStamp, queued);
516+
}
517+
465518
struct ProfilingWithPerfCountersTests : public ProfilingTests,
466519
public PerformanceCountersFixture {
467520
void SetUp() override {

unit_tests/test_files/igdrcl.config

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,4 +80,5 @@ RebuildPrecompiledKernels = false
8080
CreateMultipleDevices = 0
8181
EnableExperimentalCommandBuffer = 0
8282
LoopAtPlatformInitialize = false
83-
EnableTimestampPacket = false
83+
EnableTimestampPacket = false
84+
ReturnRawGpuTimestamps = 0

0 commit comments

Comments
 (0)