Skip to content

Commit ac82d49

Browse files
committed
[L0 v2] implement profiling support
1 parent f8e0822 commit ac82d49

19 files changed

+361
-120
lines changed

source/adapters/level_zero/v2/api.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -225,14 +225,6 @@ ur_result_t urKernelGetSuggestedLocalWorkSize(ur_kernel_handle_t hKernel,
225225
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
226226
}
227227

228-
ur_result_t urEventGetProfilingInfo(ur_event_handle_t hEvent,
229-
ur_profiling_info_t propName,
230-
size_t propSize, void *pPropValue,
231-
size_t *pPropSizeRet) {
232-
logger::error("{} function not implemented!", __FUNCTION__);
233-
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
234-
}
235-
236228
ur_result_t urEventGetNativeHandle(ur_event_handle_t hEvent,
237229
ur_native_handle_t *phNativeEvent) {
238230
logger::error("{} function not implemented!", __FUNCTION__);

source/adapters/level_zero/v2/context.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,12 @@ ur_context_handle_t_::ur_context_handle_t_(ze_context_handle_t hContext,
4949
bool ownZeContext)
5050
: commandListCache(hContext),
5151
eventPoolCache(phDevices[0]->Platform->getNumDevices(),
52-
[context = this,
53-
platform = phDevices[0]->Platform](DeviceId deviceId) {
52+
[context = this, platform = phDevices[0]->Platform](
53+
DeviceId deviceId, v2::event_flags_t flags) {
5454
auto device = platform->getDeviceById(deviceId);
5555
// TODO: just use per-context id?
5656
return std::make_unique<v2::provider_normal>(
57-
context, device, v2::EVENT_COUNTER,
58-
v2::QUEUE_IMMEDIATE);
57+
context, device, v2::QUEUE_IMMEDIATE, flags);
5958
}),
6059
hContext(hContext, ownZeContext),
6160
hDevices(phDevices, phDevices + numDevices),

source/adapters/level_zero/v2/event.cpp

Lines changed: 167 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,20 @@
1414
#include "event_pool.hpp"
1515
#include "event_provider.hpp"
1616

17-
ur_event_handle_t_::ur_event_handle_t_(v2::event_allocation eventAllocation,
18-
v2::event_pool *pool)
19-
: type(eventAllocation.type), zeEvent(std::move(eventAllocation.borrow)),
20-
pool(pool) {}
17+
#include "../ur_interface_loader.hpp"
18+
19+
ur_event_handle_t_::ur_event_handle_t_(
20+
v2::raii::cache_borrowed_event eventAllocation, v2::event_pool *pool)
21+
: zeEvent(std::move(eventAllocation)), pool(pool),
22+
adjustedEventStartTimestamp(0), recordEventEndTimestamp(0),
23+
adjustedEventEndTimestamp(0),
24+
zeTimerResolution(getDevice()->ZeDeviceProperties->timerResolution),
25+
timestampMaxValue(getDevice()->getTimestampMask()) {}
2126

2227
void ur_event_handle_t_::reset() {
2328
// consider make an abstraction for regular/counter based
2429
// events if there's more of this type of conditions
25-
if (type == v2::event_type::EVENT_REGULAR) {
30+
if (pool->getFlags() & v2::EVENT_FLAGS_COUNTER) {
2631
zeEventHostReset(zeEvent.get());
2732
}
2833
}
@@ -40,11 +45,90 @@ ur_result_t ur_event_handle_t_::release() {
4045
if (!RefCount.decrementAndTest())
4146
return UR_RESULT_SUCCESS;
4247

48+
if (isTimestamped() && adjustedEventEndTimestamp == 0) {
49+
// L0 will write end timestamp to this event some time in the future,
50+
// so we can't release it yet.
51+
// TODO: delay releasing until the end timestamp is written.
52+
return UR_RESULT_SUCCESS;
53+
}
54+
4355
pool->free(this);
4456

4557
return UR_RESULT_SUCCESS;
4658
}
4759

60+
bool ur_event_handle_t_::isTimestamped() const {
61+
// If we are recording, the start time of the event will be non-zero.
62+
return adjustedEventStartTimestamp != 0;
63+
}
64+
65+
bool ur_event_handle_t_::isProfilingEnabled() const {
66+
return pool->getFlags() & v2::EVENT_FLAGS_PROFILING_ENABLED;
67+
}
68+
69+
ur_device_handle_t ur_event_handle_t_::getDevice() const {
70+
return pool->getProvider()->device();
71+
}
72+
73+
uint64_t ur_event_handle_t_::getEventStartTimestmap() const {
74+
return adjustedEventStartTimestamp;
75+
}
76+
77+
static uint64_t adjustEndEventTimestamp(uint64_t adjustedStartTimestamp,
78+
uint64_t endTimestamp,
79+
uint64_t timestampMaxValue,
80+
uint64_t timerResolution) {
81+
// End time needs to be adjusted for resolution and valid bits.
82+
uint64_t adjustedTimestamp =
83+
(endTimestamp & timestampMaxValue) * timerResolution;
84+
85+
// Handle a possible wrap-around (the underlying HW counter is < 64-bit).
86+
// Note, it will not report correct time if there were multiple wrap
87+
// arounds, and the longer term plan is to enlarge the capacity of the
88+
// HW timestamps.
89+
if (adjustedTimestamp < adjustedStartTimestamp)
90+
adjustedTimestamp += timestampMaxValue * timerResolution;
91+
92+
return adjustedTimestamp;
93+
}
94+
95+
uint64_t ur_event_handle_t_::getEventEndTimestamp() {
96+
std::scoped_lock<ur_shared_mutex> lock(this->Mutex);
97+
98+
// If adjustedEventEndTimestamp on the event is non-zero it means it has
99+
// collected the result of the queue already. In that case it has been
100+
// adjusted and is ready for immediate return.
101+
if (adjustedEventEndTimestamp)
102+
return adjustedEventEndTimestamp;
103+
104+
// If the result is 0, we have not yet gotten results back and so we just
105+
// return it.
106+
if (recordEventEndTimestamp == 0)
107+
return recordEventEndTimestamp;
108+
109+
// Now that we have the result, there is no need to keep it in the queue
110+
// anymore, so we cache it on the event and evict the record from the
111+
// queue.
112+
adjustedEventEndTimestamp =
113+
adjustEndEventTimestamp(getEventStartTimestmap(), recordEventEndTimestamp,
114+
timestampMaxValue, zeTimerResolution);
115+
return adjustedEventEndTimestamp;
116+
}
117+
118+
void ur_event_handle_t_::recordStartTimestamp() {
119+
uint64_t deviceStartTimestamp = 0;
120+
UR_CALL_THROWS(ur::level_zero::urDeviceGetGlobalTimestamps(
121+
getDevice(), &deviceStartTimestamp, nullptr));
122+
123+
std::scoped_lock<ur_shared_mutex> lock(this->Mutex);
124+
125+
adjustedEventStartTimestamp = deviceStartTimestamp;
126+
}
127+
128+
uint64_t *ur_event_handle_t_::getEventEndTimestampPtr() {
129+
return &recordEventEndTimestamp;
130+
}
131+
48132
namespace ur::level_zero {
49133
ur_result_t urEventRetain(ur_event_handle_t hEvent) { return hEvent->retain(); }
50134

@@ -88,4 +172,82 @@ ur_result_t urEventGetInfo(ur_event_handle_t hEvent, ur_event_info_t propName,
88172

89173
return UR_RESULT_SUCCESS;
90174
}
175+
176+
ur_result_t urEventGetProfilingInfo(
177+
ur_event_handle_t hEvent, ///< [in] handle of the event object
178+
ur_profiling_info_t
179+
propName, ///< [in] the name of the profiling property to query
180+
size_t
181+
propValueSize, ///< [in] size in bytes of the profiling property value
182+
void *pPropValue, ///< [out][optional] value of the profiling property
183+
size_t *pPropValueSizeRet ///< [out][optional] pointer to the actual size in
184+
///< bytes returned in propValue
185+
) {
186+
// The event must either have profiling enabled or be recording timestamps.
187+
bool isTimestampedEvent = hEvent->isTimestamped();
188+
if (!hEvent->isProfilingEnabled() && !isTimestampedEvent) {
189+
return UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE;
190+
}
191+
192+
UrReturnHelper returnValue(propValueSize, pPropValue, pPropValueSizeRet);
193+
194+
// For timestamped events we have the timestamps ready directly on the event
195+
// handle, so we short-circuit the return.
196+
if (isTimestampedEvent) {
197+
uint64_t contextStartTime = hEvent->getEventStartTimestmap();
198+
switch (propName) {
199+
case UR_PROFILING_INFO_COMMAND_QUEUED:
200+
case UR_PROFILING_INFO_COMMAND_SUBMIT:
201+
return returnValue(contextStartTime);
202+
case UR_PROFILING_INFO_COMMAND_END:
203+
case UR_PROFILING_INFO_COMMAND_START:
204+
case UR_PROFILING_INFO_COMMAND_COMPLETE: {
205+
return returnValue(hEvent->getEventEndTimestamp());
206+
}
207+
default:
208+
logger::error("urEventGetProfilingInfo: not supported ParamName");
209+
return UR_RESULT_ERROR_INVALID_VALUE;
210+
}
211+
}
212+
213+
ze_kernel_timestamp_result_t tsResult;
214+
215+
auto zeTimerResolution =
216+
hEvent->getDevice()->ZeDeviceProperties->timerResolution;
217+
auto timestampMaxValue = hEvent->getDevice()->getTimestampMask();
218+
219+
switch (propName) {
220+
case UR_PROFILING_INFO_COMMAND_START: {
221+
ZE2UR_CALL(zeEventQueryKernelTimestamp, (hEvent->getZeEvent(), &tsResult));
222+
uint64_t contextStartTime =
223+
(tsResult.global.kernelStart & timestampMaxValue) * zeTimerResolution;
224+
return returnValue(contextStartTime);
225+
}
226+
case UR_PROFILING_INFO_COMMAND_END:
227+
case UR_PROFILING_INFO_COMMAND_COMPLETE: {
228+
ZE2UR_CALL(zeEventQueryKernelTimestamp, (hEvent->getZeEvent(), &tsResult));
229+
230+
uint64_t contextStartTime =
231+
(tsResult.global.kernelStart & timestampMaxValue);
232+
233+
auto adjustedEndTime =
234+
adjustEndEventTimestamp(contextStartTime, tsResult.global.kernelEnd,
235+
timestampMaxValue, zeTimerResolution);
236+
return returnValue(adjustedEndTime);
237+
}
238+
case UR_PROFILING_INFO_COMMAND_QUEUED:
239+
case UR_PROFILING_INFO_COMMAND_SUBMIT:
240+
// Note: No users for this case
241+
// The "command_submit" time is implemented by recording submission
242+
// timestamp with a call to urDeviceGetGlobalTimestamps before command
243+
// enqueue.
244+
//
245+
return returnValue(uint64_t{0});
246+
default:
247+
logger::error("urEventGetProfilingInfo: not supported ParamName");
248+
return UR_RESULT_ERROR_INVALID_VALUE;
249+
}
250+
251+
return UR_RESULT_SUCCESS;
252+
}
91253
} // namespace ur::level_zero

source/adapters/level_zero/v2/event.hpp

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ class event_pool;
2424

2525
struct ur_event_handle_t_ : _ur_object {
2626
public:
27-
ur_event_handle_t_(v2::event_allocation eventAllocation,
27+
ur_event_handle_t_(v2::raii::cache_borrowed_event eventAllocation,
2828
v2::event_pool *pool);
2929

3030
void reset();
@@ -33,8 +33,30 @@ struct ur_event_handle_t_ : _ur_object {
3333
ur_result_t retain();
3434
ur_result_t release();
3535

36+
// Tells if this event was created as a timestamp event, allowing profiling
37+
// info even if profiling is not enabled.
38+
bool isTimestamped() const;
39+
40+
// Tells if this event comes from a pool that has profiling enabled.
41+
bool isProfilingEnabled() const;
42+
43+
// Device associated with this event
44+
ur_device_handle_t getDevice() const;
45+
46+
void recordStartTimestamp();
47+
uint64_t *getEventEndTimestampPtr();
48+
49+
uint64_t getEventStartTimestmap() const;
50+
uint64_t getEventEndTimestamp();
51+
3652
private:
37-
v2::event_type type;
3853
v2::raii::cache_borrowed_event zeEvent;
3954
v2::event_pool *pool;
55+
56+
uint64_t adjustedEventStartTimestamp;
57+
uint64_t recordEventEndTimestamp;
58+
uint64_t adjustedEventEndTimestamp;
59+
60+
const uint64_t zeTimerResolution;
61+
const uint64_t timestampMaxValue;
4062
};

source/adapters/level_zero/v2/event_pool.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,10 @@ void event_pool::free(ur_event_handle_t_ *event) {
4848
event->RefCount.increment();
4949
}
5050

51-
event_provider *event_pool::getProvider() { return provider.get(); }
51+
event_provider *event_pool::getProvider() const { return provider.get(); }
52+
53+
event_flags_t event_pool::getFlags() const {
54+
return getProvider()->eventFlags();
55+
}
5256

5357
} // namespace v2

source/adapters/level_zero/v2/event_pool.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ class event_pool {
4646
// Free an event back to the pool. Thread safe.
4747
void free(ur_event_handle_t_ *event);
4848

49-
event_provider *getProvider();
49+
event_provider *getProvider() const;
50+
event_flags_t getFlags() const;
5051

5152
private:
5253
std::unique_ptr<event_provider> provider;

source/adapters/level_zero/v2/event_pool_cache.cpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,29 +16,32 @@ namespace v2 {
1616
event_pool_cache::event_pool_cache(size_t max_devices,
1717
ProviderCreateFunc ProviderCreate)
1818
: providerCreate(ProviderCreate) {
19-
pools.resize(max_devices);
19+
pools.resize(max_devices * (1ULL << EVENT_FLAGS_USED_BITS));
2020
}
2121

2222
event_pool_cache::~event_pool_cache() {}
2323

24-
raii::cache_borrowed_event_pool event_pool_cache::borrow(DeviceId id) {
24+
raii::cache_borrowed_event_pool event_pool_cache::borrow(DeviceId id,
25+
event_flags_t flags) {
2526
std::unique_lock<ur_mutex> Lock(mutex);
2627

27-
if (id >= pools.size()) {
28+
event_descriptor event_desc{id, flags};
29+
30+
if (event_desc.index() >= pools.size()) {
2831
return nullptr;
2932
}
3033

31-
auto &vec = pools[id];
34+
auto &vec = pools[event_desc.index()];
3235
if (vec.empty()) {
33-
vec.emplace_back(std::make_unique<event_pool>(providerCreate(id)));
36+
vec.emplace_back(std::make_unique<event_pool>(providerCreate(id, flags)));
3437
}
3538

3639
auto pool = vec.back().release();
3740
vec.pop_back();
3841

39-
return raii::cache_borrowed_event_pool(pool, [this](event_pool *pool) {
42+
return raii::cache_borrowed_event_pool(pool, [this, flags](event_pool *pool) {
4043
std::unique_lock<ur_mutex> Lock(mutex);
41-
pools[pool->Id()].emplace_back(pool);
44+
pools[event_descriptor{pool->Id(), flags}.index()].emplace_back(pool);
4245
});
4346
}
4447

source/adapters/level_zero/v2/event_pool_cache.hpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,17 +32,28 @@ using cache_borrowed_event_pool =
3232

3333
class event_pool_cache {
3434
public:
35-
using ProviderCreateFunc =
36-
std::function<std::unique_ptr<event_provider>(DeviceId)>;
35+
using ProviderCreateFunc = std::function<std::unique_ptr<event_provider>(
36+
DeviceId, event_flags_t flags)>;
3737

3838
event_pool_cache(size_t max_devices, ProviderCreateFunc);
3939
~event_pool_cache();
4040

41-
raii::cache_borrowed_event_pool borrow(DeviceId);
41+
raii::cache_borrowed_event_pool borrow(DeviceId, event_flags_t flags);
4242

4343
private:
4444
ur_mutex mutex;
4545
ProviderCreateFunc providerCreate;
46+
47+
struct event_descriptor {
48+
DeviceId device;
49+
event_flags_t flags;
50+
51+
uint64_t index() {
52+
return uint64_t(flags) | (uint64_t(device) << EVENT_FLAGS_USED_BITS);
53+
}
54+
};
55+
56+
// Indexed by event_descriptor::index()
4657
std::vector<std::vector<std::unique_ptr<event_pool>>> pools;
4758
};
4859

source/adapters/level_zero/v2/event_provider.hpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,12 @@
2121

2222
namespace v2 {
2323

24-
enum event_type { EVENT_REGULAR, EVENT_COUNTER };
24+
using event_flags_t = uint32_t;
25+
enum event_flag_t {
26+
EVENT_FLAGS_COUNTER = UR_BIT(0),
27+
EVENT_FLAGS_PROFILING_ENABLED = UR_BIT(1),
28+
};
29+
static constexpr size_t EVENT_FLAGS_USED_BITS = 2;
2530

2631
class event_provider;
2732

@@ -31,16 +36,12 @@ using cache_borrowed_event =
3136
std::function<void(::ze_event_handle_t)>>;
3237
} // namespace raii
3338

34-
struct event_allocation {
35-
event_type type;
36-
raii::cache_borrowed_event borrow;
37-
};
38-
3939
class event_provider {
4040
public:
4141
virtual ~event_provider() = default;
42-
virtual event_allocation allocate() = 0;
42+
virtual raii::cache_borrowed_event allocate() = 0;
4343
virtual ur_device_handle_t device() = 0;
44+
virtual event_flags_t eventFlags() const = 0;
4445
};
4546

4647
} // namespace v2

0 commit comments

Comments
 (0)