Skip to content

Commit 74cd7ae

Browse files
authored
Merge pull request #481 from leofang/add_timing
Add event timing
2 parents 81c6f77 + 268eee5 commit 74cd7ae

File tree

3 files changed

+52
-5
lines changed

3 files changed

+52
-5
lines changed

cuda_core/cuda/core/experimental/_event.py

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,21 @@ class Event:
4747
the last recorded stream.
4848
4949
Events can be used to monitor device's progress, query completion
50-
of work up to event's record, and help establish dependencies
51-
between GPU work submissions.
50+
of work up to event's record, help establish dependencies
51+
between GPU work submissions, and record the elapsed time (in milliseconds)
52+
on GPU:
53+
54+
.. code-block:: python
55+
56+
# To create events and record the timing:
57+
s = Device().create_stream()
58+
e1 = Device().create_event({"enable_timing": True})
59+
e2 = Device().create_event({"enable_timing": True})
60+
s.record(e1)
61+
# ... run some GPU works ...
62+
s.record(e2)
63+
e2.sync()
64+
print(f"time = {e2 - e1} milliseconds")
5265
5366
Directly creating an :obj:`~_event.Event` is not supported due to ambiguity,
5467
and they should instead be created through a :obj:`~_stream.Stream` object.
@@ -96,6 +109,22 @@ def close(self):
96109
"""Destroy the event."""
97110
self._mnff.close()
98111

112+
def __isub__(self, other):
113+
return NotImplemented
114+
115+
def __rsub__(self, other):
116+
return NotImplemented
117+
118+
def __sub__(self, other):
119+
# return self - other (in milliseconds)
120+
try:
121+
timing = handle_return(driver.cuEventElapsedTime(other.handle, self.handle))
122+
except CUDAError as e:
123+
raise RuntimeError(
124+
"Timing capability must be enabled in order to subtract two Events; timing is disabled by default."
125+
) from e
126+
return timing
127+
99128
@property
100129
def is_timing_disabled(self) -> bool:
101130
"""Return True if the event does not record timing data, otherwise False."""

cuda_core/docs/source/release/0.2.0-notes.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ New features
2727
- Expose :class:`ObjectCode` as a public API, which allows loading cubins from memory or disk. For loading other kinds of code types, please continue using :class:`Program`.
2828
- A C++ helper function ``get_cuda_native_handle()`` is provided in the new ``include/utility.cuh`` header to retrive the underlying CUDA C objects (ex: ``CUstream``) from a Python object returned by the ``.handle`` attribute (ex: :attr:`Stream.handle`).
2929
- For objects such as :class:`Program` and :class:`Linker` that could dispatch to different backends, a new ``.backend`` attribute is provided to query this information.
30-
- An :class:`~_event.Event` may now be created without recording it to a :class:`Stream` using the :meth:`Device.create_event`` method.
30+
- Support CUDA event timing.
31+
- An :class:`~_event.Event` may now be created without recording it to a :class:`~_stream.Stream` using the :meth:`Device.create_event` method.
3132

3233
Limitations
3334
-----------

cuda_core/tests/test_event.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
# this software and related documentation outside the terms of the EULA
77
# is strictly prohibited.
88

9+
import time
10+
911
import pytest
1012

1113
import cuda.core.experimental
@@ -21,8 +23,23 @@ def test_event_init_disabled():
2123
def test_timing(init_cuda, enable_timing):
2224
options = EventOptions(enable_timing=enable_timing)
2325
stream = Device().create_stream()
24-
event = stream.record(options=options)
25-
assert event.is_timing_disabled == (not enable_timing if enable_timing is not None else True)
26+
delay_seconds = 0.5
27+
e1 = stream.record(options=options)
28+
time.sleep(delay_seconds)
29+
e2 = stream.record(options=options)
30+
e2.sync()
31+
for e in (e1, e2):
32+
assert e.is_timing_disabled == (True if enable_timing is None else not enable_timing)
33+
if enable_timing:
34+
elapsed_time_ms = e2 - e1
35+
assert isinstance(elapsed_time_ms, float)
36+
assert delay_seconds * 1000 <= elapsed_time_ms < delay_seconds * 1000 + 2 # tolerance 2 ms
37+
else:
38+
with pytest.raises(RuntimeError) as e:
39+
elapsed_time_ms = e2 - e1
40+
msg = str(e)
41+
assert "disabled by default" in msg
42+
assert "CUDA_ERROR_INVALID_HANDLE" in msg
2643

2744

2845
def test_is_sync_busy_waited(init_cuda):

0 commit comments

Comments
 (0)