@@ -31,22 +31,24 @@ typedef void (*ur_context_extended_deleter_t)(void *UserData);
31
31
// / with a given device and control access to said device from the user side.
32
32
// / UR API context are objects that are passed to functions, and not bound
33
33
// / to threads.
34
- // / The ur_context_handle_t_ object doesn't implement this behavior. It only
35
- // / holds the HIP context data. The RAII object \ref ScopedDevice implements
36
- // / the active context behavior.
37
34
// /
38
- // / <b> Primary vs UserDefined context </b>
35
+ // / Since the ur_context_handle_t can contain multiple devices, and a `hipCtx_t`
36
+ // / refers to only a single device, the `hipCtx_t` is more tightly coupled to a
37
+ // / ur_device_handle_t than a ur_context_handle_t. In order to remove some
38
+ // / ambiguities about the different semantics of ur_context_handle_t s and
39
+ // / native `hipCtx_t`, we access the native `hipCtx_t` solely through the
40
+ // / ur_device_handle_t class, by using the RAII object \ref ScopedDevice, which
41
+ // / sets the active device (by setting the active native `hipCtx_t`).
39
42
// /
40
- // / HIP has two different types of context, the Primary context,
41
- // / which is usable by all threads on a given process for a given device, and
42
- // / the aforementioned custom contexts.
43
- // / The HIP documentation, and performance analysis, suggest using the Primary
44
- // / context whenever possible. The Primary context is also used by the HIP
45
- // / Runtime API. For UR applications to interop with HIP Runtime API, they have
46
- // / to use the primary context - and make that active in the thread. The
47
- // / `ur_context_handle_t_` object can be constructed with a `kind` parameter
48
- // / that allows to construct a Primary or `UserDefined` context, so that
49
- // / the UR object interface is always the same.
43
+ // / <b> Primary vs User-defined `hipCtx_t` </b>
44
+ // /
45
+ // / HIP has two different types of `hipCtx_t`, the Primary context, which is
46
+ // / usable by all threads on a given process for a given device, and the
47
+ // / aforementioned custom `hipCtx_t`s.
48
+ // / The HIP documentation, confirmed with performance analysis, suggest using
49
+ // / the Primary context whenever possible. The Primary context is also used by
50
+ // / the HIP Runtime API. For UR applications to interop with HIP Runtime API,
51
+ // / they have to use the primary context - and make that active in the thread.
50
52
// /
51
53
// / <b> Destructor callback </b>
52
54
// /
@@ -56,6 +58,15 @@ typedef void (*ur_context_extended_deleter_t)(void *UserData);
56
58
// / See proposal for details.
57
59
// / https://github.com/codeplaysoftware/standards-proposals/blob/master/extended-context-destruction/index.md
58
60
// /
61
+ // / <b> Memory Management for Devices in a Context <\b>
62
+ // /
63
+ // / A ur_buffer_ is associated with a ur_context_handle_t_, which may refer to
64
+ // / multiple devices. Therefore the ur_buffer_ must handle a native allocation
65
+ // / for each device in the context. UR is responsible for automatically
66
+ // / handling event dependencies for kernels writing to or reading from the
67
+ // / same ur_buffer_ and migrating memory between native allocations for
68
+ // / devices in the same ur_context_handle_t_ if necessary.
69
+ // /
59
70
struct ur_context_handle_t_ {
60
71
61
72
struct deleter_data {
@@ -67,15 +78,23 @@ struct ur_context_handle_t_ {
67
78
68
79
using native_type = hipCtx_t;
69
80
70
- ur_device_handle_t DeviceId;
81
+ std::vector<ur_device_handle_t > Devices;
82
+ uint32_t NumDevices;
83
+
71
84
std::atomic_uint32_t RefCount;
72
85
73
- ur_context_handle_t_ (ur_device_handle_t DevId)
74
- : DeviceId{DevId}, RefCount{1 } {
75
- urDeviceRetain (DeviceId);
86
+ ur_context_handle_t_ (const ur_device_handle_t *Devs, uint32_t NumDevices)
87
+ : Devices{Devs, Devs + NumDevices}, NumDevices{NumDevices}, RefCount{1 } {
88
+ for (auto &Dev : Devices) {
89
+ urDeviceRetain (Dev);
90
+ }
76
91
};
77
92
78
- ~ur_context_handle_t_ () { urDeviceRelease (DeviceId); }
93
+ ~ur_context_handle_t_ () {
94
+ for (auto &Dev : Devices) {
95
+ urDeviceRelease (Dev);
96
+ }
97
+ }
79
98
80
99
void invokeExtendedDeleters () {
81
100
std::lock_guard<std::mutex> Guard (Mutex);
@@ -90,7 +109,9 @@ struct ur_context_handle_t_ {
90
109
ExtendedDeleters.emplace_back (deleter_data{Function, UserData});
91
110
}
92
111
93
- ur_device_handle_t getDevice () const noexcept { return DeviceId; }
112
+ std::vector<ur_device_handle_t > getDevices () const noexcept {
113
+ return Devices;
114
+ }
94
115
95
116
uint32_t incrementReferenceCount () noexcept { return ++RefCount; }
96
117
0 commit comments