@@ -28,22 +28,24 @@ typedef void (*ur_context_extended_deleter_t)(void *UserData);
28
28
// / with a given device and control access to said device from the user side.
29
29
// / UR API context are objects that are passed to functions, and not bound
30
30
// / to threads.
31
- // / The ur_context_handle_t_ object doesn't implement this behavior. It only
32
- // / holds the HIP context data. The RAII object \ref ScopedDevice implements
33
- // / the active context behavior.
34
31
// /
35
- // / <b> Primary vs UserDefined context </b>
32
+ // / Since the ur_context_handle_t can contain multiple devices, and a `hipCtx_t`
33
+ // / refers to only a single device, the `hipCtx_t` is more tightly coupled to a
34
+ // / ur_device_handle_t than a ur_context_handle_t. In order to remove some
35
+ // / ambiguities about the different semantics of ur_context_handle_t s and
36
+ // / native `hipCtx_t`, we access the native `hipCtx_t` solely through the
37
+ // / ur_device_handle_t class, by using the RAII object \ref ScopedDevice, which
38
+ // / sets the active device (by setting the active native `hipCtx_t`).
36
39
// /
37
- // / HIP has two different types of context, the Primary context,
38
- // / which is usable by all threads on a given process for a given device, and
39
- // / the aforementioned custom contexts.
40
- // / The HIP documentation, and performance analysis, suggest using the Primary
41
- // / context whenever possible. The Primary context is also used by the HIP
42
- // / Runtime API. For UR applications to interop with HIP Runtime API, they have
43
- // / to use the primary context - and make that active in the thread. The
44
- // / `ur_context_handle_t_` object can be constructed with a `kind` parameter
45
- // / that allows to construct a Primary or `UserDefined` context, so that
46
- // / the UR object interface is always the same.
40
+ // / <b> Primary vs User-defined `hipCtx_t` </b>
41
+ // /
42
+ // / HIP has two different types of `hipCtx_t`, the Primary context, which is
43
+ // / usable by all threads on a given process for a given device, and the
44
+ // / aforementioned custom `hipCtx_t`s.
45
+ // / The HIP documentation, confirmed with performance analysis, suggest using
46
+ // / the Primary context whenever possible. The Primary context is also used by
47
+ // / the HIP Runtime API. For UR applications to interop with HIP Runtime API,
48
+ // / they have to use the primary context - and make that active in the thread.
47
49
// /
48
50
// / <b> Destructor callback </b>
49
51
// /
@@ -53,6 +55,15 @@ typedef void (*ur_context_extended_deleter_t)(void *UserData);
53
55
// / See proposal for details.
54
56
// / https://github.com/codeplaysoftware/standards-proposals/blob/master/extended-context-destruction/index.md
55
57
// /
58
+ // / <b> Memory Management for Devices in a Context <\b>
59
+ // /
60
+ // / A ur_buffer_ is associated with a ur_context_handle_t_, which may refer to
61
+ // / multiple devices. Therefore the ur_buffer_ must handle a native allocation
62
+ // / for each device in the context. UR is responsible for automatically
63
+ // / handling event dependencies for kernels writing to or reading from the
64
+ // / same ur_buffer_ and migrating memory between native allocations for
65
+ // / devices in the same ur_context_handle_t_ if necessary.
66
+ // /
56
67
struct ur_context_handle_t_ {
57
68
58
69
struct deleter_data {
@@ -64,15 +75,23 @@ struct ur_context_handle_t_ {
64
75
65
76
using native_type = hipCtx_t;
66
77
67
- ur_device_handle_t DeviceId;
78
+ std::vector<ur_device_handle_t > Devices;
79
+ uint32_t NumDevices;
80
+
68
81
std::atomic_uint32_t RefCount;
69
82
70
- ur_context_handle_t_ (ur_device_handle_t DevId)
71
- : DeviceId{DevId}, RefCount{1 } {
72
- urDeviceRetain (DeviceId);
83
+ ur_context_handle_t_ (const ur_device_handle_t *Devs, uint32_t NumDevices)
84
+ : Devices{Devs, Devs + NumDevices}, NumDevices{NumDevices}, RefCount{1 } {
85
+ for (auto &Dev : Devices) {
86
+ urDeviceRetain (Dev);
87
+ }
73
88
};
74
89
75
- ~ur_context_handle_t_ () { urDeviceRelease (DeviceId); }
90
+ ~ur_context_handle_t_ () {
91
+ for (auto &Dev : Devices) {
92
+ urDeviceRelease (Dev);
93
+ }
94
+ }
76
95
77
96
void invokeExtendedDeleters () {
78
97
std::lock_guard<std::mutex> Guard (Mutex);
@@ -87,7 +106,9 @@ struct ur_context_handle_t_ {
87
106
ExtendedDeleters.emplace_back (deleter_data{Function, UserData});
88
107
}
89
108
90
- ur_device_handle_t getDevice () const noexcept { return DeviceId; }
109
+ std::vector<ur_device_handle_t > getDevices () const noexcept {
110
+ return Devices;
111
+ }
91
112
92
113
uint32_t incrementReferenceCount () noexcept { return ++RefCount; }
93
114
0 commit comments