@@ -137,38 +137,48 @@ pi_result check_error(CUresult result, const char *function, int line,
137
137
// / \cond NODOXY
138
138
#define PI_CHECK_ERROR (result ) check_error(result, __func__, __LINE__, __FILE__)
139
139
140
- // / Scoped context is used across all PI CUDA plugin implementation
141
- // / to activate the PI Context on the current thread.
142
- // / The implementation tries to avoid replacing the CUcontext if it cans.
140
+ // / ScopedContext is used across all PI CUDA plugin implementation to ensure
141
+ // / that the proper CUDA context is active for the given PI context.
142
+ //
143
+ // / This class will only replace the context if necessary, and will leave the
144
+ // / new context active on the current thread. If there was an active context
145
+ // / already it will simply be replaced.
146
+ //
147
+ // / Previously active contexts are not restored for two reasons:
148
+ // / * Performance: context switches are expensive so leaving the context active
149
+ // / means subsequent SYCL calls with the same context will be cheaper.
150
+ // / * Multi-threading cleanup: contexts are set active per thread and deleting a
151
+ // / context will only deactivate it for the current thread. This means other
152
+ // / threads may end up with deleted active contexts. In particular this can
153
+ // / happen with host_tasks as they run in a thread pool. When the context
154
+ // / associated with these tasks is deleted it will remain active in the
155
+ // / threads of the thread pool. So it would be invalid for any other task
156
+ // / running on these threads to try to restore the deleted context. With the
157
+ // / current implementation this is not an issue because the active deleted
158
+ // / context will just be replaced.
159
+ //
160
+ // / This approach does mean that CUDA interop tasks should NOT expect their
161
+ // / contexts to be restored by SYCL.
143
162
class ScopedContext {
144
- pi_context placedContext_;
145
- CUcontext original_;
146
-
147
163
public:
148
- ScopedContext (pi_context ctxt) : placedContext_{ctxt} {
149
-
150
- if (!placedContext_) {
164
+ ScopedContext (pi_context ctxt) {
165
+ if (!ctxt) {
151
166
throw PI_INVALID_CONTEXT;
152
167
}
153
168
154
- CUcontext desired = placedContext_->get ();
155
- PI_CHECK_ERROR (cuCtxGetCurrent (&original_));
156
- if (original_ != desired) {
157
- // Sets the desired context as the active one for the thread
169
+ CUcontext desired = ctxt->get ();
170
+ CUcontext original = nullptr ;
171
+
172
+ PI_CHECK_ERROR (cuCtxGetCurrent (&original));
173
+
174
+ // Make sure the desired context is active on the current thread, setting
175
+ // it if necessary
176
+ if (original != desired) {
158
177
PI_CHECK_ERROR (cuCtxSetCurrent (desired));
159
178
}
160
179
}
161
180
162
- ~ScopedContext () {
163
- // Leave the context active, this avoids costly context switches for
164
- // subsequent calls that use the same context.
165
- //
166
- // Calls using a different context will simply set their own context as
167
- // active, so it will context switch as necessary.
168
- //
169
- // This does mean that interop tasks shouldn't make any assumptions about
170
- // the state of the CUDA context after or in between calls to SYCL.
171
- }
181
+ ~ScopedContext () {}
172
182
};
173
183
174
184
// / \cond NODOXY
0 commit comments