Skip to content

Commit d168fa5

Browse files
committed
[SYCL][CUDA] Add contexts tests
1 parent 998bf32 commit d168fa5

File tree

2 files changed

+248
-0
lines changed

2 files changed

+248
-0
lines changed

sycl/unittests/pi/cuda/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ set(LLVM_REQUIRES_EH 1)
22
add_sycl_unittest(PiCudaTests OBJECT
33
test_base_objects.cpp
44
test_commands.cpp
5+
test_contexts.cpp
56
test_device.cpp
67
test_interop_get_native.cpp
78
test_kernels.cpp
Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
//==---- test_contexts.cpp --- PI unit tests -------------------------------==//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "gtest/gtest.h"
10+
11+
#include <condition_variable>
12+
#include <thread>
13+
#include <mutex>
14+
15+
#include <cuda.h>
16+
17+
#include "TestGetPlugin.hpp"
18+
#include <CL/sycl.hpp>
19+
#include <CL/sycl/detail/pi.hpp>
20+
#include <detail/plugin.hpp>
21+
#include <pi_cuda.hpp>
22+
23+
using namespace cl::sycl;
24+
25+
struct CudaContextsTest : public ::testing::Test {
26+
27+
protected:
28+
detail::plugin *plugin = pi::initializeAndGet(backend::cuda);
29+
30+
pi_platform platform_;
31+
pi_device device_;
32+
33+
void SetUp() override {
34+
// skip the tests if the CUDA backend is not available
35+
if (!plugin) {
36+
GTEST_SKIP();
37+
}
38+
39+
pi_uint32 numPlatforms = 0;
40+
ASSERT_EQ(plugin->getBackend(), backend::cuda);
41+
42+
ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
43+
0, nullptr, &numPlatforms)),
44+
PI_SUCCESS)
45+
<< "piPlatformsGet failed.\n";
46+
47+
ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piPlatformsGet>(
48+
numPlatforms, &platform_, nullptr)),
49+
PI_SUCCESS)
50+
<< "piPlatformsGet failed.\n";
51+
52+
ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piDevicesGet>(
53+
platform_, PI_DEVICE_TYPE_GPU, 1, &device_, nullptr)),
54+
PI_SUCCESS);
55+
}
56+
57+
void TearDown() override {}
58+
59+
CudaContextsTest() = default;
60+
61+
~CudaContextsTest() = default;
62+
};
63+
64+
TEST_F(CudaContextsTest, ContextLifetime) {
65+
// start with no active context
66+
cuCtxSetCurrent(nullptr);
67+
68+
// create a context
69+
pi_context context;
70+
ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>(
71+
nullptr, 1, &device_, nullptr, nullptr, &context)),
72+
PI_SUCCESS);
73+
ASSERT_NE(context, nullptr);
74+
75+
// create a queue from the context, this should use the ScopedContext
76+
pi_queue queue;
77+
ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piQueueCreate>(
78+
context, device_, 0, &queue)),
79+
PI_SUCCESS);
80+
ASSERT_NE(queue, nullptr);
81+
82+
// ensure the queue has the correct context
83+
ASSERT_EQ(context, queue->get_context());
84+
85+
// check that the context is now the active CUDA context
86+
CUcontext cudaCtxt = nullptr;
87+
cuCtxGetCurrent(&cudaCtxt);
88+
ASSERT_EQ(cudaCtxt, context->get());
89+
90+
plugin->call<detail::PiApiKind::piQueueRelease>(queue);
91+
plugin->call<detail::PiApiKind::piContextRelease>(context);
92+
93+
// check that the context was cleaned up properly by the destructor
94+
cuCtxGetCurrent(&cudaCtxt);
95+
ASSERT_EQ(cudaCtxt, nullptr);
96+
}
97+
98+
TEST_F(CudaContextsTest, ContextLifetimeExisting) {
99+
// start by setting up a CUDA context on the thread
100+
CUcontext original;
101+
cuCtxCreate(&original, CU_CTX_MAP_HOST, device_->get());
102+
103+
// ensure the CUDA context is active
104+
CUcontext current = nullptr;
105+
cuCtxGetCurrent(&current);
106+
ASSERT_EQ(original, current);
107+
108+
// create a PI context
109+
pi_context context;
110+
ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>(
111+
nullptr, 1, &device_, nullptr, nullptr, &context)),
112+
PI_SUCCESS);
113+
ASSERT_NE(context, nullptr);
114+
115+
// create a queue from the context, this should use the ScopedContext
116+
pi_queue queue;
117+
ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piQueueCreate>(
118+
context, device_, 0, &queue)),
119+
PI_SUCCESS);
120+
ASSERT_NE(queue, nullptr);
121+
122+
// ensure the queue has the correct context
123+
ASSERT_EQ(context, queue->get_context());
124+
125+
// check that the context is now the active CUDA context
126+
cuCtxGetCurrent(&current);
127+
ASSERT_EQ(current, context->get());
128+
129+
plugin->call<detail::PiApiKind::piQueueRelease>(queue);
130+
plugin->call<detail::PiApiKind::piContextRelease>(context);
131+
132+
// check that the context was cleaned up, the old context will be restored
133+
// automatically by cuCtxDestroy in piContextRelease, as it was pushed on the
134+
// stack bu cuCtxCreate
135+
cuCtxGetCurrent(&current);
136+
ASSERT_EQ(current, original);
137+
138+
// release original context
139+
cuCtxDestroy(original);
140+
}
141+
142+
// In some cases (for host_task), the SYCL runtime may call PI API functions
143+
// from threads of the thread pool, this can cause issues because with the CUDA
144+
// plugin these functions will set an active CUDA context on these threads, but
145+
// never clean it up, as it will only get cleaned up in the main thread.
146+
//
147+
// So the following test aims to reproduce the scenario where there is a
148+
// dangling deleted context in a separate thread and seeing if the PI calls are
149+
// still able to work correctly in that thread.
150+
TEST_F(CudaContextsTest, ContextThread) {
151+
// start with no active context
152+
cuCtxSetCurrent(nullptr);
153+
154+
// create two PI contexts
155+
pi_context context1;
156+
ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>(
157+
nullptr, 1, &device_, nullptr, nullptr, &context1)),
158+
PI_SUCCESS);
159+
ASSERT_NE(context1, nullptr);
160+
161+
pi_context context2;
162+
ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piContextCreate>(
163+
nullptr, 1, &device_, nullptr, nullptr, &context2)),
164+
PI_SUCCESS);
165+
ASSERT_NE(context2, nullptr);
166+
167+
// setup synchronization variables between the main thread and the testing
168+
// thread
169+
std::mutex m;
170+
std::condition_variable cv;
171+
bool released = false;
172+
bool thread_done = false;
173+
174+
// create a testing thread that will create a queue with the first context,
175+
// release the queue, then wait for the main thread to release the first
176+
// context, and then create and release another queue with the second context
177+
// this time
178+
auto test_thread = std::thread([&] {
179+
CUcontext current = nullptr;
180+
181+
// create a queue with the first context
182+
pi_queue queue;
183+
ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piQueueCreate>(
184+
context1, device_, 0, &queue)),
185+
PI_SUCCESS);
186+
ASSERT_NE(queue, nullptr);
187+
188+
// ensure the queue has the correct context
189+
ASSERT_EQ(context1, queue->get_context());
190+
191+
// check that the first context is now the active CUDA context
192+
cuCtxGetCurrent(&current);
193+
ASSERT_EQ(current, context1->get());
194+
195+
plugin->call<detail::PiApiKind::piQueueRelease>(queue);
196+
197+
// mark the first set of processing as done and notify the main thread
198+
std::unique_lock<std::mutex> lock(m);
199+
thread_done = true;
200+
lock.unlock();
201+
cv.notify_one();
202+
203+
// wait for the main thread to release the first context
204+
lock.lock();
205+
cv.wait(lock, [&] { return released; });
206+
207+
// check that the first context is still active, this is because deleting a
208+
// context only cleans up the current thread
209+
cuCtxGetCurrent(&current);
210+
ASSERT_EQ(current, context1->get());
211+
212+
// create a queue with the second context
213+
ASSERT_EQ((plugin->call_nocheck<detail::PiApiKind::piQueueCreate>(
214+
context2, device_, 0, &queue)),
215+
PI_SUCCESS);
216+
ASSERT_NE(queue, nullptr);
217+
218+
// ensure the queue has the correct context
219+
ASSERT_EQ(context2, queue->get_context());
220+
221+
// check that the second context is now the active CUDA context
222+
cuCtxGetCurrent(&current);
223+
ASSERT_EQ(current, context2->get());
224+
225+
plugin->call<detail::PiApiKind::piQueueRelease>(queue);
226+
});
227+
228+
// wait for the thread to be done with the first queue to release the first context
229+
std::unique_lock<std::mutex> lock(m);
230+
cv.wait(lock, [&] { return thread_done; });
231+
plugin->call<detail::PiApiKind::piContextRelease>(context1);
232+
233+
// notify the other thread that the context was released
234+
released = true;
235+
lock.unlock();
236+
cv.notify_one();
237+
238+
// wait for the thread to finish
239+
test_thread.join();
240+
241+
plugin->call<detail::PiApiKind::piContextRelease>(context2);
242+
243+
// check that there is no context set on the main thread
244+
CUcontext current = nullptr;
245+
cuCtxGetCurrent(&current);
246+
ASSERT_EQ(current, nullptr);
247+
}

0 commit comments

Comments
 (0)