Skip to content

Commit f82c92e

Browse files
committed
[SYCL][CUDA] Fix context creation property parsing
Align the property parsing of `piContextCreate` with the way OpenCL properties are provided, i.e., a property list contains keys/IDs and values, never just a key/ID. Furthermore enable property lists that just contain a list terminating `0`. Introduce a PI type for context properties to untie from OpenCL. Adapt the SYCL runtime and tests accordingly. Signed-off-by: Bjoern Knafla <[email protected]>
1 parent 4d7dbd8 commit f82c92e

File tree

6 files changed

+114
-26
lines changed

6 files changed

+114
-26
lines changed

sycl/include/CL/sycl/backend/cuda.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@ namespace cuda {
1818

1919
// Mem Object info: Retrieve the raw CUDA pointer from a cl_mem
2020
#define PI_CUDA_RAW_POINTER (0xFF01)
21-
// Context creation: Use the primary context instead of a custom one
21+
// Context creation: Use a primary CUDA context instead of a custom one by
22+
// providing a property value of PI_TRUE for the following
23+
// property ID.
2224
#define PI_CONTEXT_PROPERTIES_CUDA_PRIMARY (0xFF02)
2325

2426
// PI Command Queue using Default stream

sycl/include/CL/sycl/detail/pi.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,8 @@ typedef enum {
216216
PI_LOCAL_MEM_TYPE_GLOBAL = CL_GLOBAL
217217
} _pi_local_mem_type;
218218

219+
typedef intptr_t pi_context_properties;
220+
219221
// TODO: populate
220222
typedef enum {
221223
PI_CONTEXT_INFO_DEVICES = CL_CONTEXT_DEVICES,
@@ -630,7 +632,7 @@ pi_result piextGetDeviceFunctionPointer(
630632
// Context
631633
//
632634
pi_result piContextCreate(
633-
const cl_context_properties * properties, // TODO: untie from OpenCL
635+
const pi_context_properties *properties,
634636
pi_uint32 num_devices,
635637
const pi_device * devices,
636638
void (* pfn_notify)(

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 55 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1097,7 +1097,25 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
10971097
}
10981098

10991099
/* Context APIs */
1100-
pi_result cuda_piContextCreate(const cl_context_properties *properties,
1100+
1101+
/// Create a PI CUDA context.
1102+
///
1103+
/// By default creates a scoped context and keeps the last active CUDA context
1104+
/// on top of the CUDA context stack.
1105+
/// With the PI_CONTEXT_PROPERTIES_CUDA_PRIMARY key/id and a value of PI_TRUE
1106+
/// creates a primary CUDA context and activates it on the CUDA context stack.
1107+
///
1108+
/// @param[in] properties 0 terminated array of key/id-value combinations. Can
1109+
/// be nullptr. Only accepts property key/id PI_CONTEXT_PROPERTIES_CUDA_PRIMARY
1110+
/// with a pi_bool value.
1111+
/// @param[in] num_devices Number of devices to create the context for.
1112+
/// @param[in] devices Devices to create the context for.
1113+
/// @param[in] pfn_notify Callback, currently unused.
1114+
/// @param[in] user_data User data for callback.
1115+
/// @param[out] retcontext Set to created context on success.
1116+
///
1117+
/// @return PI_SUCCESS on success, otherwise an error return code.
1118+
pi_result cuda_piContextCreate(const pi_context_properties *properties,
11011119
pi_uint32 num_devices, const pi_device *devices,
11021120
void (*pfn_notify)(const char *errinfo,
11031121
const void *private_info,
@@ -1114,31 +1132,51 @@ pi_result cuda_piContextCreate(const cl_context_properties *properties,
11141132
assert(retcontext != nullptr);
11151133
pi_result errcode_ret = PI_SUCCESS;
11161134

1135+
// Parse properties.
1136+
bool property_cuda_primary = false;
1137+
while (properties && (0 != *properties)) {
1138+
// Consume property ID.
1139+
pi_context_properties id = *properties;
1140+
++properties;
1141+
// Consume property value.
1142+
pi_context_properties value = *properties;
1143+
++properties;
1144+
switch (id) {
1145+
case PI_CONTEXT_PROPERTIES_CUDA_PRIMARY:
1146+
assert(value == PI_FALSE || value == PI_TRUE);
1147+
property_cuda_primary = static_cast<bool>(value);
1148+
break;
1149+
default:
1150+
// Unknown property.
1151+
assert(!"Unknown piContextCreate property in property list");
1152+
return PI_INVALID_VALUE;
1153+
}
1154+
}
1155+
11171156
std::unique_ptr<_pi_context> piContextPtr{nullptr};
11181157
try {
1119-
if (properties && *properties != PI_CONTEXT_PROPERTIES_CUDA_PRIMARY) {
1120-
throw pi_result(CL_INVALID_VALUE);
1121-
} else if (!properties) {
1158+
if (property_cuda_primary) {
1159+
// Use the CUDA primary context and assume that we want to use it
1160+
// immediately as we want to forge context switches.
1161+
CUcontext Ctxt;
1162+
errcode_ret = PI_CHECK_ERROR(
1163+
cuDevicePrimaryCtxRetain(&Ctxt, devices[0]->cuDevice_));
1164+
piContextPtr = std::unique_ptr<_pi_context>(
1165+
new _pi_context{_pi_context::kind::primary, Ctxt, *devices});
1166+
errcode_ret = PI_CHECK_ERROR(cuCtxPushCurrent(Ctxt));
1167+
} else {
1168+
// Create a scoped context.
11221169
CUcontext newContext, current;
11231170
PI_CHECK_ERROR(cuCtxGetCurrent(&current));
1124-
errcode_ret = PI_CHECK_ERROR(cuCtxCreate(&newContext, CU_CTX_MAP_HOST,
1125-
(*devices)->cuDevice_));
1171+
errcode_ret = PI_CHECK_ERROR(
1172+
cuCtxCreate(&newContext, CU_CTX_MAP_HOST, devices[0]->cuDevice_));
11261173
piContextPtr = std::unique_ptr<_pi_context>(new _pi_context{
11271174
_pi_context::kind::user_defined, newContext, *devices});
1175+
// For scoped contexts keep the last active CUDA one on top of the stack
1176+
// as `cuCtxtCreate` replaces it implicitly otherwise.
11281177
if (current != nullptr) {
1129-
// If there was an existing context on the thread we recover it
11301178
PI_CHECK_ERROR(cuCtxSetCurrent(current));
11311179
}
1132-
} else if (properties
1133-
&& *properties == PI_CONTEXT_PROPERTIES_CUDA_PRIMARY) {
1134-
CUcontext Ctxt;
1135-
errcode_ret = PI_CHECK_ERROR(cuDevicePrimaryCtxRetain(
1136-
&Ctxt, (*devices)->cuDevice_));
1137-
piContextPtr = std::unique_ptr<_pi_context>(
1138-
new _pi_context{_pi_context::kind::primary, Ctxt, *devices});
1139-
errcode_ret = PI_CHECK_ERROR(cuCtxPushCurrent(Ctxt));
1140-
} else {
1141-
throw pi_result(CL_INVALID_VALUE);
11421180
}
11431181

11441182
*retcontext = piContextPtr.release();

sycl/plugins/opencl/pi_opencl.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -452,7 +452,7 @@ pi_result OCL(piextGetDeviceFunctionPointer)(pi_device device,
452452
}
453453

454454
pi_result OCL(piContextCreate)(
455-
const cl_context_properties *properties, // TODO: untie from OpenCL
455+
const pi_context_properties *properties,
456456
pi_uint32 num_devices, const pi_device *devices,
457457
void (*pfn_notify)(const char *errinfo, const void *private_info, size_t cb,
458458
void *user_data1),

sycl/source/detail/context_impl.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,8 @@ context_impl::context_impl(const vector_class<cl::sycl::device> Devices,
4444

4545
if (MPlatform->is_cuda()) {
4646
#if USE_PI_CUDA
47-
const cl_context_properties props[] = {
48-
PI_CONTEXT_PROPERTIES_CUDA_PRIMARY,
49-
0};
47+
const pi_context_properties props[] = {PI_CONTEXT_PROPERTIES_CUDA_PRIMARY,
48+
UseCUDAPrimaryContext, 0};
5049

5150
getPlugin().call<PiApiKind::piContextCreate>(props, DeviceIds.size(),
5251
DeviceIds.data(), nullptr, nullptr, &MContext);

sycl/unittests/pi/cuda/test_base_objects.cpp

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ TEST_F(DISABLED_CudaBaseObjectsTest, piContextCreate) {
7373
ASSERT_EQ(cuErr, CUDA_SUCCESS);
7474
}
7575

76-
TEST_F(DISABLED_CudaBaseObjectsTest, piContextCreatePrimary) {
76+
TEST_F(DISABLED_CudaBaseObjectsTest, piContextCreatePrimaryTrue) {
7777
pi_uint32 numPlatforms = 0;
7878
pi_platform platform;
7979
pi_device device;
@@ -91,11 +91,12 @@ TEST_F(DISABLED_CudaBaseObjectsTest, piContextCreatePrimary) {
9191
ASSERT_EQ((Plugins[0].call_nocheck<detail::PiApiKind::piDevicesGet>(
9292
platform, PI_DEVICE_TYPE_GPU, 1, &device, nullptr)),
9393
PI_SUCCESS);
94-
cl_context_properties properties = PI_CONTEXT_PROPERTIES_CUDA_PRIMARY;
94+
pi_context_properties properties[] = {PI_CONTEXT_PROPERTIES_CUDA_PRIMARY,
95+
PI_TRUE, 0};
9596

9697
pi_context ctxt;
9798
ASSERT_EQ((Plugins[0].call_nocheck<detail::PiApiKind::piContextCreate>(
98-
&properties, 1, &device, nullptr, nullptr, &ctxt)),
99+
properties, 1, &device, nullptr, nullptr, &ctxt)),
99100
PI_SUCCESS);
100101
EXPECT_NE(ctxt, nullptr);
101102
EXPECT_EQ(ctxt->get_device(), device);
@@ -118,6 +119,52 @@ TEST_F(DISABLED_CudaBaseObjectsTest, piContextCreatePrimary) {
118119
PI_SUCCESS);
119120
}
120121

122+
TEST_F(DISABLED_CudaBaseObjectsTest, piContextCreatePrimaryFalse) {
123+
pi_uint32 numPlatforms = 0;
124+
pi_platform platform;
125+
pi_device device;
126+
127+
ASSERT_EQ((Plugins[0].call_nocheck<detail::PiApiKind::piPlatformsGet>(
128+
0, nullptr, &numPlatforms)),
129+
PI_SUCCESS)
130+
<< "piPlatformsGet failed.\n";
131+
132+
ASSERT_EQ((Plugins[0].call_nocheck<detail::PiApiKind::piPlatformsGet>(
133+
numPlatforms, &platform, nullptr)),
134+
PI_SUCCESS)
135+
<< "piPlatformsGet failed.\n";
136+
137+
ASSERT_EQ((Plugins[0].call_nocheck<detail::PiApiKind::piDevicesGet>(
138+
platform, PI_DEVICE_TYPE_GPU, 1, &device, nullptr)),
139+
PI_SUCCESS);
140+
pi_context_properties properties[] = {PI_CONTEXT_PROPERTIES_CUDA_PRIMARY,
141+
PI_FALSE, 0};
142+
143+
pi_context ctxt;
144+
ASSERT_EQ((Plugins[0].call_nocheck<detail::PiApiKind::piContextCreate>(
145+
properties, 1, &device, nullptr, nullptr, &ctxt)),
146+
PI_SUCCESS);
147+
EXPECT_NE(ctxt, nullptr);
148+
EXPECT_EQ(ctxt->get_device(), device);
149+
EXPECT_FALSE(ctxt->is_primary());
150+
151+
// Retrieve the cuCtxt to check information is correct
152+
CUcontext cudaContext = ctxt->get();
153+
unsigned int version = 0;
154+
CUresult cuErr = cuCtxGetApiVersion(cudaContext, &version);
155+
ASSERT_EQ(cuErr, CUDA_SUCCESS);
156+
EXPECT_EQ(version, LATEST_KNOWN_CUDA_DRIVER_API_VERSION);
157+
158+
// Current context in the stack?
159+
CUcontext current;
160+
cuErr = cuCtxGetCurrent(&current);
161+
ASSERT_EQ(cuErr, CUDA_SUCCESS);
162+
ASSERT_EQ(current, cudaContext);
163+
ASSERT_EQ(
164+
(Plugins[0].call_nocheck<detail::PiApiKind::piContextRelease>(ctxt)),
165+
PI_SUCCESS);
166+
}
167+
121168
TEST_F(DISABLED_CudaBaseObjectsTest, piContextCreateChildThread) {
122169
pi_uint32 numPlatforms = 0;
123170
pi_platform platform;

0 commit comments

Comments
 (0)