Skip to content

Commit 5e7ea06

Browse files
bjoernknaflabader
authored andcommitted
[SYCL][CUDA] Fix context creation property parsing
Align the property parsing of `piContextCreate` with the way OpenCL properties are provided, i.e., a property list contains keys/IDs and values, never just a key/ID. Furthermore enable property lists that just contain a list terminating `0`. Introduce a PI type for context properties to untie from OpenCL. Adapt the SYCL runtime and tests accordingly. Signed-off-by: Bjoern Knafla <[email protected]>
1 parent b1aa222 commit 5e7ea06

File tree

6 files changed

+129
-41
lines changed

6 files changed

+129
-41
lines changed

sycl/include/CL/sycl/backend/cuda.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@ namespace cuda {
1818

1919
// Mem Object info: Retrieve the raw CUDA pointer from a cl_mem
2020
#define PI_CUDA_RAW_POINTER (0xFF01)
21-
// Context creation: Use the primary context instead of a custom one
21+
// Context creation: Use a primary CUDA context instead of a custom one by
22+
// providing a property value of PI_TRUE for the following
23+
// property ID.
2224
#define PI_CONTEXT_PROPERTIES_CUDA_PRIMARY (0xFF02)
2325

2426
// PI Command Queue using Default stream

sycl/include/CL/sycl/detail/pi.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,8 @@ typedef enum {
232232
PI_LOCAL_MEM_TYPE_GLOBAL = CL_GLOBAL
233233
} _pi_local_mem_type;
234234

235+
typedef intptr_t pi_context_properties;
236+
235237
// TODO: populate
236238
typedef enum {
237239
PI_CONTEXT_INFO_DEVICES = CL_CONTEXT_DEVICES,
@@ -627,12 +629,12 @@ pi_result piextGetDeviceFunctionPointer(pi_device device, pi_program program,
627629
//
628630
// Context
629631
//
630-
pi_result piContextCreate(
631-
const cl_context_properties *properties, // TODO: untie from OpenCL
632-
pi_uint32 num_devices, const pi_device *devices,
633-
void (*pfn_notify)(const char *errinfo, const void *private_info, size_t cb,
634-
void *user_data),
635-
void *user_data, pi_context *ret_context);
632+
pi_result piContextCreate(const pi_context_properties *properties,
633+
pi_uint32 num_devices, const pi_device *devices,
634+
void (*pfn_notify)(const char *errinfo,
635+
const void *private_info,
636+
size_t cb, void *user_data),
637+
void *user_data, pi_context *ret_context);
636638

637639
pi_result piContextGetInfo(pi_context context, pi_context_info param_name,
638640
size_t param_value_size, void *param_value,

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 60 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1124,12 +1124,30 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
11241124
}
11251125

11261126
/* Context APIs */
1127-
pi_result cuda_piContextCreate(const cl_context_properties *properties,
1128-
pi_uint32 num_devices, const pi_device *devices,
1129-
void (*pfn_notify)(const char *errinfo,
1130-
const void *private_info,
1131-
size_t cb, void *user_data),
1132-
void *user_data, pi_context *retcontext) {
1127+
1128+
/// Create a PI CUDA context.
1129+
///
1130+
/// By default creates a scoped context and keeps the last active CUDA context
1131+
/// on top of the CUDA context stack.
1132+
/// With the PI_CONTEXT_PROPERTIES_CUDA_PRIMARY key/id and a value of PI_TRUE
1133+
/// creates a primary CUDA context and activates it on the CUDA context stack.
1134+
///
1135+
/// @param[in] properties 0 terminated array of key/id-value combinations. Can
1136+
/// be nullptr. Only accepts property key/id PI_CONTEXT_PROPERTIES_CUDA_PRIMARY
1137+
/// with a pi_bool value.
1138+
/// @param[in] num_devices Number of devices to create the context for.
1139+
/// @param[in] devices Devices to create the context for.
1140+
/// @param[in] pfn_notify Callback, currently unused.
1141+
/// @param[in] user_data User data for callback.
1142+
/// @param[out] retcontext Set to created context on success.
1143+
///
1144+
/// @return PI_SUCCESS on success, otherwise an error return code.
1145+
pi_result cuda_piContextCreate(const pi_context_properties *properties,
1146+
pi_uint32 num_devices, const pi_device *devices,
1147+
void (*pfn_notify)(const char *errinfo,
1148+
const void *private_info,
1149+
size_t cb, void *user_data),
1150+
void *user_data, pi_context *retcontext) {
11331151

11341152
assert(devices != nullptr);
11351153
// TODO: How to implement context callback?
@@ -1141,31 +1159,51 @@ pi_result cuda_piContextCreate(const cl_context_properties *properties,
11411159
assert(retcontext != nullptr);
11421160
pi_result errcode_ret = PI_SUCCESS;
11431161

1162+
// Parse properties.
1163+
bool property_cuda_primary = false;
1164+
while (properties && (0 != *properties)) {
1165+
// Consume property ID.
1166+
pi_context_properties id = *properties;
1167+
++properties;
1168+
// Consume property value.
1169+
pi_context_properties value = *properties;
1170+
++properties;
1171+
switch (id) {
1172+
case PI_CONTEXT_PROPERTIES_CUDA_PRIMARY:
1173+
assert(value == PI_FALSE || value == PI_TRUE);
1174+
property_cuda_primary = static_cast<bool>(value);
1175+
break;
1176+
default:
1177+
// Unknown property.
1178+
assert(!"Unknown piContextCreate property in property list");
1179+
return PI_INVALID_VALUE;
1180+
}
1181+
}
1182+
11441183
std::unique_ptr<_pi_context> piContextPtr{nullptr};
11451184
try {
1146-
if (properties && *properties != PI_CONTEXT_PROPERTIES_CUDA_PRIMARY) {
1147-
throw pi_result(CL_INVALID_VALUE);
1148-
} else if (!properties) {
1185+
if (property_cuda_primary) {
1186+
// Use the CUDA primary context and assume that we want to use it
1187+
// immediately as we want to forge context switches.
1188+
CUcontext Ctxt;
1189+
errcode_ret = PI_CHECK_ERROR(
1190+
cuDevicePrimaryCtxRetain(&Ctxt, devices[0]->cuDevice_));
1191+
piContextPtr = std::unique_ptr<_pi_context>(
1192+
new _pi_context{_pi_context::kind::primary, Ctxt, *devices});
1193+
errcode_ret = PI_CHECK_ERROR(cuCtxPushCurrent(Ctxt));
1194+
} else {
1195+
// Create a scoped context.
11491196
CUcontext newContext, current;
11501197
PI_CHECK_ERROR(cuCtxGetCurrent(&current));
1151-
errcode_ret = PI_CHECK_ERROR(cuCtxCreate(&newContext, CU_CTX_MAP_HOST,
1152-
(*devices)->cuDevice_));
1198+
errcode_ret = PI_CHECK_ERROR(
1199+
cuCtxCreate(&newContext, CU_CTX_MAP_HOST, devices[0]->cuDevice_));
11531200
piContextPtr = std::unique_ptr<_pi_context>(new _pi_context{
11541201
_pi_context::kind::user_defined, newContext, *devices});
1202+
// For scoped contexts keep the last active CUDA one on top of the stack
1203+
// as `cuCtxCreate` replaces it implicitly otherwise.
11551204
if (current != nullptr) {
1156-
// If there was an existing context on the thread we recover it
11571205
PI_CHECK_ERROR(cuCtxSetCurrent(current));
11581206
}
1159-
} else if (properties
1160-
&& *properties == PI_CONTEXT_PROPERTIES_CUDA_PRIMARY) {
1161-
CUcontext Ctxt;
1162-
errcode_ret = PI_CHECK_ERROR(cuDevicePrimaryCtxRetain(
1163-
&Ctxt, (*devices)->cuDevice_));
1164-
piContextPtr = std::unique_ptr<_pi_context>(
1165-
new _pi_context{_pi_context::kind::primary, Ctxt, *devices});
1166-
errcode_ret = PI_CHECK_ERROR(cuCtxPushCurrent(Ctxt));
1167-
} else {
1168-
throw pi_result(CL_INVALID_VALUE);
11691207
}
11701208

11711209
*retcontext = piContextPtr.release();

sycl/plugins/opencl/pi_opencl.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -451,12 +451,12 @@ pi_result OCL(piextGetDeviceFunctionPointer)(pi_device device,
451451
function_pointer_ret));
452452
}
453453

454-
pi_result OCL(piContextCreate)(
455-
const cl_context_properties *properties, // TODO: untie from OpenCL
456-
pi_uint32 num_devices, const pi_device *devices,
457-
void (*pfn_notify)(const char *errinfo, const void *private_info, size_t cb,
458-
void *user_data1),
459-
void *user_data, pi_context *retcontext) {
454+
pi_result OCL(piContextCreate)(const pi_context_properties *properties,
455+
pi_uint32 num_devices, const pi_device *devices,
456+
void (*pfn_notify)(const char *errinfo,
457+
const void *private_info,
458+
size_t cb, void *user_data1),
459+
void *user_data, pi_context *retcontext) {
460460
pi_result ret = PI_INVALID_OPERATION;
461461
*retcontext = cast<pi_context>(
462462
clCreateContext(properties, cast<cl_uint>(num_devices),

sycl/source/detail/context_impl.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,8 @@ context_impl::context_impl(const vector_class<cl::sycl::device> Devices,
4444

4545
if (MPlatform->is_cuda()) {
4646
#if USE_PI_CUDA
47-
const cl_context_properties props[] = {
48-
PI_CONTEXT_PROPERTIES_CUDA_PRIMARY,
49-
0};
47+
const pi_context_properties props[] = {PI_CONTEXT_PROPERTIES_CUDA_PRIMARY,
48+
UseCUDAPrimaryContext, 0};
5049

5150
getPlugin().call<PiApiKind::piContextCreate>(props, DeviceIds.size(),
5251
DeviceIds.data(), nullptr, nullptr, &MContext);

sycl/unittests/pi/cuda/test_base_objects.cpp

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ TEST_F(DISABLED_CudaBaseObjectsTest, piContextCreate) {
7373
ASSERT_EQ(cuErr, CUDA_SUCCESS);
7474
}
7575

76-
TEST_F(DISABLED_CudaBaseObjectsTest, piContextCreatePrimary) {
76+
TEST_F(DISABLED_CudaBaseObjectsTest, piContextCreatePrimaryTrue) {
7777
pi_uint32 numPlatforms = 0;
7878
pi_platform platform;
7979
pi_device device;
@@ -91,11 +91,12 @@ TEST_F(DISABLED_CudaBaseObjectsTest, piContextCreatePrimary) {
9191
ASSERT_EQ((Plugins[0].call_nocheck<detail::PiApiKind::piDevicesGet>(
9292
platform, PI_DEVICE_TYPE_GPU, 1, &device, nullptr)),
9393
PI_SUCCESS);
94-
cl_context_properties properties = PI_CONTEXT_PROPERTIES_CUDA_PRIMARY;
94+
pi_context_properties properties[] = {PI_CONTEXT_PROPERTIES_CUDA_PRIMARY,
95+
PI_TRUE, 0};
9596

9697
pi_context ctxt;
9798
ASSERT_EQ((Plugins[0].call_nocheck<detail::PiApiKind::piContextCreate>(
98-
&properties, 1, &device, nullptr, nullptr, &ctxt)),
99+
properties, 1, &device, nullptr, nullptr, &ctxt)),
99100
PI_SUCCESS);
100101
EXPECT_NE(ctxt, nullptr);
101102
EXPECT_EQ(ctxt->get_device(), device);
@@ -118,6 +119,52 @@ TEST_F(DISABLED_CudaBaseObjectsTest, piContextCreatePrimary) {
118119
PI_SUCCESS);
119120
}
120121

122+
TEST_F(DISABLED_CudaBaseObjectsTest, piContextCreatePrimaryFalse) {
123+
pi_uint32 numPlatforms = 0;
124+
pi_platform platform;
125+
pi_device device;
126+
127+
ASSERT_EQ((Plugins[0].call_nocheck<detail::PiApiKind::piPlatformsGet>(
128+
0, nullptr, &numPlatforms)),
129+
PI_SUCCESS)
130+
<< "piPlatformsGet failed.\n";
131+
132+
ASSERT_EQ((Plugins[0].call_nocheck<detail::PiApiKind::piPlatformsGet>(
133+
numPlatforms, &platform, nullptr)),
134+
PI_SUCCESS)
135+
<< "piPlatformsGet failed.\n";
136+
137+
ASSERT_EQ((Plugins[0].call_nocheck<detail::PiApiKind::piDevicesGet>(
138+
platform, PI_DEVICE_TYPE_GPU, 1, &device, nullptr)),
139+
PI_SUCCESS);
140+
pi_context_properties properties[] = {PI_CONTEXT_PROPERTIES_CUDA_PRIMARY,
141+
PI_FALSE, 0};
142+
143+
pi_context ctxt;
144+
ASSERT_EQ((Plugins[0].call_nocheck<detail::PiApiKind::piContextCreate>(
145+
properties, 1, &device, nullptr, nullptr, &ctxt)),
146+
PI_SUCCESS);
147+
EXPECT_NE(ctxt, nullptr);
148+
EXPECT_EQ(ctxt->get_device(), device);
149+
EXPECT_FALSE(ctxt->is_primary());
150+
151+
// Retrieve the cuCtxt to check information is correct
152+
CUcontext cudaContext = ctxt->get();
153+
unsigned int version = 0;
154+
CUresult cuErr = cuCtxGetApiVersion(cudaContext, &version);
155+
ASSERT_EQ(cuErr, CUDA_SUCCESS);
156+
EXPECT_EQ(version, LATEST_KNOWN_CUDA_DRIVER_API_VERSION);
157+
158+
// Current context in the stack?
159+
CUcontext current;
160+
cuErr = cuCtxGetCurrent(&current);
161+
ASSERT_EQ(cuErr, CUDA_SUCCESS);
162+
ASSERT_EQ(current, cudaContext);
163+
ASSERT_EQ(
164+
(Plugins[0].call_nocheck<detail::PiApiKind::piContextRelease>(ctxt)),
165+
PI_SUCCESS);
166+
}
167+
121168
TEST_F(DISABLED_CudaBaseObjectsTest, piContextCreateChildThread) {
122169
pi_uint32 numPlatforms = 0;
123170
pi_platform platform;

0 commit comments

Comments
 (0)