Skip to content

Commit 6b7e228

Browse files
committed
[SYCL][CUDA] Use piclProgramCreateWithBinary with CUDA backend
This commit restructures the CUDA backend so that the program manager calls "piclProgramCreateWithBinary" for both OpenCL and CUDA backends instead of branching unnecessarily. Unit tests are also updated accordingly. Signed-off-by: Przemek Malon <[email protected]>
1 parent 9072d49 commit 6b7e228

File tree

4 files changed

+103
-85
lines changed

4 files changed

+103
-85
lines changed

sycl/plugins/cuda/pi_cuda.cpp

Lines changed: 55 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -389,19 +389,20 @@ pi_result enqueueEventWait(pi_queue queue, pi_event event) {
389389
}
390390

391391
_pi_program::_pi_program(pi_context ctxt)
392-
: module_{nullptr}, source_{}, sourceLength_{0}
393-
, refCount_{1}, context_{ctxt}
394-
{
392+
: module_{nullptr}, binary_{},
393+
binarySizeInBytes_{0}, refCount_{1}, context_{ctxt} {
395394
cuda_piContextRetain(context_);
396395
}
397396

398397
_pi_program::~_pi_program() {
399398
cuda_piContextRelease(context_);
400399
}
401400

402-
pi_result _pi_program::create_from_source(const char *source, size_t length) {
403-
source_ = source;
404-
sourceLength_ = length;
401+
pi_result _pi_program::set_binary(const char *source, size_t length) {
402+
assert((binary_ == nullptr && binarySizeInBytes_ == 0) &&
403+
"Re-setting program binary data which has already been set");
404+
binary_ = source;
405+
binarySizeInBytes_ = length;
405406
return PI_SUCCESS;
406407
}
407408

@@ -427,9 +428,9 @@ pi_result _pi_program::build_program(const char *build_options) {
427428
options[3] = CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
428429
optionVals[3] = (void *)(long)MAX_LOG_SIZE;
429430

430-
auto result = PI_CHECK_ERROR(cuModuleLoadDataEx(
431-
&module_, static_cast<const void *>(source_), numberOfOptions, options,
432-
optionVals));
431+
auto result = PI_CHECK_ERROR(
432+
cuModuleLoadDataEx(&module_, static_cast<const void *>(binary_),
433+
numberOfOptions, options, optionVals));
433434

434435
const auto success = (result == PI_SUCCESS);
435436

@@ -446,8 +447,8 @@ pi_result _pi_program::build_program(const char *build_options) {
446447
/// has_kernel method, so an alternative would be to move the has_kernel
447448
/// query to PI and use cuModuleGetFunction to check for a kernel.
448449
std::string getKernelNames(pi_program program) {
449-
std::string source(program->source_,
450-
program->source_ + program->sourceLength_);
450+
std::string source(program->binary_,
451+
program->binary_ + program->binarySizeInBytes_);
451452
std::regex entries_pattern(".entry\\s+([^\\([:s:]]*)");
452453
std::string names("");
453454
std::smatch match;
@@ -2172,41 +2173,15 @@ pi_result cuda_piMemRetain(pi_mem mem) {
21722173
return PI_SUCCESS;
21732174
}
21742175

2175-
/// Constructs a PI program from a list of PTX or CUBIN binaries.
2176-
/// Note: No calls to CUDA driver API in this function, only store binaries
2177-
/// for later.
2178-
///
2179-
/// \TODO Implement more than one input image
2180-
/// \TODO SYCL RT should use cuda_piclprogramCreateWithBinary instead
2176+
/// Not used as CUDA backend only creates programs from binary.
2177+
/// See \ref cuda_piclProgramCreateWithBinary.
21812178
///
21822179
pi_result cuda_piclProgramCreateWithSource(pi_context context, pi_uint32 count,
21832180
const char **strings,
21842181
const size_t *lengths,
21852182
pi_program *program) {
2186-
2187-
assert(context != nullptr);
2188-
assert(strings != nullptr);
2189-
assert(program != nullptr);
2190-
2191-
pi_result retErr = PI_SUCCESS;
2192-
2193-
if (count == 0) {
2194-
retErr = PI_INVALID_PROGRAM;
2195-
return retErr;
2196-
}
2197-
2198-
assert(count == 1);
2199-
2200-
std::unique_ptr<_pi_program> retProgram{new _pi_program{context}};
2201-
2202-
auto has_length = (lengths != nullptr);
2203-
size_t length = has_length ? lengths[0] : strlen(strings[0]) + 1;
2204-
2205-
retProgram->create_from_source(strings[0], length);
2206-
2207-
*program = retProgram.release();
2208-
2209-
return retErr;
2183+
cl::sycl::detail::pi::die("cuda_piclProgramCreateWithSource not implemented");
2184+
return {};
22102185
}
22112186

22122187
/// Loads the images from a PI program into a CUmodule that can be
@@ -2244,13 +2219,41 @@ pi_result cuda_piProgramCreate(pi_context context, const void *il,
22442219
return {};
22452220
}
22462221

2247-
/// \TODO Not implemented. See \ref cuda_piclProgramCreateWithSource
2222+
/// Loads images from a list of PTX or CUBIN binaries.
2223+
/// Note: No calls to CUDA driver API in this function, only store binaries
2224+
/// for later.
2225+
///
2226+
/// Note: Only supports one device
2227+
///
22482228
pi_result cuda_piclProgramCreateWithBinary(
22492229
pi_context context, pi_uint32 num_devices, const pi_device *device_list,
22502230
const size_t *lengths, const unsigned char **binaries,
2251-
pi_int32 *binary_status, pi_program *errcode_ret) {
2252-
cl::sycl::detail::pi::die("cuda_piclProgramCreateWithBinary not implemented");
2253-
return {};
2231+
pi_int32 *binary_status, pi_program *program) {
2232+
assert(context != nullptr);
2233+
assert(binaries != nullptr);
2234+
assert(program != nullptr);
2235+
assert(device_list != nullptr);
2236+
assert(num_devices == 1 && "CUDA contexts are for a single device");
2237+
assert((context->get_device()->get() == device_list[0]->get()) &&
2238+
"Mismatch between devices context and passed context when creating "
2239+
"program from binary");
2240+
2241+
pi_result retError = PI_SUCCESS;
2242+
2243+
std::unique_ptr<_pi_program> retProgram{new _pi_program{context}};
2244+
2245+
const bool has_length = (lengths != nullptr);
2246+
size_t length = has_length
2247+
? lengths[0]
2248+
: strlen(reinterpret_cast<const char *>(binaries[0])) + 1;
2249+
2250+
assert(length != 0);
2251+
2252+
retProgram->set_binary(reinterpret_cast<const char *>(binaries[0]), length);
2253+
2254+
*program = retProgram.release();
2255+
2256+
return retError;
22542257
}
22552258

22562259
pi_result cuda_piProgramGetInfo(pi_program program, pi_program_info param_name,
@@ -2272,13 +2275,13 @@ pi_result cuda_piProgramGetInfo(pi_program program, pi_program_info param_name,
22722275
&program->context_->deviceId_);
22732276
case PI_PROGRAM_INFO_SOURCE:
22742277
return getInfo(param_value_size, param_value, param_value_size_ret,
2275-
program->source_);
2278+
program->binary_);
22762279
case PI_PROGRAM_INFO_BINARY_SIZES:
22772280
return getInfoArray(1, param_value_size, param_value, param_value_size_ret,
2278-
&program->sourceLength_);
2281+
&program->binarySizeInBytes_);
22792282
case PI_PROGRAM_INFO_BINARIES:
22802283
return getInfoArray(1, param_value_size, param_value, param_value_size_ret,
2281-
&program->source_);
2284+
&program->binary_);
22822285
case PI_PROGRAM_INFO_KERNEL_NAMES: {
22832286
return getInfo(param_value_size, param_value, param_value_size_ret,
22842287
getKernelNames(program).c_str());
@@ -2320,15 +2323,15 @@ pi_result cuda_piProgramLink(pi_context context, pi_uint32 num_devices,
23202323
for (size_t i = 0; i < num_input_programs; ++i) {
23212324
pi_program program = input_programs[i];
23222325
retError = PI_CHECK_ERROR(cuLinkAddData(
2323-
state, CU_JIT_INPUT_PTX, const_cast<char *>(program->source_),
2324-
program->sourceLength_, nullptr, 0, nullptr, nullptr));
2326+
state, CU_JIT_INPUT_PTX, const_cast<char *>(program->binary_),
2327+
program->binarySizeInBytes_, nullptr, 0, nullptr, nullptr));
23252328
}
23262329
void *cubin = nullptr;
23272330
size_t cubinSize = 0;
23282331
retError = PI_CHECK_ERROR(cuLinkComplete(state, &cubin, &cubinSize));
23292332

2330-
retError = retProgram->create_from_source(
2331-
static_cast<const char *>(cubin), cubinSize);
2333+
retError =
2334+
retProgram->set_binary(static_cast<const char *>(cubin), cubinSize);
23322335

23332336
if (retError != PI_SUCCESS) {
23342337
return retError;

sycl/plugins/cuda/pi_cuda.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -404,8 +404,8 @@ class _pi_event {
404404
struct _pi_program {
405405
using native_type = CUmodule;
406406
native_type module_;
407-
const char *source_;
408-
size_t sourceLength_;
407+
const char *binary_;
408+
size_t binarySizeInBytes_;
409409
std::atomic_uint32_t refCount_;
410410
_pi_context *context_;
411411

@@ -418,7 +418,7 @@ struct _pi_program {
418418
_pi_program(pi_context ctxt);
419419
~_pi_program();
420420

421-
pi_result create_from_source(const char *source, size_t length);
421+
pi_result set_binary(const char *binary, size_t binarySizeInBytes);
422422

423423
pi_result build_program(const char* build_options);
424424

sycl/source/detail/program_manager/program_manager.cpp

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -85,22 +85,14 @@ static RT::PiProgram createBinaryProgram(const ContextImplPtr Context,
8585
#endif
8686

8787
RT::PiProgram Program;
88-
89-
// TODO: Implement `piProgramCreateWithBinary` to not require extra logic for
90-
// the CUDA backend.
91-
const auto Backend = Context->getPlugin().getBackend();
92-
if (Backend == backend::cuda) {
93-
// TODO: Reemplace CreateWithSource with CreateWithBinary in CUDA backend
94-
const char *SignedData = reinterpret_cast<const char *>(Data);
95-
Plugin.call<PiApiKind::piclProgramCreateWithSource>(
96-
Context->getHandleRef(), 1 /*one binary*/, &SignedData, &DataLen,
97-
&Program);
98-
} else {
99-
RT::PiDevice Device = getFirstDevice(Context);
100-
pi_int32 BinaryStatus = CL_SUCCESS;
101-
Plugin.call<PiApiKind::piclProgramCreateWithBinary>(
102-
Context->getHandleRef(), 1 /*one binary*/, &Device, &DataLen, &Data,
103-
&BinaryStatus, &Program);
88+
RT::PiDevice Device = getFirstDevice(Context);
89+
pi_int32 BinaryStatus = CL_SUCCESS;
90+
Plugin.call<PiApiKind::piclProgramCreateWithBinary>(
91+
Context->getHandleRef(), 1 /*one binary*/, &Device, &DataLen, &Data,
92+
&BinaryStatus, &Program);
93+
94+
if (BinaryStatus != CL_SUCCESS) {
95+
throw runtime_error("Creating program with binary failed.", BinaryStatus);
10496
}
10597

10698
return Program;

sycl/unittests/pi/cuda/test_kernels.cpp

Lines changed: 37 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -128,9 +128,11 @@ const char *threeParamsTwoLocal = "\n\
128128
TEST_F(CudaKernelsTest, PICreateProgramAndKernel) {
129129

130130
pi_program prog;
131+
pi_int32 binary_status = PI_SUCCESS;
131132
ASSERT_EQ(
132-
(plugin.call_nocheck<detail::PiApiKind::piclProgramCreateWithSource>(
133-
context_, 1, (const char **)&ptxSource, nullptr, &prog)),
133+
(plugin.call_nocheck<detail::PiApiKind::piclProgramCreateWithBinary>(
134+
context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource,
135+
&binary_status, &prog)),
134136
PI_SUCCESS);
135137

136138
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piProgramBuild>(
@@ -147,10 +149,16 @@ TEST_F(CudaKernelsTest, PICreateProgramAndKernel) {
147149
TEST_F(CudaKernelsTest, PIKernelArgumentSimple) {
148150

149151
pi_program prog;
152+
/// NOTE: `binary_status` currently unsused in the CUDA backend but in case we
153+
/// use it at some point in the future, pass it anyway and check the result.
154+
/// Same goes for all the other tests in this file.
155+
pi_int32 binary_status = PI_SUCCESS;
150156
ASSERT_EQ(
151-
(plugin.call_nocheck<detail::PiApiKind::piclProgramCreateWithSource>(
152-
context_, 1, (const char **)&ptxSource, nullptr, &prog)),
157+
(plugin.call_nocheck<detail::PiApiKind::piclProgramCreateWithBinary>(
158+
context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource,
159+
&binary_status, &prog)),
153160
PI_SUCCESS);
161+
ASSERT_EQ(binary_status, PI_SUCCESS);
154162

155163
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piProgramBuild>(
156164
prog, 1, &device_, "", nullptr, nullptr)),
@@ -174,10 +182,13 @@ TEST_F(CudaKernelsTest, PIKernelArgumentSimple) {
174182
TEST_F(CudaKernelsTest, PIKernelArgumentSetTwice) {
175183

176184
pi_program prog;
185+
pi_int32 binary_status = PI_SUCCESS;
177186
ASSERT_EQ(
178-
(plugin.call_nocheck<detail::PiApiKind::piclProgramCreateWithSource>(
179-
context_, 1, (const char **)&ptxSource, nullptr, &prog)),
187+
(plugin.call_nocheck<detail::PiApiKind::piclProgramCreateWithBinary>(
188+
context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource,
189+
&binary_status, &prog)),
180190
PI_SUCCESS);
191+
ASSERT_EQ(binary_status, PI_SUCCESS);
181192

182193
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piProgramBuild>(
183194
prog, 1, &device_, "", nullptr, nullptr)),
@@ -210,10 +221,13 @@ TEST_F(CudaKernelsTest, PIKernelArgumentSetTwice) {
210221
TEST_F(CudaKernelsTest, PIKernelSetMemObj) {
211222

212223
pi_program prog;
224+
pi_int32 binary_status = PI_SUCCESS;
213225
ASSERT_EQ(
214-
(plugin.call_nocheck<detail::PiApiKind::piclProgramCreateWithSource>(
215-
context_, 1, (const char **)&ptxSource, nullptr, &prog)),
226+
(plugin.call_nocheck<detail::PiApiKind::piclProgramCreateWithBinary>(
227+
context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource,
228+
&binary_status, &prog)),
216229
PI_SUCCESS);
230+
ASSERT_EQ(binary_status, PI_SUCCESS);
217231

218232
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piProgramBuild>(
219233
prog, 1, &device_, "", nullptr, nullptr)),
@@ -242,10 +256,13 @@ TEST_F(CudaKernelsTest, PIKernelSetMemObj) {
242256
TEST_F(CudaKernelsTest, PIkerneldispatch) {
243257

244258
pi_program prog;
259+
pi_int32 binary_status = PI_SUCCESS;
245260
ASSERT_EQ(
246-
(plugin.call_nocheck<detail::PiApiKind::piclProgramCreateWithSource>(
247-
context_, 1, (const char **)&ptxSource, nullptr, &prog)),
261+
(plugin.call_nocheck<detail::PiApiKind::piclProgramCreateWithBinary>(
262+
context_, 1, &device_, nullptr, (const unsigned char **)&ptxSource,
263+
&binary_status, &prog)),
248264
PI_SUCCESS);
265+
ASSERT_EQ(binary_status, PI_SUCCESS);
249266

250267
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piProgramBuild>(
251268
prog, 1, &device_, "", nullptr, nullptr)),
@@ -282,10 +299,13 @@ TEST_F(CudaKernelsTest, PIkerneldispatch) {
282299
TEST_F(CudaKernelsTest, PIkerneldispatchTwo) {
283300

284301
pi_program prog;
302+
pi_int32 binary_status = PI_SUCCESS;
285303
ASSERT_EQ(
286-
(plugin.call_nocheck<detail::PiApiKind::piclProgramCreateWithSource>(
287-
context_, 1, (const char **)&twoParams, nullptr, &prog)),
304+
(plugin.call_nocheck<detail::PiApiKind::piclProgramCreateWithBinary>(
305+
context_, 1, &device_, nullptr, (const unsigned char **)&twoParams,
306+
&binary_status, &prog)),
288307
PI_SUCCESS);
308+
ASSERT_EQ(binary_status, PI_SUCCESS);
289309

290310
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piProgramBuild>(
291311
prog, 1, &device_, "", nullptr, nullptr)),
@@ -333,10 +353,13 @@ TEST_F(CudaKernelsTest, PIkerneldispatchTwo) {
333353
TEST_F(CudaKernelsTest, PIKernelArgumentSetTwiceOneLocal) {
334354

335355
pi_program prog;
356+
pi_int32 binary_status = PI_SUCCESS;
336357
ASSERT_EQ(
337-
(plugin.call_nocheck<detail::PiApiKind::piclProgramCreateWithSource>(
338-
context_, 1, (const char **)&threeParamsTwoLocal, nullptr, &prog)),
358+
(plugin.call_nocheck<detail::PiApiKind::piclProgramCreateWithBinary>(
359+
context_, 1, &device_, nullptr,
360+
(const unsigned char **)&threeParamsTwoLocal, &binary_status, &prog)),
339361
PI_SUCCESS);
362+
ASSERT_EQ(binary_status, PI_SUCCESS);
340363

341364
ASSERT_EQ((plugin.call_nocheck<detail::PiApiKind::piProgramBuild>(
342365
prog, 1, &device_, "", nullptr, nullptr)),

0 commit comments

Comments
 (0)