Skip to content

Commit 7ea615a

Browse files
committed
[CUDA] Add support for binary type query
CUDA does not make a distinction between binaryTypes (it treats PTX and binaries using the same entrypoints). However, for UR, by definition: * urProgramCompile should set the binary type to UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT * urProgramBuild / urProgramLink should set it to UR_PROGRAM_BINARY_TYPE_EXECUTABLE. * urProgramCreateWithBinary should set the binary type to UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT
1 parent 8945db4 commit 7ea615a

File tree

2 files changed

+56
-35
lines changed

2 files changed

+56
-35
lines changed

source/adapters/cuda/program.cpp

Lines changed: 50 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,42 @@ ur_result_t getKernelNames(ur_program_handle_t) {
165165
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
166166
}
167167

168+
/// Loads images from a list of PTX or CUBIN binaries.
169+
/// Note: No calls to CUDA driver API in this function, only store binaries
170+
/// for later.
171+
///
172+
/// Note: Only supports one device
173+
///
174+
ur_result_t createProgram(ur_context_handle_t hContext,
175+
ur_device_handle_t hDevice, size_t size,
176+
const uint8_t *pBinary,
177+
const ur_program_properties_t *pProperties,
178+
ur_program_handle_t *phProgram) {
179+
UR_ASSERT(hContext->getDevice()->get() == hDevice->get(),
180+
UR_RESULT_ERROR_INVALID_CONTEXT);
181+
UR_ASSERT(size, UR_RESULT_ERROR_INVALID_SIZE);
182+
183+
std::unique_ptr<ur_program_handle_t_> RetProgram{
184+
new ur_program_handle_t_{hContext}};
185+
186+
if (pProperties) {
187+
if (pProperties->count > 0 && pProperties->pMetadatas == nullptr) {
188+
return UR_RESULT_ERROR_INVALID_NULL_POINTER;
189+
} else if (pProperties->count == 0 && pProperties->pMetadatas != nullptr) {
190+
return UR_RESULT_ERROR_INVALID_SIZE;
191+
}
192+
UR_CHECK_ERROR(
193+
RetProgram->setMetadata(pProperties->pMetadatas, pProperties->count));
194+
}
195+
196+
auto pBinary_string = reinterpret_cast<const char *>(pBinary);
197+
198+
UR_CHECK_ERROR(RetProgram->setBinary(pBinary_string, size));
199+
*phProgram = RetProgram.release();
200+
201+
return UR_RESULT_SUCCESS;
202+
}
203+
168204
/// CUDA will handle the PTX/CUBIN binaries internally through CUmodule object.
169205
/// So, urProgramCreateWithIL and urProgramCreateWithBinary are equivalent in
170206
/// terms of CUDA adapter. See \ref urProgramCreateWithBinary.
@@ -175,8 +211,8 @@ urProgramCreateWithIL(ur_context_handle_t hContext, const void *pIL,
175211
ur_device_handle_t hDevice = hContext->getDevice();
176212
auto pBinary = reinterpret_cast<const uint8_t *>(pIL);
177213

178-
return urProgramCreateWithBinary(hContext, hDevice, length, pBinary,
179-
pProperties, phProgram);
214+
return createProgram(hContext, hDevice, length, pBinary, pProperties,
215+
phProgram);
180216
}
181217

182218
/// CUDA will handle the PTX/CUBIN binaries internally through a call to
@@ -185,7 +221,9 @@ urProgramCreateWithIL(ur_context_handle_t hContext, const void *pIL,
185221
UR_APIEXPORT ur_result_t UR_APICALL
186222
urProgramCompile(ur_context_handle_t hContext, ur_program_handle_t hProgram,
187223
const char *pOptions) {
188-
return urProgramBuild(hContext, hProgram, pOptions);
224+
UR_CHECK_ERROR(urProgramBuild(hContext, hProgram, pOptions));
225+
hProgram->BinaryType = UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT;
226+
return UR_RESULT_SUCCESS;
189227
}
190228

191229
/// Loads the images from a UR program into a CUmodule that can be
@@ -202,6 +240,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuild(ur_context_handle_t hContext,
202240
ScopedContext Active(hProgram->getContext());
203241

204242
hProgram->buildProgram(pOptions);
243+
hProgram->BinaryType = UR_PROGRAM_BINARY_TYPE_EXECUTABLE;
205244

206245
} catch (ur_result_t Err) {
207246
Result = Err;
@@ -241,6 +280,7 @@ urProgramLink(ur_context_handle_t hContext, uint32_t count,
241280
RetProgram->setBinary(static_cast<const char *>(CuBin), CuBinSize);
242281

243282
Result = RetProgram->buildProgram(pOptions);
283+
RetProgram->BinaryType = UR_PROGRAM_BINARY_TYPE_EXECUTABLE;
244284
} catch (...) {
245285
// Upon error attempt cleanup
246286
UR_CHECK_ERROR(cuLinkDestroy(State));
@@ -287,6 +327,9 @@ urProgramGetBuildInfo(ur_program_handle_t hProgram, ur_device_handle_t hDevice,
287327
return ReturnValue(hProgram->BuildOptions.c_str());
288328
case UR_PROGRAM_BUILD_INFO_LOG:
289329
return ReturnValue(hProgram->InfoLog, hProgram->MaxLogSize);
330+
case UR_PROGRAM_BUILD_INFO_BINARY_TYPE: {
331+
return ReturnValue(hProgram->BinaryType);
332+
}
290333
default:
291334
break;
292335
}
@@ -384,44 +427,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetNativeHandle(
384427
return UR_RESULT_SUCCESS;
385428
}
386429

387-
/// Loads images from a list of PTX or CUBIN binaries.
388-
/// Note: No calls to CUDA driver API in this function, only store binaries
389-
/// for later.
390-
///
391-
/// Note: Only supports one device
392-
///
393430
UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithBinary(
394431
ur_context_handle_t hContext, ur_device_handle_t hDevice, size_t size,
395432
const uint8_t *pBinary, const ur_program_properties_t *pProperties,
396433
ur_program_handle_t *phProgram) {
397-
UR_ASSERT(hContext->getDevice()->get() == hDevice->get(),
398-
UR_RESULT_ERROR_INVALID_CONTEXT);
399-
UR_ASSERT(size, UR_RESULT_ERROR_INVALID_SIZE);
400434

401-
ur_result_t Result = UR_RESULT_SUCCESS;
435+
UR_CHECK_ERROR(
436+
createProgram(hContext, hDevice, size, pBinary, pProperties, phProgram));
437+
(*phProgram)->BinaryType = UR_PROGRAM_BINARY_TYPE_COMPILED_OBJECT;
402438

403-
std::unique_ptr<ur_program_handle_t_> RetProgram{
404-
new ur_program_handle_t_{hContext}};
405-
406-
if (pProperties) {
407-
if (pProperties->count > 0 && pProperties->pMetadatas == nullptr) {
408-
return UR_RESULT_ERROR_INVALID_NULL_POINTER;
409-
} else if (pProperties->count == 0 && pProperties->pMetadatas != nullptr) {
410-
return UR_RESULT_ERROR_INVALID_SIZE;
411-
}
412-
Result =
413-
RetProgram->setMetadata(pProperties->pMetadatas, pProperties->count);
414-
}
415-
UR_ASSERT(Result == UR_RESULT_SUCCESS, Result);
416-
417-
auto pBinary_string = reinterpret_cast<const char *>(pBinary);
418-
419-
Result = RetProgram->setBinary(pBinary_string, size);
420-
UR_ASSERT(Result == UR_RESULT_SUCCESS, Result);
421-
422-
*phProgram = RetProgram.release();
423-
424-
return Result;
439+
return UR_RESULT_SUCCESS;
425440
}
426441

427442
// This entry point is only used for native specialization constants (SPIR-V),

source/adapters/cuda/program.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,12 @@ struct ur_program_handle_t_ {
2525
std::atomic_uint32_t RefCount;
2626
ur_context_handle_t Context;
2727

28+
/* The ur_program_binary_type_t property is defined individually for every
29+
* device in a program. However, since the CUDA adapter only has 1 device per
30+
* context / program, there is no need to keep track of its value for each
31+
* device. */
32+
ur_program_binary_type_t BinaryType = UR_PROGRAM_BINARY_TYPE_NONE;
33+
2834
// Metadata
2935
std::unordered_map<std::string, std::tuple<uint32_t, uint32_t, uint32_t>>
3036
KernelReqdWorkGroupSizeMD;

0 commit comments

Comments
 (0)