Skip to content

Commit 04ffc90

Browse files
committed
[CUDA][HIP] Fix exceptions throwing from adapter
`UR_CHECK_ERROR` will throw an exception if an error is found, so it should only be used within a try/catch block, otherwise the exception may leak out of the adapter.
1 parent d9d24ec commit 04ffc90

File tree

2 files changed

+33
-41
lines changed

2 files changed

+33
-41
lines changed

source/adapters/cuda/command_buffer.cpp

Lines changed: 17 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -369,14 +369,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
369369
UR_RESULT_ERROR_INVALID_VALUE);
370370
}
371371

372-
CUgraphNode GraphNode;
372+
try {
373+
CUgraphNode GraphNode;
373374

374-
std::vector<CUgraphNode> DepsList;
375-
UR_CHECK_ERROR(getNodesFromSyncPoints(hCommandBuffer, numSyncPointsInWaitList,
376-
pSyncPointWaitList, DepsList));
375+
std::vector<CUgraphNode> DepsList;
376+
UR_CHECK_ERROR(getNodesFromSyncPoints(
377+
hCommandBuffer, numSyncPointsInWaitList, pSyncPointWaitList, DepsList));
377378

378-
if (*pGlobalWorkSize == 0) {
379-
try {
379+
if (*pGlobalWorkSize == 0) {
380380
// Create an empty node if the kernel workload size is zero
381381
UR_CHECK_ERROR(cuGraphAddEmptyNode(&GraphNode, hCommandBuffer->CudaGraph,
382382
DepsList.data(), DepsList.size()));
@@ -386,25 +386,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
386386
if (pSyncPoint) {
387387
*pSyncPoint = SyncPoint;
388388
}
389-
} catch (ur_result_t Err) {
390-
return Err;
389+
return UR_RESULT_SUCCESS;
391390
}
392-
return UR_RESULT_SUCCESS;
393-
}
394391

395-
// Set the number of threads per block to the number of threads per warp
396-
// by default unless user has provided a better number
397-
size_t ThreadsPerBlock[3] = {32u, 1u, 1u};
398-
size_t BlocksPerGrid[3] = {1u, 1u, 1u};
392+
// Set the number of threads per block to the number of threads per warp
393+
// by default unless user has provided a better number
394+
size_t ThreadsPerBlock[3] = {32u, 1u, 1u};
395+
size_t BlocksPerGrid[3] = {1u, 1u, 1u};
399396

400-
uint32_t LocalSize = hKernel->getLocalSize();
401-
CUfunction CuFunc = hKernel->get();
402-
UR_CHECK_ERROR(
403-
setKernelParams(hCommandBuffer->Context, hCommandBuffer->Device, workDim,
404-
pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize,
405-
hKernel, CuFunc, ThreadsPerBlock, BlocksPerGrid));
397+
uint32_t LocalSize = hKernel->getLocalSize();
398+
CUfunction CuFunc = hKernel->get();
399+
UR_CHECK_ERROR(setKernelParams(
400+
hCommandBuffer->Context, hCommandBuffer->Device, workDim,
401+
pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, hKernel, CuFunc,
402+
ThreadsPerBlock, BlocksPerGrid));
406403

407-
try {
408404
// Set node param structure with the kernel related data
409405
auto &ArgIndices = hKernel->getArgIndices();
410406
CUDA_KERNEL_NODE_PARAMS NodeParams = {};

source/adapters/hip/command_buffer.cpp

Lines changed: 16 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -339,14 +339,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
339339
UR_RESULT_ERROR_INVALID_VALUE);
340340
}
341341

342-
hipGraphNode_t GraphNode;
343-
std::vector<hipGraphNode_t> DepsList;
342+
try {
343+
hipGraphNode_t GraphNode;
344+
std::vector<hipGraphNode_t> DepsList;
344345

345-
UR_CHECK_ERROR(getNodesFromSyncPoints(hCommandBuffer, numSyncPointsInWaitList,
346-
pSyncPointWaitList, DepsList));
346+
UR_CHECK_ERROR(getNodesFromSyncPoints(
347+
hCommandBuffer, numSyncPointsInWaitList, pSyncPointWaitList, DepsList));
347348

348-
if (*pGlobalWorkSize == 0) {
349-
try {
349+
if (*pGlobalWorkSize == 0) {
350350
// Create an empty node if the kernel workload size is zero
351351
UR_CHECK_ERROR(hipGraphAddEmptyNode(&GraphNode, hCommandBuffer->HIPGraph,
352352
DepsList.data(), DepsList.size()));
@@ -356,24 +356,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
356356
if (pSyncPoint) {
357357
*pSyncPoint = SyncPoint;
358358
}
359-
} catch (ur_result_t Err) {
360-
return Err;
359+
return UR_RESULT_SUCCESS;
361360
}
362-
return UR_RESULT_SUCCESS;
363-
}
364361

365-
// Set the number of threads per block to the number of threads per warp
366-
// by default unless user has provided a better number
367-
size_t ThreadsPerBlock[3] = {64u, 1u, 1u};
368-
size_t BlocksPerGrid[3] = {1u, 1u, 1u};
362+
// Set the number of threads per block to the number of threads per warp
363+
// by default unless user has provided a better number
364+
size_t ThreadsPerBlock[3] = {64u, 1u, 1u};
365+
size_t BlocksPerGrid[3] = {1u, 1u, 1u};
369366

370-
uint32_t LocalSize = hKernel->getLocalSize();
371-
hipFunction_t HIPFunc = hKernel->get();
372-
UR_CHECK_ERROR(setKernelParams(
373-
hCommandBuffer->Device, workDim, pGlobalWorkOffset, pGlobalWorkSize,
374-
pLocalWorkSize, hKernel, HIPFunc, ThreadsPerBlock, BlocksPerGrid));
367+
uint32_t LocalSize = hKernel->getLocalSize();
368+
hipFunction_t HIPFunc = hKernel->get();
369+
UR_CHECK_ERROR(setKernelParams(
370+
hCommandBuffer->Device, workDim, pGlobalWorkOffset, pGlobalWorkSize,
371+
pLocalWorkSize, hKernel, HIPFunc, ThreadsPerBlock, BlocksPerGrid));
375372

376-
try {
377373
// Set node param structure with the kernel related data
378374
auto &ArgIndices = hKernel->getArgIndices();
379375
hipKernelNodeParams NodeParams;

0 commit comments

Comments
 (0)