Skip to content

Commit fd0b108

Browse files
authored
[SYCL][HIP] Fix infinite loop when parallel_for range exceeds INT_MAX (#5115)
This is the equivalent for HIP of the changes in #5095. It also fixes #4255 for the HIP plugin.
1 parent e0e5336 commit fd0b108

File tree

1 file changed

+6
-8
lines changed

1 file changed

+6
-8
lines changed

sycl/plugins/hip/pi_hip.cpp

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ int getAttribute(pi_device device, hipDeviceAttribute_t attribute) {
299299
}
300300
/// \endcond
301301

302-
void simpleGuessLocalWorkSize(int *threadsPerBlock,
302+
void simpleGuessLocalWorkSize(size_t *threadsPerBlock,
303303
const size_t *global_work_size,
304304
const size_t maxThreadsPerBlock[3],
305305
pi_kernel kernel) {
@@ -314,8 +314,7 @@ void simpleGuessLocalWorkSize(int *threadsPerBlock,
314314

315315
//(void)minGrid; // Not used, avoid warnings
316316

317-
threadsPerBlock[0] = std::min(static_cast<int>(maxThreadsPerBlock[0]),
318-
static_cast<int>(global_work_size[0]));
317+
threadsPerBlock[0] = std::min(maxThreadsPerBlock[0], global_work_size[0]);
319318

320319
// Find a local work group size that is a divisor of the global
321320
// work group size to produce uniform work groups.
@@ -2501,7 +2500,7 @@ pi_result hip_piEnqueueKernelLaunch(
25012500

25022501
// Set the number of threads per block to the number of threads per warp
25032502
// by default unless user has provided a better number
2504-
int threadsPerBlock[3] = {32, 1, 1};
2503+
size_t threadsPerBlock[3] = {32u, 1u, 1u};
25052504
size_t maxWorkGroupSize = 0u;
25062505
size_t maxThreadsPerBlock[3] = {};
25072506
bool providedLocalWorkGroupSize = (local_work_size != nullptr);
@@ -2531,7 +2530,7 @@ pi_result hip_piEnqueueKernelLaunch(
25312530
return PI_INVALID_WORK_GROUP_SIZE;
25322531
if (0u != (global_work_size[dim] % local_work_size[dim]))
25332532
return PI_INVALID_WORK_GROUP_SIZE;
2534-
threadsPerBlock[dim] = static_cast<int>(local_work_size[dim]);
2533+
threadsPerBlock[dim] = local_work_size[dim];
25352534
return PI_SUCCESS;
25362535
};
25372536

@@ -2551,12 +2550,11 @@ pi_result hip_piEnqueueKernelLaunch(
25512550
return PI_INVALID_WORK_GROUP_SIZE;
25522551
}
25532552

2554-
int blocksPerGrid[3] = {1, 1, 1};
2553+
size_t blocksPerGrid[3] = {1u, 1u, 1u};
25552554

25562555
for (size_t i = 0; i < work_dim; i++) {
25572556
blocksPerGrid[i] =
2558-
static_cast<int>(global_work_size[i] + threadsPerBlock[i] - 1) /
2559-
threadsPerBlock[i];
2557+
(global_work_size[i] + threadsPerBlock[i] - 1) / threadsPerBlock[i];
25602558
}
25612559

25622560
pi_result retError = PI_SUCCESS;

0 commit comments

Comments
 (0)