Skip to content

Commit 94f5c73

Browse files
committed
[SYCL][HIP] Fix infinite loop when parallel_for range exceeds INT_MAX
This is the equivalent for HIP of the changes in intel#5095. It also fixes intel#4255 for the HIP plugin.
1 parent 0b8df3b commit 94f5c73

File tree

1 file changed

+6
-8
lines changed

1 file changed

+6
-8
lines changed

sycl/plugins/hip/pi_hip.cpp

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ int getAttribute(pi_device device, hipDeviceAttribute_t attribute) {
299299
}
300300
/// \endcond
301301

302-
void simpleGuessLocalWorkSize(int *threadsPerBlock,
302+
void simpleGuessLocalWorkSize(size_t *threadsPerBlock,
303303
const size_t *global_work_size,
304304
const size_t maxThreadsPerBlock[3],
305305
pi_kernel kernel) {
@@ -314,8 +314,7 @@ void simpleGuessLocalWorkSize(int *threadsPerBlock,
314314

315315
//(void)minGrid; // Not used, avoid warnings
316316

317-
threadsPerBlock[0] = std::min(static_cast<int>(maxThreadsPerBlock[0]),
318-
static_cast<int>(global_work_size[0]));
317+
threadsPerBlock[0] = std::min(maxThreadsPerBlock[0], global_work_size[0]);
319318

320319
// Find a local work group size that is a divisor of the global
321320
// work group size to produce uniform work groups.
@@ -2492,7 +2491,7 @@ pi_result hip_piEnqueueKernelLaunch(
24922491

24932492
// Set the number of threads per block to the number of threads per warp
24942493
// by default unless user has provided a better number
2495-
int threadsPerBlock[3] = {32, 1, 1};
2494+
size_t threadsPerBlock[3] = {32u, 1u, 1u};
24962495
size_t maxWorkGroupSize = 0u;
24972496
size_t maxThreadsPerBlock[3] = {};
24982497
bool providedLocalWorkGroupSize = (local_work_size != nullptr);
@@ -2522,7 +2521,7 @@ pi_result hip_piEnqueueKernelLaunch(
25222521
return PI_INVALID_WORK_GROUP_SIZE;
25232522
if (0u != (global_work_size[dim] % local_work_size[dim]))
25242523
return PI_INVALID_WORK_GROUP_SIZE;
2525-
threadsPerBlock[dim] = static_cast<int>(local_work_size[dim]);
2524+
threadsPerBlock[dim] = local_work_size[dim];
25262525
return PI_SUCCESS;
25272526
};
25282527

@@ -2542,12 +2541,11 @@ pi_result hip_piEnqueueKernelLaunch(
25422541
return PI_INVALID_WORK_GROUP_SIZE;
25432542
}
25442543

2545-
int blocksPerGrid[3] = {1, 1, 1};
2544+
size_t blocksPerGrid[3] = {1u, 1u, 1u};
25462545

25472546
for (size_t i = 0; i < work_dim; i++) {
25482547
blocksPerGrid[i] =
2549-
static_cast<int>(global_work_size[i] + threadsPerBlock[i] - 1) /
2550-
threadsPerBlock[i];
2548+
(global_work_size[i] + threadsPerBlock[i] - 1) / threadsPerBlock[i];
25512549
}
25522550

25532551
pi_result retError = PI_SUCCESS;

0 commit comments

Comments
 (0)