Skip to content

Commit 2ae49f5

Browse files
authored
[SYCL] Fix work-group size selection in reductions (#2693)
The 'else' branch was placed incorrectly. Fixed by this patch. Signed-off-by: Vyacheslav N Klochkov <[email protected]>
1 parent 39a555e commit 2ae49f5

File tree

2 files changed

+10
-4
lines changed

2 files changed

+10
-4
lines changed

sycl/source/detail/reduction.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,17 +26,23 @@ __SYCL_EXPORT size_t reduComputeWGSize(size_t NWorkItems, size_t MaxWGSize,
2626
NWorkGroups = NWorkItems / WGSize;
2727
size_t Rem = NWorkItems % WGSize;
2828
if (Rem != 0) {
29-
// Let's say MaxWGSize = 128 and NWorkItems is (128+32).
29+
// Let's suppose MaxWGSize = 128 and NWorkItems = (128+32).
3030
// It seems better to have 5 groups 32 work-items each than 2 groups with
3131
// 128 work-items in the 1st group and 32 work-items in the 2nd group.
3232
size_t NWorkGroupsAlt = NWorkItems / Rem;
3333
size_t RemAlt = NWorkItems % Rem;
3434
if (RemAlt == 0 && NWorkGroupsAlt <= MaxWGSize) {
35+
// Choose smaller uniform work-groups.
36+
// The condition 'NWorkGroupsAlt <= MaxWGSize' was checked to ensure
37+
// that choosing smaller groups will not cause the need in additional
38+
// invocations of the kernel.
3539
NWorkGroups = NWorkGroupsAlt;
3640
WGSize = Rem;
41+
} else {
42+
// Add 1 more group to process the remaining elements and proceed
43+
// with bigger non-uniform work-groups
44+
NWorkGroups++;
3745
}
38-
} else {
39-
NWorkGroups++;
4046
}
4147
}
4248
return WGSize;

sycl/test/reduction/reduction_nd_lambda.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ void test(T Identity, BinaryOperation BOp, size_t WGSize, size_t NWItems) {
5050

5151
int main() {
5252
test<class AddTestName, int>(
53-
0, [](auto x, auto y) { return (x + y); }, 8, 32);
53+
0, [](auto x, auto y) { return (x + y); }, 1, 1024);
5454
test<class MulTestName, int>(
5555
0, [](auto x, auto y) { return (x * y); }, 8, 32);
5656

0 commit comments

Comments
 (0)