Skip to content

Commit 6e3f244

Browse files
authored
[SYCL][CUDA] Support GroupBroadcast with 32-bit id (#2759)
Use of the broadcast algorithm with the sub_group class clamps the sub-group local id into a uint32_t. libspirv was missing an entry point for this case. Signed-off-by: John Pennycook <[email protected]>
1 parent 0aac708 commit 6e3f244

File tree

4 files changed

+12
-12
lines changed

4 files changed

+12
-12
lines changed

libclc/ptx-nvidiacl/libspirv/group/collectives.cl

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,10 @@ long __clc__3d_to_linear_local_id(ulong3 id) {
385385
uint scope, TYPE x, ulong3 local_id) { \
386386
ulong linear_local_id = __clc__3d_to_linear_local_id(local_id); \
387387
return __spirv_GroupBroadcast(scope, x, linear_local_id); \
388+
} \
389+
_CLC_DEF _CLC_OVERLOAD _CLC_CONVERGENT TYPE __spirv_GroupBroadcast( \
390+
uint scope, TYPE x, uint local_id) { \
391+
return __spirv_GroupBroadcast(scope, x, (ulong)local_id); \
388392
}
389393
__CLC_GROUP_BROADCAST(char);
390394
__CLC_GROUP_BROADCAST(uchar);
@@ -411,6 +415,10 @@ _CLC_DECL _CLC_CONVERGENT half
411415
_Z17__spirv_GroupBroadcastjDF16_Dv3_m(uint scope, half x, ulong3 local_id) {
412416
return __spirv_GroupBroadcast(scope, x, local_id);
413417
}
418+
_CLC_DECL _CLC_CONVERGENT half
419+
_Z22__spirv_GroupBroadcastjDF16_j(uint scope, half x, uint local_id) {
420+
return __spirv_GroupBroadcast(scope, x, (ulong)local_id);
421+
}
414422

415423
#undef __CLC_GROUP_BROADCAST
416424

sycl/test/on-device/sub_group/broadcast.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
// XFAIL: cuda
2-
// CUDA compilation and runtime do not yet support sub-groups.
3-
41
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
52
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
63
// RUN: %CPU_RUN_PLACEHOLDER %t.out
@@ -19,7 +16,7 @@
1916

2017
int main() {
2118
queue Queue;
22-
if (!core_sg_supported(Queue.get_device())) {
19+
if (Queue.get_device().is_host()) {
2320
std::cout << "Skipping test\n";
2421
return 0;
2522
}

sycl/test/on-device/sub_group/broadcast_fp16.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
// XFAIL: cuda
2-
// CUDA compilation and runtime do not yet support sub-groups.
3-
41
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
52
// RUN: %GPU_RUN_PLACEHOLDER %t.out
63

@@ -16,10 +13,11 @@
1613

1714
int main() {
1815
queue Queue;
19-
if (!core_sg_supported(Queue.get_device())) {
16+
if (Queue.get_device().is_host()) {
2017
std::cout << "Skipping test\n";
2118
return 0;
2219
}
2320
check<cl::sycl::half>(Queue);
21+
std::cout << "Test passed." << std::endl;
2422
return 0;
2523
}

sycl/test/on-device/sub_group/broadcast_fp64.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
// XFAIL: cuda
2-
// CUDA compilation and runtime do not yet support sub-groups.
3-
41
// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out
52
// RUN: env SYCL_DEVICE_TYPE=HOST %t.out
63
// RUN: %CPU_RUN_PLACEHOLDER %t.out
@@ -19,7 +16,7 @@
1916

2017
int main() {
2118
queue Queue;
22-
if (!core_sg_supported(Queue.get_device())) {
19+
if (Queue.get_device().is_host()) {
2320
std::cout << "Skipping test\n";
2421
return 0;
2522
}

0 commit comments

Comments
 (0)