Skip to content

Commit 6770421

Browse files
authored
[SYCL][CUDA][libclc] Add asynchronous barrier (#5303)
Adds extension proposal and implementation for asynchronous barrier (for now the implementation is for CUDA backend sm 80+ only). Tests for this are here: intel/llvm-test-suite#737
1 parent 7cb28c3 commit 6770421

File tree

10 files changed

+655
-2
lines changed

10 files changed

+655
-2
lines changed

libclc/ptx-nvidiacl/libspirv/SOURCES

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ relational/isfinite.cl
7575
relational/isinf.cl
7676
relational/isnan.cl
7777
synchronization/barrier.cl
78+
synchronization/aw_barrier.cl
7879
async/async_work_group_strided_copy.cl
7980
async/wait_group_events.cl
8081
workitem/get_global_id.cl
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include <spirv/spirv.h>
10+
#include <spirv/spirv_types.h>
11+
12+
extern int __clc_nvvm_reflect_arch();
13+
14+
void __clc_trap_if_sm_lower_than_80() {
15+
if (__clc_nvvm_reflect_arch() < 800) {
16+
__builtin_trap();
17+
__builtin_unreachable();
18+
}
19+
}
20+
21+
_CLC_OVERLOAD _CLC_DEF void __clc_BarrierInitialize(long *state,
22+
int expected_count) {
23+
__clc_trap_if_sm_lower_than_80();
24+
__nvvm_mbarrier_init(state, expected_count);
25+
}
26+
27+
_CLC_OVERLOAD _CLC_DEF void __clc_BarrierInvalidate(long *state) {
28+
__clc_trap_if_sm_lower_than_80();
29+
__nvvm_mbarrier_inval(state);
30+
}
31+
32+
_CLC_OVERLOAD _CLC_DEF long __clc_BarrierArrive(long *state) {
33+
__clc_trap_if_sm_lower_than_80();
34+
return __nvvm_mbarrier_arrive(state);
35+
}
36+
37+
_CLC_OVERLOAD _CLC_DEF long __clc_BarrierArriveAndDrop(long *state) {
38+
__clc_trap_if_sm_lower_than_80();
39+
return __nvvm_mbarrier_arrive_drop(state);
40+
}
41+
42+
_CLC_OVERLOAD _CLC_DEF long __clc_BarrierArriveNoComplete(long *state,
43+
int count) {
44+
__clc_trap_if_sm_lower_than_80();
45+
return __nvvm_mbarrier_arrive_noComplete(state, count);
46+
}
47+
48+
_CLC_OVERLOAD _CLC_DEF long __clc_BarrierArriveAndDropNoComplete(long *state,
49+
int count) {
50+
__clc_trap_if_sm_lower_than_80();
51+
return __nvvm_mbarrier_arrive_drop_noComplete(state, count);
52+
}
53+
54+
_CLC_OVERLOAD _CLC_DEF void __clc_BarrierCopyAsyncArrive(long *state) {
55+
__clc_trap_if_sm_lower_than_80();
56+
return __nvvm_cp_async_mbarrier_arrive(state);
57+
}
58+
59+
_CLC_OVERLOAD _CLC_DEF void __clc_BarrierCopyAsyncArriveNoInc(long *state) {
60+
__clc_trap_if_sm_lower_than_80();
61+
return __nvvm_cp_async_mbarrier_arrive_noinc(state);
62+
}
63+
64+
_CLC_OVERLOAD _CLC_DEF _CLC_CONVERGENT void __clc_BarrierWait(long *state,
65+
long arrival) {
66+
__clc_trap_if_sm_lower_than_80();
67+
while (!__nvvm_mbarrier_test_wait(state, arrival)) {
68+
}
69+
}
70+
71+
_CLC_OVERLOAD _CLC_DEF _CLC_CONVERGENT bool
72+
__clc_BarrierTestWait(long *state, long arrival) {
73+
__clc_trap_if_sm_lower_than_80();
74+
return __nvvm_mbarrier_test_wait(state, arrival);
75+
}
76+
77+
_CLC_OVERLOAD _CLC_DEF _CLC_CONVERGENT void
78+
__clc_BarrierArriveAndWait(long *state) {
79+
__clc_BarrierWait(state, __clc_BarrierArrive(state));
80+
}

0 commit comments

Comments
 (0)