Skip to content

[SYCL][CUDA][libclc] Add asynchronous barrier #5303

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 33 commits into from
May 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
8f5e03e
WIP: started trying to impl with atomics
FMarno Dec 9, 2021
4eed959
Merge branch 'sycl' into finlay/async_barrier_proposal
t4c1 Jan 6, 2022
4721cf6
proposal and untested async barrier implementation
t4c1 Jan 7, 2022
38f1d76
added advanced functionality (still untested)
t4c1 Jan 10, 2022
d34a206
fixed max
t4c1 Jan 10, 2022
41c66ec
clarified the cycle of arrivals and waits
t4c1 Jan 10, 2022
bc4f04a
bugfixes
t4c1 Jan 13, 2022
8dff0f7
removed pending_count, which is deprecated in CUDA
t4c1 Jan 13, 2022
f68d80b
format
t4c1 Jan 13, 2022
62ada41
addressed first review comments and clarified how it works without in…
t4c1 Jan 20, 2022
c662d70
format
t4c1 Jan 20, 2022
1e2d99b
clarified that the extension is only for CUDA and fixed some minor is…
t4c1 Jan 25, 2022
a804562
change the name of libclc functions to __clc
t4c1 Jan 26, 2022
9f2f636
Apply suggestions from code review
t4c1 Feb 9, 2022
f33ddf0
addressed review comments
t4c1 Feb 9, 2022
f63b973
added examples
t4c1 Feb 10, 2022
bb08a1b
Merge branch 'sycl' into async_barrier
t4c1 Mar 2, 2022
fa086bd
addressed review comments
t4c1 Mar 7, 2022
bccc461
format
t4c1 Mar 7, 2022
99596cd
moved to experimental namespace
t4c1 Mar 8, 2022
42afcf0
format
t4c1 Mar 8, 2022
62d731e
added explanation of limitations and fixed namespace in spec
t4c1 Mar 8, 2022
d420f88
changed limitations to a note
t4c1 Mar 9, 2022
1adecca
changed to a single note
t4c1 Mar 9, 2022
f110d25
Merge branch 'sycl' into async_barrier
t4c1 Mar 14, 2022
77cafed
Merge branch 'sycl' into async_barrier
t4c1 Mar 28, 2022
5c8e030
format
t4c1 Mar 28, 2022
32f75aa
fix merge
t4c1 Mar 28, 2022
3ccd520
another fix for bad merge
t4c1 Mar 28, 2022
6092e61
format
t4c1 Mar 28, 2022
52e540e
addressed review comments
t4c1 Mar 30, 2022
8e4f969
format
t4c1 Mar 30, 2022
9478857
fixed namespace in examples
t4c1 May 10, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions libclc/ptx-nvidiacl/libspirv/SOURCES
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ relational/isfinite.cl
relational/isinf.cl
relational/isnan.cl
synchronization/barrier.cl
synchronization/aw_barrier.cl
async/async_work_group_strided_copy.cl
async/wait_group_events.cl
workitem/get_global_id.cl
Expand Down
80 changes: 80 additions & 0 deletions libclc/ptx-nvidiacl/libspirv/synchronization/aw_barrier.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include <spirv/spirv.h>
#include <spirv/spirv_types.h>

extern int __clc_nvvm_reflect_arch();

void __clc_trap_if_sm_lower_than_80() {
if (__clc_nvvm_reflect_arch() < 800) {
__builtin_trap();
__builtin_unreachable();
}
}

_CLC_OVERLOAD _CLC_DEF void __clc_BarrierInitialize(long *state,
int expected_count) {
__clc_trap_if_sm_lower_than_80();
__nvvm_mbarrier_init(state, expected_count);
}

_CLC_OVERLOAD _CLC_DEF void __clc_BarrierInvalidate(long *state) {
__clc_trap_if_sm_lower_than_80();
__nvvm_mbarrier_inval(state);
}

_CLC_OVERLOAD _CLC_DEF long __clc_BarrierArrive(long *state) {
__clc_trap_if_sm_lower_than_80();
return __nvvm_mbarrier_arrive(state);
}

_CLC_OVERLOAD _CLC_DEF long __clc_BarrierArriveAndDrop(long *state) {
__clc_trap_if_sm_lower_than_80();
return __nvvm_mbarrier_arrive_drop(state);
}

_CLC_OVERLOAD _CLC_DEF long __clc_BarrierArriveNoComplete(long *state,
int count) {
__clc_trap_if_sm_lower_than_80();
return __nvvm_mbarrier_arrive_noComplete(state, count);
}

_CLC_OVERLOAD _CLC_DEF long __clc_BarrierArriveAndDropNoComplete(long *state,
int count) {
__clc_trap_if_sm_lower_than_80();
return __nvvm_mbarrier_arrive_drop_noComplete(state, count);
}

_CLC_OVERLOAD _CLC_DEF void __clc_BarrierCopyAsyncArrive(long *state) {
__clc_trap_if_sm_lower_than_80();
return __nvvm_cp_async_mbarrier_arrive(state);
}

_CLC_OVERLOAD _CLC_DEF void __clc_BarrierCopyAsyncArriveNoInc(long *state) {
__clc_trap_if_sm_lower_than_80();
return __nvvm_cp_async_mbarrier_arrive_noinc(state);
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONVERGENT void __clc_BarrierWait(long *state,
long arrival) {
__clc_trap_if_sm_lower_than_80();
while (!__nvvm_mbarrier_test_wait(state, arrival)) {
}
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONVERGENT bool
__clc_BarrierTestWait(long *state, long arrival) {
__clc_trap_if_sm_lower_than_80();
return __nvvm_mbarrier_test_wait(state, arrival);
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONVERGENT void
__clc_BarrierArriveAndWait(long *state) {
__clc_BarrierWait(state, __clc_BarrierArrive(state));
}
Loading