Skip to content

Commit b785f04

Browse files
KorovinVladigcbot
authored andcommitted
Global barrier support
.
1 parent 8870832 commit b785f04

File tree

15 files changed

+239
-56
lines changed

15 files changed

+239
-56
lines changed
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
/*========================== begin_copyright_notice ============================
2+
3+
Copyright (C) 2024 Intel Corporation
4+
5+
SPDX-License-Identifier: MIT
6+
7+
============================= end_copyright_notice ===========================*/
8+
9+
#ifndef CM_CL_BARRIER_H
10+
#define CM_CL_BARRIER_H
11+
12+
#include "detail/builtins.h"
13+
#include "exec.h"
14+
#include "vector.h"
15+
16+
#include <opencl_def.h>
17+
18+
namespace cm {
19+
namespace exec {
20+
21+
namespace detail {
22+
enum fence : uint8_t {
23+
global_coherent_fence = 1,
24+
l3_flush_instructions = 2,
25+
l3_flush_texture_data = 4,
26+
l3_flush_constant_data = 8,
27+
l3_flush_rw_data = 16,
28+
local_barrier = 32,
29+
l1_flush_ro_data = 64,
30+
sw_barrier = 128,
31+
};
32+
33+
enum semantics : int {
34+
relaxed = 0,
35+
acquire = 1 << 1,
36+
release = 1 << 2,
37+
acquire_release = 1 << 3
38+
};
39+
} // namespace detail
40+
41+
enum scope : int {
42+
cross_device = 0,
43+
device = 1,
44+
workgroup = 2,
45+
subgroup = 3,
46+
invocation = 4
47+
};
48+
49+
inline void fence_global(int semantics) {
50+
uint8_t mode = detail::fence::global_coherent_fence;
51+
bool invalidate_L1 = semantics & (detail::semantics::acquire |
52+
detail::semantics::acquire_release);
53+
if (invalidate_L1)
54+
mode |= detail::fence::l1_flush_ro_data;
55+
if (semantics != detail::semantics::relaxed)
56+
cm::detail::__cm_cl_fence(mode);
57+
}
58+
59+
inline void fence_local(int semantics) {
60+
const uint8_t mode = detail::fence::global_coherent_fence |
61+
detail::fence::local_barrier | detail::fence::sw_barrier;
62+
63+
if (semantics != detail::semantics::relaxed)
64+
cm::detail::__cm_cl_fence(mode);
65+
}
66+
67+
inline void fence(int scope, int semantics) {
68+
if (scope == scope::workgroup)
69+
fence_local(semantics);
70+
else
71+
fence_global(semantics);
72+
}
73+
74+
inline void barrier_arrive(int scope) {
75+
if (scope == scope::workgroup)
76+
cm::detail::__cm_cl_sbarrier(1);
77+
}
78+
79+
inline void barrier_wait(int scope) {
80+
if (scope == scope::workgroup)
81+
cm::detail::__cm_cl_sbarrier(0);
82+
}
83+
84+
inline void local_barrier() { cm::detail::__cm_cl_barrier(); }
85+
86+
inline void global_barrier() {
87+
fence_global(detail::semantics::acquire_release);
88+
local_barrier();
89+
90+
__global uint8_t *sync_buffer = cm::detail::sync_buffer();
91+
92+
bool is_first_item =
93+
(get_local_id(0) | get_local_id(1) | get_local_id(2)) == 0;
94+
95+
uint32_t group_id = get_group_linear_id();
96+
97+
// Signal that a group hit the global barrier.
98+
if (is_first_item) {
99+
sync_buffer[group_id] = 1;
100+
fence_global(detail::semantics::release); // write fence
101+
}
102+
103+
uint32_t num_groups = get_group_linear_count();
104+
105+
// The last group controls that the others hit
106+
// the global barrier.
107+
if (group_id == (num_groups - 1)) {
108+
uint32_t local_size = get_local_linear_size();
109+
uint8_t Value;
110+
do {
111+
fence_global(detail::semantics::acquire); // read fence
112+
Value = 1;
113+
for (uint32_t local_id = get_local_linear_id(); local_id < num_groups;
114+
local_id += local_size)
115+
Value = Value & sync_buffer[local_id];
116+
} while (Value == 0);
117+
118+
fence_global(detail::semantics::acquire_release);
119+
local_barrier();
120+
121+
// Global barrier is complete.
122+
for (uint32_t local_id = get_local_linear_id(); local_id < num_groups;
123+
local_id += local_size)
124+
sync_buffer[local_id] = 0;
125+
fence_global(detail::semantics::release); // write fence
126+
}
127+
128+
// The first items wait for the last group.
129+
if (is_first_item)
130+
while (sync_buffer[group_id] != 0)
131+
fence_global(detail::semantics::acquire); // read fence
132+
133+
// Other items wait for the first ones.
134+
fence_global(detail::semantics::acquire_release);
135+
local_barrier();
136+
}
137+
138+
} // namespace exec
139+
} // namespace cm
140+
141+
#endif // CM_CL_BARRIER_H

IGC/VectorCompiler/CMCL/lib/Headers/cm-cl/detail/builtins.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2021-2023 Intel Corporation
3+
Copyright (C) 2021-2024 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66
@@ -53,6 +53,7 @@ _Noreturn void __cm_cl_trap();
5353
// FIXME: For legacy issues 64-bit pointer is always returned.
5454
uint64_t __cm_cl_assert_buffer();
5555
uint64_t __cm_cl_printf_buffer();
56+
uint64_t __cm_cl_sync_buffer();
5657

5758
int __cm_cl_printf_format_index(__constant const char *str);
5859
// DPC++ tend to place constant strings in global address space.
@@ -297,6 +298,11 @@ inline __global void *printf_buffer() {
297298
return reinterpret_cast<__global void *>(ptr);
298299
}
299300

301+
inline __global uint8_t *sync_buffer() {
302+
auto ptr = static_cast<uintptr_t>(__cm_cl_sync_buffer());
303+
return reinterpret_cast<__global uint8_t *>(ptr);
304+
}
305+
300306
inline int printf_format_index(__constant const char *str) {
301307
return __cm_cl_printf_format_index(str);
302308
}

IGC/VectorCompiler/CMCL/lib/Headers/cm-cl/exec.h

Lines changed: 15 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2021 Intel Corporation
3+
Copyright (C) 2021-2024 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66
@@ -18,26 +18,6 @@ namespace cm {
1818
namespace exec {
1919

2020
enum dimension : int { x = 0, y = 1, z = 2 };
21-
enum scope : int {
22-
cross_device = 0,
23-
device = 1,
24-
workgroup = 2,
25-
subgroup = 3,
26-
invocation = 4
27-
};
28-
29-
namespace detail {
30-
enum fence : uint8_t {
31-
global_coherent_fence = 1,
32-
l3_flush_instructions = 2,
33-
l3_flush_texture_data = 4,
34-
l3_flush_constant_data = 8,
35-
l3_flush_rw_data = 16,
36-
local_barrier = 32,
37-
l1_flush_ro_data = 64,
38-
sw_barrier = 128,
39-
};
40-
} // namespace detail
4121

4222
inline uint32_t get_local_id(int dim) {
4323
if (dim > dimension::z || dim < dimension::x)
@@ -51,6 +31,15 @@ inline uint32_t get_local_size(int dim) {
5131
return cm::detail::get_local_size()[dim];
5232
}
5333

34+
inline uint32_t get_local_linear_id() {
35+
return get_local_id(2) * get_local_size(1) * get_local_size(0) +
36+
get_local_id(1) * get_local_size(0) + get_local_id(0);
37+
}
38+
39+
inline uint32_t get_local_linear_size() {
40+
return get_local_size(2) * get_local_size(1) * get_local_size(0);
41+
}
42+
5443
inline uint32_t get_group_count(int dim) {
5544
if (dim > dimension::z || dim < dimension::x)
5645
return 0;
@@ -70,28 +59,13 @@ inline uint32_t get_group_id(int dim) {
7059
}
7160
}
7261

73-
inline void barrier(int scope) {
74-
if (scope == scope::workgroup)
75-
cm::detail::__cm_cl_barrier();
62+
inline uint32_t get_group_linear_id() {
63+
return get_group_id(2) * get_group_count(1) * get_group_count(0) +
64+
get_group_id(1) * get_group_count(0) + get_group_id(0);
7665
}
7766

78-
inline void barrier_arrive(int scope) {
79-
if (scope == scope::workgroup)
80-
cm::detail::__cm_cl_sbarrier(1);
81-
}
82-
83-
inline void barrier_wait(int scope) {
84-
if (scope == scope::workgroup)
85-
cm::detail::__cm_cl_sbarrier(0);
86-
}
87-
88-
inline void fence(int scope, int semantics) {
89-
const uint8_t mode = detail::fence::global_coherent_fence |
90-
detail::fence::local_barrier |
91-
detail::fence::sw_barrier;
92-
93-
if (semantics != 0)
94-
cm::detail::__cm_cl_fence(mode);
67+
inline uint32_t get_group_linear_count() {
68+
return get_group_count(2) * get_group_count(1) * get_group_count(0);
9569
}
9670

9771
} // namespace exec

IGC/VectorCompiler/CMCL/lib/Support/TranslationDescription.json

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"copyright": [
33
"============================ begin_copyright_notice ============================",
44
"",
5-
"Copyright (C) 2021-2023 Intel Corporation",
5+
"Copyright (C) 2021-2024 Intel Corporation",
66
"",
77
"SPDX-License-Identifier: MIT",
88
"",
@@ -489,6 +489,15 @@
489489
"Operands": []
490490
}
491491
},
492+
"SyncBuffer": {
493+
"Name": "sync_buffer",
494+
"Operands": [],
495+
"TranslateInto": {
496+
"VC-Intrinsic": "sync_buffer",
497+
"ReturnType": { "GetBuiltinReturnType": [] },
498+
"Operands": []
499+
}
500+
},
492501
"Barrier": {
493502
"Name": "barrier",
494503
"Operands": [],

IGC/VectorCompiler/igcdeps/include/vc/igcdeps/cmc.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2019-2023 Intel Corporation
3+
Copyright (C) 2019-2024 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66
@@ -106,6 +106,8 @@ class CMKernel {
106106
unsigned Size, unsigned ArgOffset);
107107
void createPrintfBufferArgAnnotation(unsigned Index, unsigned BTI,
108108
unsigned Size, unsigned ArgOffset);
109+
void createSyncBufferArgAnnotation(unsigned Index, unsigned BTI,
110+
unsigned Size, unsigned ArgOffset);
109111

110112
void createImplArgsBufferAnnotation(unsigned Size, unsigned ArgOffset);
111113

IGC/VectorCompiler/igcdeps/src/cmc.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2019-2023 Intel Corporation
3+
Copyright (C) 2019-2024 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66
@@ -482,6 +482,15 @@ void CMKernel::createPrintfBufferArgAnnotation(unsigned Index, unsigned BTI,
482482
zebin::PreDefinedAttrGetter::ArgType::printf_buffer, ArgOffset, Size);
483483
}
484484

485+
void CMKernel::createSyncBufferArgAnnotation(unsigned Index, unsigned BTI,
486+
unsigned Size,
487+
unsigned ArgOffset) {
488+
// EnableZEBinary: ZEBinary related code
489+
zebin::ZEInfoBuilder::addPayloadArgumentImplicit(
490+
m_kernelInfo.m_zePayloadArgs,
491+
zebin::PreDefinedAttrGetter::ArgType::sync_buffer, ArgOffset, Size);
492+
}
493+
485494
void CMKernel::createImplArgsBufferAnnotation(unsigned Size,
486495
unsigned ArgOffset) {
487496
auto constInput = std::make_unique<iOpenCL::ConstantInputAnnotation>();
@@ -707,6 +716,10 @@ static void setArgumentsInfo(const GenXOCLRuntimeInfo::KernelInfo &Info,
707716
Kernel.createPrintfBufferArgAnnotation(Arg.getIndex(), Arg.getBTI(),
708717
Arg.getSizeInBytes(), ArgOffset);
709718
break;
719+
case ArgKind::SyncBuffer:
720+
Kernel.createSyncBufferArgAnnotation(Arg.getIndex(), Arg.getBTI(),
721+
Arg.getSizeInBytes(), ArgOffset);
722+
break;
710723
case ArgKind::PrivateBase: {
711724
auto PrivMemSize = Info.getStatelessPrivMemSize();
712725
Kernel.createPrivateBaseAnnotation(Arg.getIndex(), Arg.getSizeInBytes(),

IGC/VectorCompiler/include/vc/GenXCodeGen/GenXOCLRuntimeInfo.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2020-2023 Intel Corporation
3+
Copyright (C) 2020-2024 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66
@@ -55,6 +55,7 @@ class GenXOCLRuntimeInfo : public ModulePass {
5555
Image3D,
5656
AssertBuffer,
5757
PrintBuffer,
58+
SyncBuffer,
5859
PrivateBase,
5960
ByValSVM,
6061
BindlessBuffer,

IGC/VectorCompiler/include/vc/InternalIntrinsics/Intrinsic_definitions.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# ========================== begin_copyright_notice ============================
22
#
3-
# Copyright (C) 2022-2023 Intel Corporation
3+
# Copyright (C) 2022-2024 Intel Corporation
44
#
55
# SPDX-License-Identifier: MIT
66
#
@@ -786,6 +786,16 @@
786786
### Thread ID intrinsics
787787
### --------------------
788788

789+
## ``llvm.vc.internal.sync.buffer`` : read stateless pointer to sync buffer
790+
## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
791+
## ``llvm.vc.internal.sync.buffer`` : read implicit arg sync buffer ptr
792+
##
793+
## * Return value: i64 address of sync buffer
794+
##
795+
"sync_buffer" : { "result" : "long",
796+
"arguments": [],
797+
"attributes": "ReadMem", },
798+
789799
## ``llvm.vc.internal.logical.thread.id`` : logical global thread ID
790800
## ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
791801
##

IGC/VectorCompiler/include/vc/Utils/GenX/ImplicitArgsBuffer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/*========================== begin_copyright_notice ============================
22
3-
Copyright (C) 2021-2023 Intel Corporation
3+
Copyright (C) 2021-2024 Intel Corporation
44
55
SPDX-License-Identifier: MIT
66

0 commit comments

Comments
 (0)