Skip to content

Commit 65bdffb

Browse files
authored
[SYCL][libclc][NATIVECPU] Implement generic atomic load for generic target (#13249)
This PR implements the overload for the generic address space for `__spirv_AtomicLoad` in the `generic` target. Libclc implements overloads for the generic address space for `__spirv_AtomicLoad` (and several other builtins) in the [ptx](https://github.com/intel/llvm/blob/76167854fb3edba3a575302ccde14392c671529b/libclc/ptx-nvidiacl/libspirv/atomic/atomic_load.cl#L78) and [amdgcn](https://github.com/intel/llvm/blob/76167854fb3edba3a575302ccde14392c671529b/libclc/amdgcn-amdhsa/libspirv/atomic/atomic_load.cl#L26) targets, but doesn't do so for `generic`. I've created this PR to gather some initial feedback on the implementation, I'd like to add implementations for other builtins with follow up PRs.
1 parent 38c5524 commit 65bdffb

File tree

5 files changed

+162
-18
lines changed

5 files changed

+162
-18
lines changed

libclc/generic/libspirv/atomic/atomic_load.cl

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,26 +13,27 @@
1313
#define FDECL(TYPE, PREFIX, AS, BYTE_SIZE, MEM_ORDER) \
1414
TYPE __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_##MEM_ORDER(volatile AS const TYPE *);
1515

16-
#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, PREFIX, BYTE_SIZE) \
17-
FDECL(TYPE, PREFIX, AS, BYTE_SIZE, unordered) \
18-
FDECL(TYPE, PREFIX, AS, BYTE_SIZE, acquire) \
19-
FDECL(TYPE, PREFIX, AS, BYTE_SIZE, seq_cst) \
20-
_CLC_DEF TYPE \
21-
_Z18__spirv_AtomicLoadPU3##AS_MANGLED##K##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \
22-
volatile AS const TYPE *p, enum Scope scope, \
23-
enum MemorySemanticsMask semantics) { \
24-
if (semantics & Acquire) { \
25-
return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_acquire(p); \
26-
} \
27-
if (semantics & SequentiallyConsistent) { \
28-
return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_seq_cst(p); \
29-
} \
30-
return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_unordered(p); \
16+
#define IMPL(TYPE, TYPE_MANGLED, AS, AS_MANGLED, PREFIX, BYTE_SIZE) \
17+
FDECL(TYPE, PREFIX, AS, BYTE_SIZE, unordered) \
18+
FDECL(TYPE, PREFIX, AS, BYTE_SIZE, acquire) \
19+
FDECL(TYPE, PREFIX, AS, BYTE_SIZE, seq_cst) \
20+
_CLC_DEF TYPE \
21+
_Z18__spirv_AtomicLoadP##AS_MANGLED##K##TYPE_MANGLED##N5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE( \
22+
volatile AS const TYPE *p, enum Scope scope, \
23+
enum MemorySemanticsMask semantics) { \
24+
if (semantics & Acquire) { \
25+
return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_acquire(p); \
26+
} \
27+
if (semantics & SequentiallyConsistent) { \
28+
return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_seq_cst(p); \
29+
} \
30+
return __clc__atomic_##PREFIX##load_##AS##_##BYTE_SIZE##_unordered(p); \
3131
}
3232

33-
#define IMPL_AS(TYPE, TYPE_MANGLED, PREFIX, BYTE_SIZE) \
34-
IMPL(TYPE, TYPE_MANGLED, global, AS1, PREFIX, BYTE_SIZE) \
35-
IMPL(TYPE, TYPE_MANGLED, local, AS3, PREFIX, BYTE_SIZE)
33+
#define IMPL_AS(TYPE, TYPE_MANGLED, PREFIX, BYTE_SIZE) \
34+
IMPL(TYPE, TYPE_MANGLED, global, U3AS1, PREFIX, BYTE_SIZE) \
35+
IMPL(TYPE, TYPE_MANGLED, local, U3AS3, PREFIX, BYTE_SIZE) \
36+
IMPL(TYPE, TYPE_MANGLED, , , PREFIX, BYTE_SIZE)
3637

3738
IMPL_AS(int, i, , 4)
3839
IMPL_AS(unsigned int, j, u, 4)

libclc/generic/libspirv/atomic/loadstore_helpers_acquire.ll

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@ entry:
2020
unreachable
2121
}
2222

23+
define i32 @__clc__atomic_load__4_acquire(i32 addrspace(0)* nocapture %ptr) nounwind alwaysinline {
24+
entry:
25+
tail call void @llvm.trap()
26+
unreachable
27+
}
28+
2329
define i64 @__clc__atomic_load_global_8_acquire(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline {
2430
entry:
2531
tail call void @llvm.trap()
@@ -32,6 +38,12 @@ entry:
3238
unreachable
3339
}
3440

41+
define i64 @__clc__atomic_load__8_acquire(i64 addrspace(0)* nocapture %ptr) nounwind alwaysinline {
42+
entry:
43+
tail call void @llvm.trap()
44+
unreachable
45+
}
46+
3547
define i32 @__clc__atomic_uload_global_4_acquire(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline {
3648
entry:
3749
%0 = load atomic volatile i32, i32 addrspace(1)* %ptr acquire, align 4
@@ -44,6 +56,12 @@ entry:
4456
unreachable
4557
}
4658

59+
define i32 @__clc__atomic_uload__4_acquire(i32 addrspace(0)* nocapture %ptr) nounwind alwaysinline {
60+
entry:
61+
%0 = load atomic volatile i32, i32 addrspace(0)* %ptr acquire, align 4
62+
ret i32 %0
63+
}
64+
4765
define i64 @__clc__atomic_uload_global_8_acquire(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline {
4866
entry:
4967
tail call void @llvm.trap()
@@ -56,3 +74,8 @@ entry:
5674
unreachable
5775
}
5876

77+
define i64 @__clc__atomic_uload__8_acquire(i64 addrspace(0)* nocapture %ptr) nounwind alwaysinline {
78+
entry:
79+
tail call void @llvm.trap()
80+
unreachable
81+
}

libclc/generic/libspirv/atomic/loadstore_helpers_release.ll

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@ entry:
2020
unreachable
2121
}
2222

23+
define void @__clc__atomic_store__4_release(i32 addrspace(0)* nocapture %ptr, i32 %value) nounwind alwaysinline {
24+
entry:
25+
tail call void @llvm.trap()
26+
unreachable
27+
}
28+
2329
define void @__clc__atomic_store_global_8_release(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
2430
entry:
2531
tail call void @llvm.trap()
@@ -32,6 +38,12 @@ entry:
3238
unreachable
3339
}
3440

41+
define void @__clc__atomic_store__8_release(i64 addrspace(0)* nocapture %ptr, i64 %value) nounwind alwaysinline {
42+
entry:
43+
tail call void @llvm.trap()
44+
unreachable
45+
}
46+
3547
define void @__clc__atomic_ustore_global_4_release(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
3648
entry:
3749
tail call void @llvm.trap()
@@ -44,6 +56,12 @@ entry:
4456
unreachable
4557
}
4658

59+
define void @__clc__atomic_ustore__4_release(i32 addrspace(0)* nocapture %ptr, i32 %value) nounwind alwaysinline {
60+
entry:
61+
tail call void @llvm.trap()
62+
unreachable
63+
}
64+
4765
define void @__clc__atomic_ustore_global_8_release(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
4866
entry:
4967
tail call void @llvm.trap()
@@ -56,3 +74,9 @@ entry:
5674
unreachable
5775
}
5876

77+
define void @__clc__atomic_ustore__8_release(i64 addrspace(0)* nocapture %ptr, i64 %value) nounwind alwaysinline {
78+
entry:
79+
tail call void @llvm.trap()
80+
unreachable
81+
}
82+

libclc/generic/libspirv/atomic/loadstore_helpers_seq_cst.ll

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@ entry:
2020
unreachable
2121
}
2222

23+
define i32 @__clc__atomic_load__4_seq_cst(i32 addrspace(0)* nocapture %ptr) nounwind alwaysinline {
24+
entry:
25+
tail call void @llvm.trap()
26+
unreachable
27+
}
28+
2329
define i64 @__clc__atomic_load_global_8_seq_cst(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline {
2430
entry:
2531
tail call void @llvm.trap()
@@ -32,6 +38,12 @@ entry:
3238
unreachable
3339
}
3440

41+
define i64 @__clc__atomic_load__8_seq_cst(i64 addrspace(0)* nocapture %ptr) nounwind alwaysinline {
42+
entry:
43+
tail call void @llvm.trap()
44+
unreachable
45+
}
46+
3547
define i32 @__clc__atomic_uload_global_4_seq_cst(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline {
3648
entry:
3749
%0 = load atomic volatile i32, i32 addrspace(1)* %ptr seq_cst, align 4
@@ -44,6 +56,12 @@ entry:
4456
unreachable
4557
}
4658

59+
define i32 @__clc__atomic_uload__4_seq_cst(i32 addrspace(0)* nocapture %ptr) nounwind alwaysinline {
60+
entry:
61+
tail call void @llvm.trap()
62+
unreachable
63+
}
64+
4765
define i64 @__clc__atomic_uload_global_8_seq_cst(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline {
4866
entry:
4967
tail call void @llvm.trap()
@@ -56,6 +74,12 @@ entry:
5674
unreachable
5775
}
5876

77+
define i64 @__clc__atomic_uload__8_seq_cst(i64 addrspace(0)* nocapture %ptr) nounwind alwaysinline {
78+
entry:
79+
tail call void @llvm.trap()
80+
unreachable
81+
}
82+
5983
define void @__clc__atomic_store_global_4_seq_cst(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
6084
entry:
6185
tail call void @llvm.trap()
@@ -68,6 +92,12 @@ entry:
6892
unreachable
6993
}
7094

95+
define void @__clc__atomic_store__4_seq_cst(i32 addrspace(0)* nocapture %ptr, i32 %value) nounwind alwaysinline {
96+
entry:
97+
tail call void @llvm.trap()
98+
unreachable
99+
}
100+
71101
define void @__clc__atomic_store_global_8_seq_cst(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
72102
entry:
73103
tail call void @llvm.trap()
@@ -80,6 +110,12 @@ entry:
80110
unreachable
81111
}
82112

113+
define void @__clc__atomic_store__8_seq_cst(i64 addrspace(0)* nocapture %ptr, i64 %value) nounwind alwaysinline {
114+
entry:
115+
tail call void @llvm.trap()
116+
unreachable
117+
}
118+
83119
define void @__clc__atomic_ustore_global_4_seq_cst(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
84120
entry:
85121
tail call void @llvm.trap()
@@ -92,6 +128,12 @@ entry:
92128
unreachable
93129
}
94130

131+
define void @__clc__atomic_ustore__4_seq_cst(i32 addrspace(0)* nocapture %ptr, i32 %value) nounwind alwaysinline {
132+
entry:
133+
tail call void @llvm.trap()
134+
unreachable
135+
}
136+
95137
define void @__clc__atomic_ustore_global_8_seq_cst(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
96138
entry:
97139
tail call void @llvm.trap()
@@ -103,3 +145,9 @@ entry:
103145
tail call void @llvm.trap()
104146
unreachable
105147
}
148+
149+
define void @__clc__atomic_ustore__8_seq_cst(i64 addrspace(0)* nocapture %ptr, i64 %value) nounwind alwaysinline {
150+
entry:
151+
tail call void @llvm.trap()
152+
unreachable
153+
}

libclc/generic/libspirv/atomic/loadstore_helpers_unordered.ll

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@ entry:
2020
ret i32 %0
2121
}
2222

23+
define i32 @__clc__atomic_load__4_unordered(i32 addrspace(0)* nocapture %ptr) nounwind alwaysinline {
24+
entry:
25+
%0 = load atomic volatile i32, i32 addrspace(0)* %ptr unordered, align 4
26+
ret i32 %0
27+
}
28+
2329
define i64 @__clc__atomic_load_global_8_unordered(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline {
2430
entry:
2531
%0 = load atomic volatile i64, i64 addrspace(1)* %ptr unordered, align 8
@@ -32,6 +38,12 @@ entry:
3238
ret i64 %0
3339
}
3440

41+
define i64 @__clc__atomic_load__8_unordered(i64 addrspace(0)* nocapture %ptr) nounwind alwaysinline {
42+
entry:
43+
%0 = load atomic volatile i64, i64 addrspace(0)* %ptr unordered, align 8
44+
ret i64 %0
45+
}
46+
3547
define i32 @__clc__atomic_uload_global_4_unordered(i32 addrspace(1)* nocapture %ptr) nounwind alwaysinline {
3648
entry:
3749
%0 = load atomic volatile i32, i32 addrspace(1)* %ptr unordered, align 4
@@ -44,6 +56,12 @@ entry:
4456
ret i32 %0
4557
}
4658

59+
define i32 @__clc__atomic_uload__4_unordered(i32 addrspace(0)* nocapture %ptr) nounwind alwaysinline {
60+
entry:
61+
%0 = load atomic volatile i32, i32 addrspace(0)* %ptr unordered, align 4
62+
ret i32 %0
63+
}
64+
4765
define i64 @__clc__atomic_uload_global_8_unordered(i64 addrspace(1)* nocapture %ptr) nounwind alwaysinline {
4866
entry:
4967
%0 = load atomic volatile i64, i64 addrspace(1)* %ptr unordered, align 8
@@ -56,6 +74,12 @@ entry:
5674
ret i64 %0
5775
}
5876

77+
define i64 @__clc__atomic_uload__8_unordered(i64 addrspace(0)* nocapture %ptr) nounwind alwaysinline {
78+
entry:
79+
%0 = load atomic volatile i64, i64 addrspace(0)* %ptr unordered, align 8
80+
ret i64 %0
81+
}
82+
5983
define void @__clc__atomic_store_global_4_unordered(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
6084
entry:
6185
store atomic volatile i32 %value, i32 addrspace(1)* %ptr unordered, align 4
@@ -68,6 +92,12 @@ entry:
6892
ret void
6993
}
7094

95+
define void @__clc__atomic_store__4_unordered(i32 addrspace(0)* nocapture %ptr, i32 %value) nounwind alwaysinline {
96+
entry:
97+
store atomic volatile i32 %value, i32 addrspace(0)* %ptr unordered, align 4
98+
ret void
99+
}
100+
71101
define void @__clc__atomic_store_global_8_unordered(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
72102
entry:
73103
store atomic volatile i64 %value, i64 addrspace(1)* %ptr unordered, align 8
@@ -80,6 +110,12 @@ entry:
80110
ret void
81111
}
82112

113+
define void @__clc__atomic_store__8_unordered(i64 addrspace(0)* nocapture %ptr, i64 %value) nounwind alwaysinline {
114+
entry:
115+
store atomic volatile i64 %value, i64 addrspace(0)* %ptr unordered, align 8
116+
ret void
117+
}
118+
83119
define void @__clc__atomic_ustore_global_4_unordered(i32 addrspace(1)* nocapture %ptr, i32 %value) nounwind alwaysinline {
84120
entry:
85121
store atomic volatile i32 %value, i32 addrspace(1)* %ptr unordered, align 4
@@ -92,6 +128,12 @@ entry:
92128
ret void
93129
}
94130

131+
define void @__clc__atomic_ustore__4_unordered(i32 addrspace(0)* nocapture %ptr, i32 %value) nounwind alwaysinline {
132+
entry:
133+
store atomic volatile i32 %value, i32 addrspace(0)* %ptr unordered, align 4
134+
ret void
135+
}
136+
95137
define void @__clc__atomic_ustore_global_8_unordered(i64 addrspace(1)* nocapture %ptr, i64 %value) nounwind alwaysinline {
96138
entry:
97139
store atomic volatile i64 %value, i64 addrspace(1)* %ptr unordered, align 8
@@ -104,3 +146,9 @@ entry:
104146
ret void
105147
}
106148

149+
define void @__clc__atomic_ustore__8_unordered(i64 addrspace(0)* nocapture %ptr, i64 %value) nounwind alwaysinline {
150+
entry:
151+
store atomic volatile i64 %value, i64 addrspace(0)* %ptr unordered, align 8
152+
ret void
153+
}
154+

0 commit comments

Comments
 (0)