1
- From 8ba15d3b243584a005b3016fb4cd02e78a636a10 Mon Sep 17 00:00:00 2001
1
+ From 1a155b4a98bef1ca74cb215db5c16a5259e9492a Mon Sep 17 00:00:00 2001
2
2
From: haonanya <
[email protected] >
3
3
Date: Wed, 28 Jul 2021 11:43:20 +0800
4
4
Subject: [PATCH] Add support for cl_ext_float_atomics in SPIRVWriter
5
5
6
6
Signed-off-by: haonanya <
[email protected] >
7
7
---
8
- lib/SPIRV/OCLToSPIRV.cpp | 27 +++++++ -
9
- lib/SPIRV/OCLUtil.cpp | 19 +++- --
10
- test/AtomicBuiltinsFloat.ll | 79 +++++++++++++++++++++++
8
+ lib/SPIRV/OCLToSPIRV.cpp | 27 ++++++-
9
+ lib/SPIRV/OCLUtil.cpp | 15 ++--
10
+ test/AtomicBuiltinsFloat.ll | 94 ++ +++++++++++++++++++++++
11
11
test/negative/InvalidAtomicBuiltins.cl | 20 +-----
12
- test/transcoding/AtomicFAddEXTForOCL.ll | 84 +++++++++++++++++++++++++
13
- test/transcoding/AtomicFMaxEXTForOCL.ll | 84 +++++++++++++++++++++++++
14
- test/transcoding/AtomicFMinEXTForOCL.ll | 83 ++++++++++++++++++++++++
15
- 7 files changed, 368 insertions(+), 28 deletions(-)
12
+ test/transcoding/AtomicFAddEXTForOCL.ll | 84 ++++++++++++++++++++++
13
+ test/transcoding/AtomicFMaxEXTForOCL.ll | 84 ++++++++++++++++++++++
14
+ test/transcoding/AtomicFMinEXTForOCL.ll | 83 ++++++++++++++++++++++
15
+ 7 files changed, 381 insertions(+), 26 deletions(-)
16
16
create mode 100644 test/AtomicBuiltinsFloat.ll
17
17
create mode 100644 test/transcoding/AtomicFAddEXTForOCL.ll
18
18
create mode 100644 test/transcoding/AtomicFMaxEXTForOCL.ll
@@ -71,30 +71,28 @@ index d9ed4a7a..cadc2247 100644
71
71
&Attrs);
72
72
}
73
73
diff --git a/lib/SPIRV/OCLUtil.cpp b/lib/SPIRV/OCLUtil.cpp
74
- index 2de3f152..2150a991 100644
74
+ index 2de3f152..94e248c1 100644
75
75
--- a/lib/SPIRV/OCLUtil.cpp
76
76
+++ b/lib/SPIRV/OCLUtil.cpp
77
77
@@ -662,29 +662,32 @@ size_t getSPIRVAtomicBuiltinNumMemoryOrderArgs(Op OC) {
78
78
return 1;
79
79
}
80
80
81
- + // atomic_fetch_[add, sub, min, max] and atomic_fetch_[add, sub, min,
82
- + // max]_explicit functions are defined on OpenCL headers, they are not
83
- + // translated to function call
81
+ + // atomic_fetch_[add, min, max] and atomic_fetch_[add, min, max]_explicit
82
+ + // functions declared in clang headers should be translated to corresponding
83
+ + // FP-typed Atomic Instructions
84
84
bool isComputeAtomicOCLBuiltin(StringRef DemangledName) {
85
85
if (!DemangledName.startswith(kOCLBuiltinName::AtomicPrefix) &&
86
86
!DemangledName.startswith(kOCLBuiltinName::AtomPrefix))
87
87
return false;
88
88
89
89
return llvm::StringSwitch<bool>(DemangledName)
90
90
- .EndsWith("add", true)
91
- - .EndsWith("sub", true)
91
+ .EndsWith("sub", true)
92
92
+ .EndsWith("atomic_add", true)
93
- + .EndsWith("atomic_sub", true)
94
93
+ .EndsWith("atomic_min", true)
95
94
+ .EndsWith("atomic_max", true)
96
95
+ .EndsWith("atom_add", true)
97
- + .EndsWith("atom_sub", true)
98
96
+ .EndsWith("atom_min", true)
99
97
+ .EndsWith("atom_max", true)
100
98
.EndsWith("inc", true)
@@ -106,7 +104,7 @@ index 2de3f152..2150a991 100644
106
104
.EndsWith("or", true)
107
105
.EndsWith("xor", true)
108
106
- .EndsWith("add_explicit", true)
109
- - .EndsWith("sub_explicit", true)
107
+ .EndsWith("sub_explicit", true)
110
108
.EndsWith("or_explicit", true)
111
109
.EndsWith("xor_explicit", true)
112
110
.EndsWith("and_explicit", true)
@@ -117,11 +115,13 @@ index 2de3f152..2150a991 100644
117
115
118
116
diff --git a/test/AtomicBuiltinsFloat.ll b/test/AtomicBuiltinsFloat.ll
119
117
new file mode 100644
120
- index 00000000..d75bd012
118
+ index 00000000..c85dd5b6
121
119
--- /dev/null
122
120
+++ b/test/AtomicBuiltinsFloat.ll
123
- @@ -0,0 +1,79 @@
121
+ @@ -0,0 +1,94 @@
124
122
+ ; Check that translator generate atomic instructions for atomic builtins
123
+ + ; FP-typed atomic_fetch_sub and atomic_fetch_sub_explicit should be translated
124
+ + ; to FunctionCall
125
125
+ ; RUN: llvm-as %s -o %t.bc
126
126
+ ; RUN: llvm-spirv %t.bc -spirv-text -o - | FileCheck %s
127
127
+ ; RUN: llvm-spirv %t.bc -o %t.spv
@@ -132,12 +132,13 @@ index 00000000..d75bd012
132
132
+ ; CHECK-COUNT-3: AtomicStore
133
133
+ ; CHECK-COUNT-3: AtomicLoad
134
134
+ ; CHECK-COUNT-3: AtomicExchange
135
+ + ; CHECK-COUNT-3: FunctionCall
135
136
+
136
137
+ target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
137
138
+ target triple = "spir-unknown-unknown"
138
139
+
139
140
+ ; Function Attrs: convergent norecurse nounwind
140
- + define spir_kernel void @test_atomic_kernel(float addrspace(3)* %ff, float addrspace(3)* nocapture readnone %a ) local_unnamed_addr #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
141
+ + define spir_kernel void @test_atomic_kernel(float addrspace(3)* %ff) local_unnamed_addr #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
141
142
+ entry:
142
143
+ %0 = addrspacecast float addrspace(3)* %ff to float addrspace(4)*
143
144
+ tail call spir_func void @_Z11atomic_initPU3AS4VU7_Atomicff(float addrspace(4)* %0, float 1.000000e+00) #2
@@ -150,6 +151,9 @@ index 00000000..d75bd012
150
151
+ %call3 = tail call spir_func float @_Z15atomic_exchangePU3AS4VU7_Atomicff(float addrspace(4)* %0, float 1.000000e+00) #2
151
152
+ %call4 = tail call spir_func float @_Z24atomic_exchange_explicitPU3AS4VU7_Atomicff12memory_order(float addrspace(4)* %0, float 1.000000e+00, i32 0) #2
152
153
+ %call5 = tail call spir_func float @_Z24atomic_exchange_explicitPU3AS4VU7_Atomicff12memory_order12memory_scope(float addrspace(4)* %0, float 1.000000e+00, i32 0, i32 1) #2
154
+ + %call6 = tail call spir_func float @_Z16atomic_fetch_subPU3AS3VU7_Atomicff(float addrspace(3)* %ff, float 1.000000e+00) #2
155
+ + %call7 = tail call spir_func float @_Z25atomic_fetch_sub_explicitPU3AS3VU7_Atomicff12memory_order(float addrspace(3)* %ff, float 1.000000e+00, i32 0) #2
156
+ + %call8 = tail call spir_func float @_Z25atomic_fetch_sub_explicitPU3AS3VU7_Atomicff12memory_order12memory_scope(float addrspace(3)* %ff, float 1.000000e+00, i32 0, i32 1) #2
153
157
+ ret void
154
158
+ }
155
159
+
@@ -183,6 +187,15 @@ index 00000000..d75bd012
183
187
+ ; Function Attrs: convergent
184
188
+ declare spir_func float @_Z24atomic_exchange_explicitPU3AS4VU7_Atomicff12memory_order12memory_scope(float addrspace(4)*, float, i32, i32) local_unnamed_addr #1
185
189
+
190
+ + ; Function Attrs: convergent
191
+ + declare spir_func float @_Z16atomic_fetch_subPU3AS3VU7_Atomicff(float addrspace(3)*, float) local_unnamed_addr #1
192
+ +
193
+ + ; Function Attrs: convergent
194
+ + declare spir_func float @_Z25atomic_fetch_sub_explicitPU3AS3VU7_Atomicff12memory_order(float addrspace(3)*, float, i32) local_unnamed_addr #1
195
+ +
196
+ + ; Function Attrs: convergent
197
+ + declare spir_func float @_Z25atomic_fetch_sub_explicitPU3AS3VU7_Atomicff12memory_order12memory_scope(float addrspace(3)*, float, i32, i32) local_unnamed_addr #1
198
+ +
186
199
+ attributes #0 = { convergent norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
187
200
+ attributes #1 = { convergent "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
188
201
+ attributes #2 = { convergent nounwind }
@@ -194,12 +207,12 @@ index 00000000..d75bd012
194
207
+
195
208
+ !0 = !{i32 1, !"wchar_size", i32 4}
196
209
+ !1 = !{i32 2, i32 0}
197
- + !2 = !{!"clang version 11.1.0 (https://github.com/llvm/llvm-project.git 15c8e1468997ba90943155d35475b2aaeea65f19 )"}
198
- + !3 = !{i32 3, i32 3 }
199
- + !4 = !{!"none", !"none" }
200
- + !5 = !{!"atomic_float*", !"float*" }
201
- + !6 = !{!"_Atomic(float)*", !"float*" }
202
- + !7 = !{!"volatile", !"" }
210
+ + !2 = !{!"clang version 11.1.0 (4989da43e3648ed25a272773165367f195d3b53c )"}
211
+ + !3 = !{i32 3}
212
+ + !4 = !{!"none"}
213
+ + !5 = !{!"atomic_float*"}
214
+ + !6 = !{!"_Atomic(float)*"}
215
+ + !7 = !{!"volatile"}
203
216
diff --git a/test/negative/InvalidAtomicBuiltins.cl b/test/negative/InvalidAtomicBuiltins.cl
204
217
index b8ec5b89..18d11bf5 100644
205
218
--- a/test/negative/InvalidAtomicBuiltins.cl
0 commit comments