1
- From 794c7d32e521060190bbfca7c52f0bb4fa49f2f2 Mon Sep 17 00:00:00 2001
1
+ From 89e3be862e9a389ba858faa1270f0f8856b37bcf Mon Sep 17 00:00:00 2001
2
2
From: haonanya <
[email protected] >
3
3
Date: Mon, 19 Jul 2021 10:14:20 +0800
4
4
Subject: [PATCH] Add support for cl_ext_float_atomics in SPIRVWriter
5
5
6
6
Signed-off-by: haonanya <
[email protected] >
7
- Signed-off-by: Haonan Yang <
[email protected] >
8
7
---
9
8
lib/SPIRV/OCL20ToSPIRV.cpp | 26 +++++-
10
- lib/SPIRV/OCLUtil.cpp | 19 + ++--
9
+ lib/SPIRV/OCLUtil.cpp | 15 ++--
11
10
lib/SPIRV/SPIRVToOCL.h | 3 +
12
11
lib/SPIRV/SPIRVToOCL12.cpp | 21 +++++
13
12
lib/SPIRV/SPIRVToOCL20.cpp | 28 ++++++-
14
13
lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h | 1 -
15
14
lib/SPIRV/libSPIRV/SPIRVOpCode.h | 8 +-
16
- test/AtomicBuiltinsFloat.ll | 79 ++++++++++++++++++
15
+ test/AtomicBuiltinsFloat.ll | 94 +++ ++++++++++++++++++
17
16
test/AtomicFAddEXTForOCL.ll | 88 ++++++++++++++++++++
18
17
test/AtomicFAddExt.ll | 111 ++++++++-----------------
19
18
test/AtomicFMaxEXT.ll | 113 +++++++-------------------
20
19
test/AtomicFMaxEXTForOCL.ll | 88 ++++++++++++++++++++
21
20
test/AtomicFMinEXT.ll | 113 +++++++-------------------
22
21
test/AtomicFMinEXTForOCL.ll | 85 +++++++++++++++++++
23
22
test/InvalidAtomicBuiltins.cl | 16 ----
24
- 15 files changed, 526 insertions(+), 273 deletions(-)
23
+ 15 files changed, 539 insertions(+), 271 deletions(-)
25
24
create mode 100644 test/AtomicBuiltinsFloat.ll
26
25
create mode 100644 test/AtomicFAddEXTForOCL.ll
27
26
create mode 100644 test/AtomicFMaxEXTForOCL.ll
28
27
create mode 100644 test/AtomicFMinEXTForOCL.ll
29
28
30
29
diff --git a/lib/SPIRV/OCL20ToSPIRV.cpp b/lib/SPIRV/OCL20ToSPIRV.cpp
31
- index e30aa5be..98d4289e 100644
30
+ index e30aa5be..79323de2 100644
32
31
--- a/lib/SPIRV/OCL20ToSPIRV.cpp
33
32
+++ b/lib/SPIRV/OCL20ToSPIRV.cpp
34
33
@@ -407,7 +407,6 @@ void OCL20ToSPIRV::visitCallInst(CallInst &CI) {
@@ -53,7 +52,7 @@ index e30aa5be..98d4289e 100644
53
52
Args.end() - Offset);
54
53
}
55
54
- return getSPIRVFuncName(OCLSPIRVBuiltinMap::map(Info.UniqName));
56
- + llvm::Type* AtomicBuiltinsReturnType =
55
+ + llvm::Type* AtomicBuiltinsReturnType =
57
56
+ CI->getCalledFunction()->getReturnType();
58
57
+ auto IsFPType = [](llvm::Type *ReturnType) {
59
58
+ return ReturnType->isHalfTy() || ReturnType->isFloatTy() ||
@@ -79,30 +78,28 @@ index e30aa5be..98d4289e 100644
79
78
&Attrs);
80
79
}
81
80
diff --git a/lib/SPIRV/OCLUtil.cpp b/lib/SPIRV/OCLUtil.cpp
82
- index 992f173f..539c196c 100644
81
+ index 992f173f..8ae882c6 100644
83
82
--- a/lib/SPIRV/OCLUtil.cpp
84
83
+++ b/lib/SPIRV/OCLUtil.cpp
85
84
@@ -120,29 +120,32 @@ size_t getSPIRVAtomicBuiltinNumMemoryOrderArgs(Op OC) {
86
85
return 1;
87
86
}
88
87
89
- + // atomic_fetch_[add, sub, min, max] and atomic_fetch_[add, sub, min,
90
- + // max]_explicit functions are defined on OpenCL headers, they are not
91
- + // translated to function call
88
+ + // atomic_fetch_[add, min, max] and atomic_fetch_[add, min, max]_explicit
89
+ + // functions declared in clang headers should be translated to corresponding
90
+ + // FP-typed Atomic Instructions
92
91
bool isComputeAtomicOCLBuiltin(StringRef DemangledName) {
93
92
if (!DemangledName.startswith(kOCLBuiltinName::AtomicPrefix) &&
94
93
!DemangledName.startswith(kOCLBuiltinName::AtomPrefix))
95
94
return false;
96
95
97
96
return llvm::StringSwitch<bool>(DemangledName)
98
97
- .EndsWith("add", true)
99
- - .EndsWith("sub", true)
98
+ .EndsWith("sub", true)
100
99
+ .EndsWith("atomic_add", true)
101
- + .EndsWith("atomic_sub", true)
102
100
+ .EndsWith("atomic_min", true)
103
101
+ .EndsWith("atomic_max", true)
104
102
+ .EndsWith("atom_add", true)
105
- + .EndsWith("atom_sub", true)
106
103
+ .EndsWith("atom_min", true)
107
104
+ .EndsWith("atom_max", true)
108
105
.EndsWith("inc", true)
@@ -114,7 +111,7 @@ index 992f173f..539c196c 100644
114
111
.EndsWith("or", true)
115
112
.EndsWith("xor", true)
116
113
- .EndsWith("add_explicit", true)
117
- - .EndsWith("sub_explicit", true)
114
+ .EndsWith("sub_explicit", true)
118
115
.EndsWith("or_explicit", true)
119
116
.EndsWith("xor_explicit", true)
120
117
.EndsWith("and_explicit", true)
@@ -184,7 +181,7 @@ index 1a62c6b8..dc0ba9cc 100644
184
181
}
185
182
186
183
diff --git a/lib/SPIRV/SPIRVToOCL20.cpp b/lib/SPIRV/SPIRVToOCL20.cpp
187
- index 8c437858..0033b853 100644
184
+ index 8c437858..07612ce6 100644
188
185
--- a/lib/SPIRV/SPIRVToOCL20.cpp
189
186
+++ b/lib/SPIRV/SPIRVToOCL20.cpp
190
187
@@ -82,6 +82,9 @@ public:
@@ -232,8 +229,8 @@ index 8c437858..0033b853 100644
232
229
}
233
230
auto Ptr = findFirstPtr(Args);
234
231
- auto Name = OCLSPIRVBuiltinMap::rmap(OC);
235
- + std::string Name;
236
- + // Map fp atomic instructions to regular OpenCL built-ins.
232
+ + std::string Name;
233
+ + // Map fp atomic instructions to regular OpenCL built-ins.
237
234
+ if (isFPAtomicOpCode(OC))
238
235
+ Name = mapFPAtomicName(OC);
239
236
+ else
@@ -278,11 +275,13 @@ index feec70f6..8e595e83 100644
278
275
return ((unsigned)OpCode >= OpIAdd && (unsigned)OpCode <= OpFMod) ||
279
276
diff --git a/test/AtomicBuiltinsFloat.ll b/test/AtomicBuiltinsFloat.ll
280
277
new file mode 100644
281
- index 00000000..18ee1c86
278
+ index 00000000..b688cb2a
282
279
--- /dev/null
283
280
+++ b/test/AtomicBuiltinsFloat.ll
284
- @@ -0,0 +1,79 @@
281
+ @@ -0,0 +1,94 @@
285
282
+ ; Check that translator generate atomic instructions for atomic builtins
283
+ + ; FP-typed atomic_fetch_sub and atomic_fetch_sub_explicit should be translated
284
+ + ; to FunctionCall
286
285
+ ; RUN: llvm-as %s -o %t.bc
287
286
+ ; RUN: llvm-spirv %t.bc -spirv-text -o - | FileCheck %s
288
287
+ ; RUN: llvm-spirv %t.bc -o %t.spv
@@ -293,6 +292,7 @@ index 00000000..18ee1c86
293
292
+ ; CHECK-COUNT-3: AtomicStore
294
293
+ ; CHECK-COUNT-3: AtomicLoad
295
294
+ ; CHECK-COUNT-3: AtomicExchange
295
+ + ; CHECK-COUNT-3: FunctionCall
296
296
+
297
297
+ target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
298
298
+ target triple = "spir-unknown-unknown"
@@ -311,6 +311,9 @@ index 00000000..18ee1c86
311
311
+ %call3 = tail call spir_func float @_Z15atomic_exchangePU3AS4VU7_Atomicff(float addrspace(4)* %0, float 1.000000e+00) #2
312
312
+ %call4 = tail call spir_func float @_Z24atomic_exchange_explicitPU3AS4VU7_Atomicff12memory_order(float addrspace(4)* %0, float 1.000000e+00, i32 0) #2
313
313
+ %call5 = tail call spir_func float @_Z24atomic_exchange_explicitPU3AS4VU7_Atomicff12memory_order12memory_scope(float addrspace(4)* %0, float 1.000000e+00, i32 0, i32 1) #2
314
+ + %call6 = tail call spir_func float @_Z16atomic_fetch_subPU3AS3VU7_Atomicff(float addrspace(3)* %ff, float 1.000000e+00) #2
315
+ + %call7 = tail call spir_func float @_Z25atomic_fetch_sub_explicitPU3AS3VU7_Atomicff12memory_order(float addrspace(3)* %ff, float 1.000000e+00, i32 0) #2
316
+ + %call8 = tail call spir_func float @_Z25atomic_fetch_sub_explicitPU3AS3VU7_Atomicff12memory_order12memory_scope(float addrspace(3)* %ff, float 1.000000e+00, i32 0, i32 1) #2
314
317
+ ret void
315
318
+ }
316
319
+
@@ -344,6 +347,15 @@ index 00000000..18ee1c86
344
347
+ ; Function Attrs: convergent
345
348
+ declare spir_func float @_Z24atomic_exchange_explicitPU3AS4VU7_Atomicff12memory_order12memory_scope(float addrspace(4)*, float, i32, i32) local_unnamed_addr #1
346
349
+
350
+ + ; Function Attrs: convergent
351
+ + declare spir_func float @_Z16atomic_fetch_subPU3AS3VU7_Atomicff(float addrspace(3)*, float) local_unnamed_addr #1
352
+ +
353
+ + ; Function Attrs: convergent
354
+ + declare spir_func float @_Z25atomic_fetch_sub_explicitPU3AS3VU7_Atomicff12memory_order(float addrspace(3)*, float, i32) local_unnamed_addr #1
355
+ +
356
+ + ; Function Attrs: convergent
357
+ + declare spir_func float @_Z25atomic_fetch_sub_explicitPU3AS3VU7_Atomicff12memory_order12memory_scope(float addrspace(3)*, float, i32, i32) local_unnamed_addr #1
358
+ +
347
359
+ attributes #0 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
348
360
+ attributes #1 = { convergent "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
349
361
+ attributes #2 = { convergent nounwind }
@@ -355,7 +367,7 @@ index 00000000..18ee1c86
355
367
+
356
368
+ !0 = !{i32 1, !"wchar_size", i32 4}
357
369
+ !1 = !{i32 2, i32 0}
358
- + !2 = !{!"clang version 10.0.1 (8560093eba963fba2edd47ca85404cdaff22f174 )"}
370
+ + !2 = !{!"clang version 10.0.1 (0d3ce3267dd78b77ab8f302347cc547afaf43ede )"}
359
371
+ !3 = !{i32 3}
360
372
+ !4 = !{!"none"}
361
373
+ !5 = !{!"atomic_float*"}
0 commit comments