Skip to content

Commit 4ef5242

Browse files
authored
[Backport][110] Translate atomic_fetch_sub to OpAtomicFAddEXT with negative value operand (#1498)
Backport for #1492 Signed-off-by: haonanya <[email protected]>
1 parent a31ffae commit 4ef5242

File tree

4 files changed

+90
-30
lines changed

4 files changed

+90
-30
lines changed

lib/SPIRV/OCLToSPIRV.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -881,14 +881,24 @@ void OCLToSPIRVBase::transAtomicBuiltin(CallInst *CI,
881881
if (!IsFPType(AtomicBuiltinsReturnType))
882882
return SPIRVFunctionName;
883883
// Translate FP-typed atomic builtins. Currently we only need to
884-
// translate atomic_fetch_[add, max, min] and atomic_fetch_[add, max,
885-
// min]_explicit to related float instructions
884+
// translate atomic_fetch_[add, sub, max, min] and atomic_fetch_[add,
885+
// sub, max, min]_explicit to related float instructions.
886+
// Translate atomic_fetch_sub to OpAtomicFAddEXT with negative value
887+
// operand
886888
auto SPIRFunctionNameForFloatAtomics =
887889
llvm::StringSwitch<std::string>(SPIRVFunctionName)
888890
.Case("__spirv_AtomicIAdd", "__spirv_AtomicFAddEXT")
891+
.Case("__spirv_AtomicISub", "__spirv_AtomicFAddEXT")
889892
.Case("__spirv_AtomicSMax", "__spirv_AtomicFMaxEXT")
890893
.Case("__spirv_AtomicSMin", "__spirv_AtomicFMinEXT")
891894
.Default("others");
895+
if (SPIRVFunctionName == "__spirv_AtomicISub") {
896+
IRBuilder<> IRB(CI);
897+
// Set float operand to its negation
898+
CI->setOperand(1, IRB.CreateFNeg(CI->getArgOperand(1)));
899+
// Update Args which is used to generate new call
900+
Args.back() = CI->getArgOperand(1);
901+
}
892902
return SPIRFunctionNameForFloatAtomics == "others"
893903
? SPIRVFunctionName
894904
: SPIRFunctionNameForFloatAtomics;

lib/SPIRV/OCLUtil.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -662,20 +662,21 @@ size_t getSPIRVAtomicBuiltinNumMemoryOrderArgs(Op OC) {
662662
return 1;
663663
}
664664

665-
// atomic_fetch_[add, min, max] and atomic_fetch_[add, min, max]_explicit
666-
// functions declared in clang headers should be translated to corresponding
667-
// FP-typed Atomic Instructions
665+
// atomic_fetch_[add, sub, min, max] and atomic_fetch_[add, sub, min,
666+
// max]_explicit functions declared in clang headers should be translated
667+
// to corresponding FP-typed Atomic Instructions
668668
bool isComputeAtomicOCLBuiltin(StringRef DemangledName) {
669669
if (!DemangledName.startswith(kOCLBuiltinName::AtomicPrefix) &&
670670
!DemangledName.startswith(kOCLBuiltinName::AtomPrefix))
671671
return false;
672672

673673
return llvm::StringSwitch<bool>(DemangledName)
674-
.EndsWith("sub", true)
675674
.EndsWith("atomic_add", true)
675+
.EndsWith("atomic_sub", true)
676676
.EndsWith("atomic_min", true)
677677
.EndsWith("atomic_max", true)
678678
.EndsWith("atom_add", true)
679+
.EndsWith("atom_sub", true)
679680
.EndsWith("atom_min", true)
680681
.EndsWith("atom_max", true)
681682
.EndsWith("inc", true)
@@ -684,7 +685,6 @@ bool isComputeAtomicOCLBuiltin(StringRef DemangledName) {
684685
.EndsWith("and", true)
685686
.EndsWith("or", true)
686687
.EndsWith("xor", true)
687-
.EndsWith("sub_explicit", true)
688688
.EndsWith("or_explicit", true)
689689
.EndsWith("xor_explicit", true)
690690
.EndsWith("and_explicit", true)

test/AtomicBuiltinsFloat.ll

Lines changed: 6 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
; Check that translator generate atomic instructions for atomic builtins
2-
; FP-typed atomic_fetch_sub and atomic_fetch_sub_explicit should be translated
3-
; to FunctionCall
42
; RUN: llvm-as %s -o %t.bc
53
; RUN: llvm-spirv %t.bc -spirv-text -o - | FileCheck %s
64
; RUN: llvm-spirv %t.bc -o %t.spv
@@ -11,13 +9,12 @@
119
; CHECK-COUNT-3: AtomicStore
1210
; CHECK-COUNT-3: AtomicLoad
1311
; CHECK-COUNT-3: AtomicExchange
14-
; CHECK-COUNT-3: FunctionCall
1512

1613
target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
1714
target triple = "spir-unknown-unknown"
1815

1916
; Function Attrs: convergent norecurse nounwind
20-
define dso_local spir_kernel void @test_atomic_kernel(float addrspace(3)* %ff) local_unnamed_addr #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {
17+
define dso_local spir_kernel void @test_atomic_kernel(float addrspace(3)* %ff) local_unnamed_addr #0 !kernel_arg_addr_space !2 !kernel_arg_access_qual !3 !kernel_arg_type !4 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 {
2118
entry:
2219
%0 = addrspacecast float addrspace(3)* %ff to float addrspace(4)*
2320
tail call spir_func void @_Z11atomic_initPU3AS4VU7_Atomicff(float addrspace(4)* %0, float 1.000000e+00) #2
@@ -30,9 +27,6 @@ entry:
3027
%call3 = tail call spir_func float @_Z15atomic_exchangePU3AS4VU7_Atomicff(float addrspace(4)* %0, float 1.000000e+00) #2
3128
%call4 = tail call spir_func float @_Z24atomic_exchange_explicitPU3AS4VU7_Atomicff12memory_order(float addrspace(4)* %0, float 1.000000e+00, i32 0) #2
3229
%call5 = tail call spir_func float @_Z24atomic_exchange_explicitPU3AS4VU7_Atomicff12memory_order12memory_scope(float addrspace(4)* %0, float 1.000000e+00, i32 0, i32 1) #2
33-
%call6 = tail call spir_func float @_Z16atomic_fetch_subPU3AS3VU7_Atomicff(float addrspace(3)* %ff, float 1.000000e+00) #2
34-
%call7 = tail call spir_func float @_Z25atomic_fetch_sub_explicitPU3AS3VU7_Atomicff12memory_order(float addrspace(3)* %ff, float 1.000000e+00, i32 0) #2
35-
%call8 = tail call spir_func float @_Z25atomic_fetch_sub_explicitPU3AS3VU7_Atomicff12memory_order12memory_scope(float addrspace(3)* %ff, float 1.000000e+00, i32 0, i32 1) #2
3630
ret void
3731
}
3832

@@ -66,29 +60,18 @@ declare spir_func float @_Z24atomic_exchange_explicitPU3AS4VU7_Atomicff12memory_
6660
; Function Attrs: convergent
6761
declare spir_func float @_Z24atomic_exchange_explicitPU3AS4VU7_Atomicff12memory_order12memory_scope(float addrspace(4)*, float, i32, i32) local_unnamed_addr #1
6862

69-
; Function Attrs: convergent
70-
declare spir_func float @_Z16atomic_fetch_subPU3AS3VU7_Atomicff(float addrspace(3)*, float) local_unnamed_addr #1
71-
72-
; Function Attrs: convergent
73-
declare spir_func float @_Z25atomic_fetch_sub_explicitPU3AS3VU7_Atomicff12memory_order(float addrspace(3)*, float, i32) local_unnamed_addr #1
74-
75-
; Function Attrs: convergent
76-
declare spir_func float @_Z25atomic_fetch_sub_explicitPU3AS3VU7_Atomicff12memory_order12memory_scope(float addrspace(3)*, float, i32, i32) local_unnamed_addr #1
77-
7863
attributes #0 = { convergent norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" }
7964
attributes #1 = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
8065
attributes #2 = { convergent nounwind }
8166

8267
!llvm.module.flags = !{!0}
8368
!opencl.ocl.version = !{!1}
8469
!opencl.spir.version = !{!1}
85-
!llvm.ident = !{!2}
8670

8771
!0 = !{i32 1, !"wchar_size", i32 4}
8872
!1 = !{i32 2, i32 0}
89-
!2 = !{!"clang version 14.0.0 (https://github.com/llvm/llvm-project.git 28c4f97a1dc8608cdd4db452b73d7d4afc89acc9)"}
90-
!3 = !{i32 3}
91-
!4 = !{!"none"}
92-
!5 = !{!"atomic_float*"}
93-
!6 = !{!"_Atomic(float)*"}
94-
!7 = !{!"volatile"}
73+
!2 = !{i32 3}
74+
!3 = !{!"none"}
75+
!4 = !{!"atomic_float*"}
76+
!5 = !{!"_Atomic(float)*"}
77+
!6 = !{!"volatile"}
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
;; Check that atomic_fetch_sub is translated to OpAtomicFAddEXT with negative
2+
;; value operand
3+
; RUN: llvm-as %s -o %t.bc
4+
; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_EXT_shader_atomic_float_add -o %t.spv
5+
; RUN: spirv-val %t.spv
6+
; RUN: llvm-spirv -to-text %t.spv -o %t.spt
7+
; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV
8+
9+
; RUN: llvm-spirv --spirv-target-env=CL2.0 -r %t.spv -o %t.rev.bc
10+
; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-CL20
11+
12+
; RUN: llvm-spirv --spirv-target-env=SPV-IR -r %t.spv -o %t.rev.bc
13+
; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefixes=CHECK-LLVM-SPV
14+
15+
target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
16+
target triple = "spir-unknown-unknown"
17+
18+
; CHECK-SPIRV: Capability AtomicFloat32AddEXT
19+
; CHECK-SPIRV: Capability AtomicFloat64AddEXT
20+
; CHECK-SPIRV: Extension "SPV_EXT_shader_atomic_float_add"
21+
; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_32:[0-9]+]] 32
22+
; CHECK-SPIRV: TypeFloat [[TYPE_FLOAT_64:[0-9]+]] 64
23+
;; Check float operand of atomic_fetch_sub is handled correctly
24+
; CHECK-SPIRV: Constant [[TYPE_FLOAT_32]] [[NEGATIVE_229:[0-9]+]] 3278176256
25+
; CHECK-SPIRV: Constant [[TYPE_FLOAT_64]] [[NEGATIVE_334:[0-9]+]] 0 3228884992
26+
27+
28+
; Function Attrs: convergent norecurse nounwind
29+
define dso_local spir_func void @test_atomic_float(float addrspace(1)* %a) local_unnamed_addr #0 {
30+
entry:
31+
; CHECK-SPIRV: 7 AtomicFAddEXT [[TYPE_FLOAT_32]] 13 7 10 11 [[NEGATIVE_229]]
32+
; CHECK-LLVM-CL20: call spir_func float @_Z25atomic_fetch_add_explicitPU3AS4VU7_Atomicff12memory_order12memory_scope(float addrspace(4)* %a.as, float -2.290000e+02, i32 0, i32 1) #0
33+
; CHECK-LLVM-SPV: call spir_func float @_Z21__spirv_AtomicFAddEXTPU3AS1fiif(float addrspace(1)* %a, i32 2, i32 0, float -2.290000e+02) #0
34+
%call2 = tail call spir_func float @_Z25atomic_fetch_sub_explicitPU3AS1VU7_Atomicff12memory_order12memory_scope(float addrspace(1)* %a, float 2.290000e+02, i32 0, i32 1) #2
35+
ret void
36+
}
37+
38+
; Function Attrs: convergent
39+
declare spir_func float @_Z25atomic_fetch_sub_explicitPU3AS1VU7_Atomicff12memory_order12memory_scope(float addrspace(1)* , float , i32 , i32 ) local_unnamed_addr #1
40+
; CHECK-LLVM-SPV: declare spir_func float @_Z21__spirv_AtomicFAddEXTPU3AS1fiif(float addrspace(1)*, i32, i32, float) #0
41+
42+
; Function Attrs: convergent norecurse nounwind
43+
define dso_local spir_func void @test_atomic_double(double addrspace(1)* %a) local_unnamed_addr #0 {
44+
entry:
45+
; CHECK-SPIRV: 7 AtomicFAddEXT [[TYPE_FLOAT_64]] 21 18 10 11 [[NEGATIVE_334]]
46+
; CHECK-LLVM-CL20: call spir_func double @_Z25atomic_fetch_add_explicitPU3AS4VU7_Atomicdd12memory_order12memory_scope(double addrspace(4)* %a.as, double -3.340000e+02, i32 0, i32 1) #0
47+
; CHECK-LLVM-SPV: call spir_func double @_Z21__spirv_AtomicFAddEXTPU3AS1diid(double addrspace(1)* %a, i32 2, i32 0, double -3.340000e+02) #0
48+
%call = tail call spir_func double @_Z25atomic_fetch_sub_explicitPU3AS1VU7_Atomicdd12memory_order12memory_scope(double addrspace(1)* %a, double 3.340000e+02, i32 0, i32 1) #2
49+
ret void
50+
}
51+
; Function Attrs: convergent
52+
declare spir_func double @_Z25atomic_fetch_sub_explicitPU3AS1VU7_Atomicdd12memory_order12memory_scope(double addrspace(1)* , double , i32 , i32 ) local_unnamed_addr #1
53+
; CHECK-LLVM-SPV: declare spir_func double @_Z21__spirv_AtomicFAddEXTPU3AS1diid(double addrspace(1)*, i32, i32, double) #0
54+
55+
; CHECK-LLVM-CL20: declare spir_func float @_Z25atomic_fetch_add_explicitPU3AS4VU7_Atomicff12memory_order12memory_scope(float addrspace(4)*, float, i32, i32) #0
56+
; CHECK-LLVM-CL20: declare spir_func double @_Z25atomic_fetch_add_explicitPU3AS4VU7_Atomicdd12memory_order12memory_scope(double addrspace(4)*, double, i32, i32) #0
57+
58+
attributes #0 = { convergent norecurse nounwind "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
59+
attributes #1 = { convergent "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
60+
attributes #2 = { convergent nounwind }
61+
62+
!llvm.module.flags = !{!0}
63+
!opencl.ocl.version = !{!1}
64+
!opencl.spir.version = !{!1}
65+
66+
!0 = !{i32 1, !"wchar_size", i32 4}
67+
!1 = !{i32 2, i32 0}

0 commit comments

Comments
 (0)