Skip to content

Commit 56a0a7f

Browse files
authored
[flang][cuda] Adding support for more atomic calls (#124671)
The PR follows the earlier #123840 PR for atomic operation support in CUF
1 parent df122fc commit 56a0a7f

File tree

4 files changed

+245
-4
lines changed

4 files changed

+245
-4
lines changed

flang/include/flang/Optimizer/Builder/IntrinsicCall.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,13 @@ struct IntrinsicLibrary {
186186
fir::ExtendedValue genAny(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
187187
mlir::Value genAtanpi(mlir::Type, llvm::ArrayRef<mlir::Value>);
188188
mlir::Value genAtomicAdd(mlir::Type, llvm::ArrayRef<mlir::Value>);
189+
mlir::Value genAtomicAnd(mlir::Type, llvm::ArrayRef<mlir::Value>);
190+
mlir::Value genAtomicOr(mlir::Type, llvm::ArrayRef<mlir::Value>);
191+
mlir::Value genAtomicDec(mlir::Type, llvm::ArrayRef<mlir::Value>);
192+
mlir::Value genAtomicInc(mlir::Type, llvm::ArrayRef<mlir::Value>);
193+
mlir::Value genAtomicMax(mlir::Type, llvm::ArrayRef<mlir::Value>);
194+
mlir::Value genAtomicMin(mlir::Type, llvm::ArrayRef<mlir::Value>);
195+
mlir::Value genAtomicSub(mlir::Type, llvm::ArrayRef<mlir::Value>);
189196
fir::ExtendedValue
190197
genCommandArgumentCount(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
191198
mlir::Value genAsind(mlir::Type, llvm::ArrayRef<mlir::Value>);

flang/lib/Optimizer/Builder/IntrinsicCall.cpp

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,22 @@ static constexpr IntrinsicHandler handlers[]{
151151
{"atomicaddf", &I::genAtomicAdd, {{{"a", asAddr}, {"v", asValue}}}, false},
152152
{"atomicaddi", &I::genAtomicAdd, {{{"a", asAddr}, {"v", asValue}}}, false},
153153
{"atomicaddl", &I::genAtomicAdd, {{{"a", asAddr}, {"v", asValue}}}, false},
154+
{"atomicandi", &I::genAtomicAnd, {{{"a", asAddr}, {"v", asValue}}}, false},
155+
{"atomicdeci", &I::genAtomicDec, {{{"a", asAddr}, {"v", asValue}}}, false},
156+
{"atomicinci", &I::genAtomicInc, {{{"a", asAddr}, {"v", asValue}}}, false},
157+
{"atomicmaxd", &I::genAtomicMax, {{{"a", asAddr}, {"v", asValue}}}, false},
158+
{"atomicmaxf", &I::genAtomicMax, {{{"a", asAddr}, {"v", asValue}}}, false},
159+
{"atomicmaxi", &I::genAtomicMax, {{{"a", asAddr}, {"v", asValue}}}, false},
160+
{"atomicmaxl", &I::genAtomicMax, {{{"a", asAddr}, {"v", asValue}}}, false},
161+
{"atomicmind", &I::genAtomicMin, {{{"a", asAddr}, {"v", asValue}}}, false},
162+
{"atomicminf", &I::genAtomicMin, {{{"a", asAddr}, {"v", asValue}}}, false},
163+
{"atomicmini", &I::genAtomicMin, {{{"a", asAddr}, {"v", asValue}}}, false},
164+
{"atomicminl", &I::genAtomicMin, {{{"a", asAddr}, {"v", asValue}}}, false},
165+
{"atomicori", &I::genAtomicOr, {{{"a", asAddr}, {"v", asValue}}}, false},
166+
{"atomicsubd", &I::genAtomicSub, {{{"a", asAddr}, {"v", asValue}}}, false},
167+
{"atomicsubf", &I::genAtomicSub, {{{"a", asAddr}, {"v", asValue}}}, false},
168+
{"atomicsubi", &I::genAtomicSub, {{{"a", asAddr}, {"v", asValue}}}, false},
169+
{"atomicsubl", &I::genAtomicSub, {{{"a", asAddr}, {"v", asValue}}}, false},
154170
{"bessel_jn",
155171
&I::genBesselJn,
156172
{{{"n1", asValue}, {"n2", asValue}, {"x", asValue}}},
@@ -2600,6 +2616,75 @@ mlir::Value IntrinsicLibrary::genAtomicAdd(mlir::Type resultType,
26002616
return genAtomBinOp(builder, loc, binOp, args[0], args[1]);
26012617
}
26022618

2619+
mlir::Value IntrinsicLibrary::genAtomicSub(mlir::Type resultType,
2620+
llvm::ArrayRef<mlir::Value> args) {
2621+
assert(args.size() == 2);
2622+
2623+
mlir::LLVM::AtomicBinOp binOp =
2624+
mlir::isa<mlir::IntegerType>(args[1].getType())
2625+
? mlir::LLVM::AtomicBinOp::sub
2626+
: mlir::LLVM::AtomicBinOp::fsub;
2627+
return genAtomBinOp(builder, loc, binOp, args[0], args[1]);
2628+
}
2629+
2630+
mlir::Value IntrinsicLibrary::genAtomicAnd(mlir::Type resultType,
2631+
llvm::ArrayRef<mlir::Value> args) {
2632+
assert(args.size() == 2);
2633+
assert(mlir::isa<mlir::IntegerType>(args[1].getType()));
2634+
2635+
mlir::LLVM::AtomicBinOp binOp = mlir::LLVM::AtomicBinOp::_and;
2636+
return genAtomBinOp(builder, loc, binOp, args[0], args[1]);
2637+
}
2638+
2639+
mlir::Value IntrinsicLibrary::genAtomicOr(mlir::Type resultType,
2640+
llvm::ArrayRef<mlir::Value> args) {
2641+
assert(args.size() == 2);
2642+
assert(mlir::isa<mlir::IntegerType>(args[1].getType()));
2643+
2644+
mlir::LLVM::AtomicBinOp binOp = mlir::LLVM::AtomicBinOp::_or;
2645+
return genAtomBinOp(builder, loc, binOp, args[0], args[1]);
2646+
}
2647+
2648+
mlir::Value IntrinsicLibrary::genAtomicDec(mlir::Type resultType,
2649+
llvm::ArrayRef<mlir::Value> args) {
2650+
assert(args.size() == 2);
2651+
assert(mlir::isa<mlir::IntegerType>(args[1].getType()));
2652+
2653+
mlir::LLVM::AtomicBinOp binOp = mlir::LLVM::AtomicBinOp::udec_wrap;
2654+
return genAtomBinOp(builder, loc, binOp, args[0], args[1]);
2655+
}
2656+
2657+
mlir::Value IntrinsicLibrary::genAtomicInc(mlir::Type resultType,
2658+
llvm::ArrayRef<mlir::Value> args) {
2659+
assert(args.size() == 2);
2660+
assert(mlir::isa<mlir::IntegerType>(args[1].getType()));
2661+
2662+
mlir::LLVM::AtomicBinOp binOp = mlir::LLVM::AtomicBinOp::uinc_wrap;
2663+
return genAtomBinOp(builder, loc, binOp, args[0], args[1]);
2664+
}
2665+
2666+
mlir::Value IntrinsicLibrary::genAtomicMax(mlir::Type resultType,
2667+
llvm::ArrayRef<mlir::Value> args) {
2668+
assert(args.size() == 2);
2669+
2670+
mlir::LLVM::AtomicBinOp binOp =
2671+
mlir::isa<mlir::IntegerType>(args[1].getType())
2672+
? mlir::LLVM::AtomicBinOp::max
2673+
: mlir::LLVM::AtomicBinOp::fmax;
2674+
return genAtomBinOp(builder, loc, binOp, args[0], args[1]);
2675+
}
2676+
2677+
mlir::Value IntrinsicLibrary::genAtomicMin(mlir::Type resultType,
2678+
llvm::ArrayRef<mlir::Value> args) {
2679+
assert(args.size() == 2);
2680+
2681+
mlir::LLVM::AtomicBinOp binOp =
2682+
mlir::isa<mlir::IntegerType>(args[1].getType())
2683+
? mlir::LLVM::AtomicBinOp::min
2684+
: mlir::LLVM::AtomicBinOp::fmin;
2685+
return genAtomBinOp(builder, loc, binOp, args[0], args[1]);
2686+
}
2687+
26032688
// ASSOCIATED
26042689
fir::ExtendedValue
26052690
IntrinsicLibrary::genAssociated(mlir::Type resultType,

flang/module/cudadevice.f90

Lines changed: 113 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -106,17 +106,126 @@ attributes(device) pure real function atomicaddf(address, val)
106106
real, intent(inout) :: address
107107
real, value :: val
108108
end function
109-
attributes(device) pure real*8 function atomicaddd(address, val)
109+
attributes(device) pure real(8) function atomicaddd(address, val)
110110
!dir$ ignore_tkr (d) address, (d) val
111-
real*8, intent(inout) :: address
112-
real*8, value :: val
111+
real(8), intent(inout) :: address
112+
real(8), value :: val
113113
end function
114114
attributes(device) pure integer(8) function atomicaddl(address, val)
115115
!dir$ ignore_tkr (d) address, (d) val
116116
integer(8), intent(inout) :: address
117117
integer(8), value :: val
118118
end function
119119
end interface
120-
public :: atomicadd
120+
public :: atomicadd
121+
122+
interface atomicsub
123+
attributes(device) pure integer function atomicsubi(address, val)
124+
!dir$ ignore_tkr (d) address, (d) val
125+
integer, intent(inout) :: address
126+
integer, value :: val
127+
end function
128+
attributes(device) pure real function atomicsubf(address, val)
129+
!dir$ ignore_tkr (d) address, (d) val
130+
real, intent(inout) :: address
131+
real, value :: val
132+
end function
133+
attributes(device) pure real(8) function atomicsubd(address, val)
134+
!dir$ ignore_tkr (d) address, (d) val
135+
real(8), intent(inout) :: address
136+
real(8), value :: val
137+
end function
138+
attributes(device) pure integer(8) function atomicsubl(address, val)
139+
!dir$ ignore_tkr (d) address, (dk) val
140+
integer(8), intent(inout) :: address
141+
integer(8), value :: val
142+
end function
143+
end interface
144+
public :: atomicsub
145+
146+
interface atomicmax
147+
attributes(device) pure integer function atomicmaxi(address, val)
148+
!dir$ ignore_tkr (d) address, (d) val
149+
integer, intent(inout) :: address
150+
integer, value :: val
151+
end function
152+
attributes(device) pure real function atomicmaxf(address, val)
153+
!dir$ ignore_tkr (d) address, (d) val
154+
real, intent(inout) :: address
155+
real, value :: val
156+
end function
157+
attributes(device) pure real(8) function atomicmaxd(address, val)
158+
!dir$ ignore_tkr (d) address, (d) val
159+
real(8), intent(inout) :: address
160+
real(8), value :: val
161+
end function
162+
attributes(device) pure integer(8) function atomicmaxl(address, val)
163+
!dir$ ignore_tkr (d) address, (dk) val
164+
integer(8), intent(inout) :: address
165+
integer(8), value :: val
166+
end function
167+
end interface
168+
public :: atomicmax
169+
170+
interface atomicmin
171+
attributes(device) pure integer function atomicmini(address, val)
172+
!dir$ ignore_tkr (d) address, (d) val
173+
integer, intent(inout) :: address
174+
integer, value :: val
175+
end function
176+
attributes(device) pure real function atomicminf(address, val)
177+
!dir$ ignore_tkr (d) address, (d) val
178+
real, intent(inout) :: address
179+
real, value :: val
180+
end function
181+
attributes(device) pure real(8) function atomicmind(address, val)
182+
!dir$ ignore_tkr (d) address, (d) val
183+
real(8), intent(inout) :: address
184+
real(8), value :: val
185+
end function
186+
attributes(device) pure integer(8) function atomicminl(address, val)
187+
!dir$ ignore_tkr (d) address, (dk) val
188+
integer(8), intent(inout) :: address
189+
integer(8), value :: val
190+
end function
191+
end interface
192+
public :: atomicmin
193+
194+
interface atomicand
195+
attributes(device) pure integer function atomicandi(address, val)
196+
!dir$ ignore_tkr (d) address, (d) val
197+
integer, intent(inout) :: address
198+
integer, value :: val
199+
end function
200+
end interface
201+
public :: atomicand
202+
203+
interface atomicor
204+
attributes(device) pure integer function atomicori(address, val)
205+
!dir$ ignore_tkr (d) address, (d) val
206+
integer, intent(inout) :: address
207+
integer, value :: val
208+
end function
209+
end interface
210+
public :: atomicor
211+
212+
interface atomicinc
213+
attributes(device) pure integer function atomicinci(address, val)
214+
!dir$ ignore_tkr (d) address, (d) val
215+
integer, intent(inout) :: address
216+
integer, value :: val
217+
end function
218+
end interface
219+
public :: atomicinc
220+
221+
interface atomicdec
222+
attributes(device) pure integer function atomicdeci(address, val)
223+
!dir$ ignore_tkr (d) address, (d) val
224+
integer, intent(inout) :: address
225+
integer, value :: val
226+
end function
227+
end interface
228+
public :: atomicdec
229+
121230

122231
end module

flang/test/Lower/CUDA/cuda-device-proc.cuf

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,26 @@ attributes(global) subroutine devsub()
2323
al = atomicadd(al, 1_8)
2424
af = atomicadd(af, 1.0_4)
2525
ad = atomicadd(ad, 1.0_8)
26+
27+
ai = atomicsub(ai, 1_4)
28+
al = atomicsub(al, 1_8)
29+
af = atomicsub(af, 1.0_4)
30+
ad = atomicsub(ad, 1.0_8)
31+
32+
ai = atomicmax(ai, 1_4)
33+
al = atomicmax(al, 1_8)
34+
af = atomicmax(af, 1.0_4)
35+
ad = atomicmax(ad, 1.0_8)
36+
37+
ai = atomicmin(ai, 1_4)
38+
al = atomicmin(al, 1_8)
39+
af = atomicmin(af, 1.0_4)
40+
ad = atomicmin(ad, 1.0_8)
41+
42+
ai = atomicand(ai, 1_4)
43+
ai = atomicor(ai, 1_4)
44+
ai = atomicinc(ai, 1_4)
45+
ai = atomicdec(ai, 1_4)
2646
end
2747

2848
! CHECK-LABEL: func.func @_QPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
@@ -39,6 +59,26 @@ end
3959
! CHECK: %{{.*}} = llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f32
4060
! CHECK: %{{.*}} = llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f64
4161

62+
! CHECK: %{{.*}} = llvm.atomicrmw sub %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
63+
! CHECK: %{{.*}} = llvm.atomicrmw sub %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i64
64+
! CHECK: %{{.*}} = llvm.atomicrmw fsub %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f32
65+
! CHECK: %{{.*}} = llvm.atomicrmw fsub %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f64
66+
67+
! CHECK: %{{.*}} = llvm.atomicrmw max %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
68+
! CHECK: %{{.*}} = llvm.atomicrmw max %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i64
69+
! CHECK: %{{.*}} = llvm.atomicrmw fmax %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f32
70+
! CHECK: %{{.*}} = llvm.atomicrmw fmax %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f64
71+
72+
! CHECK: %{{.*}} = llvm.atomicrmw min %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
73+
! CHECK: %{{.*}} = llvm.atomicrmw min %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i64
74+
! CHECK: %{{.*}} = llvm.atomicrmw fmin %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f32
75+
! CHECK: %{{.*}} = llvm.atomicrmw fmin %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f64
76+
77+
! CHECK: %{{.*}} = llvm.atomicrmw _and %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
78+
! CHECK: %{{.*}} = llvm.atomicrmw _or %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
79+
! CHECK: %{{.*}} = llvm.atomicrmw uinc_wrap %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
80+
! CHECK: %{{.*}} = llvm.atomicrmw udec_wrap %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
81+
4282
! CHECK: func.func private @llvm.nvvm.barrier0()
4383
! CHECK: func.func private @__syncwarp(!fir.ref<i32> {cuf.data_attr = #cuf.cuda<device>}) attributes {cuf.proc_attr = #cuf.cuda_proc<device>, fir.bindc_name = "__syncwarp", fir.proc_attrs = #fir.proc_attrs<bind_c>}
4484
! CHECK: func.func private @llvm.nvvm.membar.gl()

0 commit comments

Comments
 (0)