Skip to content

Commit 44a05d0

Browse files
committed
Adding atomicadd as a cudadevice intrinsic and converting it LLVM dialect
1 parent fec503d commit 44a05d0

File tree

5 files changed

+76
-0
lines changed

5 files changed

+76
-0
lines changed

flang/include/flang/Optimizer/Builder/IntrinsicCall.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "mlir/Dialect/Complex/IR/Complex.h"
2121
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
2222
#include "mlir/Dialect/Math/IR/Math.h"
23+
#include <mlir/IR/Value.h>
2324
#include <optional>
2425

2526
namespace fir {
@@ -185,6 +186,7 @@ struct IntrinsicLibrary {
185186
mlir::Value genAnint(mlir::Type, llvm::ArrayRef<mlir::Value>);
186187
fir::ExtendedValue genAny(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
187188
mlir::Value genAtanpi(mlir::Type, llvm::ArrayRef<mlir::Value>);
189+
mlir::Value genAtomAdd(mlir::Type, llvm::ArrayRef<mlir::Value>);
188190
fir::ExtendedValue
189191
genCommandArgumentCount(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
190192
mlir::Value genAsind(mlir::Type, llvm::ArrayRef<mlir::Value>);

flang/lib/Optimizer/Builder/IntrinsicCall.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@
5151
#include "llvm/Support/MathExtras.h"
5252
#include "llvm/Support/raw_ostream.h"
5353
#include <cfenv> // temporary -- only used in genIeeeGetOrSetModesOrStatus
54+
#include <mlir/Dialect/LLVMIR/LLVMTypes.h>
55+
#include <mlir/IR/BuiltinAttributes.h>
5456
#include <mlir/IR/Value.h>
5557
#include <optional>
5658

@@ -147,6 +149,10 @@ static constexpr IntrinsicHandler handlers[]{
147149
{"atan2pi", &I::genAtanpi},
148150
{"atand", &I::genAtand},
149151
{"atanpi", &I::genAtanpi},
152+
{"atomicaddd", &I::genAtomAdd, {{{"addr", asAddr}, {"v", asValue}}}, false},
153+
{"atomicaddf", &I::genAtomAdd, {{{"addr", asAddr}, {"v", asValue}}}, false},
154+
{"atomicaddi", &I::genAtomAdd, {{{"addr", asAddr}, {"v", asValue}}}, false},
155+
{"atomicaddl", &I::genAtomAdd, {{{"addr", asAddr}, {"v", asValue}}}, false},
150156
{"bessel_jn",
151157
&I::genBesselJn,
152158
{{{"n1", asValue}, {"n2", asValue}, {"x", asValue}}},
@@ -2574,6 +2580,26 @@ mlir::Value IntrinsicLibrary::genAtanpi(mlir::Type resultType,
25742580
return builder.create<mlir::arith::MulFOp>(loc, atan, factor);
25752581
}
25762582

2583+
static mlir::Value genAtomBinOp(fir::FirOpBuilder &builder, mlir::Location &loc,
2584+
mlir::LLVM::AtomicBinOp binOp, mlir::Value arg0,
2585+
mlir::Value arg1) {
2586+
auto llvmPointerType = mlir::LLVM::LLVMPointerType::get(builder.getContext());
2587+
arg0 = builder.createConvert(loc, llvmPointerType, arg0);
2588+
return builder.create<mlir::LLVM::AtomicRMWOp>(
2589+
loc, binOp, arg0, arg1, mlir::LLVM::AtomicOrdering::seq_cst);
2590+
}
2591+
2592+
mlir::Value IntrinsicLibrary::genAtomAdd(mlir::Type resultType,
2593+
llvm::ArrayRef<mlir::Value> args) {
2594+
assert(args.size() == 2);
2595+
2596+
mlir::LLVM::AtomicBinOp binOp =
2597+
mlir::isa<mlir::IntegerType>(args[1].getType())
2598+
? mlir::LLVM::AtomicBinOp::add
2599+
: mlir::LLVM::AtomicBinOp::fadd;
2600+
return genAtomBinOp(builder, loc, binOp, args[0], args[1]);
2601+
}
2602+
25772603
// ASSOCIATED
25782604
fir::ExtendedValue
25792605
IntrinsicLibrary::genAssociated(mlir::Type resultType,

flang/module/cudadevice.f90

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,5 +92,31 @@ attributes(device) subroutine threadfence_system()
9292
end function
9393
end interface
9494
public :: __fadd_ru
95+
96+
! Atomic Operations
9597

98+
interface atomicadd
99+
attributes(device) pure integer function atomicaddi(address, val)
100+
!dir$ ignore_tkr (rd) address, (d) val
101+
integer, intent(inout) :: address
102+
integer, value :: val
103+
end function
104+
attributes(device) pure real function atomicaddf(address, val)
105+
!dir$ ignore_tkr (rd) address, (d) val
106+
real, intent(inout) :: address
107+
real, value :: val
108+
end function
109+
attributes(device) pure real*8 function atomicaddd(address, val)
110+
!dir$ ignore_tkr (rd) address, (d) val
111+
real*8, intent(inout) :: address
112+
real*8, value :: val
113+
end function
114+
attributes(device) pure integer(8) function atomicaddl(address, val)
115+
!dir$ ignore_tkr (rd) address, (dk) val
116+
integer(8), intent(inout) :: address
117+
integer(8), value :: val
118+
end function
119+
end interface
120+
public :: atomicadd
121+
96122
end module

flang/test/Lower/CUDA/cuda-device-proc.cuf

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@
55
attributes(global) subroutine devsub()
66
implicit none
77
integer :: ret
8+
real(4) :: af
9+
real(8) :: ad
10+
integer(4) :: ai
11+
integer(8) :: al
812

913
call syncthreads()
1014
call syncwarp(1)
@@ -14,6 +18,11 @@ attributes(global) subroutine devsub()
1418
ret = syncthreads_and(1)
1519
ret = syncthreads_count(1)
1620
ret = syncthreads_or(1)
21+
22+
ai = atomicadd(ai, 1_4)
23+
al = atomicadd(al, 1_8)
24+
af = atomicadd(af, 1.0_4)
25+
ad = atomicadd(ad, 1.0_8)
1726
end
1827

1928
! CHECK-LABEL: func.func @_QPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
@@ -25,6 +34,10 @@ end
2534
! CHECK: %{{.*}} = fir.call @llvm.nvvm.barrier0.and(%c1_i32_0) fastmath<contract> : (i32) -> i32
2635
! CHECK: %{{.*}} = fir.call @llvm.nvvm.barrier0.popc(%c1_i32_1) fastmath<contract> : (i32) -> i32
2736
! CHECK: %{{.*}} = fir.call @llvm.nvvm.barrier0.or(%c1_i32_2) fastmath<contract> : (i32) -> i32
37+
! CHECK: %{{.*}} = llvm.atomicrmw add %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
38+
! CHECK: %{{.*}} = llvm.atomicrmw add %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i64
39+
! CHECK: %{{.*}} = llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f32
40+
! CHECK: %{{.*}} = llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f64
2841

2942
! CHECK: func.func private @llvm.nvvm.barrier0()
3043
! CHECK: func.func private @__syncwarp(!fir.ref<i32> {cuf.data_attr = #cuf.cuda<device>}) attributes {cuf.proc_attr = #cuf.cuda_proc<device>, fir.bindc_name = "__syncwarp", fir.proc_attrs = #fir.proc_attrs<bind_c>}

flang/test/Semantics/cuf-device-procedures01.cuf

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,17 @@ end
2828
! CHECK: threadfence_system (Subroutine): Use from threadfence_system in cudadevice
2929

3030
subroutine host()
31+
real(4) :: af
32+
real(8) :: ad
33+
integer(4) :: ai
34+
integer(8) :: al
3135
call syncthreads()
36+
ai = atomicadd(ai, 1_4)
37+
al = atomicadd(al, 1_8)
38+
af = atomicadd(af, 1.0_4)
39+
ad = atomicadd(ad, 1.0_8)
3240
end subroutine
3341

3442
! CHECK-LABEL: Subprogram scope: host
43+
! CHECK: atomicadd, EXTERNAL: HostAssoc{{$}}
3544
! CHECK: syncthreads, EXTERNAL: HostAssoc{{$}}

0 commit comments

Comments
 (0)