Skip to content

[flang][cuda] Adding support for more atomic calls #124671

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jan 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions flang/include/flang/Optimizer/Builder/IntrinsicCall.h
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,13 @@ struct IntrinsicLibrary {
fir::ExtendedValue genAny(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genAtanpi(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genAtomicAdd(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genAtomicAnd(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genAtomicOr(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genAtomicDec(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genAtomicInc(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genAtomicMax(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genAtomicMin(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genAtomicSub(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue
genCommandArgumentCount(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genAsind(mlir::Type, llvm::ArrayRef<mlir::Value>);
Expand Down
85 changes: 85 additions & 0 deletions flang/lib/Optimizer/Builder/IntrinsicCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,22 @@ static constexpr IntrinsicHandler handlers[]{
{"atomicaddf", &I::genAtomicAdd, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomicaddi", &I::genAtomicAdd, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomicaddl", &I::genAtomicAdd, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomicandi", &I::genAtomicAnd, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomicdeci", &I::genAtomicDec, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomicinci", &I::genAtomicInc, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomicmaxd", &I::genAtomicMax, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomicmaxf", &I::genAtomicMax, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomicmaxi", &I::genAtomicMax, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomicmaxl", &I::genAtomicMax, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomicmind", &I::genAtomicMin, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomicminf", &I::genAtomicMin, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomicmini", &I::genAtomicMin, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomicminl", &I::genAtomicMin, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomicori", &I::genAtomicOr, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomicsubd", &I::genAtomicSub, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomicsubf", &I::genAtomicSub, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomicsubi", &I::genAtomicSub, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomicsubl", &I::genAtomicSub, {{{"a", asAddr}, {"v", asValue}}}, false},
{"bessel_jn",
&I::genBesselJn,
{{{"n1", asValue}, {"n2", asValue}, {"x", asValue}}},
Expand Down Expand Up @@ -2600,6 +2616,75 @@ mlir::Value IntrinsicLibrary::genAtomicAdd(mlir::Type resultType,
return genAtomBinOp(builder, loc, binOp, args[0], args[1]);
}

mlir::Value IntrinsicLibrary::genAtomicSub(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args) {
assert(args.size() == 2);

mlir::LLVM::AtomicBinOp binOp =
mlir::isa<mlir::IntegerType>(args[1].getType())
? mlir::LLVM::AtomicBinOp::sub
: mlir::LLVM::AtomicBinOp::fsub;
return genAtomBinOp(builder, loc, binOp, args[0], args[1]);
}

mlir::Value IntrinsicLibrary::genAtomicAnd(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args) {
assert(args.size() == 2);
assert(mlir::isa<mlir::IntegerType>(args[1].getType()));

mlir::LLVM::AtomicBinOp binOp = mlir::LLVM::AtomicBinOp::_and;
return genAtomBinOp(builder, loc, binOp, args[0], args[1]);
}

mlir::Value IntrinsicLibrary::genAtomicOr(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args) {
assert(args.size() == 2);
assert(mlir::isa<mlir::IntegerType>(args[1].getType()));

mlir::LLVM::AtomicBinOp binOp = mlir::LLVM::AtomicBinOp::_or;
return genAtomBinOp(builder, loc, binOp, args[0], args[1]);
}

mlir::Value IntrinsicLibrary::genAtomicDec(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args) {
assert(args.size() == 2);
assert(mlir::isa<mlir::IntegerType>(args[1].getType()));

mlir::LLVM::AtomicBinOp binOp = mlir::LLVM::AtomicBinOp::udec_wrap;
return genAtomBinOp(builder, loc, binOp, args[0], args[1]);
}

mlir::Value IntrinsicLibrary::genAtomicInc(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args) {
assert(args.size() == 2);
assert(mlir::isa<mlir::IntegerType>(args[1].getType()));

mlir::LLVM::AtomicBinOp binOp = mlir::LLVM::AtomicBinOp::uinc_wrap;
return genAtomBinOp(builder, loc, binOp, args[0], args[1]);
}

mlir::Value IntrinsicLibrary::genAtomicMax(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args) {
assert(args.size() == 2);

mlir::LLVM::AtomicBinOp binOp =
mlir::isa<mlir::IntegerType>(args[1].getType())
? mlir::LLVM::AtomicBinOp::max
: mlir::LLVM::AtomicBinOp::fmax;
return genAtomBinOp(builder, loc, binOp, args[0], args[1]);
}

mlir::Value IntrinsicLibrary::genAtomicMin(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args) {
assert(args.size() == 2);

mlir::LLVM::AtomicBinOp binOp =
mlir::isa<mlir::IntegerType>(args[1].getType())
? mlir::LLVM::AtomicBinOp::min
: mlir::LLVM::AtomicBinOp::fmin;
return genAtomBinOp(builder, loc, binOp, args[0], args[1]);
}

// ASSOCIATED
fir::ExtendedValue
IntrinsicLibrary::genAssociated(mlir::Type resultType,
Expand Down
117 changes: 113 additions & 4 deletions flang/module/cudadevice.f90
Original file line number Diff line number Diff line change
Expand Up @@ -106,17 +106,126 @@ attributes(device) pure real function atomicaddf(address, val)
real, intent(inout) :: address
real, value :: val
end function
attributes(device) pure real*8 function atomicaddd(address, val)
attributes(device) pure real(8) function atomicaddd(address, val)
!dir$ ignore_tkr (d) address, (d) val
real*8, intent(inout) :: address
real*8, value :: val
real(8), intent(inout) :: address
real(8), value :: val
end function
attributes(device) pure integer(8) function atomicaddl(address, val)
!dir$ ignore_tkr (d) address, (d) val
integer(8), intent(inout) :: address
integer(8), value :: val
end function
end interface
public :: atomicadd
public :: atomicadd

interface atomicsub
attributes(device) pure integer function atomicsubi(address, val)
!dir$ ignore_tkr (d) address, (d) val
integer, intent(inout) :: address
integer, value :: val
end function
attributes(device) pure real function atomicsubf(address, val)
!dir$ ignore_tkr (d) address, (d) val
real, intent(inout) :: address
real, value :: val
end function
attributes(device) pure real(8) function atomicsubd(address, val)
!dir$ ignore_tkr (d) address, (d) val
real(8), intent(inout) :: address
real(8), value :: val
end function
attributes(device) pure integer(8) function atomicsubl(address, val)
!dir$ ignore_tkr (d) address, (dk) val
integer(8), intent(inout) :: address
integer(8), value :: val
end function
end interface
public :: atomicsub

interface atomicmax
attributes(device) pure integer function atomicmaxi(address, val)
!dir$ ignore_tkr (d) address, (d) val
integer, intent(inout) :: address
integer, value :: val
end function
attributes(device) pure real function atomicmaxf(address, val)
!dir$ ignore_tkr (d) address, (d) val
real, intent(inout) :: address
real, value :: val
end function
attributes(device) pure real(8) function atomicmaxd(address, val)
!dir$ ignore_tkr (d) address, (d) val
real(8), intent(inout) :: address
real(8), value :: val
end function
attributes(device) pure integer(8) function atomicmaxl(address, val)
!dir$ ignore_tkr (d) address, (dk) val
integer(8), intent(inout) :: address
integer(8), value :: val
end function
end interface
public :: atomicmax

interface atomicmin
attributes(device) pure integer function atomicmini(address, val)
!dir$ ignore_tkr (d) address, (d) val
integer, intent(inout) :: address
integer, value :: val
end function
attributes(device) pure real function atomicminf(address, val)
!dir$ ignore_tkr (d) address, (d) val
real, intent(inout) :: address
real, value :: val
end function
attributes(device) pure real(8) function atomicmind(address, val)
!dir$ ignore_tkr (d) address, (d) val
real(8), intent(inout) :: address
real(8), value :: val
end function
attributes(device) pure integer(8) function atomicminl(address, val)
!dir$ ignore_tkr (d) address, (dk) val
integer(8), intent(inout) :: address
integer(8), value :: val
end function
end interface
public :: atomicmin

interface atomicand
attributes(device) pure integer function atomicandi(address, val)
!dir$ ignore_tkr (d) address, (d) val
integer, intent(inout) :: address
integer, value :: val
end function
end interface
public :: atomicand

interface atomicor
attributes(device) pure integer function atomicori(address, val)
!dir$ ignore_tkr (d) address, (d) val
integer, intent(inout) :: address
integer, value :: val
end function
end interface
public :: atomicor

interface atomicinc
attributes(device) pure integer function atomicinci(address, val)
!dir$ ignore_tkr (d) address, (d) val
integer, intent(inout) :: address
integer, value :: val
end function
end interface
public :: atomicinc

interface atomicdec
attributes(device) pure integer function atomicdeci(address, val)
!dir$ ignore_tkr (d) address, (d) val
integer, intent(inout) :: address
integer, value :: val
end function
end interface
public :: atomicdec


end module
40 changes: 40 additions & 0 deletions flang/test/Lower/CUDA/cuda-device-proc.cuf
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,26 @@ attributes(global) subroutine devsub()
al = atomicadd(al, 1_8)
af = atomicadd(af, 1.0_4)
ad = atomicadd(ad, 1.0_8)

ai = atomicsub(ai, 1_4)
al = atomicsub(al, 1_8)
af = atomicsub(af, 1.0_4)
ad = atomicsub(ad, 1.0_8)

ai = atomicmax(ai, 1_4)
al = atomicmax(al, 1_8)
af = atomicmax(af, 1.0_4)
ad = atomicmax(ad, 1.0_8)

ai = atomicmin(ai, 1_4)
al = atomicmin(al, 1_8)
af = atomicmin(af, 1.0_4)
ad = atomicmin(ad, 1.0_8)

ai = atomicand(ai, 1_4)
ai = atomicor(ai, 1_4)
ai = atomicinc(ai, 1_4)
ai = atomicdec(ai, 1_4)
end

! CHECK-LABEL: func.func @_QPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
Expand All @@ -39,6 +59,26 @@ end
! CHECK: %{{.*}} = llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f32
! CHECK: %{{.*}} = llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f64

! CHECK: %{{.*}} = llvm.atomicrmw sub %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
! CHECK: %{{.*}} = llvm.atomicrmw sub %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i64
! CHECK: %{{.*}} = llvm.atomicrmw fsub %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f32
! CHECK: %{{.*}} = llvm.atomicrmw fsub %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f64

! CHECK: %{{.*}} = llvm.atomicrmw max %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
! CHECK: %{{.*}} = llvm.atomicrmw max %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i64
! CHECK: %{{.*}} = llvm.atomicrmw fmax %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f32
! CHECK: %{{.*}} = llvm.atomicrmw fmax %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f64

! CHECK: %{{.*}} = llvm.atomicrmw min %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
! CHECK: %{{.*}} = llvm.atomicrmw min %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i64
! CHECK: %{{.*}} = llvm.atomicrmw fmin %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f32
! CHECK: %{{.*}} = llvm.atomicrmw fmin %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f64

! CHECK: %{{.*}} = llvm.atomicrmw _and %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
! CHECK: %{{.*}} = llvm.atomicrmw _or %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
! CHECK: %{{.*}} = llvm.atomicrmw uinc_wrap %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
! CHECK: %{{.*}} = llvm.atomicrmw udec_wrap %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32

! CHECK: func.func private @llvm.nvvm.barrier0()
! CHECK: func.func private @__syncwarp(!fir.ref<i32> {cuf.data_attr = #cuf.cuda<device>}) attributes {cuf.proc_attr = #cuf.cuda_proc<device>, fir.bindc_name = "__syncwarp", fir.proc_attrs = #fir.proc_attrs<bind_c>}
! CHECK: func.func private @llvm.nvvm.membar.gl()
Expand Down
Loading