Skip to content

Commit ae42f07

Browse files
authored
[flang][cuda] Allow array pointer for atomicexch and atomiccas (#130363)
1 parent 829e899 commit ae42f07

File tree

3 files changed

+23
-11
lines changed

3 files changed

+23
-11
lines changed

flang/include/flang/Optimizer/Builder/IntrinsicCall.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,9 +187,11 @@ struct IntrinsicLibrary {
187187
mlir::Value genAtanpi(mlir::Type, llvm::ArrayRef<mlir::Value>);
188188
mlir::Value genAtomicAdd(mlir::Type, llvm::ArrayRef<mlir::Value>);
189189
mlir::Value genAtomicAnd(mlir::Type, llvm::ArrayRef<mlir::Value>);
190-
mlir::Value genAtomicCas(mlir::Type, llvm::ArrayRef<mlir::Value>);
190+
fir::ExtendedValue genAtomicCas(mlir::Type,
191+
llvm::ArrayRef<fir::ExtendedValue>);
191192
mlir::Value genAtomicDec(mlir::Type, llvm::ArrayRef<mlir::Value>);
192-
mlir::Value genAtomicExch(mlir::Type, llvm::ArrayRef<mlir::Value>);
193+
fir::ExtendedValue genAtomicExch(mlir::Type,
194+
llvm::ArrayRef<fir::ExtendedValue>);
193195
mlir::Value genAtomicInc(mlir::Type, llvm::ArrayRef<mlir::Value>);
194196
mlir::Value genAtomicMax(mlir::Type, llvm::ArrayRef<mlir::Value>);
195197
mlir::Value genAtomicMin(mlir::Type, llvm::ArrayRef<mlir::Value>);

flang/lib/Optimizer/Builder/IntrinsicCall.cpp

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2868,15 +2868,17 @@ mlir::Value IntrinsicLibrary::genAtomicOr(mlir::Type resultType,
28682868
}
28692869

28702870
// ATOMICCAS
2871-
mlir::Value IntrinsicLibrary::genAtomicCas(mlir::Type resultType,
2872-
llvm::ArrayRef<mlir::Value> args) {
2871+
fir::ExtendedValue
2872+
IntrinsicLibrary::genAtomicCas(mlir::Type resultType,
2873+
llvm::ArrayRef<fir::ExtendedValue> args) {
28732874
assert(args.size() == 3);
28742875
auto successOrdering = mlir::LLVM::AtomicOrdering::acq_rel;
28752876
auto failureOrdering = mlir::LLVM::AtomicOrdering::monotonic;
28762877
auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(resultType.getContext());
28772878

2878-
mlir::Value arg1 = args[1];
2879-
mlir::Value arg2 = args[2];
2879+
mlir::Value arg0 = fir::getBase(args[0]);
2880+
mlir::Value arg1 = fir::getBase(args[1]);
2881+
mlir::Value arg2 = fir::getBase(args[2]);
28802882

28812883
auto bitCastFloat = [&](mlir::Value arg) -> mlir::Value {
28822884
if (mlir::isa<mlir::Float32Type>(arg.getType()))
@@ -2897,7 +2899,7 @@ mlir::Value IntrinsicLibrary::genAtomicCas(mlir::Type resultType,
28972899
}
28982900

28992901
auto address =
2900-
builder.create<mlir::UnrealizedConversionCastOp>(loc, llvmPtrTy, args[0])
2902+
builder.create<mlir::UnrealizedConversionCastOp>(loc, llvmPtrTy, arg0)
29012903
.getResult(0);
29022904
auto cmpxchg = builder.create<mlir::LLVM::AtomicCmpXchgOp>(
29032905
loc, address, arg1, arg2, successOrdering, failureOrdering);
@@ -2914,13 +2916,16 @@ mlir::Value IntrinsicLibrary::genAtomicDec(mlir::Type resultType,
29142916
}
29152917

29162918
// ATOMICEXCH
2917-
mlir::Value IntrinsicLibrary::genAtomicExch(mlir::Type resultType,
2918-
llvm::ArrayRef<mlir::Value> args) {
2919+
fir::ExtendedValue
2920+
IntrinsicLibrary::genAtomicExch(mlir::Type resultType,
2921+
llvm::ArrayRef<fir::ExtendedValue> args) {
29192922
assert(args.size() == 2);
2920-
assert(args[1].getType().isIntOrFloat());
2923+
mlir::Value arg0 = fir::getBase(args[0]);
2924+
mlir::Value arg1 = fir::getBase(args[1]);
2925+
assert(arg1.getType().isIntOrFloat());
29212926

29222927
mlir::LLVM::AtomicBinOp binOp = mlir::LLVM::AtomicBinOp::xchg;
2923-
return genAtomBinOp(builder, loc, binOp, args[0], args[1]);
2928+
return genAtomBinOp(builder, loc, binOp, arg0, arg1);
29242929
}
29252930

29262931
mlir::Value IntrinsicLibrary::genAtomicInc(mlir::Type resultType,

flang/test/Lower/CUDA/cuda-device-proc.cuf

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,8 @@ attributes(device) subroutine testAtomic(aa, n)
161161
istat = atomiccas(a, i, 14)
162162
do i = 1, n
163163
istat = atomicxor(aa, i)
164+
istat = atomiccas(aa, i, 14)
165+
istat = atomicexch(aa, 0)
164166
end do
165167
end subroutine
166168

@@ -172,6 +174,9 @@ end subroutine
172174
! CHECK: llvm.cmpxchg %[[ADDR]], %{{.*}}, %c14{{.*}} acq_rel monotonic : !llvm.ptr, i32
173175
! CHECK: fir.do_loop
174176
! CHECK: llvm.atomicrmw _xor %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
177+
! CHECK: %[[ADDR:.*]] = builtin.unrealized_conversion_cast %{{.*}}#1 : !fir.ref<!fir.array<?xi32>> to !llvm.ptr
178+
! CHECK: llvm.cmpxchg %[[ADDR]], %{{.*}}, %c14{{.*}} acq_rel monotonic : !llvm.ptr, i32
179+
! CHECK: llvm.atomicrmw xchg %{{.*}}, %c0{{.*}} seq_cst : !llvm.ptr, i32
175180

176181
attributes(device) subroutine testAtomic2()
177182
integer(8) :: a, i, istat

0 commit comments

Comments
 (0)