Skip to content

Commit 110b77f

Browse files
authored
[flang][cuda] Handle floats in atomiccas (#128970)
1 parent 5066d7b commit 110b77f

File tree

2 files changed

+37
-0
lines changed

2 files changed

+37
-0
lines changed

flang/lib/Optimizer/Builder/IntrinsicCall.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2734,6 +2734,20 @@ mlir::Value IntrinsicLibrary::genAtomicCas(mlir::Type resultType,
27342734

27352735
mlir::Value arg1 = args[1];
27362736
mlir::Value arg2 = args[2];
2737+
2738+
auto bitCastFloat = [&](mlir::Value arg) -> mlir::Value {
2739+
if (mlir::isa<mlir::Float32Type>(arg.getType()))
2740+
return builder.create<mlir::LLVM::BitcastOp>(loc, builder.getI32Type(),
2741+
arg);
2742+
if (mlir::isa<mlir::Float64Type>(arg.getType()))
2743+
return builder.create<mlir::LLVM::BitcastOp>(loc, builder.getI64Type(),
2744+
arg);
2745+
return arg;
2746+
};
2747+
2748+
arg1 = bitCastFloat(arg1);
2749+
arg2 = bitCastFloat(arg2);
2750+
27372751
if (arg1.getType() != arg2.getType()) {
27382752
// arg1 and arg2 need to have the same type in AtomicCmpXchgOp.
27392753
arg2 = builder.createConvert(loc, arg1.getType(), arg2);

flang/test/Lower/CUDA/cuda-device-proc.cuf

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,3 +175,26 @@ end subroutine
175175
! CHECK: %[[VAL:.*]] = fir.convert %c14{{.*}} : (i32) -> i64
176176
! CHECK: %[[ADDR:.*]] = builtin.unrealized_conversion_cast %{{.*}}#1 : !fir.ref<i64> to !llvm.ptr
177177
! CHECK: llvm.cmpxchg %{{.*}}, %{{.*}}, %[[VAL]] acq_rel monotonic : !llvm.ptr, i64
178+
179+
attributes(device) subroutine testAtomic3()
180+
real :: a, i, istat
181+
istat = atomiccas(a, i, 14.0)
182+
end subroutine
183+
184+
! CHECK-LABEL: func.func @_QPtestatomic3()
185+
! CHECK: %[[BCAST1:.*]] = llvm.bitcast %{{.*}} : f32 to i32
186+
! CHECK: %[[BCAST2:.*]] = llvm.bitcast %{{.*}} : f32 to i32
187+
! CHECK: %[[CAST:.*]] = builtin.unrealized_conversion_cast %{{.*}}#1 : !fir.ref<f32> to !llvm.ptr
188+
! CHECK: llvm.cmpxchg %[[CAST]], %[[BCAST1]], %[[BCAST2]] acq_rel monotonic : !llvm.ptr, i32
189+
190+
attributes(device) subroutine testAtomic4()
191+
real(8) :: a, i, istat
192+
istat = atomiccas(a, i, 14.0d0)
193+
end subroutine
194+
195+
! CHECK-LABEL: func.func @_QPtestatomic4()
196+
! CHECK: %[[BCAST1:.*]] = llvm.bitcast %{{.*}} : f64 to i64
197+
! CHECK: %[[BCAST2:.*]] = llvm.bitcast %{{.*}} : f64 to i64
198+
! CHECK: %[[CAST:.*]] = builtin.unrealized_conversion_cast %{{.*}}#1 : !fir.ref<f64> to !llvm.ptr
199+
! CHECK: %[[ATOMIC:.*]] = llvm.cmpxchg %[[CAST]], %[[BCAST1]], %[[BCAST2]] acq_rel monotonic : !llvm.ptr, i64
200+
! CHECK: %[[RES:.*]] = llvm.extractvalue %[[ATOMIC]][1] : !llvm.struct<(i64, i1)>

0 commit comments

Comments
 (0)