Skip to content

Commit 0a41c8e

Browse files
authored
[flang][cuda] Avoid generating cuf.data_transfer in OpenACC region (#106435)
`cuf.data_transfer` will be converted to runtime calls to cuda runtime api and these are not supported in device code. assignment in OpenACC region will be handled by the OpenACC code gen so we avoid to generate data transfer on them.
1 parent 1ace91f commit 0a41c8e

File tree

2 files changed

+51
-3
lines changed

2 files changed

+51
-3
lines changed

flang/lib/Lower/Bridge.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4380,9 +4380,14 @@ class FirConverter : public Fortran::lower::AbstractConverter {
43804380
// Check if the insertion point is currently in a device context. HostDevice
43814381
// subprogram are not considered fully device context so it will return false
43824382
// for it.
4383-
static bool isDeviceContext(fir::FirOpBuilder &builder) {
4383+
// If the insertion point is inside an OpenACC region op, it is considered
4384+
// device context.
4385+
static bool isCudaDeviceContext(fir::FirOpBuilder &builder) {
43844386
if (builder.getRegion().getParentOfType<cuf::KernelOp>())
43854387
return true;
4388+
if (builder.getRegion()
4389+
.getParentOfType<mlir::acc::ComputeRegionOpInterface>())
4390+
return true;
43864391
if (auto funcOp =
43874392
builder.getRegion().getParentOfType<mlir::func::FuncOp>()) {
43884393
if (auto cudaProcAttr =
@@ -4401,7 +4406,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
44014406
mlir::Location loc = getCurrentLocation();
44024407
fir::FirOpBuilder &builder = getFirOpBuilder();
44034408

4404-
bool isInDeviceContext = isDeviceContext(builder);
4409+
bool isInDeviceContext = isCudaDeviceContext(builder);
4410+
44054411
bool isCUDATransfer = (Fortran::evaluate::HasCUDADeviceAttrs(assign.lhs) ||
44064412
Fortran::evaluate::HasCUDADeviceAttrs(assign.rhs)) &&
44074413
!isInDeviceContext;

flang/test/Lower/CUDA/cuda-data-transfer.cuf

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
1+
! RUN: bbc -emit-hlfir -fopenacc -fcuda %s -o - | FileCheck %s
22

33
! Test CUDA Fortran data transfer using assignment statements.
44

@@ -290,3 +290,45 @@ end subroutine
290290
! CHECK: %[[SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1>
291291
! CHECK: %[[AHOST:.*]]:2 = hlfir.declare %[[ARG1]](%{{.*}}) dummy_scope %{{.*}} {uniq_name = "_QFsub15Ea_host"} : (!fir.ref<!fir.array<?xf32>>, !fir.shape<1>, !fir.dscope) -> (!fir.box<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>)
292292
! CHECK: cuf.data_transfer %[[AHOST]]#1 to %[[ADEV]]#1, %[[SHAPE]] : !fir.shape<1> {transfer_kind = #cuf.cuda_transfer<host_device>} : !fir.ref<!fir.array<?xf32>>, !fir.ref<!fir.array<?xf32>>
293+
294+
! Check that cuf.data_transfer are not generated within OpenACC region
295+
subroutine sub16()
296+
integer, parameter :: n = 10
297+
real, device :: adev(n)
298+
real :: ahost(n)
299+
real, managed :: b
300+
integer :: i
301+
302+
adev = ahost
303+
!$acc parallel loop deviceptr(adev)
304+
do i = 1, n
305+
adev(i) = adev(i) + b
306+
enddo
307+
308+
!$acc kernels deviceptr(adev)
309+
do i = 1, n
310+
adev(i) = adev(i) + b
311+
enddo
312+
!$acc end kernels
313+
314+
315+
!$acc serial deviceptr(adev)
316+
do i = 1, n
317+
adev(i) = adev(i) + b
318+
enddo
319+
!$acc end serial
320+
end subroutine
321+
322+
! CHECK-LABEL: func.func @_QPsub16()
323+
! CHECK: cuf.data_transfer
324+
! CHECK: acc.parallel
325+
! CHECK-NOT: cuf.data_transfer
326+
! CHECK: hlfir.assign
327+
328+
! CHECK: acc.kernels
329+
! CHECK-NOT: cuf.data_transfer
330+
! CHECK: hlfir.assign
331+
332+
! CHECK: acc.serial
333+
! CHECK-NOT: cuf.data_transfer
334+
! CHECK: hlfir.assign

0 commit comments

Comments
 (0)