Skip to content

Commit 7c0c418

Browse files
clementvalJaddyen
authored andcommitted
[flang][cuda] Do not generate cuf.alloc/cuf.free in device context (llvm#141117)
`cuf.alloc` and `cuf.free` are converted to `fir.alloca` or deleted when in device context during the CUFOpConversion pass. Do not generate them in lowering to avoid confusion.
1 parent db6b9af commit 7c0c418

File tree

3 files changed

+9
-6
lines changed

3 files changed

+9
-6
lines changed

flang/lib/Lower/ConvertVariable.cpp

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "flang/Lower/StatementContext.h"
2626
#include "flang/Lower/Support/Utils.h"
2727
#include "flang/Lower/SymbolMap.h"
28+
#include "flang/Optimizer/Builder/CUFCommon.h"
2829
#include "flang/Optimizer/Builder/Character.h"
2930
#include "flang/Optimizer/Builder/FIRBuilder.h"
3031
#include "flang/Optimizer/Builder/HLFIRTools.h"
@@ -735,8 +736,10 @@ static mlir::Value createNewLocal(Fortran::lower::AbstractConverter &converter,
735736
if (dataAttr.getValue() == cuf::DataAttribute::Shared)
736737
return builder.create<cuf::SharedMemoryOp>(loc, ty, nm, symNm, lenParams,
737738
indices);
738-
return builder.create<cuf::AllocOp>(loc, ty, nm, symNm, dataAttr, lenParams,
739-
indices);
739+
740+
if (!cuf::isCUDADeviceContext(builder.getRegion()))
741+
return builder.create<cuf::AllocOp>(loc, ty, nm, symNm, dataAttr,
742+
lenParams, indices);
740743
}
741744

742745
// Let the builder do all the heavy lifting.
@@ -1072,8 +1075,9 @@ static void instantiateLocal(Fortran::lower::AbstractConverter &converter,
10721075
if (mustBeDefaultInitializedAtRuntime(var))
10731076
Fortran::lower::defaultInitializeAtRuntime(converter, var.getSymbol(),
10741077
symMap);
1075-
if (Fortran::semantics::NeedCUDAAlloc(var.getSymbol())) {
1076-
auto *builder = &converter.getFirOpBuilder();
1078+
auto *builder = &converter.getFirOpBuilder();
1079+
if (Fortran::semantics::NeedCUDAAlloc(var.getSymbol()) &&
1080+
!cuf::isCUDADeviceContext(builder->getRegion())) {
10771081
cuf::DataAttributeAttr dataAttr =
10781082
Fortran::lower::translateSymbolCUFDataAttribute(builder->getContext(),
10791083
var.getSymbol());

flang/test/Lower/CUDA/cuda-allocatable.cuf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ attributes(global) subroutine sub8()
186186
end subroutine
187187

188188
! CHECK-LABEL: func.func @_QPsub8() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
189-
! CHECK: %[[DESC:.*]] = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFsub8Ea"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
189+
! CHECK: %[[DESC:.*]] = fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QFsub8Ea"}
190190
! CHECK: %[[A:.*]]:2 = hlfir.declare %[[DESC]] {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFsub8Ea"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> (!fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>, !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>)
191191
! CHECK: %[[HEAP:.*]] = fir.allocmem !fir.array<?xf32>, %{{.*}} {fir.must_be_heap = true, uniq_name = "_QFsub8Ea.alloc"}
192192
! CHECK: %[[SHAPE:.*]] = fir.shape %{{.*}} : (index) -> !fir.shape<1>

flang/test/Lower/CUDA/cuda-shared.cuf

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,4 @@ end subroutine
99

1010
! CHECK-LABEL: func.func @_QPsharedmem() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
1111
! CHECK: %{{.*}} = cuf.shared_memory !fir.array<32xf32> {bindc_name = "s", uniq_name = "_QFsharedmemEs"} -> !fir.ref<!fir.array<32xf32>>
12-
! CHECK: cuf.free %{{.*}}#0 : !fir.ref<i32> {data_attr = #cuf.cuda<device>}
1312
! CHECK-NOT: cuf.free

0 commit comments

Comments
 (0)