Skip to content

Commit ec066d3

Browse files
authored
[flang][cuda] cuf.alloc in device context should be converted to fir.alloc (#116110)
Update `inDeviceContext` to account for the gpu.func operation.
1 parent fd2e400 commit ec066d3

File tree

2 files changed

+12
-0
lines changed

2 files changed

+12
-0
lines changed

flang/lib/Optimizer/Transforms/CUFOpConversion.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,8 @@ struct CUFDeallocateOpConversion
251251
static bool inDeviceContext(mlir::Operation *op) {
252252
if (op->getParentOfType<cuf::KernelOp>())
253253
return true;
254+
if (auto funcOp = op->getParentOfType<mlir::gpu::GPUFuncOp>())
255+
return true;
254256
if (auto funcOp = op->getParentOfType<mlir::func::FuncOp>()) {
255257
if (auto cudaProcAttr =
256258
funcOp.getOperation()->getAttrOfType<cuf::ProcAttributeAttr>(

flang/test/Fir/CUDA/cuda-alloc-free.fir

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,4 +73,14 @@ func.func @_QPtest_type() {
7373
// CHECK: %[[CONV_BYTES:.*]] = fir.convert %[[BYTES]] : (index) -> i64
7474
// CHECK: fir.call @_FortranACUFMemAlloc(%[[CONV_BYTES]], %c0{{.*}}, %{{.*}}, %{{.*}}) : (i64, i32, !fir.ref<i8>, i32) -> !fir.llvm_ptr<i8>
7575

76+
gpu.module @cuda_device_mod [#nvvm.target] {
77+
gpu.func @_QMalloc() kernel {
78+
%0 = cuf.alloc !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QMallocEa"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
79+
gpu.return
80+
}
81+
}
82+
83+
// CHECK-LABEL: gpu.func @_QMalloc() kernel
84+
// CHECK: fir.alloca !fir.box<!fir.heap<!fir.array<?xf32>>> {bindc_name = "a", uniq_name = "_QMallocEa"}
85+
7686
} // end module

0 commit comments

Comments
 (0)