Skip to content

Commit 850c932

Browse files
authored
[flang][cuda] Walk through cuf kernel for implicit globals (#119455)
Globals used in cuf kernel need to be flagged as well.
1 parent 5a93033 commit 850c932

File tree

2 files changed

+50
-0
lines changed

2 files changed

+50
-0
lines changed

flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,11 @@ class CUFDeviceGlobal : public fir::impl::CUFDeviceGlobalBase<CUFDeviceGlobal> {
6868
prepareImplicitDeviceGlobals(funcOp, symTable, candidates);
6969
return mlir::WalkResult::advance();
7070
});
71+
mod.walk([&](cuf::KernelOp kernelOp) {
72+
kernelOp.walk([&](fir::AddrOfOp addrOfOp) {
73+
processAddrOfOp(addrOfOp, symTable, candidates);
74+
});
75+
});
7176

7277
// Copying the device global variable into the gpu module
7378
mlir::SymbolTable parentSymTable(mod);

flang/test/Fir/CUDA/cuda-implicit-device-global.f90

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,3 +146,48 @@ // Test that global used in device function are flagged with the correct
146146

147147
// CHECK-LABEL: gpu.module @cuda_device_mod
148148
// CHECK: fir.global linkonce @_QQclX5465737420504153534544 constant
149+
150+
// -----
151+
152+
func.func @_QQmain() attributes {fir.bindc_name = "cufkernel_global"} {
153+
%c10 = arith.constant 10 : index
154+
%c5_i32 = arith.constant 5 : i32
155+
%c6_i32 = arith.constant 6 : i32
156+
%c1 = arith.constant 1 : index
157+
%c1_i32 = arith.constant 1 : i32
158+
%c10_i32 = arith.constant 10 : i32
159+
%0 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFEi"}
160+
%1:2 = hlfir.declare %0 {uniq_name = "_QFEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
161+
cuf.kernel<<<%c10_i32, %c1_i32>>> (%arg0 : index) = (%c1 : index) to (%c10 : index) step (%c1 : index) {
162+
%2 = fir.convert %arg0 : (index) -> i32
163+
fir.store %2 to %1#1 : !fir.ref<i32>
164+
%3 = fir.load %1#0 : !fir.ref<i32>
165+
%4 = arith.cmpi eq, %3, %c1_i32 : i32
166+
cf.cond_br %4, ^bb1, ^bb2
167+
^bb1: // pred: ^bb0
168+
%5 = fir.address_of(@_QQclX91d13f6e74caa2f03965d7a7c6a8fdd5) : !fir.ref<!fir.char<1,50>>
169+
%6 = fir.convert %5 : (!fir.ref<!fir.char<1,50>>) -> !fir.ref<i8>
170+
%7 = fir.call @_FortranAioBeginExternalListOutput(%c6_i32, %6, %c5_i32) fastmath<contract> : (i32, !fir.ref<i8>, i32) -> !fir.ref<i8>
171+
%8 = fir.load %1#0 : !fir.ref<i32>
172+
%9 = fir.call @_FortranAioOutputInteger32(%7, %8) fastmath<contract> : (!fir.ref<i8>, i32) -> i1
173+
%10 = fir.call @_FortranAioEndIoStatement(%7) fastmath<contract> : (!fir.ref<i8>) -> i32
174+
cf.br ^bb2
175+
^bb2: // 2 preds: ^bb0, ^bb1
176+
"fir.end"() : () -> ()
177+
}
178+
return
179+
}
180+
func.func private @_FortranAioBeginExternalListOutput(i32, !fir.ref<i8>, i32) -> !fir.ref<i8> attributes {fir.io, fir.runtime}
181+
fir.global linkonce @_QQclX91d13f6e74caa2f03965d7a7c6a8fdd5 constant : !fir.char<1,50> {
182+
%0 = fir.string_lit "/local/home/vclement/llvm-project/build/dummy.cuf\00"(50) : !fir.char<1,50>
183+
fir.has_value %0 : !fir.char<1,50>
184+
}
185+
func.func private @_FortranAioOutputInteger32(!fir.ref<i8>, i32) -> i1 attributes {fir.io, fir.runtime}
186+
func.func private @_FortranAioEndIoStatement(!fir.ref<i8>) -> i32 attributes {fir.io, fir.runtime}
187+
func.func private @_FortranAProgramStart(i32, !llvm.ptr, !llvm.ptr, !llvm.ptr)
188+
func.func private @_FortranAProgramEndStatement()
189+
190+
// CHECK-LABEL: func.func @_QQmain()
191+
// CHECK: fir.global linkonce @_QQclX91d13f6e74caa2f03965d7a7c6a8fdd5 constant : !fir.char<1,50>
192+
// CHECK: gpu.module @cuda_device_mod
193+
// CHECK: fir.global linkonce @_QQclX91d13f6e74caa2f03965d7a7c6a8fdd5 constant : !fir.char<1,50>

0 commit comments

Comments
 (0)