Skip to content

Commit 544a3cb

Browse files
authored
[flang][cuda] Handle variable with initialization in device global pass (#124307)
1 parent a9ad601 commit 544a3cb

File tree

2 files changed

+26
-7
lines changed

2 files changed

+26
-7
lines changed

flang/lib/Optimizer/Transforms/CUFDeviceGlobal.cpp

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,11 @@ static void processAddrOfOp(fir::AddrOfOp addrOfOp,
3636
addrOfOp.getSymbol().getRootReference().getValue())) {
3737
// TO DO: limit candidates to non-scalars. Scalars appear to have been
3838
// folded in already.
39-
if (globalOp.getConstant()) {
40-
if (recurseInGlobal)
41-
globalOp.walk([&](fir::AddrOfOp op) {
42-
processAddrOfOp(op, symbolTable, candidates, recurseInGlobal);
43-
});
44-
candidates.insert(globalOp);
45-
}
39+
if (recurseInGlobal)
40+
globalOp.walk([&](fir::AddrOfOp op) {
41+
processAddrOfOp(op, symbolTable, candidates, recurseInGlobal);
42+
});
43+
candidates.insert(globalOp);
4644
}
4745
}
4846

flang/test/Fir/CUDA/cuda-implicit-device-global.f90

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,3 +308,24 @@ // Test that global used in device function are flagged with the correct
308308
// CHECK-DAG: fir.global linkonce_odr @_QM__mod1E.c.__builtin_c_devptr
309309
// CHECK-DAG: fir.global linkonce_odr @_QM__mod1E.dt.__builtin_c_devptr
310310
// CHECK-DAG: fir.global linkonce_odr @_QM__mod1E.n.__builtin_c_devptr
311+
312+
// -----
313+
314+
// Variables with initialization are promoted to non constant global.
315+
//
316+
// attributes(global) subroutine kernel4()
317+
// integer :: a = 4
318+
// end subroutine
319+
320+
func.func @_QPkernel4() attributes {cuf.proc_attr = #cuf.cuda_proc<global>} {
321+
%0 = fir.address_of(@_QFkernel4Ea) : !fir.ref<i32>
322+
return
323+
}
324+
fir.global internal @_QFkernel4Ea : i32 {
325+
%c4_i32 = arith.constant 4 : i32
326+
fir.has_value %c4_i32 : i32
327+
}
328+
329+
// CHECK-LABEL: fir.global internal @_QFkernel4Ea : i32
330+
// CHECK-LABEL: gpu.module @cuda_device_mod
331+
// CHECK: fir.global internal @_QFkernel4Ea : i32

0 commit comments

Comments
 (0)