Skip to content

Commit ed2db3b

Browse files
authored
[flang][cuda] Do not register global constants (#118582)
Global constants have no symbols in library files. They are replaced with literal constants during lowering before kernels are moved into a GPU module. Do not register them because they will result in unresolved symbols.
1 parent ef164ec commit ed2db3b

File tree

2 files changed

+31
-4
lines changed

2 files changed

+31
-4
lines changed

flang/lib/Optimizer/Transforms/CUFAddConstructor.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,7 @@ struct CUFAddConstructor
106106

107107
mlir::func::FuncOp func;
108108
switch (attr.getValue()) {
109-
case cuf::DataAttribute::Device:
110-
case cuf::DataAttribute::Constant: {
109+
case cuf::DataAttribute::Device: {
111110
func = fir::runtime::getRuntimeFunc<mkRTKey(CUFRegisterVariable)>(
112111
loc, builder);
113112
auto fTy = func.getFunctionType();
@@ -145,8 +144,6 @@ struct CUFAddConstructor
145144
default:
146145
break;
147146
}
148-
if (!func)
149-
continue;
150147
}
151148
}
152149
builder.create<mlir::LLVM::ReturnOp>(loc, mlir::ValueRange{});

flang/test/Fir/CUDA/cuda-constructor-2.f90

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,33 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<!llvm.ptr, dense<
3030
// CHECK-DAG: %[[BOXREF:.*]] = fir.convert %[[BOX]] : (!fir.ref<!fir.box<!fir.heap<!fir.array<?xi32>>>>) -> !fir.ref<i8>
3131
// CHECK-DAG: fir.call @_FortranACUFRegisterVariable(%[[MODULE:.*]], %[[BOXREF]], %{{.*}}, %{{.*}})
3232
//
33+
34+
// -----
35+
36+
// Checking that constant global variables are not registered
37+
38+
// CHECK: @_FortranACUFRegisterAllocator
39+
// CHECK-NOT: fir.call @_FortranACUFRegisterVariable
40+
41+
module attributes {dlti.dl_spec = #dlti.dl_spec<i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i1 = dense<8> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, f80 = dense<128> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, i64 = dense<64> : vector<2xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, f128 = dense<128> : vector<2xi64>, !llvm.ptr<270> = dense<32> : vector<4xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, "dlti.stack_alignment" = 128 : i64, "dlti.endianness" = "little">, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 20.0.0 (https://github.com/llvm/llvm-project.git 3372303188df0f7f8ac26e7ab610cf8b0f716d42)", llvm.target_triple = "x86_64-unknown-linux-gnu"} {
42+
fir.global @_QMiso_c_bindingECc_int {data_attr = #cuf.cuda<constant>} constant : i32
43+
44+
45+
fir.type_info @_QM__fortran_builtinsT__builtin_c_ptr noinit nodestroy nofinal : !fir.type<_QM__fortran_builtinsT__builtin_c_ptr{__address:i64}>
46+
gpu.module @cuda_device_mod {
47+
fir.global @_QMiso_c_bindingECc_int {data_attr = #cuf.cuda<constant>} constant : i32
48+
gpu.func @_QMdevmodPdevsub(%arg0: !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>) -> i32 {
49+
%c0 = arith.constant 0 : index
50+
%c4_i32 = arith.constant 4 : i32
51+
%0 = fir.alloca i32 {bindc_name = "devsub", uniq_name = "_QMdevmodFdevsubEdevsub"}
52+
%1 = fir.alloca i32 {bindc_name = "__builtin_warpsize", uniq_name = "_QM__fortran_builtinsEC__builtin_warpsize"}
53+
%2 = fir.load %arg0 : !fir.ref<!fir.box<!fir.heap<!fir.array<?xf32>>>>
54+
%3:3 = fir.box_dims %2, %c0 : (!fir.box<!fir.heap<!fir.array<?xf32>>>, index) -> (index, index, index)
55+
%4 = fir.convert %3#1 : (index) -> i32
56+
%5 = arith.muli %4, %c4_i32 : i32
57+
fir.store %5 to %0 : !fir.ref<i32>
58+
%6 = fir.load %0 : !fir.ref<i32>
59+
gpu.return %6 : i32
60+
}
61+
}
62+
}

0 commit comments

Comments
 (0)