@@ -54,3 +54,56 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<!llvm.ptr, dense<
54
54
// CHECK: fir.global internal @_QPshared_static__shared_mem(dense<0> : vector<28xi8>) {alignment = 8 : i64, data_attr = #cuf.cuda<shared>} : !fir.array<28xi8>
55
55
// CHECK: }
56
56
// CHECK: }
57
+
58
+ // -----
59
+
60
+ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 20.0.0 (https://github.com/llvm/llvm-project.git cae351f3453a0a26ec8eb2ddaf773c24a29d929e)", llvm.target_triple = "x86_64-unknown-linux-gnu"} {
61
+ gpu.module @cuda_device_mod {
62
+ gpu.func @_QMmPshareddyn (%arg0: !fir.box <!fir.array <?x ?xi32 >> {cuf.data_attr = #cuf.cuda <device >, fir.bindc_name = " a" }, %arg1: !fir.box <!fir.array <?x ?xi32 >> {cuf.data_attr = #cuf.cuda <device >, fir.bindc_name = " b" }, %arg2: i32 {fir.bindc_name = " k" }) attributes {cuf.proc_attr = #cuf.cuda_proc <global >} {
63
+ %c1_i32 = arith.constant 1 : i32
64
+ %c2_i32 = arith.constant 2 : i32
65
+ %c0 = arith.constant 0 : index
66
+ %5 = fir.address_of (@_QM__fortran_builtinsE__builtin_blockdim ) : !fir.ref <!fir.type <_QM__fortran_builtinsT__builtin_dim3 {x:i32 ,y:i32 ,z:i32 }>>
67
+ %6 = fir.declare %5 {uniq_name = " _QM__fortran_builtinsE__builtin_blockdim" } : (!fir.ref <!fir.type <_QM__fortran_builtinsT__builtin_dim3 {x:i32 ,y:i32 ,z:i32 }>>) -> !fir.ref <!fir.type <_QM__fortran_builtinsT__builtin_dim3 {x:i32 ,y:i32 ,z:i32 }>>
68
+ %15 = fir.alloca i32
69
+ %16 = fir.declare %15 {fortran_attrs = #fir.var_attrs <value >, uniq_name = " _QMmFss1Ek" } : (!fir.ref <i32 >) -> !fir.ref <i32 >
70
+ %27 = fir.coordinate_of %6 , x : (!fir.ref <!fir.type <_QM__fortran_builtinsT__builtin_dim3 {x:i32 ,y:i32 ,z:i32 }>>) -> !fir.ref <i32 >
71
+ %28 = fir.load %27 : !fir.ref <i32 >
72
+ %29 = fir.convert %28 : (i32 ) -> i64
73
+ %30 = fir.convert %29 : (i64 ) -> index
74
+ %31 = arith.cmpi sgt , %30 , %c0 : index
75
+ %32 = arith.select %31 , %30 , %c0 : index
76
+ %33 = fir.coordinate_of %6 , y : (!fir.ref <!fir.type <_QM__fortran_builtinsT__builtin_dim3 {x:i32 ,y:i32 ,z:i32 }>>) -> !fir.ref <i32 >
77
+ %34 = fir.load %33 : !fir.ref <i32 >
78
+ %35 = fir.convert %34 : (i32 ) -> i64
79
+ %36 = fir.convert %35 : (i64 ) -> index
80
+ %37 = arith.cmpi sgt , %36 , %c0 : index
81
+ %38 = arith.select %37 , %36 , %c0 : index
82
+ %39 = cuf.shared_memory !fir.array <?x ?xi32 >, %32 , %38 : index , index {bindc_name = " s1" , uniq_name = " _QMmFss1Es1" } -> !fir.ref <!fir.array <?x ?xi32 >>
83
+ %40 = fir.shape %32 , %38 : (index , index ) -> !fir.shape <2 >
84
+ %41 = fir.declare %39 (%40 ) {data_attr = #cuf.cuda <shared >, uniq_name = " _QMmFss1Es1" } : (!fir.ref <!fir.array <?x ?xi32 >>, !fir.shape <2 >) -> !fir.ref <!fir.array <?x ?xi32 >>
85
+ %42 = fir.load %16 : !fir.ref <i32 >
86
+ %43 = arith.muli %42 , %c2_i32 : i32
87
+ %44 = fir.convert %43 : (i32 ) -> i64
88
+ %45 = fir.convert %44 : (i64 ) -> index
89
+ %46 = arith.cmpi sgt , %45 , %c0 : index
90
+ %47 = arith.select %46 , %45 , %c0 : index
91
+ %48 = fir.load %16 : !fir.ref <i32 >
92
+ %49 = fir.convert %48 : (i32 ) -> i64
93
+ %50 = fir.convert %49 : (i64 ) -> index
94
+ %51 = arith.cmpi sgt , %50 , %c0 : index
95
+ %52 = arith.select %51 , %50 , %c0 : index
96
+ %53 = cuf.shared_memory !fir.array <?x ?xi32 >, %47 , %52 : index , index {bindc_name = " s2" , uniq_name = " _QMmFss1Es2" } -> !fir.ref <!fir.array <?x ?xi32 >>
97
+ gpu.return
98
+ }
99
+ }
100
+ }
101
+
102
+ // CHECK: gpu.func @_QMmPshareddyn(%arg0: !fir.box<!fir.array<?x?xi32>> {cuf.data_attr = #cuf.cuda<device>, fir.bindc_name = "a"}, %arg1: !fir.box<!fir.array<?x?xi32>> {cuf.data_attr = #cuf.cuda<device>, fir.bindc_name = "b"}, %arg2: i32 {fir.bindc_name = "k"}) attributes {cuf.proc_attr = #cuf.cuda_proc<global>} {
103
+ // CHECK: %[[EXTENT0:.*]] = arith.select
104
+ // CHECK: %[[EXTENT1:.*]] = arith.select
105
+ // CHECK: %[[SIZE_EXTENT:.*]] = arith.muli %c4{{.*}}, %[[EXTENT0]] : index
106
+ // CHECK: %[[DYNSIZE:.*]] = arith.muli %[[SIZE_EXTENT]], %[[EXTENT1]] : index
107
+ // CHECK: cuf.shared_memory[%c0{{.*}} : i32] !fir.array<?x?xi32>, %9, %15 : index, index {bindc_name = "s1", uniq_name = "_QMmFss1Es1"} -> !fir.ref<!fir.array<?x?xi32>>
108
+ // CHECK: %[[CONV_DYNSIZE:.*]] = fir.convert %[[DYNSIZE]] : (index) -> i32
109
+ // CHECK: cuf.shared_memory[%[[CONV_DYNSIZE]] : i32] !fir.array<?x?xi32>, %26, %31 : index, index {bindc_name = "s2", uniq_name = "_QMmFss1Es2"} -> !fir.ref<!fir.array<?x?xi32>>
0 commit comments