@@ -113,7 +113,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<i1, dense<8> : ve
113
113
// -----
114
114
115
115
module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 20.0.0 (
[email protected] :clementval/llvm-project.git 4116c1370ff76adf1e58eb3c39d0a14721794c70)", llvm.target_triple = "x86_64-unknown-linux-gnu"} {
116
- llvm.func @_FortranACUFLaunchClusterKernel (!llvm.ptr , i64 , i64 , i64 , i64 , i64 , i64 , i64 , i64 , i64 , i64 , i32 , !llvm.ptr , !llvm.ptr ) attributes {sym_visibility = " private" }
116
+ llvm.func @_FortranACUFLaunchClusterKernel (!llvm.ptr , i64 , i64 , i64 , i64 , i64 , i64 , i64 , i64 , i64 , !llvm.ptr , i32 , !llvm.ptr , !llvm.ptr ) attributes {sym_visibility = " private" }
117
117
llvm.func @_QMmod1Psub1 () attributes {cuf.cluster_dims = #cuf.cluster_dims <x = 2 : i64 , y = 2 : i64 , z = 1 : i64 >} {
118
118
llvm.return
119
119
}
@@ -166,3 +166,66 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<i1, dense<8> : ve
166
166
167
167
// CHECK-LABEL: llvm.func @_QMmod1Phost_sub()
168
168
// CHECK: llvm.call @_FortranACUFLaunchCooperativeKernel
169
+
170
+ // -----
171
+
172
+ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 20.0.0 (
[email protected] :clementval/llvm-project.git 4116c1370ff76adf1e58eb3c39d0a14721794c70)", llvm.target_triple = "x86_64-unknown-linux-gnu"} {
173
+ llvm.func @_QMmod1Psub1 () attributes {cuf.cluster_dims = #cuf.cluster_dims <x = 2 : i64 , y = 2 : i64 , z = 1 : i64 >} {
174
+ llvm.return
175
+ }
176
+ llvm.func @_QQmain () attributes {fir.bindc_name = " test" } {
177
+ %0 = llvm.mlir.constant (1 : index ) : i64
178
+ %stream = llvm.alloca %0 x i64 : (i64 ) -> !llvm.ptr
179
+ %1 = llvm.mlir.constant (2 : index ) : i64
180
+ %2 = llvm.mlir.constant (0 : i32 ) : i32
181
+ %3 = llvm.mlir.constant (10 : index ) : i64
182
+ %token = cuf.stream_cast %stream : !llvm.ptr
183
+ gpu.launch_func [%token ] @cuda_device_mod ::@_QMmod1Psub1 blocks in (%3 , %3 , %0 ) threads in (%3 , %3 , %0 ) : i64 dynamic_shared_memory_size %2
184
+ llvm.return
185
+ }
186
+ gpu.binary @cuda_device_mod [#gpu.object <#nvvm.target , " " >]
187
+ }
188
+
189
+ // CHECK-LABEL: llvm.func @_QQmain()
190
+ // CHECK: %[[STREAM:.*]] = llvm.alloca %{{.*}} x i64 : (i64) -> !llvm.ptr
191
+ // CHECK: %[[KERNEL_PTR:.*]] = llvm.mlir.addressof @_QMmod1Psub1
192
+ // CHECK: llvm.call @_FortranACUFLaunchKernel(%[[KERNEL_PTR]], %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[STREAM]], %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, i64, i64, i64, i64, i64, i64, !llvm.ptr, i32, !llvm.ptr, !llvm.ptr) -> ()
193
+
194
+ // -----
195
+
196
+ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<i1, dense<8> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr, dense<64> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<270>, dense<32> : vector<4xi64>>, #dlti.dl_entry<!llvm.ptr<271>, dense<32> : vector<4xi64>>, #dlti.dl_entry<i8, dense<8> : vector<2xi64>>, #dlti.dl_entry<i16, dense<16> : vector<2xi64>>, #dlti.dl_entry<!llvm.ptr<272>, dense<64> : vector<4xi64>>, #dlti.dl_entry<i64, dense<64> : vector<2xi64>>, #dlti.dl_entry<i32, dense<32> : vector<2xi64>>, #dlti.dl_entry<f128, dense<128> : vector<2xi64>>, #dlti.dl_entry<i128, dense<128> : vector<2xi64>>, #dlti.dl_entry<f64, dense<64> : vector<2xi64>>, #dlti.dl_entry<f80, dense<128> : vector<2xi64>>, #dlti.dl_entry<f16, dense<16> : vector<2xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", gpu.container_module, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 20.0.0 (
[email protected] :clementval/llvm-project.git ddcfd4d2dc17bf66cee8c3ef6284118684a2b0e6)", llvm.target_triple = "x86_64-unknown-linux-gnu"} {
197
+ llvm.func @_QMmod1Phost_sub () {
198
+ %0 = llvm.mlir.constant (1 : i32 ) : i32
199
+ %one = llvm.mlir.constant (1 : i64 ) : i64
200
+ %1 = llvm.alloca %0 x !llvm.struct <(ptr , i64 , i32 , i8 , i8 , i8 , i8 , array <1 x array <3 x i64 >>)> {alignment = 8 : i64 } : (i32 ) -> !llvm.ptr
201
+ %stream = llvm.alloca %one x i64 : (i64 ) -> !llvm.ptr
202
+ %2 = llvm.mlir.constant (40 : i64 ) : i64
203
+ %3 = llvm.mlir.constant (16 : i32 ) : i32
204
+ %4 = llvm.mlir.constant (25 : i32 ) : i32
205
+ %5 = llvm.mlir.constant (21 : i32 ) : i32
206
+ %6 = llvm.mlir.constant (17 : i32 ) : i32
207
+ %7 = llvm.mlir.constant (1 : index ) : i64
208
+ %8 = llvm.mlir.constant (27 : i32 ) : i32
209
+ %9 = llvm.mlir.constant (6 : i32 ) : i32
210
+ %10 = llvm.mlir.constant (1 : i32 ) : i32
211
+ %11 = llvm.mlir.constant (0 : i32 ) : i32
212
+ %12 = llvm.mlir.constant (10 : index ) : i64
213
+ %13 = llvm.mlir.addressof @_QQclX91d13f6e74caa2f03965d7a7c6a8fdd5 : !llvm.ptr
214
+ %14 = llvm.call @_FortranACUFMemAlloc (%2 , %11 , %13 , %6 ) : (i64 , i32 , !llvm.ptr , i32 ) -> !llvm.ptr
215
+ %token = cuf.stream_cast %stream : !llvm.ptr
216
+ gpu.launch_func [%token ] @cuda_device_mod ::@_QMmod1Psub1 blocks in (%7 , %7 , %7 ) threads in (%12 , %7 , %7 ) : i64 dynamic_shared_memory_size %11 args (%14 : !llvm.ptr ) {cuf.proc_attr = #cuf.cuda_proc <grid_global >}
217
+ llvm.return
218
+ }
219
+ llvm.func @_QMmod1Psub1 (!llvm.ptr ) -> ()
220
+ llvm.mlir.global linkonce constant @_QQclX91d13f6e74caa2f03965d7a7c6a8fdd5 () {addr_space = 0 : i32 } : !llvm.array <2 x i8 > {
221
+ %0 = llvm.mlir.constant (" a\00" ) : !llvm.array <2 x i8 >
222
+ llvm.return %0 : !llvm.array <2 x i8 >
223
+ }
224
+ llvm.func @_FortranACUFMemAlloc (i64 , i32 , !llvm.ptr , i32 ) -> !llvm.ptr attributes {fir.runtime , sym_visibility = " private" }
225
+ llvm.func @_FortranACUFMemFree (!llvm.ptr , i32 , !llvm.ptr , i32 ) -> !llvm.struct <()> attributes {fir.runtime , sym_visibility = " private" }
226
+ gpu.binary @cuda_device_mod [#gpu.object <#nvvm.target , " " >]
227
+ }
228
+
229
+ // CHECK-LABEL: llvm.func @_QMmod1Phost_sub()
230
+ // CHECK: %[[STREAM:.*]] = llvm.alloca %{{.*}} x i64 : (i64) -> !llvm.ptr
231
+ // CHECK: llvm.call @_FortranACUFLaunchCooperativeKernel(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[STREAM]], %{{.*}}, %{{.*}}, %{{.*}}) : (!llvm.ptr, i64, i64, i64, i64, i64, i64, !llvm.ptr, i32, !llvm.ptr, !llvm.ptr) -> ()
0 commit comments