|
| 1 | +// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(gpu-async-region),spirv-attach-target{ver=v1.0 caps=Addresses,Int64,Kernel},convert-gpu-to-spirv{use-64bit-index=true},gpu.module(spirv.module(spirv-lower-abi-attrs,spirv-update-vce)),func.func(llvm-request-c-wrappers),convert-scf-to-cf,convert-cf-to-llvm,convert-arith-to-llvm,convert-math-to-llvm,convert-func-to-llvm,gpu-to-llvm{use-bare-pointers-for-kernels=true},gpu-module-to-binary,expand-strided-metadata,lower-affine,finalize-memref-to-llvm,reconcile-unrealized-casts)' \ |
| 2 | +// RUN: | mlir-runner \ |
| 3 | +// RUN: --shared-libs=%mlir_sycl_runtime \ |
| 4 | +// RUN: --shared-libs=%mlir_runner_utils \ |
| 5 | +// RUN: --entry-point-result=void \ |
| 6 | +// RUN: | FileCheck %s |
| 7 | + |
| 8 | +module @add attributes {gpu.container_module} { |
| 9 | + memref.global "private" constant @__constant_2x2x2xf32_0 : memref<2x2x2xf32> = dense<[[[1.1, 2.2], [3.3, 4.4]], [[5.5, 6.6], [7.7, 8.8 ]]]> |
| 10 | + memref.global "private" constant @__constant_2x2x2xf32 : memref<2x2x2xf32> = dense<[[[1.2, 2.3], [4.5, 5.8]], [[7.2, 8.3], [10.5, 11.8]]]> |
| 11 | + func.func @main() { |
| 12 | + %0 = memref.get_global @__constant_2x2x2xf32 : memref<2x2x2xf32> |
| 13 | + %1 = memref.get_global @__constant_2x2x2xf32_0 : memref<2x2x2xf32> |
| 14 | + %2 = call @test(%0, %1) : (memref<2x2x2xf32>, memref<2x2x2xf32>) -> memref<2x2x2xf32> |
| 15 | + %cast = memref.cast %2 : memref<2x2x2xf32> to memref<*xf32> |
| 16 | + call @printMemrefF32(%cast) : (memref<*xf32>) -> () |
| 17 | + memref.dealloc %2 : memref<2x2x2xf32> |
| 18 | + return |
| 19 | + } |
| 20 | + func.func private @printMemrefF32(memref<*xf32>) |
| 21 | + func.func @test(%arg0: memref<2x2x2xf32>, %arg1: memref<2x2x2xf32>) -> memref<2x2x2xf32> { |
| 22 | + %c2 = arith.constant 2 : index |
| 23 | + %c1 = arith.constant 1 : index |
| 24 | + %memref = gpu.alloc () : memref<2x2x2xf32> |
| 25 | + gpu.memcpy %memref, %arg0 : memref<2x2x2xf32>, memref<2x2x2xf32> |
| 26 | + %memref_0 = gpu.alloc () : memref<2x2x2xf32> |
| 27 | + gpu.memcpy %memref_0, %arg1 : memref<2x2x2xf32>, memref<2x2x2xf32> |
| 28 | + %memref_1 = gpu.alloc () : memref<2x2x2xf32> |
| 29 | + gpu.launch_func @test_kernel::@test_kernel blocks in (%c2, %c2, %c2) threads in (%c1, %c1, %c1) args(%memref : memref<2x2x2xf32>, %memref_0 : memref<2x2x2xf32>, %memref_1 : memref<2x2x2xf32>) |
| 30 | + %alloc = memref.alloc() : memref<2x2x2xf32> |
| 31 | + gpu.memcpy %alloc, %memref_1 : memref<2x2x2xf32>, memref<2x2x2xf32> |
| 32 | + gpu.dealloc %memref_1 : memref<2x2x2xf32> |
| 33 | + gpu.dealloc %memref_0 : memref<2x2x2xf32> |
| 34 | + gpu.dealloc %memref : memref<2x2x2xf32> |
| 35 | + return %alloc : memref<2x2x2xf32> |
| 36 | + } |
| 37 | + gpu.module @test_kernel attributes {spirv.target_env = #spirv.target_env<#spirv.vce<v1.0, [Addresses, Int64, Kernel], []>, api=OpenCL, #spirv.resource_limits<>>} { |
| 38 | + gpu.func @test_kernel(%arg0: memref<2x2x2xf32>, %arg1: memref<2x2x2xf32>, %arg2: memref<2x2x2xf32>) kernel attributes {gpu.known_block_size = array<i32: 1, 1, 1>, gpu.known_grid_size = array<i32: 2, 2, 2>, spirv.entry_point_abi = #spirv.entry_point_abi<>} { |
| 39 | + %0 = gpu.block_id x |
| 40 | + %1 = gpu.block_id y |
| 41 | + %2 = gpu.block_id z |
| 42 | + %3 = memref.load %arg0[%0, %1, %2] : memref<2x2x2xf32> |
| 43 | + %4 = memref.load %arg1[%0, %1, %2] : memref<2x2x2xf32> |
| 44 | + %5 = arith.addf %3, %4 : f32 |
| 45 | + memref.store %5, %arg2[%0, %1, %2] : memref<2x2x2xf32> |
| 46 | + gpu.return |
| 47 | + } |
| 48 | + } |
| 49 | + // CHECK: [2.3, 4.5] |
| 50 | + // CHECK: [7.8, 10.2] |
| 51 | + // CHECK: [12.7, 14.9] |
| 52 | + // CHECK: [18.2, 20.6] |
| 53 | +} |
0 commit comments