Skip to content

Commit a4eadd7

Browse files
authored
[mlir][sparse][gpu] add GPU BSR SDDMM check test (#71491)
also minor edits in other GPU check tests
1 parent 590884a commit a4eadd7

File tree

6 files changed

+137
-29
lines changed

6 files changed

+137
-29
lines changed

mlir/test/Dialect/SparseTensor/GPU/gpu_matmul.mlir

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,15 @@
1010
//
1111
// CHECK-LABEL: gpu.module @sparse_kernels
1212
// CHECK-LABEL: gpu.func @kernel0(
13-
// CHECK-SAME: %[[VAL_0:.*0]]: index,
14-
// CHECK-SAME: %[[VAL_1:.*1]]: index,
15-
// CHECK-SAME: %[[VAL_2:.*2]]: memref<?xindex>,
16-
// CHECK-SAME: %[[VAL_3:.*3]]: memref<?xindex>,
17-
// CHECK-SAME: %[[VAL_4:.*4]]: memref<?xf64>,
18-
// CHECK-SAME: %[[VAL_5:.*5]]: memref<?x?xf64>,
19-
// CHECK-SAME: %[[VAL_6:.*6]]: memref<?x?xf64>) kernel {
20-
// CHECK: %[[VAL_7:.*]] = arith.constant 1 : index
21-
// CHECK: %[[VAL_8:.*]] = arith.constant 0 : index
13+
// CHECK-SAME: %[[VAL_0:.*0]]: index,
14+
// CHECK-SAME: %[[VAL_1:.*1]]: index,
15+
// CHECK-SAME: %[[VAL_2:.*2]]: memref<?xindex>,
16+
// CHECK-SAME: %[[VAL_3:.*3]]: memref<?xindex>,
17+
// CHECK-SAME: %[[VAL_4:.*4]]: memref<?xf64>,
18+
// CHECK-SAME: %[[VAL_5:.*5]]: memref<?x?xf64>,
19+
// CHECK-SAME: %[[VAL_6:.*6]]: memref<?x?xf64>) kernel {
20+
// CHECK-DAG: %[[VAL_7:.*]] = arith.constant 1 : index
21+
// CHECK-DAG: %[[VAL_8:.*]] = arith.constant 0 : index
2222
// CHECK: %[[VAL_9:.*]] = gpu.block_id x
2323
// CHECK: %[[VAL_10:.*]] = gpu.block_dim x
2424
// CHECK: %[[VAL_11:.*]] = gpu.thread_id x

mlir/test/Dialect/SparseTensor/GPU/gpu_matmul_lib_2to4.mlir

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
// RUN: --sparsification="enable-gpu-libgen" | FileCheck %s
33

44
// CHECK-LABEL: func.func @matmul(
5-
// CHECK-SAME: %[[VAL_0:.*0]]: tensor<?x?xf16>,
6-
// CHECK-SAME: %[[VAL_1:.*1]]: tensor<?x?xf16>,
7-
// CHECK-SAME: %[[VAL_2:.*2]]: tensor<?x?xf16>) -> tensor<?x?xf16> {
8-
// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index
9-
// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index
5+
// CHECK-SAME: %[[VAL_0:.*0]]: tensor<?x?xf16>,
6+
// CHECK-SAME: %[[VAL_1:.*1]]: tensor<?x?xf16>,
7+
// CHECK-SAME: %[[VAL_2:.*2]]: tensor<?x?xf16>) -> tensor<?x?xf16> {
8+
// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index
9+
// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 1 : index
1010
// CHECK: %[[VAL_5:.*]] = bufferization.to_memref %[[VAL_0]] : memref<?x?xf16>
1111
// CHECK: %[[VAL_6:.*]] = gpu.wait async
1212
// CHECK: %[[VAL_7:.*]] = memref.dim %[[VAL_5]], %[[VAL_3]] : memref<?x?xf16>
@@ -66,4 +66,4 @@ module {
6666
} -> tensor<?x?xf16>
6767
return %0 : tensor<?x?xf16>
6868
}
69-
}
69+
}

mlir/test/Dialect/SparseTensor/GPU/gpu_matvec.mlir

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,12 @@
1010
//
1111
// CHECK-LABEL: gpu.module @sparse_kernels
1212
// CHECK: gpu.func @kernel0(
13-
// CHECK-SAME: %[[VAL_0:.*0]]: index,
14-
// CHECK-SAME: %[[VAL_1:.*1]]: memref<?xf64>,
15-
// CHECK-SAME: %[[VAL_2:.*2]]: memref<?xindex>,
16-
// CHECK-SAME: %[[VAL_3:.*3]]: memref<?xindex>,
17-
// CHECK-SAME: %[[VAL_4:.*4]]: memref<?xf64>,
18-
// CHECK-SAME: %[[VAL_5:.*5]]: memref<?xf64>) kernel {
13+
// CHECK-SAME: %[[VAL_0:.*0]]: index,
14+
// CHECK-SAME: %[[VAL_1:.*1]]: memref<?xf64>,
15+
// CHECK-SAME: %[[VAL_2:.*2]]: memref<?xindex>,
16+
// CHECK-SAME: %[[VAL_3:.*3]]: memref<?xindex>,
17+
// CHECK-SAME: %[[VAL_4:.*4]]: memref<?xf64>,
18+
// CHECK-SAME: %[[VAL_5:.*5]]: memref<?xf64>) kernel {
1919
// CHECK: %[[VAL_6:.*]] = arith.constant 1 : index
2020
// CHECK: %[[VAL_7:.*]] = gpu.block_id x
2121
// CHECK: %[[VAL_8:.*]] = gpu.block_dim x

mlir/test/Dialect/SparseTensor/GPU/gpu_sampled_matmul_lib.mlir

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,11 @@
2222
#CSR = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : dense, d1 : compressed) }>
2323

2424
// CHECK-LABEL: func.func @sparse_sampled_dd(
25-
// CHECK-SAME: %[[VAL_0:.*]]: tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>,
26-
// CHECK-SAME: %[[VAL_1:.*]]: tensor<8x8xf64>,
27-
// CHECK-SAME: %[[VAL_2:.*]]: tensor<8x8xf64>) -> tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> {
28-
// CHECK: %[[VAL_3:.*]] = arith.constant 8 : index
29-
// CHECK: %[[VAL_4:.*]] = arith.constant 0 : index
25+
// CHECK-SAME: %[[VAL_0:.*]]: tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>,
26+
// CHECK-SAME: %[[VAL_1:.*]]: tensor<8x8xf64>,
27+
// CHECK-SAME: %[[VAL_2:.*]]: tensor<8x8xf64>) -> tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> {
28+
// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 8 : index
29+
// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 0 : index
3030
// CHECK: %[[VAL_5:.*]] = sparse_tensor.number_of_entries %[[VAL_0]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
3131
// CHECK: %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_1]] : memref<8x8xf64>
3232
// CHECK: %[[VAL_7:.*]] = gpu.wait async
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
// RUN: mlir-opt %s --sparsification="enable-gpu-libgen" | FileCheck %s
2+
3+
#BSR = #sparse_tensor.encoding<{
4+
map = (i, j) -> (
5+
i floordiv 2 : dense,
6+
j floordiv 2 : compressed,
7+
i mod 2 : dense,
8+
j mod 2 : dense)
9+
}>
10+
11+
#trait_SDDMM = {
12+
indexing_maps = [
13+
affine_map<(i,j,k) -> (i,k)>, // A
14+
affine_map<(i,j,k) -> (k,j)>, // B
15+
affine_map<(i,j,k) -> (i,j)> // S (in/out)
16+
],
17+
iterator_types = ["parallel", "parallel", "reduction"],
18+
doc = "S(i,j) += spy[S(i,j)] x SUM_k A(i,k) B(k,j)"
19+
}
20+
21+
// CHECK-LABEL: func.func @SDDMM_block(
22+
// CHECK-SAME: %[[VAL_0:.*]]: tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>,
23+
// CHECK-SAME: %[[VAL_1:.*]]: tensor<?x?xf32>,
24+
// CHECK-SAME: %[[VAL_2:.*]]: tensor<?x?xf32>) -> tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> {
25+
// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index
26+
// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 1 : index
27+
// CHECK-DAG: %[[VAL_5:.*]] = arith.constant 2 : index
28+
// CHECK-DAG: %[[VAL_6:.*]] = arith.constant 4 : index
29+
// CHECK: %[[VAL_7:.*]] = sparse_tensor.number_of_entries %[[VAL_0]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
30+
// CHECK: %[[VAL_8:.*]] = tensor.dim %[[VAL_1]], %[[VAL_3]] : tensor<?x?xf32>
31+
// CHECK: %[[VAL_9:.*]] = tensor.dim %[[VAL_1]], %[[VAL_4]] : tensor<?x?xf32>
32+
// CHECK: %[[VAL_10:.*]] = tensor.dim %[[VAL_2]], %[[VAL_4]] : tensor<?x?xf32>
33+
// CHECK: %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<?x?xf32>
34+
// CHECK: %[[VAL_12:.*]] = gpu.wait async
35+
// CHECK: %[[VAL_13:.*]] = memref.dim %[[VAL_11]], %[[VAL_3]] : memref<?x?xf32>
36+
// CHECK: %[[VAL_14:.*]] = memref.dim %[[VAL_11]], %[[VAL_4]] : memref<?x?xf32>
37+
// CHECK: %[[VAL_15:.*]], %[[VAL_16:.*]] = gpu.alloc async {{\[}}%[[VAL_12]]] (%[[VAL_13]], %[[VAL_14]]) : memref<?x?xf32>
38+
// CHECK: %[[VAL_17:.*]] = gpu.memcpy async {{\[}}%[[VAL_16]]] %[[VAL_15]], %[[VAL_11]] : memref<?x?xf32>, memref<?x?xf32>
39+
// CHECK: %[[VAL_18:.*]] = bufferization.to_memref %[[VAL_2]] : memref<?x?xf32>
40+
// CHECK: %[[VAL_19:.*]] = gpu.wait async
41+
// CHECK: %[[VAL_20:.*]] = memref.dim %[[VAL_18]], %[[VAL_3]] : memref<?x?xf32>
42+
// CHECK: %[[VAL_21:.*]] = memref.dim %[[VAL_18]], %[[VAL_4]] : memref<?x?xf32>
43+
// CHECK: %[[VAL_22:.*]], %[[VAL_23:.*]] = gpu.alloc async {{\[}}%[[VAL_19]]] (%[[VAL_20]], %[[VAL_21]]) : memref<?x?xf32>
44+
// CHECK: %[[VAL_24:.*]] = gpu.memcpy async {{\[}}%[[VAL_23]]] %[[VAL_22]], %[[VAL_18]] : memref<?x?xf32>, memref<?x?xf32>
45+
// CHECK: %[[VAL_25:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index}
46+
// CHECK: %[[VAL_26:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index}
47+
// CHECK: %[[VAL_27:.*]] = sparse_tensor.values %[[VAL_0]]
48+
// CHECK: %[[VAL_28:.*]] = gpu.wait async
49+
// CHECK: %[[VAL_29:.*]] = memref.dim %[[VAL_25]], %[[VAL_3]] : memref<?xindex>
50+
// CHECK: %[[VAL_30:.*]], %[[VAL_31:.*]] = gpu.alloc async {{\[}}%[[VAL_28]]] (%[[VAL_29]]) : memref<?xindex>
51+
// CHECK: %[[VAL_32:.*]] = gpu.memcpy async {{\[}}%[[VAL_31]]] %[[VAL_30]], %[[VAL_25]] : memref<?xindex>, memref<?xindex>
52+
// CHECK: %[[VAL_33:.*]] = gpu.wait async
53+
// CHECK: %[[VAL_34:.*]] = memref.dim %[[VAL_26]], %[[VAL_3]] : memref<?xindex>
54+
// CHECK: %[[VAL_35:.*]], %[[VAL_36:.*]] = gpu.alloc async {{\[}}%[[VAL_33]]] (%[[VAL_34]]) : memref<?xindex>
55+
// CHECK: %[[VAL_37:.*]] = gpu.memcpy async {{\[}}%[[VAL_36]]] %[[VAL_35]], %[[VAL_26]] : memref<?xindex>, memref<?xindex>
56+
// CHECK: %[[VAL_38:.*]] = gpu.wait async
57+
// CHECK: %[[VAL_39:.*]] = memref.dim %[[VAL_27]], %[[VAL_3]] : memref<?xf32>
58+
// CHECK: %[[VAL_40:.*]], %[[VAL_41:.*]] = gpu.alloc async {{\[}}%[[VAL_38]]] (%[[VAL_39]]) : memref<?xf32>
59+
// CHECK: %[[VAL_42:.*]] = gpu.memcpy async {{\[}}%[[VAL_41]]] %[[VAL_40]], %[[VAL_27]] : memref<?xf32>, memref<?xf32>
60+
// CHECK: gpu.wait {{\[}}%[[VAL_17]], %[[VAL_24]], %[[VAL_32]], %[[VAL_37]], %[[VAL_42]]]
61+
// CHECK: %[[VAL_43:.*]] = gpu.wait async
62+
// CHECK: %[[VAL_44:.*]], %[[VAL_45:.*]] = gpu.create_dn_tensor async {{\[}}%[[VAL_43]]] %[[VAL_15]], %[[VAL_8]], %[[VAL_9]] : index, index into memref<?x?xf32>
63+
// CHECK: %[[VAL_46:.*]], %[[VAL_47:.*]] = gpu.create_dn_tensor async {{\[}}%[[VAL_45]]] %[[VAL_22]], %[[VAL_9]], %[[VAL_10]] : index, index into memref<?x?xf32>
64+
// CHECK: %[[VAL_48:.*]] = arith.divui %[[VAL_8]], %[[VAL_5]] : index
65+
// CHECK: %[[VAL_49:.*]] = arith.divui %[[VAL_10]], %[[VAL_5]] : index
66+
// CHECK: %[[VAL_50:.*]] = arith.divui %[[VAL_7]], %[[VAL_6]] : index
67+
// CHECK: %[[VAL_51:.*]], %[[VAL_52:.*]] = gpu.create_bsr async {{\[}}%[[VAL_47]]] %[[VAL_48]], %[[VAL_49]], %[[VAL_50]], %[[VAL_5]], %[[VAL_5]], %[[VAL_30]], %[[VAL_35]], %[[VAL_40]] : memref<?xindex>, memref<?xindex>, memref<?xf32>
68+
// CHECK: %[[VAL_53:.*]], %[[VAL_54:.*]] = gpu.sddmm_buffer_size async {{\[}}%[[VAL_52]]] %[[VAL_44]], %[[VAL_46]], %[[VAL_51]] into f32
69+
// CHECK: %[[VAL_55:.*]], %[[VAL_56:.*]] = gpu.alloc async {{\[}}%[[VAL_54]]] (%[[VAL_53]]) : memref<?xi8>
70+
// CHECK: %[[VAL_57:.*]] = gpu.sddmm async {{\[}}%[[VAL_56]]] %[[VAL_44]], %[[VAL_46]], %[[VAL_51]], %[[VAL_55]] : memref<?xi8> into f32
71+
// CHECK: %[[VAL_58:.*]] = gpu.destroy_dn_tensor async {{\[}}%[[VAL_57]]] %[[VAL_44]]
72+
// CHECK: %[[VAL_59:.*]] = gpu.destroy_dn_tensor async {{\[}}%[[VAL_58]]] %[[VAL_46]]
73+
// CHECK: %[[VAL_60:.*]] = gpu.destroy_sp_mat async {{\[}}%[[VAL_59]]] %[[VAL_51]]
74+
// CHECK: %[[VAL_61:.*]] = gpu.dealloc async {{\[}}%[[VAL_60]]] %[[VAL_55]] : memref<?xi8>
75+
// CHECK: %[[VAL_62:.*]] = gpu.dealloc async {{\[}}%[[VAL_61]]] %[[VAL_15]] : memref<?x?xf32>
76+
// CHECK: %[[VAL_63:.*]] = gpu.dealloc async {{\[}}%[[VAL_62]]] %[[VAL_22]] : memref<?x?xf32>
77+
// CHECK: %[[VAL_64:.*]] = gpu.dealloc async {{\[}}%[[VAL_63]]] %[[VAL_30]] : memref<?xindex>
78+
// CHECK: %[[VAL_65:.*]] = gpu.dealloc async {{\[}}%[[VAL_64]]] %[[VAL_35]] : memref<?xindex>
79+
// CHECK: %[[VAL_66:.*]] = gpu.memcpy async {{\[}}%[[VAL_65]]] %[[VAL_27]], %[[VAL_40]] : memref<?xf32>, memref<?xf32>
80+
// CHECK: %[[VAL_67:.*]] = gpu.dealloc async {{\[}}%[[VAL_66]]] %[[VAL_40]] : memref<?xf32>
81+
// CHECK: gpu.wait {{\[}}%[[VAL_67]]]
82+
// CHECK: %[[VAL_68:.*]] = sparse_tensor.load %[[VAL_0]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
83+
// CHECK: return %[[VAL_68]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
84+
// CHECK: }
85+
func.func @SDDMM_block(%args: tensor<?x?xf32, #BSR>,
86+
%arga: tensor<?x?xf32>,
87+
%argb: tensor<?x?xf32>) -> tensor<?x?xf32, #BSR> {
88+
%result = linalg.generic #trait_SDDMM
89+
ins(%arga, %argb: tensor<?x?xf32>, tensor<?x?xf32>)
90+
outs(%args: tensor<?x?xf32, #BSR>) {
91+
^bb(%a: f32, %b: f32, %s: f32):
92+
%f0 = arith.constant 0.0 : f32
93+
%u = sparse_tensor.unary %s : f32 to f32
94+
present={
95+
^bb0(%p: f32):
96+
%mul = arith.mulf %a, %b : f32
97+
sparse_tensor.yield %mul : f32
98+
}
99+
absent={}
100+
%r = sparse_tensor.reduce %s, %u, %f0 : f32 {
101+
^bb0(%p: f32, %q: f32):
102+
%add = arith.addf %p, %q : f32
103+
sparse_tensor.yield %add : f32
104+
}
105+
linalg.yield %r : f32
106+
} -> tensor<?x?xf32, #BSR>
107+
return %result : tensor<?x?xf32, #BSR>
108+
}

mlir/test/Dialect/SparseTensor/GPU/gpu_spgemm_lib.mlir

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
// CHECK-LABEL: func.func @matmulCSR(
77
// CHECK-SAME: %[[VAL_0:.*0]]: tensor<8x8xf32, #{{.*}}>,
88
// CHECK-SAME: %[[VAL_1:.*1]]: tensor<8x8xf32, #{{.*}}>) -> tensor<8x8xf32, #{{.*}}> {
9-
// CHECK: %[[VAL_2:.*]] = arith.constant 8 : index
10-
// CHECK: %[[VAL_3:.*]] = arith.constant 0 : index
11-
// CHECK: %[[VAL_4:.*]] = arith.constant 9 : index
9+
// CHECK-DAG: %[[VAL_2:.*]] = arith.constant 8 : index
10+
// CHECK-DAG: %[[VAL_3:.*]] = arith.constant 0 : index
11+
// CHECK-DAG: %[[VAL_4:.*]] = arith.constant 9 : index
1212
// CHECK: %[[VAL_6:.*]] = sparse_tensor.number_of_entries %[[VAL_0]] : tensor<8x8xf32, #{{.*}}>
1313
// CHECK: %[[VAL_7:.*]] = sparse_tensor.number_of_entries %[[VAL_1]] : tensor<8x8xf32, #{{.*}}>
1414
// CHECK: %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<8x8xf32, #{{.*}}>

0 commit comments

Comments
 (0)