Skip to content

Commit c820f9e

Browse files
committed
[mlir][sparse][gpu] end-to-end integration test of GPU libgen approach
Reviewed By: Peiming Differential Revision: https://reviews.llvm.org/D150172
1 parent 8097d01 commit c820f9e

File tree

1 file changed

+98
-0
lines changed

1 file changed

+98
-0
lines changed
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
//
2+
// NOTE: this test requires gpu-sm80
3+
//
4+
// with RT lib (SoA COO):
5+
//
6+
// RUN: mlir-opt %s \
7+
// RUN: --sparse-compiler="enable-runtime-library=true enable-gpu-libgen gpu-triple=nvptx64-nvidia-cuda gpu-chip=sm_80 gpu-features=+ptx71" \
8+
// RUN: | mlir-cpu-runner \
9+
// RUN: --shared-libs=%mlir_cuda_runtime \
10+
// RUN: --shared-libs=%mlir_runner_utils \
11+
// RUN: --e main --entry-point-result=void \
12+
// RUN: | FileCheck %s
13+
//
14+
// TODO: without RT lib (AoS COO):
15+
16+
#SortedCOO = #sparse_tensor.encoding<{
17+
dimLevelType = [ "compressed-nu", "singleton" ]
18+
}>
19+
20+
#CSR = #sparse_tensor.encoding<{
21+
dimLevelType = [ "dense", "compressed" ],
22+
posWidth = 32,
23+
crdWidth = 32
24+
}>
25+
26+
module {
27+
// Compute matrix vector y = Ax on COO with default index coordinates.
28+
func.func @matvecCOO(%A: tensor<?x?xf64, #SortedCOO>, %x: tensor<?xf64>, %y_in: tensor<?xf64>) -> tensor<?xf64> {
29+
%y_out = linalg.matvec
30+
ins(%A, %x: tensor<?x?xf64, #SortedCOO>, tensor<?xf64>)
31+
outs(%y_in: tensor<?xf64>) -> tensor<?xf64>
32+
return %y_out : tensor<?xf64>
33+
}
34+
35+
// Compute matrix vector y = Ax on CSR with 32-bit positions and coordinates.
36+
func.func @matvecCSR(%A: tensor<?x?xf64, #CSR>, %x: tensor<?xf64>, %y_in: tensor<?xf64>) -> tensor<?xf64> {
37+
%y_out = linalg.matvec
38+
ins(%A, %x: tensor<?x?xf64, #CSR>, tensor<?xf64>)
39+
outs(%y_in: tensor<?xf64>) -> tensor<?xf64>
40+
return %y_out : tensor<?xf64>
41+
}
42+
43+
func.func @main() {
44+
%f0 = arith.constant 0.0 : f64
45+
%c0 = arith.constant 0 : index
46+
%c1 = arith.constant 1 : index
47+
48+
// Stress test with a dense matrix DA.
49+
%DA = tensor.generate {
50+
^bb0(%i: index, %j: index):
51+
%k = arith.addi %i, %j : index
52+
%l = arith.index_cast %k : index to i64
53+
%f = arith.uitofp %l : i64 to f64
54+
tensor.yield %f : f64
55+
} : tensor<1024x64xf64>
56+
57+
// Convert to a "sparse" m x n matrix A.
58+
%Acoo = sparse_tensor.convert %DA : tensor<1024x64xf64> to tensor<?x?xf64, #SortedCOO>
59+
%Acsr = sparse_tensor.convert %DA : tensor<1024x64xf64> to tensor<?x?xf64, #CSR>
60+
61+
// Initialize dense vector with n elements:
62+
// (1, 2, 3, 4, ..., n)
63+
%d1 = tensor.dim %Acoo, %c1 : tensor<?x?xf64, #SortedCOO>
64+
%x = tensor.generate %d1 {
65+
^bb0(%i : index):
66+
%k = arith.addi %i, %c1 : index
67+
%j = arith.index_cast %k : index to i64
68+
%f = arith.uitofp %j : i64 to f64
69+
tensor.yield %f : f64
70+
} : tensor<?xf64>
71+
72+
// Initialize dense vector to m zeros.
73+
%d0 = tensor.dim %Acoo, %c0 : tensor<?x?xf64, #SortedCOO>
74+
%y = tensor.generate %d0 {
75+
^bb0(%i : index):
76+
tensor.yield %f0 : f64
77+
} : tensor<?xf64>
78+
79+
// Call the kernels.
80+
%0 = call @matvecCOO(%Acoo, %x, %y) : (tensor<?x?xf64, #SortedCOO>, tensor<?xf64>, tensor<?xf64>) -> tensor<?xf64>
81+
%1 = call @matvecCSR(%Acsr, %x, %y) : (tensor<?x?xf64, #CSR>, tensor<?xf64>, tensor<?xf64>) -> tensor<?xf64>
82+
83+
//
84+
// Sanity check on results.
85+
//
86+
// CHECK-COUNT-2: ( 87360, 89440, 91520, 93600, 95680, 97760, 99840, 101920, 104000, 106080, 108160, 110240, 112320, 114400, 116480, 118560, 120640, 122720, 124800, 126880, 128960, 131040, 133120, 135200, 137280, 139360, 141440, 143520, 145600, 147680, 149760, 151840, 153920, 156000, 158080, 160160, 162240, 164320, 166400, 168480, 170560, 172640, 174720, 176800, 178880, 180960, 183040, 185120, 187200, 189280, 191360, 193440, 195520, 197600, 199680, 201760, 203840, 205920, 208000, 210080, 212160, 214240, 216320, 218400 )
87+
//
88+
%pb0 = vector.transfer_read %0[%c0], %f0 : tensor<?xf64>, vector<64xf64>
89+
vector.print %pb0 : vector<64xf64>
90+
%pb1 = vector.transfer_read %0[%c0], %f0 : tensor<?xf64>, vector<64xf64>
91+
vector.print %pb1 : vector<64xf64>
92+
93+
// Release the resources.
94+
bufferization.dealloc_tensor %Acoo : tensor<?x?xf64, #SortedCOO>
95+
bufferization.dealloc_tensor %Acsr : tensor<?x?xf64, #CSR>
96+
return
97+
}
98+
}

0 commit comments

Comments
 (0)