llvm · matthias-springer · Mar 19, 2024 · Mar 19, 2024 · yinying-lisa-li · Mar 19, 2024
@@ -35,8 +35,8 @@
 #COO_3D = #sparse_tensor.encoding<{ map = (d0, d1, d2) -> (d0 : compressed(nonunique), d1 : singleton(nonunique), d2 : singleton), posWidth = 32, crdWidth = 32 }>
 
 module {
-  func.func private @printMemref3dF32(%ptr : tensor<?x?x?xf32>) attributes { llvm.emit_c_interface }
-  func.func private @printMemref2dF32(%ptr : tensor<?x?xf32>) attributes { llvm.emit_c_interface }
+  func.func private @printMemref3dF32(%ptr : tensor<?x?x?xf32> {bufferization.access = "read"}) attributes { llvm.emit_c_interface }
+  func.func private @printMemref2dF32(%ptr : tensor<?x?xf32> {bufferization.access = "read"}) attributes { llvm.emit_c_interface }
 
   func.func @test_sparse_rhs(%arg0: tensor<5x6xf32>, %arg1: tensor<6x2x3xf32, #COO_3D>) -> tensor<?x?x?xf32> {
     %collapsed = tensor.collapse_shape %arg1 [[0], [1, 2]] : tensor<6x2x3xf32, #COO_3D> into tensor<6x6xf32, #COO_2D>
@@ -46,6 +46,11 @@ module {
     %2 = linalg.matmul ins(%arg0, %collapsed : tensor<5x6xf32>, tensor<6x6xf32, #COO_2D>) outs(%1 : tensor<5x6xf32>) -> tensor<5x6xf32>
     %expanded = tensor.expand_shape %2 [[0], [1, 2]] : tensor<5x6xf32> into tensor<5x2x3xf32>
     %ret1 = tensor.cast %expanded : tensor<5x2x3xf32> to tensor<?x?x?xf32>
+
+    // Note: tensor.collapse_shape is a metadata-only operation on dense tensors
+    // but requires reallocation on sparse tensors.
+    bufferization.dealloc_tensor %collapsed : tensor<6x6xf32, #COO_2D>
+
     return %ret1 : tensor<?x?x?xf32>
   }
 
@@ -57,6 +62,11 @@ module {
     %2 = linalg.matmul ins(%arg0, %collapsed : tensor<5x6xf32, #COO_2D>, tensor<6x6xf32, #COO_2D>) outs(%1 : tensor<5x6xf32>) -> tensor<5x6xf32>
     %expanded = tensor.expand_shape %2 [[0], [1, 2]] : tensor<5x6xf32> into tensor<5x2x3xf32>
     %ret1 = tensor.cast %expanded : tensor<5x2x3xf32> to tensor<?x?x?xf32>
+
+    // Note: tensor.collapse_shape is a metadata-only operation on dense tensors
+    // but requires reallocation on sparse tensors.
+    bufferization.dealloc_tensor %collapsed : tensor<6x6xf32, #COO_2D>
+
     return %ret1 : tensor<?x?x?xf32>
   }
 
@@ -80,6 +90,11 @@ module {
     %2 = linalg.matmul ins(%arg0, %collapsed : tensor<5x6xf32, #COO_2D>, tensor<6x6xf32, #COO_2D>) outs(%1 : tensor<5x6xf32>) -> tensor<5x6xf32>
     %expanded = tensor.expand_shape %2 [[0], [1, 2]] : tensor<5x6xf32> into tensor<5x2x3xf32>
     %ret1 = tensor.cast %expanded : tensor<5x2x3xf32> to tensor<?x?x?xf32>
+
+    // Note: tensor.collapse_shape is a metadata-only operation on dense tensors
+    // but requires reallocation on sparse tensors.
+    bufferization.dealloc_tensor %collapsed : tensor<6x6xf32, #COO_2D>
+
     return %ret1 : tensor<?x?x?xf32>
   }
 
@@ -192,6 +207,7 @@ module {
     bufferization.dealloc_tensor %so1 : tensor<?x?x?xf32>
     bufferization.dealloc_tensor %so2 : tensor<?x?x?xf32>
     bufferization.dealloc_tensor %so3 : tensor<?x?x?xf32>
+
     return
   }
 }
@@ -161,6 +161,14 @@ module {
     call @dump_dense_f64(%s24)  : (tensor<4x4xf64>) -> ()
     call @dump_dense_f64(%scsr) : (tensor<4x4xf64>) -> ()
 
+    bufferization.dealloc_tensor %a : tensor<4x8xf64, #BSR>
+    bufferization.dealloc_tensor %b : tensor<4x8xf64, #NV_24>
+    bufferization.dealloc_tensor %c : tensor<4x8xf64, #CSR>
+    bufferization.dealloc_tensor %d : tensor<4x4xf64>
+    bufferization.dealloc_tensor %s : tensor<4x4xf64>
+    bufferization.dealloc_tensor %s24 : tensor<4x4xf64>
+    bufferization.dealloc_tensor %scsr : tensor<4x4xf64>
+
     return
   }
 }
@@ -279,6 +279,31 @@ module {
     %si = tensor.extract %li[] : tensor<i64>
     vector.print %si : i64
 
+    // TODO: This check is no longer needed once the codegen path uses the
+    // buffer deallocation pass. "dealloc_tensor" turn into a no-op in the
+    // codegen path.
+    %has_runtime = sparse_tensor.has_runtime_library
+    scf.if %has_runtime {
+      // sparse_tensor.assemble copies buffers when running with the runtime
+      // library. Deallocations are needed not needed when running in codgen
+      // mode.
+      bufferization.dealloc_tensor %s4 : tensor<10x10xf64, #SortedCOO>
+      bufferization.dealloc_tensor %s5 : tensor<10x10xf64, #SortedCOOI32>
+      bufferization.dealloc_tensor %csr : tensor<2x2xf64, #CSR>
+      bufferization.dealloc_tensor %bs : tensor<2x10x10xf64, #BCOO>
+    }
+
+    bufferization.dealloc_tensor %li : tensor<i64>
+    bufferization.dealloc_tensor %od : tensor<3xf64>
+    bufferization.dealloc_tensor %op : tensor<2xi32>
+    bufferization.dealloc_tensor %oi : tensor<3x2xi32>
+    bufferization.dealloc_tensor %d_csr : tensor<4xf64>
+    bufferization.dealloc_tensor %p_csr : tensor<3xi32>
+    bufferization.dealloc_tensor %i_csr : tensor<3xi32>
+    bufferization.dealloc_tensor %bod : tensor<6xf64>
+    bufferization.dealloc_tensor %bop : tensor<4xindex>
+    bufferization.dealloc_tensor %boi : tensor<6x2xindex>
+
     return
   }
 }