|
1 | 1 | // DEFINE: %{compile} = mlir-opt %s \
|
2 |
| -// DEFINE: -transform-interpreter -test-transform-dialect-erase-schedule |\ |
3 |
| -// DEFINE: mlir-opt --test-linalg-transform-patterns="test-decompose-tensor-pack"\ |
4 |
| -// DEFINE: --test-transform-dialect-erase-schedule \ |
5 |
| -// DEFINE: -one-shot-bufferize="bufferize-function-boundaries" \ |
6 |
| -// DEFINE: -buffer-deallocation-pipeline="private-function-dynamic-ownership" \ |
7 |
| -// DEFINE: -cse -canonicalize -test-lower-to-llvm -o %t |
| 2 | +// DEFINE: -transform-interpreter -test-transform-dialect-erase-schedule |\ |
| 3 | +// DEFINE: mlir-opt \ |
| 4 | +// DEFINE: -test-lower-to-llvm -o %t |
8 | 5 | // DEFINE: %{entry_point} = main
|
9 | 6 | // DEFINE: %{run} = mlir-cpu-runner %t -e %{entry_point} -entry-point-result=void \
|
10 | 7 | // DEFINE: -shared-libs=%mlir_runner_utils,%mlir_c_runner_utils
|
@@ -84,12 +81,37 @@ func.func private @pack(%A: tensor<7x16xi32>) {
|
84 | 81 | }
|
85 | 82 |
|
86 | 83 | module @transforms attributes { transform.with_named_sequence } {
|
87 |
| - transform.named_sequence @__transform_main(%module: !transform.any_op {transform.readonly}) { |
| 84 | + transform.named_sequence @__transform_main(%module: !transform.any_op {transform.consume}) { |
88 | 85 | %pack = transform.structured.match ops{["tensor.pack"]} in %module : (!transform.any_op) -> !transform.any_op
|
89 | 86 |
|
90 |
| - %tiled_linalg_op_p, %loops:2 = transform.structured.tile_using_for %pack tile_sizes [1, 1] |
| 87 | + // 1. Tile so that we can decompose tensor.pack into tensor.pad and other |
| 88 | + // Ops (see step 2) |
| 89 | + %tiled_pack_op_p, %loops:2 = transform.structured.tile_using_for %pack tile_sizes [1, 1] |
91 | 90 | : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op)
|
92 | 91 |
|
| 92 | + // 2. Decompose the tiled Op into (trimmed for brevity): |
| 93 | + // |
| 94 | + // %padded = tensor.pad %slice_of_A (..) : |
| 95 | + // tensor<?x?xi32> to tensor<8x1xi32> |
| 96 | + // %inserted_slice = tensor.insert_slice %padded into %slice_of_A_pack (...) : |
| 97 | + // tensor<8x1xi32> into tensor<1x1x?x1xi32> |
| 98 | + // |
| 99 | + // NOTE: no tile is transposed, hence no linalg.transpose |
| 100 | + %func_1 = transform.get_parent_op %tiled_pack_op_p {isolated_from_above} : (!transform.any_op) -> !transform.any_op |
| 101 | + transform.apply_patterns to %func_1 { |
| 102 | + transform.apply_patterns.linalg.decompose_pack_unpack |
| 103 | + } : !transform.any_op |
| 104 | + |
| 105 | + // 3. Bufferize before lowering to LLVM |
| 106 | + %bufferize = transform.bufferization.one_shot_bufferize %module |
| 107 | + {bufferize_function_boundaries=true} : (!transform.any_op) -> !transform.any_op |
| 108 | + |
| 109 | + // 4. Canonicalize |
| 110 | + %func_2 = transform.structured.match ops{["func.func"]} in %bufferize : (!transform.any_op) -> !transform.op<"func.func"> |
| 111 | + transform.apply_patterns to %func_2 { |
| 112 | + transform.apply_patterns.canonicalization |
| 113 | + } : !transform.op<"func.func"> |
| 114 | + |
93 | 115 | transform.yield
|
94 | 116 | }
|
95 | 117 | }
|
|
0 commit comments