[mlir][Tiling] Properly reject "buffer semantic" operations

qcolombet · qcolombet · commit 38182378ffe3 · 2023-02-09T09:18:03.000+01:00
Our tiling implementation assumes a "tensor semantic" for the operation to be tiled. Prior to this patch, if we provide a tilable op with "buffer semantic", we will assert instead of gracefully reject the input. This patch turns the assert in a proper error. Differential Revision: https://reviews.llvm.org/D143558
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
@@ -381,7 +381,8 @@ static FailureOr<ForeachThreadTilingResult> tileToForeachThreadOpImpl(
     if (destinationStyleOp) {
       for (OpOperand *outOperand : destinationStyleOp.getDpsInitOperands()) {
         auto *it = llvm::find(dest, outOperand->get());
-        assert(it != dest.end() && "dest operand not found in dest");
+        if (it == dest.end())
+          return op->emitOpError("must have \"tensor semantic\" for tiling");
         unsigned destNum = std::distance(dest.begin(), it);
         outOperand->set(destBbArgs[destNum]);
       }
diff --git a/mlir/test/Dialect/GPU/transform-gpu-failing.mlir b/mlir/test/Dialect/GPU/transform-gpu-failing.mlir
@@ -274,4 +274,32 @@ transform.sequence failures(propagate) {
   transform.gpu.map_nested_foreach_to_threads %funcop { blockDim = [32, 32]}
 }
 
+// -----
+
+func.func @tiling_buffer_semantic_op(%x: memref<32x32xf32>, %y: memref<32x32xf32>, %stream : !gpu.async.token) {
+  %one = arith.constant 1 : index
+  %name = gpu.launch async[%stream] blocks(%arg3, %arg4, %arg5) in (%arg9 = %one, %arg10 = %one, %arg11 = %one)
+            threads(%arg6, %arg7, %arg8) in (%arg12 = %one, %arg13 = %one, %arg14 = %one)
+  {
+    // expected-error @below {{'linalg.generic' op must have "tensor semantic" for tiling}}
+    // expected-note @below {{when applied to this op}}
+    linalg.generic
+      {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>,
+                        affine_map<(d0, d1) -> (d0, d1)>],
+       iterator_types = ["parallel", "parallel"]}
+      ins(%x : memref<32x32xf32>)
+      outs(%y : memref<32x32xf32>) {
+        ^bb0(%in: f32, %out: f32):
+          linalg.yield %in : f32
+    }
+    gpu.terminator
+  }
+  return
+}
 
+transform.sequence failures(propagate) {
+^bb1(%arg0: !pdl.operation):
+  %matmul = transform.structured.match ops{["linalg.generic"]} in %arg0 : (!pdl.operation) -> !pdl.operation
+  // expected-error @below {{transform.structured.tile_to_foreach_thread_op failed to apply}}
+  %foreach, %tiled = transform.structured.tile_to_foreach_thread_op %matmul num_threads [10, 20, 30] (mapping = [ #gpu.thread<y>, #gpu.thread<x>, #gpu.thread<z> ] )
+}