[mlir][vector] Fix FlattenGather for scalable vectors

c-rhodes · c-rhodes · commit 9d78e81022bc · 2024-06-20T07:09:05.000Z
This pattern flattens vector.gather ops by unrolling the outermost dimension for rank > 2 vectors. There's two issues with this pattern for scalable vectors: 1. The unrolling doesn't take vscale into account. A constraint is added to disable this pattern for vectors with leading scalable dims. 2. The scalable dims are dropped when creating the new gather. Fixed by propagating the flags. Depends on #96049.
diff --git a/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h b/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h
@@ -211,6 +211,11 @@ Value createReadOrMaskedRead(OpBuilder &builder, Location loc, Value source,
 ///      static sizes in `shape`.
 LogicalResult isValidMaskedInputVector(ArrayRef<int64_t> shape,
                                        ArrayRef<int64_t> inputVectorSizes);
+
+/// Returns true if the leading dim(s) of `type` are fixed and the trailing dim
+/// is scalable.
+bool isTrailingDimScalable(VectorType type);
+
 } // namespace vector
 
 /// Constructs a permutation map of invariant memref indices to vector
diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorGather.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorGather.cpp
@@ -55,6 +55,8 @@ namespace {
 /// ```
 ///
 /// When applied exhaustively, this will produce a sequence of 1-d gather ops.
+///
+/// Supports vector types with trailing scalable dim.
 struct FlattenGather : OpRewritePattern<vector::GatherOp> {
   using OpRewritePattern::OpRewritePattern;
 
@@ -64,6 +66,12 @@ struct FlattenGather : OpRewritePattern<vector::GatherOp> {
     if (resultTy.getRank() < 2)
       return rewriter.notifyMatchFailure(op, "already flat");
 
+    // Unrolling doesn't take vscale into account. Pattern is disabled for
+    // vectors with leading scalable dim(s).
+    if (resultTy.isScalable() && !isTrailingDimScalable(resultTy))
+      return rewriter.notifyMatchFailure(
+          op, "vector type must be fixed-width or scalable in trailing dim");
+
     Location loc = op.getLoc();
     Value indexVec = op.getIndexVec();
     Value maskVec = op.getMask();
@@ -73,7 +81,8 @@ struct FlattenGather : OpRewritePattern<vector::GatherOp> {
         loc, resultTy, rewriter.getZeroAttr(resultTy));
 
     Type subTy = VectorType::get(resultTy.getShape().drop_front(),
-                                 resultTy.getElementType());
+                                 resultTy.getElementType(),
+                                 resultTy.getScalableDims().drop_front());
 
     for (int64_t i = 0, e = resultTy.getShape().front(); i < e; ++i) {
       int64_t thisIdx[1] = {i};
diff --git a/mlir/lib/Dialect/Vector/Transforms/LowerVectorShapeCast.cpp b/mlir/lib/Dialect/Vector/Transforms/LowerVectorShapeCast.cpp
@@ -342,11 +342,6 @@ class ScalableShapeCastOpRewritePattern
     rewriter.replaceOp(op, result);
     return success();
   }
-
-  static bool isTrailingDimScalable(VectorType type) {
-    return type.getRank() >= 1 && type.getScalableDims().back() &&
-           !llvm::is_contained(type.getScalableDims().drop_back(), true);
-  }
 };
 
 } // namespace
diff --git a/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp b/mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp
@@ -396,3 +396,8 @@ vector::isValidMaskedInputVector(ArrayRef<int64_t> shape,
   }
   return success();
 }
+
+bool vector::isTrailingDimScalable(VectorType type) {
+  return type.getRank() >= 1 && type.getScalableDims().back() &&
+         !llvm::is_contained(type.getScalableDims().drop_back(), true);
+}
diff --git a/mlir/test/Dialect/Vector/vector-gather-lowering.mlir b/mlir/test/Dialect/Vector/vector-gather-lowering.mlir
@@ -74,6 +74,32 @@ func.func @gather_memref_1d_i32_index(%base: memref<?xf32>, %v: vector<2xi32>, %
   return %0 : vector<2x3xf32>
  }
 
+// CHECK-LABEL: @scalable_gather_memref_2d
+// CHECK-SAME:      %[[BASE:.*]]: memref<?x?xf32>,
+// CHECK-SAME:      %[[IDXVEC:.*]]: vector<2x[3]xindex>,
+// CHECK-SAME:      %[[MASK:.*]]: vector<2x[3]xi1>,
+// CHECK-SAME:      %[[PASS:.*]]: vector<2x[3]xf32>
+// CHECK:         %[[C0:.*]] = arith.constant 0 : index
+// CHECK:         %[[C1:.*]] = arith.constant 1 : index
+// CHECK:         %[[INIT:.*]] = arith.constant dense<0.000000e+00> : vector<2x[3]xf32>
+// CHECK:         %[[IDXVEC0:.*]] = vector.extract %[[IDXVEC]][0] : vector<[3]xindex> from vector<2x[3]xindex>
+// CHECK:         %[[MASK0:.*]] = vector.extract %[[MASK]][0] : vector<[3]xi1> from vector<2x[3]xi1>
+// CHECK:         %[[PASS0:.*]] = vector.extract %[[PASS]][0] : vector<[3]xf32> from vector<2x[3]xf32>
+// CHECK:         %[[GATHER0:.*]] = vector.gather %[[BASE]]{{\[}}%[[C0]], %[[C1]]] {{\[}}%[[IDXVEC0]]], %[[MASK0]], %[[PASS0]] : memref<?x?xf32>, vector<[3]xindex>, vector<[3]xi1>, vector<[3]xf32> into vector<[3]xf32>
+// CHECK:         %[[INS0:.*]] = vector.insert %[[GATHER0]], %[[INIT]] [0] : vector<[3]xf32> into vector<2x[3]xf32>
+// CHECK:         %[[IDXVEC1:.*]] = vector.extract %[[IDXVEC]][1] : vector<[3]xindex> from vector<2x[3]xindex>
+// CHECK:         %[[MASK1:.*]] = vector.extract %[[MASK]][1] : vector<[3]xi1> from vector<2x[3]xi1>
+// CHECK:         %[[PASS1:.*]] = vector.extract %[[PASS]][1] : vector<[3]xf32> from vector<2x[3]xf32>
+// CHECK:         %[[GATHER1:.*]] = vector.gather %[[BASE]]{{\[}}%[[C0]], %[[C1]]] {{\[}}%[[IDXVEC1]]], %[[MASK1]], %[[PASS1]] : memref<?x?xf32>, vector<[3]xindex>, vector<[3]xi1>, vector<[3]xf32> into vector<[3]xf32>
+// CHECK:         %[[INS1:.*]] = vector.insert %[[GATHER1]], %[[INS0]] [1] : vector<[3]xf32> into vector<2x[3]xf32>
+// CHECK-NEXT:    return %[[INS1]] : vector<2x[3]xf32>
+func.func @scalable_gather_memref_2d(%base: memref<?x?xf32>, %v: vector<2x[3]xindex>, %mask: vector<2x[3]xi1>, %pass_thru: vector<2x[3]xf32>) -> vector<2x[3]xf32> {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %0 = vector.gather %base[%c0, %c1][%v], %mask, %pass_thru : memref<?x?xf32>, vector<2x[3]xindex>, vector<2x[3]xi1>, vector<2x[3]xf32> into vector<2x[3]xf32>
+ return %0 : vector<2x[3]xf32>
+}
+
 // CHECK-LABEL: @gather_tensor_1d
 // CHECK-SAME:    ([[BASE:%.+]]: tensor<?xf32>, [[IDXVEC:%.+]]: vector<2xindex>, [[MASK:%.+]]: vector<2xi1>, [[PASS:%.+]]: vector<2xf32>)
 // CHECK-DAG:     [[M0:%.+]]    = vector.extract [[MASK]][0] : i1 from vector<2xi1>

Original file line number	Diff line number	Diff line change
`@@ -396,3 +396,8 @@ vector::isValidMaskedInputVector(ArrayRef<int64_t> shape,`
`396`	`396`	`}`
`397`	`397`	`return success();`
`398`	`398`	`}`
	`399`	`+`
	`400`	`+bool vector::isTrailingDimScalable(VectorType type) {`
	`401`	`+ return type.getRank() >= 1 && type.getScalableDims().back() &&`
	`402`	`+ !llvm::is_contained(type.getScalableDims().drop_back(), true);`
	`403`	`+}`