address comments

c-rhodes · c-rhodes · commit 9a5cdf0bd9d2 · 2023-11-07T15:46:09.000Z
diff --git a/mlir/lib/Conversion/ArmSMEToSCF/ArmSMEToSCF.cpp b/mlir/lib/Conversion/ArmSMEToSCF/ArmSMEToSCF.cpp
@@ -158,6 +158,7 @@ struct TileLoadOpConversion : public OpRewritePattern<arm_sme::TileLoadOp> {
 ///  %c0 = arith.constant 0 : index
 ///  %c1 = arith.constant 1 : index
 ///  %tile = arm_sme.zero : vector<[4]x[4]xi32>
+///  %num_rows = arith.constant 2 : index
 ///  %num_cols = vector.create_mask %c4 : vector<[4]xi1>
 ///  scf.for %tile_slice_idx = %c0 to %num_rows step %c1 {
 ///    %tile_update = arm_sme.load_tile_slice
@@ -252,24 +253,12 @@ struct TileLoadOpWithMaskAndPadZeroConversion
 ///
 ///  AFTER:
 ///  ```mlir
+///  ...
 ///  %pad_1d = arith.constant dense<1> : vector<[4]xi32>
-///  %num_rows = arith.constant 2 : index
-///  %num_cols = arith.constant 4 : index
-///  %num_cols_i32 = arith.index_castui %num_cols : index to i32
-///  %tile_id = arm_sme.get_tile_id : i32
-///  %tile = arm_sme.cast_tile_to_vector %tile_id : i32 to vector<[4]x[4]xi32>
-///  %vscale = vector.vscale
-///  %c0 = arith.constant 0 : index
-///  %c1 = arith.constant 1 : index
-///  %min_svl_s = arith.constant 4 : index
-///  %svl_s = arith.muli %min_svl_s, %vscale : index
 ///  scf.for %tile_slice_idx = %c0 to %svl_s step %c1 {
-///    %row_is_active = arith.cmpi ult %tile_slice_idx, %num_rows : index
-///    %row_is_active_i32 = arith.extsi %row_is_active : i1 to i32
-///    %mask = arith.andi %row_is_active_i32, %num_cols_i32 : i32
-///    %mask_index = arith.index_cast %mask : i32 to index
-///    %mask_1d = vector.create_mask %mask_index : vector<[4]xi1>
-///    %slice = vector.maskedload %base[%tile_slice_idx, %c0], %mask_1d, %pad
+///    ...
+///    %mask_1d = vector.create_mask <combined_mask> : vector<[4]xi1>
+///    %slice = vector.maskedload %base[%tile_slice_idx, %c0], %mask_1d, %pad_1d
 ///      : memref<?x?xi32>, vector<[4]xi1>,
 ///        vector<[4]xi32> into vector<[4]xi32>
 ///    // Insert slice into tile
diff --git a/mlir/test/Conversion/ArmSMEToSCF/arm-sme-to-scf.mlir b/mlir/test/Conversion/ArmSMEToSCF/arm-sme-to-scf.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-arm-sme-to-scf -cse -split-input-file | FileCheck %s
+// RUN: mlir-opt %s -convert-arm-sme-to-scf -cse -split-input-file -verify-diagnostics | FileCheck %s
 
 //===----------------------------------------------------------------------===//
 // arm_sme.tile_load
@@ -89,6 +89,25 @@ func.func @arm_sme_tile_load_hor_with_mask_and_nonzero_pad(%src : memref<?x?xi32
   return
 }
 
+// -----
+
+func.func @arm_sme_tile_load_zero_pad__unsupported_mask_op(%src : memref<?x?xi32>, %mask : vector<[4]x[4]xi1>) {
+  %c0 = arith.constant 0 : index
+  %pad = arith.constant 0 : i32
+  // expected-error@+1 {{failed to legalize operation 'arm_sme.tile_load' that was explicitly marked illegal}}
+  %tile = arm_sme.tile_load %src[%c0, %c0], %pad, %mask : memref<?x?xi32>, vector<[4]x[4]xi32>
+  return
+}
+
+// -----
+
+func.func @arm_sme_tile_load_nonzero_pad__unsupported_mask_op(%src : memref<?x?xi32>, %pad : i32, %mask : vector<[4]x[4]xi1>) {
+  %c0 = arith.constant 0 : index
+  // expected-error@+1 {{failed to legalize operation 'arm_sme.tile_load' that was explicitly marked illegal}}
+  %tile = arm_sme.tile_load %src[%c0, %c0], %pad, %mask : memref<?x?xi32>, vector<[4]x[4]xi32>
+  return
+}
+
 //===----------------------------------------------------------------------===//
 // arm_sme.tile_store
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transfer-read-2d.mlir b/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-transfer-read-2d.mlir
@@ -11,7 +11,7 @@
 
 // RUN: %{compile} | %{run} | FileCheck %s
 
-// Vector load.
+// 2-D vector load (SME tile).
 func.func @transfer_read_2d(%A : memref<?x?xf32>, %base1: index, %base2: index) {
   %c4 = arith.constant 4 : index
   %pad = arith.constant 0.0 : f32
@@ -24,7 +24,7 @@ func.func @transfer_read_2d(%A : memref<?x?xf32>, %base1: index, %base2: index)
   return
 }
 
-// Vector load + transpose.
+// 2-D vector load (SME tile) + transpose.
 func.func @transfer_read_2d_transposed(%A : memref<?x?xf32>, %base1: index, %base2: index) {
   %pad = arith.constant 0.0 : f32
   %0 = vector.transfer_read %A[%base1, %base2], %pad
@@ -37,7 +37,7 @@ func.func @transfer_read_2d_transposed(%A : memref<?x?xf32>, %base1: index, %bas
   return
 }
 
-// Vector load with mask and pad of zero.
+// 2-D vector load (SME tile) with mask and pad of zero.
 func.func @transfer_read_2d_mask(%A : memref<?x?xf32>, %base1: index, %base2: index) {
   %c2 = arith.constant 2 : index
   %c3 = arith.constant 3 : index
@@ -52,7 +52,7 @@ func.func @transfer_read_2d_mask(%A : memref<?x?xf32>, %base1: index, %base2: in
   return
 }
 
-// Vector load with mask and pad of zero + transpose.
+// 2-D vector load (SME tile) with mask and pad of zero + transpose.
 func.func @transfer_read_2d_mask_transposed(%A : memref<?x?xf32>, %base1: index, %base2: index) {
   %c2 = arith.constant 2 : index
   %c3 = arith.constant 3 : index
@@ -68,7 +68,7 @@ func.func @transfer_read_2d_mask_transposed(%A : memref<?x?xf32>, %base1: index,
   return
 }
 
-// Vector load with mask and non-zero pad.
+// 2-D vector load (SME tile) with mask and non-zero pad.
 func.func @transfer_read_2d_mask_non_zero_pad(%A : memref<?x?xf32>, %base1: index, %base2: index) {
   %c2 = arith.constant 2 : index
   %c3 = arith.constant 3 : index
@@ -83,7 +83,7 @@ func.func @transfer_read_2d_mask_non_zero_pad(%A : memref<?x?xf32>, %base1: inde
   return
 }
 
-// Vector load with mask and non-zero pad + transpose.
+// 2-D vector load (SME tile) with mask and non-zero pad + transpose.
 func.func @transfer_read_2d_mask_non_zero_pad_transposed(%A : memref<?x?xf32>, %base1: index, %base2: index) {
   %c2 = arith.constant 2 : index
   %c3 = arith.constant 3 : index