Skip to content

[mlir][tosa] Change MatMul zero-point to inputs #129785

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 7, 2025

Conversation

Tai78641
Copy link
Contributor

@Tai78641 Tai78641 commented Mar 4, 2025

  • Change zero-point attributes to inputs
  • Fix relevant mlir tests
  • Enhance ShardingInterface in MatMul

@llvmbot
Copy link
Member

llvmbot commented Mar 4, 2025

@llvm/pr-subscribers-mlir-tosa

@llvm/pr-subscribers-mlir

Author: Tai Ly (Tai78641)

Changes
  • Change zero-point attributes to inputs
  • Fix relevant mlir tests
  • Enhance ShardingInterface in MatMul

Patch is 44.01 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/129785.diff

15 Files Affected:

  • (modified) mlir/include/mlir/Dialect/Tosa/IR/TosaComplianceData.h.inc (+8-6)
  • (modified) mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td (+10-2)
  • (modified) mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp (+26-3)
  • (modified) mlir/lib/Dialect/Tosa/IR/ShardingInterfaceImpl.cpp (+2)
  • (modified) mlir/lib/Dialect/Tosa/IR/TosaOps.cpp (+55-20)
  • (modified) mlir/lib/Dialect/Tosa/Transforms/TosaProfileCompliance.cpp (+10-1)
  • (modified) mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir (+18-6)
  • (modified) mlir/test/Dialect/Mesh/sharding-propagation.mlir (+39-27)
  • (modified) mlir/test/Dialect/Tosa/availability.mlir (+2-2)
  • (modified) mlir/test/Dialect/Tosa/invalid.mlir (+40)
  • (modified) mlir/test/Dialect/Tosa/level_check.mlir (+2-1)
  • (modified) mlir/test/Dialect/Tosa/ops.mlir (+3-1)
  • (modified) mlir/test/Dialect/Tosa/profile_all_unsupported.mlir (+2-2)
  • (modified) mlir/test/Dialect/Tosa/profile_pro_fp_unsupported.mlir (+2-2)
  • (modified) mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir (+23-13)
diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaComplianceData.h.inc b/mlir/include/mlir/Dialect/Tosa/IR/TosaComplianceData.h.inc
index a9b458acd87f2..d3fd4c3d1d3e1 100644
--- a/mlir/include/mlir/Dialect/Tosa/IR/TosaComplianceData.h.inc
+++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaComplianceData.h.inc
@@ -35,9 +35,11 @@ profileComplianceMap = {
         {fp16T, fp16T, fp32T, fp32T},
         {fp32T, fp32T, fp32T, fp32T}}}}},
     {"tosa.matmul",
-     {{{Profile::pro_int}, {{i8T, i8T, i32T}}},
+     {{{Profile::pro_int}, {{i8T, i8T, i8T, i8T, i32T}}},
       {{Profile::pro_fp},
-       {{fp16T, fp16T, fp16T}, {fp16T, fp16T, fp32T}, {fp32T, fp32T, fp32T}}}}},
+       {{fp16T, fp16T, fp16T, fp16T, fp16T},
+        {fp16T, fp16T, fp16T, fp16T, fp32T},
+        {fp32T, fp32T, fp32T, fp32T, fp32T}}}}},
     {"tosa.max_pool2d",
      {{{Profile::pro_int}, {{i8T, i8T}}},
       {{Profile::pro_fp}, {{fp16T, fp16T}, {fp32T, fp32T}}}}},
@@ -273,10 +275,10 @@ extensionComplianceMap = {
       {{Extension::int16}, {{i16T, i8T, i48T, i48T}}},
       {{Extension::bf16}, {{bf16T, bf16T, fp32T, fp32T}}}}},
     {"tosa.matmul",
-     {{{Extension::int16}, {{i16T, i16T, i48T}}},
-      {{Extension::fp8e4m3}, {{fp8e4m3T, fp8e4m3T, fp16T}}},
-      {{Extension::fp8e5m2}, {{fp8e5m2T, fp8e5m2T, fp16T}}},
-      {{Extension::bf16}, {{bf16T, bf16T, fp32T}}}}},
+     {{{Extension::int16}, {{i16T, i16T, i16T, i16T, i48T}}},
+      {{Extension::fp8e4m3}, {{fp8e4m3T, fp8e4m3T, fp8e4m3T, fp8e4m3T, fp16T}}},
+      {{Extension::fp8e5m2}, {{fp8e5m2T, fp8e5m2T, fp8e5m2T, fp8e5m2T, fp16T}}},
+      {{Extension::bf16}, {{bf16T, bf16T, bf16T, bf16T, fp32T}}}}},
     {"tosa.max_pool2d",
      {{{Extension::int16}, {{i16T, i16T}}},
       {{Extension::fp8e4m3}, {{fp8e4m3T, fp8e4m3T}}},
diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td
index e0f2fd411bbe4..66ba5b0b6dc68 100644
--- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td
+++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td
@@ -309,8 +309,8 @@ def Tosa_MatMulOp : Tosa_InferShapedTypeOp<"matmul"> {
   let arguments = (ins
     Tosa_Tensor3D:$a,
     Tosa_Tensor3D:$b,
-    OptionalAttr<I32Attr>:$a_zp,
-    OptionalAttr<I32Attr>:$b_zp
+    Tosa_ScalarTensor:$a_zp,
+    Tosa_ScalarTensor:$b_zp
   );
 
   let results = (outs
@@ -322,7 +322,15 @@ def Tosa_MatMulOp : Tosa_InferShapedTypeOp<"matmul"> {
     Extension<[Tosa_EXT_INT16, Tosa_EXT_FP8E4M3, Tosa_EXT_FP8E5M2, Tosa_EXT_BF16]>,
   ];
 
+  let extraClassDeclaration = [{
+    FailureOr<int64_t> getAZeroPoint();
+    FailureOr<int64_t> getBZeroPoint();
+    LogicalResult verifyAZeroPoint(int64_t zp);
+    LogicalResult verifyBZeroPoint(int64_t zp);
+  }];
+
   let builders = [Tosa_MatMulOpQuantInfoBuilder];
+  let hasVerifier = 1;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp
index 2a2589e19d0ac..667c97f213198 100644
--- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp
+++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp
@@ -621,15 +621,38 @@ class MatMulConverter : public OpConversionPattern<tosa::MatMulOp> {
                            .create<linalg::FillOp>(loc, ValueRange{zero},
                                                    ValueRange{emptyTensor})
                            .result();
-    if (!op.getAZp() && !op.getBZp()) {
+
+    FailureOr<int64_t> maybeAZp = op.getAZeroPoint();
+    FailureOr<int64_t> maybeBZp = op.getBZeroPoint();
+    if (failed(maybeAZp))
+      return rewriter.notifyMatchFailure(
+          op, "input a zero point cannot be statically determined");
+    if (failed(maybeBZp))
+      return rewriter.notifyMatchFailure(
+          op, "input b zero point cannot be statically determined");
+
+    int64_t aZpVal = *maybeAZp;
+    int64_t bZpVal = *maybeBZp;
+
+    if (op.verifyAZeroPoint(aZpVal).failed())
+      return rewriter.notifyMatchFailure(
+          op, "input a zero point must be zero for non-int8 integer types");
+
+    if (op.verifyBZeroPoint(bZpVal).failed())
+      return rewriter.notifyMatchFailure(
+          op, "input b zero point must be zero for non-int8 integer types");
+
+    if (aZpVal == 0 && bZpVal == 0) {
       rewriter.replaceOpWithNewOp<linalg::BatchMatmulOp>(
           op, TypeRange{op.getType()},
           ValueRange{adaptor.getA(), adaptor.getB()}, ValueRange{zeroTensor});
       return success();
     }
 
-    auto aZp = rewriter.create<arith::ConstantOp>(loc, op.getAZpAttr());
-    auto bZp = rewriter.create<arith::ConstantOp>(loc, op.getBZpAttr());
+    auto aZp = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getI32IntegerAttr(aZpVal));
+    auto bZp = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getI32IntegerAttr(bZpVal));
     rewriter.replaceOpWithNewOp<linalg::QuantizedBatchMatmulOp>(
         op, TypeRange{op.getType()},
         ValueRange{adaptor.getA(), adaptor.getB(), aZp, bZp}, zeroTensor);
diff --git a/mlir/lib/Dialect/Tosa/IR/ShardingInterfaceImpl.cpp b/mlir/lib/Dialect/Tosa/IR/ShardingInterfaceImpl.cpp
index ffbb707344b8c..6dcb7c845b21f 100644
--- a/mlir/lib/Dialect/Tosa/IR/ShardingInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tosa/IR/ShardingInterfaceImpl.cpp
@@ -55,6 +55,8 @@ struct MatMulOpSharding
     SmallVector<AffineMap> maps;
     maps.push_back(AffineMap::getMultiDimMapWithTargets(4, {0, 1, 3}, ctx));
     maps.push_back(AffineMap::getMultiDimMapWithTargets(4, {0, 3, 2}, ctx));
+    maps.push_back(AffineMap::get(0, 0, {}, ctx));
+    maps.push_back(AffineMap::get(0, 0, {}, ctx));
     maps.push_back(AffineMap::getMultiDimMapWithTargets(4, {0, 1, 2}, ctx));
     return maps;
   }
diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
index 8841d53b6e64d..ae83467f698f1 100644
--- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
+++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
@@ -435,6 +435,34 @@ static LogicalResult verifySameElementTypes(T op, Type inType, Type outType) {
   return success();
 }
 
+static LogicalResult verifyZpMatMul(MatMulOp op) {
+  auto aEType = getStorageElementTypeOrSelf(op.getA().getType());
+  auto aZpEType = getStorageElementTypeOrSelf(op.getAZp().getType());
+  if (aEType != aZpEType) {
+    return op.emitOpError("expect input a and a_zp have the same "
+                          "element type, got ")
+           << aEType << " and " << aZpEType;
+  }
+
+  auto bEType = getStorageElementTypeOrSelf(op.getB().getType());
+  auto bZpEType = getStorageElementTypeOrSelf(op.getBZp().getType());
+  if (bEType != bZpEType) {
+    return op.emitOpError("expect input b and b_zp have the same "
+                          "element type, got ")
+           << bEType << " and " << bZpEType;
+  }
+
+  FailureOr<int64_t> maybeAZp = op.getAZeroPoint();
+  if (succeeded(maybeAZp) && op.verifyAZeroPoint(*maybeAZp).failed())
+    return failure();
+
+  FailureOr<int64_t> maybeBZp = op.getBZeroPoint();
+  if (succeeded(maybeBZp) && op.verifyBZeroPoint(*maybeBZp).failed())
+    return failure();
+
+  return success();
+}
+
 LogicalResult tosa::ArgMaxOp::verify() {
   // Ensure output is of 32-bit integer
   const auto resultETy = llvm::cast<ShapedType>(getType()).getElementType();
@@ -601,23 +629,13 @@ buildTransConvOpWithQuantInfo(OpBuilder &builder, OperationState &result,
 static void buildMatMulOpWithQuantInfo(OpBuilder &builder,
                                        OperationState &result, Type outputType,
                                        Value a, Value b) {
-  result.addOperands({a, b});
-  auto quantAttr = ::buildMatMulOpQuantizationAttr(builder, a, b);
+  auto zps = createZPsAsConst(builder, a, b);
+  result.addOperands({a, b, zps.first, zps.second});
 
-  if (quantAttr) {
-    result.addAttribute("a_zp", builder.getI32IntegerAttr(
-                                    static_cast<int32_t>(quantAttr.getAZp())));
-    result.addAttribute("b_zp", builder.getI32IntegerAttr(
-                                    static_cast<int32_t>(quantAttr.getBZp())));
-
-    auto inputType = llvm::dyn_cast<ShapedType>(a.getType());
-    assert(inputType && "Input must be a shaped tensor type!");
-
-    auto inputQType = llvm::dyn_cast<mlir::quant::UniformQuantizedType>(
-        inputType.getElementType());
-    assert(inputQType && "Tensor must have quantized datatype!");
-
-    unsigned inputBits = inputQType.getStorageTypeIntegralWidth();
+  Type finalOutputType{outputType};
+  if (auto quantAttr = buildMatMulOpQuantizationAttr(builder, a, b)) {
+    auto eType = getStorageElementTypeOrSelf(a.getType());
+    auto inputBits = eType.getIntOrFloatBitWidth();
 
     auto outputShapedType = llvm::dyn_cast<ShapedType>(outputType);
     assert(outputShapedType && "Output must be a shaped type");
@@ -627,11 +645,10 @@ static void buildMatMulOpWithQuantInfo(OpBuilder &builder,
       accElementType = builder.getIntegerType(48);
     else
       accElementType = builder.getI32Type();
-    auto accType = outputShapedType.clone(accElementType);
-    result.addTypes(accType);
-  } else {
-    result.addTypes(outputType);
+
+    finalOutputType = outputShapedType.clone(accElementType);
   }
+  result.addTypes(finalOutputType);
 }
 
 /// Both the tosa.avg_pool2d and unary ops use the same
@@ -1025,6 +1042,22 @@ LogicalResult tosa::MatMulOp::inferReturnTypeComponents(
   return success();
 }
 
+LogicalResult MatMulOp::verify() {
+  auto aType = llvm::dyn_cast<ShapedType>(getA().getType());
+  auto bType = llvm::dyn_cast<ShapedType>(getB().getType());
+
+  // Must be shaped tensor types
+  if (!aType)
+    return emitOpError("expect a shaped tensor for input a, got ")
+           << getA().getType();
+
+  if (!bType)
+    return emitOpError("expect a shaped tensor for input b, got ")
+           << getB().getType();
+
+  return verifyZpMatMul(*this);
+}
+
 LogicalResult tosa::PadOp::inferReturnTypeComponents(
     MLIRContext *context, ::std::optional<Location> location,
     PadOp::Adaptor adaptor,
@@ -1560,6 +1593,8 @@ ZERO_POINT_HELPER(TransposeConv2DOp, Input)
 ZERO_POINT_HELPER(TransposeConv2DOp, Weight)
 ZERO_POINT_HELPER(AvgPool2dOp, Input)
 ZERO_POINT_HELPER(AvgPool2dOp, Output)
+ZERO_POINT_HELPER(MatMulOp, A)
+ZERO_POINT_HELPER(MatMulOp, B)
 #undef ZERO_POINT_HELPER
 
 LogicalResult tosa::TransposeOp::inferReturnTypeComponents(
diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaProfileCompliance.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaProfileCompliance.cpp
index 345616c9563b5..983062ffd7912 100644
--- a/mlir/lib/Dialect/Tosa/Transforms/TosaProfileCompliance.cpp
+++ b/mlir/lib/Dialect/Tosa/Transforms/TosaProfileCompliance.cpp
@@ -178,6 +178,15 @@ void ProfileInfoDepot::populateProfileInfo(tosa::RescaleOp op) {
   addValue(op.getOutput());
 }
 
+template <>
+void ProfileInfoDepot::populateProfileInfo(tosa::MatMulOp op) {
+  addValue(op.getA());
+  addValue(op.getB());
+  addValue(op.getAZp());
+  addValue(op.getBZp());
+  addValue(op.getOutput());
+}
+
 LogicalResult ProfileInfoDepot::populatationDispatch(Operation *op) {
 // This helper function only populates the info for the customised operands.
 #define POPULATE_PROFILE_INFO_CUSTOM(tosaOp)                                   \
@@ -218,6 +227,7 @@ LogicalResult ProfileInfoDepot::populatationDispatch(Operation *op) {
   POPULATE_PROFILE_INFO_CUSTOM(Resize)
   POPULATE_PROFILE_INFO_CUSTOM(Select)
   POPULATE_PROFILE_INFO_CUSTOM(Rescale)
+  POPULATE_PROFILE_INFO_CUSTOM(MatMul)
 
   // Type Invariant Extension, a capability extension that is independent
   // of the data type, meaning any compatible type can be used. No type
@@ -235,7 +245,6 @@ LogicalResult ProfileInfoDepot::populatationDispatch(Operation *op) {
   POPULATE_PROFILE_INFO_COMMON(Cast)
   POPULATE_PROFILE_INFO_COMMON(Const)
   POPULATE_PROFILE_INFO_COMMON(ArgMax)
-  POPULATE_PROFILE_INFO_COMMON(MatMul)
   POPULATE_PROFILE_INFO_COMMON(Sub)
   POPULATE_PROFILE_INFO_COMMON(Maximum)
   POPULATE_PROFILE_INFO_COMMON(Minimum)
diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
index d4afc468eeea4..3807ae2599b72 100644
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
@@ -8,7 +8,9 @@ func.func @matmul(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x6xf32>) -> (tensor
   // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[FILLED:%.+]] = linalg.fill ins([[C0]] : f32) outs([[INIT]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
   // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x6xf32>) outs([[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
-  %0 = tosa.matmul %arg0, %arg1 : (tensor<1x5x3xf32>, tensor<1x3x6xf32>)  -> tensor<1x5x6xf32>
+  %a_zp = "tosa.const"() <{value = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32>
+  %b_zp = "tosa.const"() <{value = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32>
+  %0 = tosa.matmul %arg0, %arg1, %a_zp, %b_zp : (tensor<1x5x3xf32>, tensor<1x3x6xf32>, tensor<1xf32>, tensor<1xf32>)  -> tensor<1x5x6xf32>
   return %0 : tensor<1x5x6xf32>
 }
 
@@ -23,7 +25,9 @@ func.func @matmul_quantized(%arg0: tensor<1x5x3xi8>, %arg1: tensor<1x3x6xi8>) ->
   // CHECK: [[ONE:%.+]] = arith.constant 1
   // CHECK: [[TWO:%.+]] = arith.constant 2
   // CHECK: linalg.quantized_batch_matmul ins(%arg0, %arg1, [[ONE]], [[TWO]] : tensor<1x5x3xi8>, tensor<1x3x6xi8>, i32, i32) outs([[FILLED]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32>
-  %0 = tosa.matmul %arg0, %arg1 {a_zp = 1 : i32, b_zp = 2 : i32} : (tensor<1x5x3xi8>, tensor<1x3x6xi8>) -> tensor<1x5x6xi32>
+  %a_zp = "tosa.const"() <{value = dense<1> : tensor<1xi8>}> : () -> tensor<1xi8>
+  %b_zp = "tosa.const"() <{value = dense<2> : tensor<1xi8>}> : () -> tensor<1xi8>
+  %0 = tosa.matmul %arg0, %arg1, %a_zp, %b_zp : (tensor<1x5x3xi8>, tensor<1x3x6xi8>, tensor<1xi8>, tensor<1xi8>) -> tensor<1x5x6xi32>
   return %0 : tensor<1x5x6xi32>
 }
 
@@ -37,7 +41,9 @@ func.func @matmul_dyn_batch(%arg0: tensor<?x5x3xf32>, %arg1: tensor<?x3x6xf32>)
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]])
   // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0_0]] : f32) outs(%[[INIT]] : tensor<?x5x6xf32>) -> tensor<?x5x6xf32>
   // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<?x5x3xf32>, tensor<?x3x6xf32>) outs(%[[FILLED]] : tensor<?x5x6xf32>) -> tensor<?x5x6xf32>
-  %0 = tosa.matmul %arg0, %arg1 : (tensor<?x5x3xf32>, tensor<?x3x6xf32>) -> tensor<?x5x6xf32>
+  %a_zp = "tosa.const"() <{value = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32>
+  %b_zp = "tosa.const"() <{value = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32>
+  %0 = tosa.matmul %arg0, %arg1, %a_zp, %b_zp : (tensor<?x5x3xf32>, tensor<?x3x6xf32>, tensor<1xf32>, tensor<1xf32>)  -> tensor<?x5x6xf32>
   return %0 : tensor<?x5x6xf32>
 }
 
@@ -51,7 +57,9 @@ func.func @matmul_dyn_independent_dim(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]])
   // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32>
   // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x?xf32>) outs(%[[FILLED]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32>
-  %0 = tosa.matmul %arg0, %arg1 : (tensor<1x5x3xf32>, tensor<1x3x?xf32>) -> tensor<1x5x?xf32>
+  %a_zp = "tosa.const"() <{value = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32>
+  %b_zp = "tosa.const"() <{value = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32>
+  %0 = tosa.matmul %arg0, %arg1, %a_zp, %b_zp : (tensor<1x5x3xf32>, tensor<1x3x?xf32>, tensor<1xf32>, tensor<1xf32>)  -> tensor<1x5x?xf32>
   return %0 : tensor<1x5x?xf32>
 }
 
@@ -63,7 +71,9 @@ func.func @matmul_dyn_independent_dim(%arg0: tensor<1x5x?xf32>, %arg1: tensor<1x
   // CHECK: %[[INIT:.+]] = tensor.empty()
   // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
   // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x?xf32>, tensor<1x?x6xf32>) outs(%[[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
-  %0 = tosa.matmul %arg0, %arg1 : (tensor<1x5x?xf32>, tensor<1x?x6xf32>) -> tensor<1x5x6xf32>
+  %a_zp = "tosa.const"() <{value = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32>
+  %b_zp = "tosa.const"() <{value = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32>
+  %0 = tosa.matmul %arg0, %arg1, %a_zp, %b_zp : (tensor<1x5x?xf32>, tensor<1x?x6xf32>, tensor<1xf32>, tensor<1xf32>)  -> tensor<1x5x6xf32>
   return %0 : tensor<1x5x6xf32>
 }
 
@@ -77,7 +87,9 @@ func.func @matmul_dyn_output(%arg0: tensor<1x1x8xf32>, %arg1: tensor<1x8x1xf32>)
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM0]]) : tensor<?x1x1xf32>
   // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[CST]] : f32) outs(%[[INIT]] : tensor<?x1x1xf32>) -> tensor<?x1x1xf32>
   // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x1x8xf32>, tensor<1x8x1xf32>) outs(%[[FILLED]] : tensor<?x1x1xf32>) -> tensor<?x1x1xf32>
-  %0 = tosa.matmul %arg0, %arg1 : (tensor<1x1x8xf32>, tensor<1x8x1xf32>) -> tensor<?x1x1xf32>
+  %a_zp = "tosa.const"() <{value = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32>
+  %b_zp = "tosa.const"() <{value = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32>
+  %0 = tosa.matmul %arg0, %arg1, %a_zp, %b_zp : (tensor<1x1x8xf32>, tensor<1x8x1xf32>, tensor<1xf32>, tensor<1xf32>)  -> tensor<?x1x1xf32>
   return %0 : tensor<?x1x1xf32>
 }
 
diff --git a/mlir/test/Dialect/Mesh/sharding-propagation.mlir b/mlir/test/Dialect/Mesh/sharding-propagation.mlir
index 83136f613b020..14c67e670e921 100644
--- a/mlir/test/Dialect/Mesh/sharding-propagation.mlir
+++ b/mlir/test/Dialect/Mesh/sharding-propagation.mlir
@@ -98,14 +98,16 @@ func.func @arrow_structure(%arg0: tensor<8x16xf32>) -> (tensor<8x16xf32>, tensor
 }
 
 // CHECK-LABEL: func.func @matmul_on_def_shard_batch_and_m
-// CHECK-SAME:     %[[ARG0:.*]]: tensor<2x16x8xf32>, %[[ARG1:.*]]: tensor<2x8x32xf32>
-func.func @matmul_on_def_shard_batch_and_m(%arg0: tensor<2x16x8xf32>, %arg1: tensor<2x8x32xf32>) -> tensor<2x16x32xf32> {
+// CHECK-SAME:     %[[ARG0:.*]]: tensor<2x16x8xf32>, %[[ARG1:.*]]: tensor<2x8x32xf32>, %[[ARG2:.*]]: tensor<1xf32>
+func.func @matmul_on_def_shard_batch_and_m(%arg0: tensor<2x16x8xf32>, %arg1: tensor<2x8x32xf32>, %arg2: tensor<1xf32>) -> tensor<2x16x32xf32> {
   // CHECK-NEXT:  %[[S0:.*]] = mesh.sharding @mesh_2d split_axes = {{\[\[}}0], [1]] : !mesh.sharding
   // CHECK-NEXT:  %[[V0:.*]] = mesh.shard %[[ARG0]] to %[[S0]] annotate_for_users  : tensor<2x16x8xf32>
   // CHECK-NEXT:  %[[S1:.*]] = mesh.sharding @mesh_2d split_axes = {{\[\[}}0]] : !mesh.sharding
   // CHECK-NEXT:  %[[V1:.*]] = mesh.shard %[[ARG1]] to %[[S1]] annotate_for_users  : tensor<2x8x32xf32>
-  // CHECK-NEXT:  %[[V2:.*]] = tosa.matmul %[[V0]], %[[V1]]
-  %0 = tosa.matmul %arg0, %arg1 : (tensor<2x16x8xf32>, tensor<2x8x32xf32>) -> tensor<2x16x32xf32>
+  // CHECK-NEXT:  %[[S2:.*]] = mesh.sharding @mesh_2d split_axes = {{\[\[}}]] : !mesh.sharding
+  // CHECK-NEXT:  %[[ZP:.*]] = mesh.shard %[[ARG2]] to %[[S2]] annotate_for_users  : tensor<1xf32>
+  // CHECK-NEXT:  %[[V2:.*]] = tosa.matmul %[[V0]], %[[V1]], %[[ZP]], %[[ZP]]
+  %0 = tosa.matmul %arg0, %arg1, %arg2, %arg2 : (tensor<2x16x8xf32>, tensor<2x8x32xf32>, tensor<1xf32>, tensor<1xf32>)  -> tensor<2x16x32xf32>
   // CHECK-NEXT:  %[[S3:.*]] = mesh.sharding @mesh_2d split_axes = {{\[\[}}0], [1]] : !mesh.sharding
   // CHECK-NEXT:  %[[V3:.*]] = mesh.shard %[[V2]] to %[[S3]]  : tensor<2x16x32xf32>
   %s1 = mesh.sharding @mesh_2d split_axes = [[0], [1]] : !mesh.sharding
@@ -115,14 +117,16 @@ func.func @matmul_on_def_shard_batch_and_m(%arg0: tensor<2x16x8xf32>, %arg1: ten
 }
 
 // CHECK-LABEL: func.func @matmul_on_def_shard_m_and_k
-// CHECK-SAME:     %[[ARG0:.*]]: tensor<2x16x8xf32>, %[[ARG1:.*]]: tensor<2x8x32xf32>
-func.func @matmul_on_def_shard_m_and_k(%arg0: tensor<2x16x8xf32>, %arg1: tensor<2x8x32xf32>) -> tensor<2x16x32xf32> {
+// CHECK-SAME:     %[[ARG0:.*]]: tensor<2x16x8xf32>, %[[ARG1:.*]]: tensor<2x8x32xf32>, %[[ARG2:.*]]: tensor<1xf32>
+func.func @matmul_on_def_shard_m_and_k(%arg0: tensor<2x16x8xf32>, %arg1: tensor<2x8x32xf32>, %arg2: tensor<1xf32>) -> tensor<2x16x32xf32> {
   // CHECK-NEXT:  %[[S0:.*]] = mesh.sharding @mesh_2d split_axes = {{\[\[}}], [1], [0]] : !mesh.sharding
   // CHECK-NEXT:  %[[V0:.*]] = mesh.shar...
[truncated]

@llvmbot
Copy link
Member

llvmbot commented Mar 4, 2025

@llvm/pr-subscribers-mlir-linalg

Author: Tai Ly (Tai78641)

Changes
  • Change zero-point attributes to inputs
  • Fix relevant mlir tests
  • Enhance ShardingInterface in MatMul

Patch is 44.01 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/129785.diff

15 Files Affected:

  • (modified) mlir/include/mlir/Dialect/Tosa/IR/TosaComplianceData.h.inc (+8-6)
  • (modified) mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td (+10-2)
  • (modified) mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp (+26-3)
  • (modified) mlir/lib/Dialect/Tosa/IR/ShardingInterfaceImpl.cpp (+2)
  • (modified) mlir/lib/Dialect/Tosa/IR/TosaOps.cpp (+55-20)
  • (modified) mlir/lib/Dialect/Tosa/Transforms/TosaProfileCompliance.cpp (+10-1)
  • (modified) mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir (+18-6)
  • (modified) mlir/test/Dialect/Mesh/sharding-propagation.mlir (+39-27)
  • (modified) mlir/test/Dialect/Tosa/availability.mlir (+2-2)
  • (modified) mlir/test/Dialect/Tosa/invalid.mlir (+40)
  • (modified) mlir/test/Dialect/Tosa/level_check.mlir (+2-1)
  • (modified) mlir/test/Dialect/Tosa/ops.mlir (+3-1)
  • (modified) mlir/test/Dialect/Tosa/profile_all_unsupported.mlir (+2-2)
  • (modified) mlir/test/Dialect/Tosa/profile_pro_fp_unsupported.mlir (+2-2)
  • (modified) mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir (+23-13)
diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaComplianceData.h.inc b/mlir/include/mlir/Dialect/Tosa/IR/TosaComplianceData.h.inc
index a9b458acd87f2..d3fd4c3d1d3e1 100644
--- a/mlir/include/mlir/Dialect/Tosa/IR/TosaComplianceData.h.inc
+++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaComplianceData.h.inc
@@ -35,9 +35,11 @@ profileComplianceMap = {
         {fp16T, fp16T, fp32T, fp32T},
         {fp32T, fp32T, fp32T, fp32T}}}}},
     {"tosa.matmul",
-     {{{Profile::pro_int}, {{i8T, i8T, i32T}}},
+     {{{Profile::pro_int}, {{i8T, i8T, i8T, i8T, i32T}}},
       {{Profile::pro_fp},
-       {{fp16T, fp16T, fp16T}, {fp16T, fp16T, fp32T}, {fp32T, fp32T, fp32T}}}}},
+       {{fp16T, fp16T, fp16T, fp16T, fp16T},
+        {fp16T, fp16T, fp16T, fp16T, fp32T},
+        {fp32T, fp32T, fp32T, fp32T, fp32T}}}}},
     {"tosa.max_pool2d",
      {{{Profile::pro_int}, {{i8T, i8T}}},
       {{Profile::pro_fp}, {{fp16T, fp16T}, {fp32T, fp32T}}}}},
@@ -273,10 +275,10 @@ extensionComplianceMap = {
       {{Extension::int16}, {{i16T, i8T, i48T, i48T}}},
       {{Extension::bf16}, {{bf16T, bf16T, fp32T, fp32T}}}}},
     {"tosa.matmul",
-     {{{Extension::int16}, {{i16T, i16T, i48T}}},
-      {{Extension::fp8e4m3}, {{fp8e4m3T, fp8e4m3T, fp16T}}},
-      {{Extension::fp8e5m2}, {{fp8e5m2T, fp8e5m2T, fp16T}}},
-      {{Extension::bf16}, {{bf16T, bf16T, fp32T}}}}},
+     {{{Extension::int16}, {{i16T, i16T, i16T, i16T, i48T}}},
+      {{Extension::fp8e4m3}, {{fp8e4m3T, fp8e4m3T, fp8e4m3T, fp8e4m3T, fp16T}}},
+      {{Extension::fp8e5m2}, {{fp8e5m2T, fp8e5m2T, fp8e5m2T, fp8e5m2T, fp16T}}},
+      {{Extension::bf16}, {{bf16T, bf16T, bf16T, bf16T, fp32T}}}}},
     {"tosa.max_pool2d",
      {{{Extension::int16}, {{i16T, i16T}}},
       {{Extension::fp8e4m3}, {{fp8e4m3T, fp8e4m3T}}},
diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td
index e0f2fd411bbe4..66ba5b0b6dc68 100644
--- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td
+++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td
@@ -309,8 +309,8 @@ def Tosa_MatMulOp : Tosa_InferShapedTypeOp<"matmul"> {
   let arguments = (ins
     Tosa_Tensor3D:$a,
     Tosa_Tensor3D:$b,
-    OptionalAttr<I32Attr>:$a_zp,
-    OptionalAttr<I32Attr>:$b_zp
+    Tosa_ScalarTensor:$a_zp,
+    Tosa_ScalarTensor:$b_zp
   );
 
   let results = (outs
@@ -322,7 +322,15 @@ def Tosa_MatMulOp : Tosa_InferShapedTypeOp<"matmul"> {
     Extension<[Tosa_EXT_INT16, Tosa_EXT_FP8E4M3, Tosa_EXT_FP8E5M2, Tosa_EXT_BF16]>,
   ];
 
+  let extraClassDeclaration = [{
+    FailureOr<int64_t> getAZeroPoint();
+    FailureOr<int64_t> getBZeroPoint();
+    LogicalResult verifyAZeroPoint(int64_t zp);
+    LogicalResult verifyBZeroPoint(int64_t zp);
+  }];
+
   let builders = [Tosa_MatMulOpQuantInfoBuilder];
+  let hasVerifier = 1;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp
index 2a2589e19d0ac..667c97f213198 100644
--- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp
+++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp
@@ -621,15 +621,38 @@ class MatMulConverter : public OpConversionPattern<tosa::MatMulOp> {
                            .create<linalg::FillOp>(loc, ValueRange{zero},
                                                    ValueRange{emptyTensor})
                            .result();
-    if (!op.getAZp() && !op.getBZp()) {
+
+    FailureOr<int64_t> maybeAZp = op.getAZeroPoint();
+    FailureOr<int64_t> maybeBZp = op.getBZeroPoint();
+    if (failed(maybeAZp))
+      return rewriter.notifyMatchFailure(
+          op, "input a zero point cannot be statically determined");
+    if (failed(maybeBZp))
+      return rewriter.notifyMatchFailure(
+          op, "input b zero point cannot be statically determined");
+
+    int64_t aZpVal = *maybeAZp;
+    int64_t bZpVal = *maybeBZp;
+
+    if (op.verifyAZeroPoint(aZpVal).failed())
+      return rewriter.notifyMatchFailure(
+          op, "input a zero point must be zero for non-int8 integer types");
+
+    if (op.verifyBZeroPoint(bZpVal).failed())
+      return rewriter.notifyMatchFailure(
+          op, "input b zero point must be zero for non-int8 integer types");
+
+    if (aZpVal == 0 && bZpVal == 0) {
       rewriter.replaceOpWithNewOp<linalg::BatchMatmulOp>(
           op, TypeRange{op.getType()},
           ValueRange{adaptor.getA(), adaptor.getB()}, ValueRange{zeroTensor});
       return success();
     }
 
-    auto aZp = rewriter.create<arith::ConstantOp>(loc, op.getAZpAttr());
-    auto bZp = rewriter.create<arith::ConstantOp>(loc, op.getBZpAttr());
+    auto aZp = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getI32IntegerAttr(aZpVal));
+    auto bZp = rewriter.create<arith::ConstantOp>(
+        loc, rewriter.getI32IntegerAttr(bZpVal));
     rewriter.replaceOpWithNewOp<linalg::QuantizedBatchMatmulOp>(
         op, TypeRange{op.getType()},
         ValueRange{adaptor.getA(), adaptor.getB(), aZp, bZp}, zeroTensor);
diff --git a/mlir/lib/Dialect/Tosa/IR/ShardingInterfaceImpl.cpp b/mlir/lib/Dialect/Tosa/IR/ShardingInterfaceImpl.cpp
index ffbb707344b8c..6dcb7c845b21f 100644
--- a/mlir/lib/Dialect/Tosa/IR/ShardingInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tosa/IR/ShardingInterfaceImpl.cpp
@@ -55,6 +55,8 @@ struct MatMulOpSharding
     SmallVector<AffineMap> maps;
     maps.push_back(AffineMap::getMultiDimMapWithTargets(4, {0, 1, 3}, ctx));
     maps.push_back(AffineMap::getMultiDimMapWithTargets(4, {0, 3, 2}, ctx));
+    maps.push_back(AffineMap::get(0, 0, {}, ctx));
+    maps.push_back(AffineMap::get(0, 0, {}, ctx));
     maps.push_back(AffineMap::getMultiDimMapWithTargets(4, {0, 1, 2}, ctx));
     return maps;
   }
diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
index 8841d53b6e64d..ae83467f698f1 100644
--- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
+++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
@@ -435,6 +435,34 @@ static LogicalResult verifySameElementTypes(T op, Type inType, Type outType) {
   return success();
 }
 
+static LogicalResult verifyZpMatMul(MatMulOp op) {
+  auto aEType = getStorageElementTypeOrSelf(op.getA().getType());
+  auto aZpEType = getStorageElementTypeOrSelf(op.getAZp().getType());
+  if (aEType != aZpEType) {
+    return op.emitOpError("expect input a and a_zp have the same "
+                          "element type, got ")
+           << aEType << " and " << aZpEType;
+  }
+
+  auto bEType = getStorageElementTypeOrSelf(op.getB().getType());
+  auto bZpEType = getStorageElementTypeOrSelf(op.getBZp().getType());
+  if (bEType != bZpEType) {
+    return op.emitOpError("expect input b and b_zp have the same "
+                          "element type, got ")
+           << bEType << " and " << bZpEType;
+  }
+
+  FailureOr<int64_t> maybeAZp = op.getAZeroPoint();
+  if (succeeded(maybeAZp) && op.verifyAZeroPoint(*maybeAZp).failed())
+    return failure();
+
+  FailureOr<int64_t> maybeBZp = op.getBZeroPoint();
+  if (succeeded(maybeBZp) && op.verifyBZeroPoint(*maybeBZp).failed())
+    return failure();
+
+  return success();
+}
+
 LogicalResult tosa::ArgMaxOp::verify() {
   // Ensure output is of 32-bit integer
   const auto resultETy = llvm::cast<ShapedType>(getType()).getElementType();
@@ -601,23 +629,13 @@ buildTransConvOpWithQuantInfo(OpBuilder &builder, OperationState &result,
 static void buildMatMulOpWithQuantInfo(OpBuilder &builder,
                                        OperationState &result, Type outputType,
                                        Value a, Value b) {
-  result.addOperands({a, b});
-  auto quantAttr = ::buildMatMulOpQuantizationAttr(builder, a, b);
+  auto zps = createZPsAsConst(builder, a, b);
+  result.addOperands({a, b, zps.first, zps.second});
 
-  if (quantAttr) {
-    result.addAttribute("a_zp", builder.getI32IntegerAttr(
-                                    static_cast<int32_t>(quantAttr.getAZp())));
-    result.addAttribute("b_zp", builder.getI32IntegerAttr(
-                                    static_cast<int32_t>(quantAttr.getBZp())));
-
-    auto inputType = llvm::dyn_cast<ShapedType>(a.getType());
-    assert(inputType && "Input must be a shaped tensor type!");
-
-    auto inputQType = llvm::dyn_cast<mlir::quant::UniformQuantizedType>(
-        inputType.getElementType());
-    assert(inputQType && "Tensor must have quantized datatype!");
-
-    unsigned inputBits = inputQType.getStorageTypeIntegralWidth();
+  Type finalOutputType{outputType};
+  if (auto quantAttr = buildMatMulOpQuantizationAttr(builder, a, b)) {
+    auto eType = getStorageElementTypeOrSelf(a.getType());
+    auto inputBits = eType.getIntOrFloatBitWidth();
 
     auto outputShapedType = llvm::dyn_cast<ShapedType>(outputType);
     assert(outputShapedType && "Output must be a shaped type");
@@ -627,11 +645,10 @@ static void buildMatMulOpWithQuantInfo(OpBuilder &builder,
       accElementType = builder.getIntegerType(48);
     else
       accElementType = builder.getI32Type();
-    auto accType = outputShapedType.clone(accElementType);
-    result.addTypes(accType);
-  } else {
-    result.addTypes(outputType);
+
+    finalOutputType = outputShapedType.clone(accElementType);
   }
+  result.addTypes(finalOutputType);
 }
 
 /// Both the tosa.avg_pool2d and unary ops use the same
@@ -1025,6 +1042,22 @@ LogicalResult tosa::MatMulOp::inferReturnTypeComponents(
   return success();
 }
 
+LogicalResult MatMulOp::verify() {
+  auto aType = llvm::dyn_cast<ShapedType>(getA().getType());
+  auto bType = llvm::dyn_cast<ShapedType>(getB().getType());
+
+  // Must be shaped tensor types
+  if (!aType)
+    return emitOpError("expect a shaped tensor for input a, got ")
+           << getA().getType();
+
+  if (!bType)
+    return emitOpError("expect a shaped tensor for input b, got ")
+           << getB().getType();
+
+  return verifyZpMatMul(*this);
+}
+
 LogicalResult tosa::PadOp::inferReturnTypeComponents(
     MLIRContext *context, ::std::optional<Location> location,
     PadOp::Adaptor adaptor,
@@ -1560,6 +1593,8 @@ ZERO_POINT_HELPER(TransposeConv2DOp, Input)
 ZERO_POINT_HELPER(TransposeConv2DOp, Weight)
 ZERO_POINT_HELPER(AvgPool2dOp, Input)
 ZERO_POINT_HELPER(AvgPool2dOp, Output)
+ZERO_POINT_HELPER(MatMulOp, A)
+ZERO_POINT_HELPER(MatMulOp, B)
 #undef ZERO_POINT_HELPER
 
 LogicalResult tosa::TransposeOp::inferReturnTypeComponents(
diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaProfileCompliance.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaProfileCompliance.cpp
index 345616c9563b5..983062ffd7912 100644
--- a/mlir/lib/Dialect/Tosa/Transforms/TosaProfileCompliance.cpp
+++ b/mlir/lib/Dialect/Tosa/Transforms/TosaProfileCompliance.cpp
@@ -178,6 +178,15 @@ void ProfileInfoDepot::populateProfileInfo(tosa::RescaleOp op) {
   addValue(op.getOutput());
 }
 
+template <>
+void ProfileInfoDepot::populateProfileInfo(tosa::MatMulOp op) {
+  addValue(op.getA());
+  addValue(op.getB());
+  addValue(op.getAZp());
+  addValue(op.getBZp());
+  addValue(op.getOutput());
+}
+
 LogicalResult ProfileInfoDepot::populatationDispatch(Operation *op) {
 // This helper function only populates the info for the customised operands.
 #define POPULATE_PROFILE_INFO_CUSTOM(tosaOp)                                   \
@@ -218,6 +227,7 @@ LogicalResult ProfileInfoDepot::populatationDispatch(Operation *op) {
   POPULATE_PROFILE_INFO_CUSTOM(Resize)
   POPULATE_PROFILE_INFO_CUSTOM(Select)
   POPULATE_PROFILE_INFO_CUSTOM(Rescale)
+  POPULATE_PROFILE_INFO_CUSTOM(MatMul)
 
   // Type Invariant Extension, a capability extension that is independent
   // of the data type, meaning any compatible type can be used. No type
@@ -235,7 +245,6 @@ LogicalResult ProfileInfoDepot::populatationDispatch(Operation *op) {
   POPULATE_PROFILE_INFO_COMMON(Cast)
   POPULATE_PROFILE_INFO_COMMON(Const)
   POPULATE_PROFILE_INFO_COMMON(ArgMax)
-  POPULATE_PROFILE_INFO_COMMON(MatMul)
   POPULATE_PROFILE_INFO_COMMON(Sub)
   POPULATE_PROFILE_INFO_COMMON(Maximum)
   POPULATE_PROFILE_INFO_COMMON(Minimum)
diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
index d4afc468eeea4..3807ae2599b72 100644
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
@@ -8,7 +8,9 @@ func.func @matmul(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x3x6xf32>) -> (tensor
   // CHECK: [[INIT:%.+]] = tensor.empty()
   // CHECK: [[FILLED:%.+]] = linalg.fill ins([[C0]] : f32) outs([[INIT]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
   // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x6xf32>) outs([[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
-  %0 = tosa.matmul %arg0, %arg1 : (tensor<1x5x3xf32>, tensor<1x3x6xf32>)  -> tensor<1x5x6xf32>
+  %a_zp = "tosa.const"() <{value = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32>
+  %b_zp = "tosa.const"() <{value = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32>
+  %0 = tosa.matmul %arg0, %arg1, %a_zp, %b_zp : (tensor<1x5x3xf32>, tensor<1x3x6xf32>, tensor<1xf32>, tensor<1xf32>)  -> tensor<1x5x6xf32>
   return %0 : tensor<1x5x6xf32>
 }
 
@@ -23,7 +25,9 @@ func.func @matmul_quantized(%arg0: tensor<1x5x3xi8>, %arg1: tensor<1x3x6xi8>) ->
   // CHECK: [[ONE:%.+]] = arith.constant 1
   // CHECK: [[TWO:%.+]] = arith.constant 2
   // CHECK: linalg.quantized_batch_matmul ins(%arg0, %arg1, [[ONE]], [[TWO]] : tensor<1x5x3xi8>, tensor<1x3x6xi8>, i32, i32) outs([[FILLED]] : tensor<1x5x6xi32>) -> tensor<1x5x6xi32>
-  %0 = tosa.matmul %arg0, %arg1 {a_zp = 1 : i32, b_zp = 2 : i32} : (tensor<1x5x3xi8>, tensor<1x3x6xi8>) -> tensor<1x5x6xi32>
+  %a_zp = "tosa.const"() <{value = dense<1> : tensor<1xi8>}> : () -> tensor<1xi8>
+  %b_zp = "tosa.const"() <{value = dense<2> : tensor<1xi8>}> : () -> tensor<1xi8>
+  %0 = tosa.matmul %arg0, %arg1, %a_zp, %b_zp : (tensor<1x5x3xi8>, tensor<1x3x6xi8>, tensor<1xi8>, tensor<1xi8>) -> tensor<1x5x6xi32>
   return %0 : tensor<1x5x6xi32>
 }
 
@@ -37,7 +41,9 @@ func.func @matmul_dyn_batch(%arg0: tensor<?x5x3xf32>, %arg1: tensor<?x3x6xf32>)
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]])
   // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0_0]] : f32) outs(%[[INIT]] : tensor<?x5x6xf32>) -> tensor<?x5x6xf32>
   // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<?x5x3xf32>, tensor<?x3x6xf32>) outs(%[[FILLED]] : tensor<?x5x6xf32>) -> tensor<?x5x6xf32>
-  %0 = tosa.matmul %arg0, %arg1 : (tensor<?x5x3xf32>, tensor<?x3x6xf32>) -> tensor<?x5x6xf32>
+  %a_zp = "tosa.const"() <{value = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32>
+  %b_zp = "tosa.const"() <{value = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32>
+  %0 = tosa.matmul %arg0, %arg1, %a_zp, %b_zp : (tensor<?x5x3xf32>, tensor<?x3x6xf32>, tensor<1xf32>, tensor<1xf32>)  -> tensor<?x5x6xf32>
   return %0 : tensor<?x5x6xf32>
 }
 
@@ -51,7 +57,9 @@ func.func @matmul_dyn_independent_dim(%arg0: tensor<1x5x3xf32>, %arg1: tensor<1x
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM]])
   // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32>
   // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x3xf32>, tensor<1x3x?xf32>) outs(%[[FILLED]] : tensor<1x5x?xf32>) -> tensor<1x5x?xf32>
-  %0 = tosa.matmul %arg0, %arg1 : (tensor<1x5x3xf32>, tensor<1x3x?xf32>) -> tensor<1x5x?xf32>
+  %a_zp = "tosa.const"() <{value = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32>
+  %b_zp = "tosa.const"() <{value = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32>
+  %0 = tosa.matmul %arg0, %arg1, %a_zp, %b_zp : (tensor<1x5x3xf32>, tensor<1x3x?xf32>, tensor<1xf32>, tensor<1xf32>)  -> tensor<1x5x?xf32>
   return %0 : tensor<1x5x?xf32>
 }
 
@@ -63,7 +71,9 @@ func.func @matmul_dyn_independent_dim(%arg0: tensor<1x5x?xf32>, %arg1: tensor<1x
   // CHECK: %[[INIT:.+]] = tensor.empty()
   // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[C0]] : f32) outs(%[[INIT]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
   // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x5x?xf32>, tensor<1x?x6xf32>) outs(%[[FILLED]] : tensor<1x5x6xf32>) -> tensor<1x5x6xf32>
-  %0 = tosa.matmul %arg0, %arg1 : (tensor<1x5x?xf32>, tensor<1x?x6xf32>) -> tensor<1x5x6xf32>
+  %a_zp = "tosa.const"() <{value = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32>
+  %b_zp = "tosa.const"() <{value = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32>
+  %0 = tosa.matmul %arg0, %arg1, %a_zp, %b_zp : (tensor<1x5x?xf32>, tensor<1x?x6xf32>, tensor<1xf32>, tensor<1xf32>)  -> tensor<1x5x6xf32>
   return %0 : tensor<1x5x6xf32>
 }
 
@@ -77,7 +87,9 @@ func.func @matmul_dyn_output(%arg0: tensor<1x1x8xf32>, %arg1: tensor<1x8x1xf32>)
   // CHECK: %[[INIT:.+]] = tensor.empty(%[[DIM0]]) : tensor<?x1x1xf32>
   // CHECK: %[[FILLED:.+]] = linalg.fill ins(%[[CST]] : f32) outs(%[[INIT]] : tensor<?x1x1xf32>) -> tensor<?x1x1xf32>
   // CHECK: linalg.batch_matmul ins(%arg0, %arg1 : tensor<1x1x8xf32>, tensor<1x8x1xf32>) outs(%[[FILLED]] : tensor<?x1x1xf32>) -> tensor<?x1x1xf32>
-  %0 = tosa.matmul %arg0, %arg1 : (tensor<1x1x8xf32>, tensor<1x8x1xf32>) -> tensor<?x1x1xf32>
+  %a_zp = "tosa.const"() <{value = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32>
+  %b_zp = "tosa.const"() <{value = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32>
+  %0 = tosa.matmul %arg0, %arg1, %a_zp, %b_zp : (tensor<1x1x8xf32>, tensor<1x8x1xf32>, tensor<1xf32>, tensor<1xf32>)  -> tensor<?x1x1xf32>
   return %0 : tensor<?x1x1xf32>
 }
 
diff --git a/mlir/test/Dialect/Mesh/sharding-propagation.mlir b/mlir/test/Dialect/Mesh/sharding-propagation.mlir
index 83136f613b020..14c67e670e921 100644
--- a/mlir/test/Dialect/Mesh/sharding-propagation.mlir
+++ b/mlir/test/Dialect/Mesh/sharding-propagation.mlir
@@ -98,14 +98,16 @@ func.func @arrow_structure(%arg0: tensor<8x16xf32>) -> (tensor<8x16xf32>, tensor
 }
 
 // CHECK-LABEL: func.func @matmul_on_def_shard_batch_and_m
-// CHECK-SAME:     %[[ARG0:.*]]: tensor<2x16x8xf32>, %[[ARG1:.*]]: tensor<2x8x32xf32>
-func.func @matmul_on_def_shard_batch_and_m(%arg0: tensor<2x16x8xf32>, %arg1: tensor<2x8x32xf32>) -> tensor<2x16x32xf32> {
+// CHECK-SAME:     %[[ARG0:.*]]: tensor<2x16x8xf32>, %[[ARG1:.*]]: tensor<2x8x32xf32>, %[[ARG2:.*]]: tensor<1xf32>
+func.func @matmul_on_def_shard_batch_and_m(%arg0: tensor<2x16x8xf32>, %arg1: tensor<2x8x32xf32>, %arg2: tensor<1xf32>) -> tensor<2x16x32xf32> {
   // CHECK-NEXT:  %[[S0:.*]] = mesh.sharding @mesh_2d split_axes = {{\[\[}}0], [1]] : !mesh.sharding
   // CHECK-NEXT:  %[[V0:.*]] = mesh.shard %[[ARG0]] to %[[S0]] annotate_for_users  : tensor<2x16x8xf32>
   // CHECK-NEXT:  %[[S1:.*]] = mesh.sharding @mesh_2d split_axes = {{\[\[}}0]] : !mesh.sharding
   // CHECK-NEXT:  %[[V1:.*]] = mesh.shard %[[ARG1]] to %[[S1]] annotate_for_users  : tensor<2x8x32xf32>
-  // CHECK-NEXT:  %[[V2:.*]] = tosa.matmul %[[V0]], %[[V1]]
-  %0 = tosa.matmul %arg0, %arg1 : (tensor<2x16x8xf32>, tensor<2x8x32xf32>) -> tensor<2x16x32xf32>
+  // CHECK-NEXT:  %[[S2:.*]] = mesh.sharding @mesh_2d split_axes = {{\[\[}}]] : !mesh.sharding
+  // CHECK-NEXT:  %[[ZP:.*]] = mesh.shard %[[ARG2]] to %[[S2]] annotate_for_users  : tensor<1xf32>
+  // CHECK-NEXT:  %[[V2:.*]] = tosa.matmul %[[V0]], %[[V1]], %[[ZP]], %[[ZP]]
+  %0 = tosa.matmul %arg0, %arg1, %arg2, %arg2 : (tensor<2x16x8xf32>, tensor<2x8x32xf32>, tensor<1xf32>, tensor<1xf32>)  -> tensor<2x16x32xf32>
   // CHECK-NEXT:  %[[S3:.*]] = mesh.sharding @mesh_2d split_axes = {{\[\[}}0], [1]] : !mesh.sharding
   // CHECK-NEXT:  %[[V3:.*]] = mesh.shard %[[V2]] to %[[S3]]  : tensor<2x16x32xf32>
   %s1 = mesh.sharding @mesh_2d split_axes = [[0], [1]] : !mesh.sharding
@@ -115,14 +117,16 @@ func.func @matmul_on_def_shard_batch_and_m(%arg0: tensor<2x16x8xf32>, %arg1: ten
 }
 
 // CHECK-LABEL: func.func @matmul_on_def_shard_m_and_k
-// CHECK-SAME:     %[[ARG0:.*]]: tensor<2x16x8xf32>, %[[ARG1:.*]]: tensor<2x8x32xf32>
-func.func @matmul_on_def_shard_m_and_k(%arg0: tensor<2x16x8xf32>, %arg1: tensor<2x8x32xf32>) -> tensor<2x16x32xf32> {
+// CHECK-SAME:     %[[ARG0:.*]]: tensor<2x16x8xf32>, %[[ARG1:.*]]: tensor<2x8x32xf32>, %[[ARG2:.*]]: tensor<1xf32>
+func.func @matmul_on_def_shard_m_and_k(%arg0: tensor<2x16x8xf32>, %arg1: tensor<2x8x32xf32>, %arg2: tensor<1xf32>) -> tensor<2x16x32xf32> {
   // CHECK-NEXT:  %[[S0:.*]] = mesh.sharding @mesh_2d split_axes = {{\[\[}}], [1], [0]] : !mesh.sharding
   // CHECK-NEXT:  %[[V0:.*]] = mesh.shar...
[truncated]

@Jerry-Ge Jerry-Ge requested review from GeorgeARM and lhutton1 and removed request for GeorgeARM March 4, 2025 22:15
Copy link
Contributor

@lhutton1 lhutton1 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Had a couple of comments, otherwise LGTM!

@Tai78641
Copy link
Contributor Author

Tai78641 commented Mar 5, 2025

rebased

Copy link
Contributor

@lhutton1 lhutton1 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM!

@Tai78641
Copy link
Contributor Author

Tai78641 commented Mar 6, 2025

rebased and resolved merge conflict in invalid.mlir

* Change zero-point attributes to inputs
* Fix relevant mlir tests
* Enhance ShardingInterface in MatMul

Signed-off-by: Udaya Ranga <[email protected]>
Change-Id: Ia58b15cba546a948a6a4d8e8ee26a72cd050de4e
@Jerry-Ge Jerry-Ge merged commit 106c964 into llvm:main Mar 7, 2025
11 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Mar 7, 2025

LLVM Buildbot has detected a new failure on builder mlir-rocm-mi200 running on mi200-buildbot while building mlir at step 7 "test-build-check-mlir-build-only-check-mlir".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/177/builds/14218

Here is the relevant piece of the build log for the reference
Step 7 (test-build-check-mlir-build-only-check-mlir) failure: test (failure)
******************** TEST 'MLIR :: Dialect/Tosa/ops.mlir' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 1
/vol/worker/mi200-buildbot/mlir-rocm-mi200/build/bin/mlir-opt /vol/worker/mi200-buildbot/mlir-rocm-mi200/llvm-project/mlir/test/Dialect/Tosa/ops.mlir --verify-each | /vol/worker/mi200-buildbot/mlir-rocm-mi200/build/bin/mlir-opt | /vol/worker/mi200-buildbot/mlir-rocm-mi200/build/bin/FileCheck /vol/worker/mi200-buildbot/mlir-rocm-mi200/llvm-project/mlir/test/Dialect/Tosa/ops.mlir
# executed command: /vol/worker/mi200-buildbot/mlir-rocm-mi200/build/bin/mlir-opt /vol/worker/mi200-buildbot/mlir-rocm-mi200/llvm-project/mlir/test/Dialect/Tosa/ops.mlir --verify-each
# .---command stderr------------
# | /vol/worker/mi200-buildbot/mlir-rocm-mi200/llvm-project/mlir/test/Dialect/Tosa/ops.mlir:837:8: error: 'tosa.matmul' op expected 4 operands, but found 2
# |   %0 = tosa.matmul %arg0, %arg1 : (tensor<1x14x19xf8E5M2>, tensor<1x19x28xf8E5M2>) -> tensor<1x14x28xf16>
# |        ^
# | /vol/worker/mi200-buildbot/mlir-rocm-mi200/llvm-project/mlir/test/Dialect/Tosa/ops.mlir:837:8: note: see current operation: %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x14x19xf8E5M2>, tensor<1x19x28xf8E5M2>) -> tensor<1x14x28xf16>
# `-----------------------------
# error: command failed with exit status: 1
# executed command: /vol/worker/mi200-buildbot/mlir-rocm-mi200/build/bin/mlir-opt
# executed command: /vol/worker/mi200-buildbot/mlir-rocm-mi200/build/bin/FileCheck /vol/worker/mi200-buildbot/mlir-rocm-mi200/llvm-project/mlir/test/Dialect/Tosa/ops.mlir
# .---command stderr------------
# | /vol/worker/mi200-buildbot/mlir-rocm-mi200/llvm-project/mlir/test/Dialect/Tosa/ops.mlir:6:17: error: CHECK-LABEL: expected string not found in input
# | // CHECK-LABEL: argmax
# |                 ^
# | <stdin>:1:1: note: scanning from here
# | module {
# | ^
# | 
# | Input file: <stdin>
# | Check file: /vol/worker/mi200-buildbot/mlir-rocm-mi200/llvm-project/mlir/test/Dialect/Tosa/ops.mlir
# | 
# | -dump-input=help explains the following input dump.
# | 
# | Input was:
# | <<<<<<
# |          1: module { 
# | label:6     X~~~~~~~~ error: no match found
# |          2: } 
# | label:6     ~~
# |          3:  
# | label:6     ~
# | >>>>>>
# `-----------------------------
# error: command failed with exit status: 1

--

********************


@llvm-ci
Copy link
Collaborator

llvm-ci commented Mar 7, 2025

LLVM Buildbot has detected a new failure on builder openmp-offload-sles-build-only running on rocm-worker-hw-04-sles while building mlir at step 10 "Add check check-mlir".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/140/builds/18494

Here is the relevant piece of the build log for the reference
Step 10 (Add check check-mlir) failure: test (failure)
******************** TEST 'MLIR :: Dialect/Tosa/ops.mlir' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 1
/home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/mlir-opt /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/mlir/test/Dialect/Tosa/ops.mlir --verify-each | /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/mlir-opt | /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/FileCheck /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/mlir/test/Dialect/Tosa/ops.mlir
# executed command: /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/mlir-opt /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/mlir/test/Dialect/Tosa/ops.mlir --verify-each
# .---command stderr------------
# | /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/mlir/test/Dialect/Tosa/ops.mlir:837:8: error: 'tosa.matmul' op expected 4 operands, but found 2
# |   %0 = tosa.matmul %arg0, %arg1 : (tensor<1x14x19xf8E5M2>, tensor<1x19x28xf8E5M2>) -> tensor<1x14x28xf16>
# |        ^
# | /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/mlir/test/Dialect/Tosa/ops.mlir:837:8: note: see current operation: %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x14x19xf8E5M2>, tensor<1x19x28xf8E5M2>) -> tensor<1x14x28xf16>
# `-----------------------------
# error: command failed with exit status: 1
# executed command: /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/mlir-opt
# executed command: /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.build/bin/FileCheck /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/mlir/test/Dialect/Tosa/ops.mlir
# .---command stderr------------
# | /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/mlir/test/Dialect/Tosa/ops.mlir:6:17: error: CHECK-LABEL: expected string not found in input
# | // CHECK-LABEL: argmax
# |                 ^
# | <stdin>:1:1: note: scanning from here
# | module {
# | ^
# | 
# | Input file: <stdin>
# | Check file: /home/botworker/bbot/builds/openmp-offload-sles-build/llvm.src/mlir/test/Dialect/Tosa/ops.mlir
# | 
# | -dump-input=help explains the following input dump.
# | 
# | Input was:
# | <<<<<<
# |          1: module { 
# | label:6     X~~~~~~~~ error: no match found
# |          2: } 
# | label:6     ~~
# |          3:  
# | label:6     ~
# | >>>>>>
# `-----------------------------
# error: command failed with exit status: 1

--

********************


@llvm-ci
Copy link
Collaborator

llvm-ci commented Mar 7, 2025

LLVM Buildbot has detected a new failure on builder ppc64le-mlir-rhel-clang running on ppc64le-mlir-rhel-test while building mlir at step 6 "test-build-check-mlir-build-only-check-mlir".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/129/builds/16321

Here is the relevant piece of the build log for the reference
Step 6 (test-build-check-mlir-build-only-check-mlir) failure: test (failure)
******************** TEST 'MLIR :: Dialect/Tosa/ops.mlir' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 1
/home/buildbots/llvm-external-buildbots/workers/ppc64le-mlir-rhel-test/ppc64le-mlir-rhel-clang-build/build/bin/mlir-opt /home/buildbots/llvm-external-buildbots/workers/ppc64le-mlir-rhel-test/ppc64le-mlir-rhel-clang-build/llvm-project/mlir/test/Dialect/Tosa/ops.mlir --verify-each | /home/buildbots/llvm-external-buildbots/workers/ppc64le-mlir-rhel-test/ppc64le-mlir-rhel-clang-build/build/bin/mlir-opt | /home/buildbots/llvm-external-buildbots/workers/ppc64le-mlir-rhel-test/ppc64le-mlir-rhel-clang-build/build/bin/FileCheck /home/buildbots/llvm-external-buildbots/workers/ppc64le-mlir-rhel-test/ppc64le-mlir-rhel-clang-build/llvm-project/mlir/test/Dialect/Tosa/ops.mlir
# executed command: /home/buildbots/llvm-external-buildbots/workers/ppc64le-mlir-rhel-test/ppc64le-mlir-rhel-clang-build/build/bin/mlir-opt /home/buildbots/llvm-external-buildbots/workers/ppc64le-mlir-rhel-test/ppc64le-mlir-rhel-clang-build/llvm-project/mlir/test/Dialect/Tosa/ops.mlir --verify-each
# .---command stderr------------
# | /home/buildbots/llvm-external-buildbots/workers/ppc64le-mlir-rhel-test/ppc64le-mlir-rhel-clang-build/llvm-project/mlir/test/Dialect/Tosa/ops.mlir:837:8: error: 'tosa.matmul' op expected 4 operands, but found 2
# |   %0 = tosa.matmul %arg0, %arg1 : (tensor<1x14x19xf8E5M2>, tensor<1x19x28xf8E5M2>) -> tensor<1x14x28xf16>
# |        ^
# | /home/buildbots/llvm-external-buildbots/workers/ppc64le-mlir-rhel-test/ppc64le-mlir-rhel-clang-build/llvm-project/mlir/test/Dialect/Tosa/ops.mlir:837:8: note: see current operation: %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x14x19xf8E5M2>, tensor<1x19x28xf8E5M2>) -> tensor<1x14x28xf16>
# `-----------------------------
# error: command failed with exit status: 1
# executed command: /home/buildbots/llvm-external-buildbots/workers/ppc64le-mlir-rhel-test/ppc64le-mlir-rhel-clang-build/build/bin/mlir-opt
# executed command: /home/buildbots/llvm-external-buildbots/workers/ppc64le-mlir-rhel-test/ppc64le-mlir-rhel-clang-build/build/bin/FileCheck /home/buildbots/llvm-external-buildbots/workers/ppc64le-mlir-rhel-test/ppc64le-mlir-rhel-clang-build/llvm-project/mlir/test/Dialect/Tosa/ops.mlir
# .---command stderr------------
# | /home/buildbots/llvm-external-buildbots/workers/ppc64le-mlir-rhel-test/ppc64le-mlir-rhel-clang-build/llvm-project/mlir/test/Dialect/Tosa/ops.mlir:6:17: error: CHECK-LABEL: expected string not found in input
# | // CHECK-LABEL: argmax
# |                 ^
# | <stdin>:1:1: note: scanning from here
# | module {
# | ^
# | 
# | Input file: <stdin>
# | Check file: /home/buildbots/llvm-external-buildbots/workers/ppc64le-mlir-rhel-test/ppc64le-mlir-rhel-clang-build/llvm-project/mlir/test/Dialect/Tosa/ops.mlir
# | 
# | -dump-input=help explains the following input dump.
# | 
# | Input was:
# | <<<<<<
# |          1: module { 
# | label:6     X~~~~~~~~ error: no match found
# |          2: } 
# | label:6     ~~
# |          3:  
# | label:6     ~
# | >>>>>>
# `-----------------------------
# error: command failed with exit status: 1

--

********************


llvm-sync bot pushed a commit to arm/arm-toolchain that referenced this pull request Mar 7, 2025
@llvm-ci
Copy link
Collaborator

llvm-ci commented Mar 7, 2025

LLVM Buildbot has detected a new failure on builder premerge-monolithic-linux running on premerge-linux-1 while building mlir at step 7 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/153/builds/25040

Here is the relevant piece of the build log for the reference
Step 7 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'MLIR :: Dialect/Tosa/ops.mlir' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 1
/build/buildbot/premerge-monolithic-linux/build/bin/mlir-opt /build/buildbot/premerge-monolithic-linux/llvm-project/mlir/test/Dialect/Tosa/ops.mlir --verify-each | /build/buildbot/premerge-monolithic-linux/build/bin/mlir-opt | /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck /build/buildbot/premerge-monolithic-linux/llvm-project/mlir/test/Dialect/Tosa/ops.mlir
# executed command: /build/buildbot/premerge-monolithic-linux/build/bin/mlir-opt /build/buildbot/premerge-monolithic-linux/llvm-project/mlir/test/Dialect/Tosa/ops.mlir --verify-each
# .---command stderr------------
# | /build/buildbot/premerge-monolithic-linux/llvm-project/mlir/test/Dialect/Tosa/ops.mlir:837:8: error: 'tosa.matmul' op expected 4 operands, but found 2
# |   %0 = tosa.matmul %arg0, %arg1 : (tensor<1x14x19xf8E5M2>, tensor<1x19x28xf8E5M2>) -> tensor<1x14x28xf16>
# |        ^
# | /build/buildbot/premerge-monolithic-linux/llvm-project/mlir/test/Dialect/Tosa/ops.mlir:837:8: note: see current operation: %0 = "tosa.matmul"(%arg0, %arg1) : (tensor<1x14x19xf8E5M2>, tensor<1x19x28xf8E5M2>) -> tensor<1x14x28xf16>
# `-----------------------------
# error: command failed with exit status: 1
# executed command: /build/buildbot/premerge-monolithic-linux/build/bin/mlir-opt
# note: command had no output on stdout or stderr
# executed command: /build/buildbot/premerge-monolithic-linux/build/bin/FileCheck /build/buildbot/premerge-monolithic-linux/llvm-project/mlir/test/Dialect/Tosa/ops.mlir
# .---command stderr------------
# | /build/buildbot/premerge-monolithic-linux/llvm-project/mlir/test/Dialect/Tosa/ops.mlir:6:17: error: CHECK-LABEL: expected string not found in input
# | // CHECK-LABEL: argmax
# |                 ^
# | <stdin>:1:1: note: scanning from here
# | module {
# | ^
# | 
# | Input file: <stdin>
# | Check file: /build/buildbot/premerge-monolithic-linux/llvm-project/mlir/test/Dialect/Tosa/ops.mlir
# | 
# | -dump-input=help explains the following input dump.
# | 
# | Input was:
# | <<<<<<
# |          1: module { 
# | label:6     X~~~~~~~~ error: no match found
# |          2: } 
# | label:6     ~~
# |          3:  
# | label:6     ~
# | >>>>>>
# `-----------------------------
# error: command failed with exit status: 1

--

********************


jph-13 pushed a commit to jph-13/llvm-project that referenced this pull request Mar 21, 2025
* Change zero-point attributes to inputs
* Fix relevant mlir tests
* Enhance ShardingInterface in MatMul

Signed-off-by: Udaya Ranga <[email protected]>
Co-authored-by: Udaya Ranga <[email protected]>
jph-13 pushed a commit to jph-13/llvm-project that referenced this pull request Mar 21, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

8 participants