llvm
diff --git a/‎mlir/lib/Dialect/Quant/Transforms/LowerQuantOps.cpp
Lines changed: 130 additions & 84 deletions b/‎mlir/lib/Dialect/Quant/Transforms/LowerQuantOps.cpp
Lines changed: 130 additions & 84 deletions
@@ -38,11 +38,11 @@ Type getScalarType(Type inputType) {
   return inputType;
 }
 
-// Return the shape of an input value as a list of attributes (static dimensions)
-// and values (dynamic dimensions). If 'input' is a scalar, an empty list is
-// returned. If 'input' is a tensor, its shape is returned.
-SmallVector<OpFoldResult>
-getScalarOrTensorShape(OpBuilder &builder, Location loc, Value input) {
+// Return the shape of an input value as a list of attributes (static
+// dimensions) and values (dynamic dimensions). If 'input' is a scalar, an empty
+// list is returned. If 'input' is a tensor, its shape is returned.
+SmallVector<OpFoldResult> getScalarOrTensorShape(OpBuilder &builder,
+                                                 Location loc, Value input) {
   if (isa<TensorType>(input.getType()))
     return tensor::getMixedSizes(builder, loc, input);
   return {};
@@ -100,16 +100,16 @@ std::pair<Value, Value> flattenUnrankedTensor(OpBuilder &builder, Location loc,
 
   // Turn input size into 1D tensor
   auto flatShapeType = shape::getExtentTensorType(context, 1);
-  auto flatInputShape = builder.create<tensor::FromElementsOp>(
-      loc, flatShapeType, inputSize);
+  auto flatInputShape =
+      builder.create<tensor::FromElementsOp>(loc, flatShapeType, inputSize);
 
   // Reshape input tensor into 1D
   auto inputType = cast<UnrankedTensorType>(input.getType());
   auto elementType = inputType.getElementType();
   auto flatInputType =
       RankedTensorType::get({ShapedType::kDynamic}, elementType);
-  auto flatInput = builder.create<tensor::ReshapeOp>(
-      loc, flatInputType, input, flatInputShape);
+  auto flatInput = builder.create<tensor::ReshapeOp>(loc, flatInputType, input,
+                                                     flatInputShape);
   return std::make_pair(flatInput, inputShape);
 }
 
@@ -135,11 +135,9 @@ std::pair<Value, Value> flattenUnrankedTensor(OpBuilder &builder, Location loc,
 // - inputShape
 //   1D extent tensor containing the shape of the original unranked input.
 //
-std::pair<Value, Value> flattenUnrankedTensorAroundAxis(OpBuilder &builder,
-                                                        Location loc,
-                                                        Value input,
-                                                        int64_t axis,
-                                                        int64_t axisSize) {
+std::pair<Value, Value>
+flattenUnrankedTensorAroundAxis(OpBuilder &builder, Location loc, Value input,
+                                int64_t axis, int64_t axisSize) {
   // Get full tensor shape
   auto *context = builder.getContext();
   auto indexType = builder.getIndexType();
@@ -149,16 +147,20 @@ std::pair<Value, Value> flattenUnrankedTensorAroundAxis(OpBuilder &builder,
   // Get shape and sizes on left and right of axis
   auto axisValue = builder.create<arith::ConstantIndexOp>(loc, axis);
   auto axisNextValue = builder.create<arith::ConstantIndexOp>(loc, axis + 1);
-  auto shapeLeft = builder.create<shape::SplitAtOp>(
-      loc, TypeRange{shapeType, shapeType}, inputShape, axisValue)
-      .getResult(0);
-  auto sizeLeft = builder.create<shape::NumElementsOp>(
-      loc, indexType, shapeLeft);
-  auto shapeRight = builder.create<shape::SplitAtOp>(
-      loc, TypeRange{shapeType, shapeType}, inputShape, axisNextValue)
-      .getResult(1);
-  auto sizeRight = builder.create<shape::NumElementsOp>(
-      loc, indexType, shapeRight);
+  auto shapeLeft =
+      builder
+          .create<shape::SplitAtOp>(loc, TypeRange{shapeType, shapeType},
+                                    inputShape, axisValue)
+          .getResult(0);
+  auto sizeLeft =
+      builder.create<shape::NumElementsOp>(loc, indexType, shapeLeft);
+  auto shapeRight =
+      builder
+          .create<shape::SplitAtOp>(loc, TypeRange{shapeType, shapeType},
+                                    inputShape, axisNextValue)
+          .getResult(1);
+  auto sizeRight =
+      builder.create<shape::NumElementsOp>(loc, indexType, shapeRight);
 
   // Compute flat input shape as a 3-element 1D tensor
   auto axisSizeValue = builder.create<arith::ConstantIndexOp>(loc, axisSize);
@@ -171,8 +173,8 @@ std::pair<Value, Value> flattenUnrankedTensorAroundAxis(OpBuilder &builder,
   auto elementType = inputType.getElementType();
   auto flatInputType = RankedTensorType::get(
       {ShapedType::kDynamic, axisSize, ShapedType::kDynamic}, elementType);
-  auto flatInput = builder.create<tensor::ReshapeOp>(
-      loc, flatInputType, input, flatInputShape);
+  auto flatInput = builder.create<tensor::ReshapeOp>(loc, flatInputType, input,
+                                                     flatInputShape);
 
   return std::make_pair(flatInput, inputShape);
 }
@@ -190,7 +192,8 @@ Value restoreUnrankedTensorShape(OpBuilder &builder, Location loc, Value input,
   auto inputType = cast<RankedTensorType>(input.getType());
   auto elementType = inputType.getElementType();
   auto unrankedType = UnrankedTensorType::get(elementType);
-  return builder.create<tensor::ReshapeOp>(loc, unrankedType, input, inputShape);
+  return builder.create<tensor::ReshapeOp>(loc, unrankedType, input,
+                                           inputShape);
 }
 
 // Create a tensor constant containing all scales in a per-channel quantized
@@ -209,7 +212,8 @@ Value materializePerChannelScales(OpBuilder &builder, Location loc,
   auto scaleAttrs = llvm::map_to_vector(scales, [&](double scale) -> Attribute {
     return builder.getFloatAttr(expressedType, scale);
   });
-  auto tensorType = RankedTensorType::get({(int64_t) scales.size()}, expressedType);
+  auto tensorType =
+      RankedTensorType::get({(int64_t)scales.size()}, expressedType);
   auto scalesAttr = DenseElementsAttr::get(tensorType, scaleAttrs);
   return builder.create<arith::ConstantOp>(loc, tensorType, scalesAttr);
 }
@@ -228,9 +232,8 @@ Value materializePerChannelZeroPoints(
     UniformQuantizedPerAxisType quantizedType) {
   auto zeroPoints = quantizedType.getZeroPoints();
   auto storageType = quantizedType.getStorageType();
-  auto zeroPointAttrs = llvm::map_to_vector(
-      zeroPoints,
-      [&](int64_t zeroPoint) -> Attribute {
+  auto zeroPointAttrs =
+      llvm::map_to_vector(zeroPoints, [&](int64_t zeroPoint) -> Attribute {
         return builder.getIntegerAttr(storageType, zeroPoint);
       });
   auto tensorType =
@@ -239,6 +242,54 @@ Value materializePerChannelZeroPoints(
   return builder.create<arith::ConstantOp>(loc, tensorType, zeroPointsAttr);
 }
 
+// Create a tensor constant containing all scales in a sub-channel quantized
+// type. Example:
+//
+//   !quant.uniform<i8:f32:{0:1,1:2}, {{2.0:10, 3.0:20}, {4.0:30, 5.0:40}}>
+//
+// produces
+//
+//   %cst = arith.constant dense<[[2.0, 3.0], [4.0, 5.0]]> : tensor<2x2xf32>
+//
+Value materializeSubChannelScales(
+    OpBuilder &builder, Location loc,
+    UniformQuantizedSubChannelType quantizedType) {
+  auto scales = quantizedType.getScales();
+  auto expressedType = quantizedType.getExpressedType();
+  auto scaleAttrs = llvm::map_to_vector(
+      scales.getValues<APFloat>(), [&](APFloat scale) -> Attribute {
+        return builder.getFloatAttr(expressedType, scale);
+      });
+  auto tensorType =
+      RankedTensorType::get(scales.getType().getShape(), expressedType);
+  auto scalesAttr = DenseElementsAttr::get(tensorType, scaleAttrs);
+  return builder.create<arith::ConstantOp>(loc, tensorType, scalesAttr);
+}
+
+// Create a tensor constant containing all zero points in a sub-channel
+// quantized type. Example:
+//
+//   !quant.uniform<i8:f32:{0:1,1:2}, {{2.0:10, 3.0:20}, {4.0:30, 5.0:40}}>
+//
+// produces
+//
+//   %cst = arith.constant dense<[[10, 20], [30, 40]]> : tensor<2x2xi8>
+//
+Value materializeSubChannelZeroPoints(
+    OpBuilder &builder, Location loc,
+    UniformQuantizedSubChannelType quantizedType) {
+  auto zeroPoints = quantizedType.getZeroPoints();
+  auto storageType = quantizedType.getStorageType();
+  auto zeroPointAttrs = llvm::map_to_vector(
+      zeroPoints.getValues<APInt>(), [&](APInt zeroPoint) -> Attribute {
+        return builder.getIntegerAttr(storageType, zeroPoint);
+      });
+  auto tensorType =
+      RankedTensorType::get(zeroPoints.getType().getShape(), storageType);
+  auto zeroPointsAttr = DenseElementsAttr::get(tensorType, zeroPointAttrs);
+  return builder.create<arith::ConstantOp>(loc, tensorType, zeroPointsAttr);
+}
+
 // Clamp the given scalar or tensor input using the storage bounds encoded in
 // the given quantized type, if present.
 //
@@ -299,7 +350,7 @@ Value convertIntegerToFloat(OpBuilder &builder, Location loc, Value input,
   return builder.create<arith::UIToFPOp>(loc, resultType, input);
 }
 
-// Quantize a scalar or ranked tensor value. The stored value is clamped using 
+// Quantize a scalar or ranked tensor value. The stored value is clamped using
 // the storage bounds encoded in the given quantized type.
 //
 // See function 'convertRanked()' below for a description of the arguments.
@@ -308,8 +359,7 @@ Value quantizeValue(OpBuilder &builder, Location loc, Value input,
                     Value zeroPoint, QuantizedType quantizedType) {
   // Convert scale to tensor if necessary
   auto inputType = input.getType();
-  scale = getScalarOrTensorConstant(
-      builder, loc, scale, inputType, inputShape);
+  scale = getScalarOrTensorConstant(builder, loc, scale, inputType, inputShape);
 
   // Scale input
   auto scaledValue = builder.create<arith::DivFOp>(loc, input, scale);
@@ -322,8 +372,7 @@ Value quantizeValue(OpBuilder &builder, Location loc, Value input,
                                           inputShape);
 
     // Convert zero point from storage to expressed type
-    zeroPoint = convertIntegerToFloat(builder, loc, zeroPoint,
-                                      scale.getType(),
+    zeroPoint = convertIntegerToFloat(builder, loc, zeroPoint, scale.getType(),
                                       quantizedType.isSigned());
 
     // Add zero point to stored value
@@ -334,9 +383,9 @@ Value quantizeValue(OpBuilder &builder, Location loc, Value input,
   // Convert stored value to storage type
   auto storageScalarOrTensorType =
       getScalarOrTensorType(quantizedType.getStorageType(), inputType);
-  auto storedValueInt = convertFloatToInteger(
-      builder, loc, storedValueFloat, storageScalarOrTensorType,
-      quantizedType.isSigned());
+  auto storedValueInt = convertFloatToInteger(builder, loc, storedValueFloat,
+                                              storageScalarOrTensorType,
+                                              quantizedType.isSigned());
 
   // Clamp stored value it if the storage type is bound
   auto storedValueClamped = clampScalarOrTensor(builder, loc, storedValueInt,
@@ -352,12 +401,11 @@ Value dequantizeValue(OpBuilder &builder, Location loc, Value input,
                       Value zeroPoint, QuantizedType quantizedType) {
   // Convert scale to tensor if necessary
   auto inputType = input.getType();
-  scale = getScalarOrTensorConstant(
-      builder, loc, scale, inputType, inputShape);
+  scale = getScalarOrTensorConstant(builder, loc, scale, inputType, inputShape);
 
   // Convert stored value to float
-  auto result = convertIntegerToFloat(
-      builder, loc, input, scale.getType(), quantizedType.isSigned());
+  auto result = convertIntegerToFloat(builder, loc, input, scale.getType(),
+                                      quantizedType.isSigned());
 
   // Skip unnecessary computations if no zero point is given
   if (!matchPattern(zeroPoint, m_Zero())) {
@@ -366,8 +414,7 @@ Value dequantizeValue(OpBuilder &builder, Location loc, Value input,
                                           inputShape);
 
     // Convert zero point from storage to expressed type
-    zeroPoint = convertIntegerToFloat(builder, loc, zeroPoint,
-                                      scale.getType(),
+    zeroPoint = convertIntegerToFloat(builder, loc, zeroPoint, scale.getType(),
                                       quantizedType.isSigned());
 
     // Subtract zero point to stored value
@@ -501,35 +548,33 @@ Value convertPerChannelRanked(OpBuilder &builder, Location loc, Operation *op,
   auto initShape = tensor::getMixedSizes(builder, loc, input);
   Value init = builder.create<tensor::EmptyOp>(loc, initShape, elementType);
 
-  SmallVector<utils::IteratorType> iteratorTypes(
-      inputRank, utils::IteratorType::parallel);
+  SmallVector<utils::IteratorType> iteratorTypes(inputRank,
+                                                 utils::IteratorType::parallel);
   auto channelAxisAffineMap = AffineMap::get(
       inputRank, 0, builder.getAffineDimExpr(channelAxis), context);
   SmallVector<AffineMap> indexingMaps{
-    builder.getMultiDimIdentityMap(inputRank),
-    channelAxisAffineMap,
-    channelAxisAffineMap,
-    builder.getMultiDimIdentityMap(inputRank)
-  };
-  auto result = builder.create<linalg::GenericOp>(
-      loc,
-      init.getType(),  // resultType
-      ValueRange{input, scales, zeroPoints},  // inputs
-      ValueRange{init},  // outputs
-      indexingMaps,
-      iteratorTypes,
-      [&](OpBuilder& builder, Location loc, ValueRange args) {
-        assert(args.size() == 4);
-        auto input = args[0];
-        auto scale = args[1];
-        auto zeroPoint = args[2];
-
-        auto result = convertRanked(builder, loc, op, input, {}, scale,
-                                    zeroPoint, quantizedType);
-
-        builder.create<linalg::YieldOp>(loc, result);
-      })
-      .getResult(0);
+      builder.getMultiDimIdentityMap(inputRank), channelAxisAffineMap,
+      channelAxisAffineMap, builder.getMultiDimIdentityMap(inputRank)};
+  auto result = builder
+                    .create<linalg::GenericOp>(
+                        loc,
+                        init.getType(),                        // resultType
+                        ValueRange{input, scales, zeroPoints}, // inputs
+                        ValueRange{init},                      // outputs
+                        indexingMaps, iteratorTypes,
+                        [&](OpBuilder &builder, Location loc, ValueRange args) {
+                          assert(args.size() == 4);
+                          auto input = args[0];
+                          auto scale = args[1];
+                          auto zeroPoint = args[2];
+
+                          auto result =
+                              convertRanked(builder, loc, op, input, {}, scale,
+                                            zeroPoint, quantizedType);
+
+                          builder.create<linalg::YieldOp>(loc, result);
+                        })
+                    .getResult(0);
 
   return result;
 }
@@ -551,7 +596,7 @@ Value convertPerChannel(OpBuilder &builder, Location loc, Operation *op,
   // Flatten unranked tensor into a 3D ranked tensor if necessary
   bool isUnranked = isa<UnrankedTensorType>(input.getType());
   int64_t channelAxis = quantizedType.getQuantizedDimension();
-  int64_t channelAxisSize = (int64_t) quantizedType.getScales().size();
+  int64_t channelAxisSize = (int64_t)quantizedType.getScales().size();
   Value inputShape;
   if (isUnranked) {
     std::tie(input, inputShape) = flattenUnrankedTensorAroundAxis(
@@ -660,11 +705,17 @@ Value convertQuantized(OpBuilder &builder, Location loc, Operation *op,
     return convertPerChannel(builder, loc, op, input,
                              uniformQuantizedPerAxisType);
 
+  if (auto uniformQuantizedSubChannelType =
+          dyn_cast<UniformQuantizedSubChannelType>(quantizedType))
+    return convertSubChannel(builder, loc, op, input,
+                             uniformQuantizedSubChannelType);
+
   llvm_unreachable("unexpected quantized type");
 }
 
 // Lowering pattern for 'quant.dcast'
-struct DequantizeCastOpConversion : public OpConversionPattern<quant::DequantizeCastOp> {
+struct DequantizeCastOpConversion
+    : public OpConversionPattern<quant::DequantizeCastOp> {
   using OpConversionPattern<quant::DequantizeCastOp>::OpConversionPattern;
 
   LogicalResult
@@ -689,7 +740,8 @@ struct DequantizeCastOpConversion : public OpConversionPattern<quant::Dequantize
 };
 
 // Lowering pattern for 'quant.qcast'
-struct QuantizeCastOpConversion : public OpConversionPattern<quant::QuantizeCastOp> {
+struct QuantizeCastOpConversion
+    : public OpConversionPattern<quant::QuantizeCastOp> {
   using OpConversionPattern<quant::QuantizeCastOp>::OpConversionPattern;
 
   LogicalResult
@@ -717,12 +769,8 @@ struct LowerQuantOps : public impl::LowerQuantOpsBase<LowerQuantOps> {
     ConversionTarget target(getContext());
     target.addLegalOp<quant::StorageCastOp>();
     target.addIllegalDialect<quant::QuantDialect>();
-    target.addLegalDialect<
-      arith::ArithDialect,
-      linalg::LinalgDialect,
-      shape::ShapeDialect,
-      tensor::TensorDialect
-    >();
+    target.addLegalDialect<arith::ArithDialect, linalg::LinalgDialect,
+                           shape::ShapeDialect, tensor::TensorDialect>();
 
     if (failed(applyPartialConversion(getOperation(), target,
                                       std::move(patterns))))
@@ -733,10 +781,8 @@ struct LowerQuantOps : public impl::LowerQuantOpsBase<LowerQuantOps> {
 } // namespace
 
 void populateLowerQuantOpsPatterns(RewritePatternSet &patterns) {
-  patterns.add<
-    DequantizeCastOpConversion,
-    QuantizeCastOpConversion
-  >(patterns.getContext());
+  patterns.add<DequantizeCastOpConversion, QuantizeCastOpConversion>(
+      patterns.getContext());
 }
 
 } // namespace quant