Skip to content

Commit ef8e883

Browse files
committed
[mlir][TOSA] Fix linalg lowering of depthwise conv2d (#130282)
Current lowering for tosa.depthwise_conv2d assumes if both zero points are zero then it's a floating-point operation by hardcoding the use of a arith.addf in the lowered code. Fix code to check for the element type to decide what add operation to use.
1 parent 659cca7 commit ef8e883

File tree

2 files changed

+43
-19
lines changed

2 files changed

+43
-19
lines changed

mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -477,27 +477,21 @@ class DepthwiseConvConverter
477477
return rewriter.notifyMatchFailure(
478478
op, "weight zero point must be zero for non-int8 integer types");
479479

480-
bool hasZp = (inputZpVal != 0) || (weightZpVal != 0);
481480
auto weightShape = weightTy.getShape();
482481
auto resultShape = resultTy.getShape();
483482

484483
// Apply padding as necessary.
485-
TypedAttr zeroAttr = rewriter.getZeroAttr(inputETy);
486-
if (hasZp) {
487-
int64_t intMin =
488-
APInt::getSignedMinValue(inputETy.getIntOrFloatBitWidth())
489-
.getSExtValue();
490-
int64_t intMax =
491-
APInt::getSignedMaxValue(inputETy.getIntOrFloatBitWidth())
492-
.getSExtValue();
484+
int64_t intMin = APInt::getSignedMinValue(inputETy.getIntOrFloatBitWidth())
485+
.getSExtValue();
486+
int64_t intMax = APInt::getSignedMaxValue(inputETy.getIntOrFloatBitWidth())
487+
.getSExtValue();
493488

494-
if (inputZpVal < intMin || inputZpVal > intMax)
495-
return rewriter.notifyMatchFailure(
496-
op, "tosa.depthwise_conv op quantization has zp outside of input "
497-
"range");
489+
if (inputZpVal < intMin || inputZpVal > intMax)
490+
return rewriter.notifyMatchFailure(
491+
op, "tosa.depthwise_conv op quantization has zp outside of input "
492+
"range");
498493

499-
zeroAttr = rewriter.getIntegerAttr(inputETy, inputZpVal);
500-
}
494+
TypedAttr zeroAttr = rewriter.getIntegerAttr(inputETy, inputZpVal);
501495

502496
llvm::SmallVector<int64_t> pad;
503497
pad.resize(2, 0);
@@ -536,7 +530,7 @@ class DepthwiseConvConverter
536530
indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultRank));
537531
indexingMaps.push_back(rewriter.getMultiDimIdentityMap(resultRank));
538532

539-
if (!hasZp) {
533+
if (inputZpVal == 0 && weightZpVal == 0) {
540534
Value conv = rewriter
541535
.create<linalg::DepthwiseConv2DNhwcHwcmOp>(
542536
loc, linalgConvTy, ValueRange{input, weight},
@@ -556,8 +550,13 @@ class DepthwiseConvConverter
556550
getNParallelLoopsAttrs(resultRank),
557551
[&](OpBuilder &nestedBuilder, Location nestedLoc,
558552
ValueRange args) {
559-
Value added = nestedBuilder.create<arith::AddFOp>(
560-
loc, args[0], args[1]);
553+
Value added;
554+
if (llvm::isa<FloatType>(inputETy))
555+
added = nestedBuilder.create<arith::AddFOp>(loc, args[0],
556+
args[1]);
557+
else
558+
added = nestedBuilder.create<arith::AddIOp>(loc, args[0],
559+
args[1]);
561560
nestedBuilder.create<linalg::YieldOp>(nestedLoc, added);
562561
})
563562
.getResult(0);

mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -798,9 +798,10 @@ func.func @depthwise_conv2d_dyn_w_h(%arg0: tensor<2x?x?x3xf32>, %arg1: tensor<3x
798798
// CHECK: arith.subi
799799
// CHECK: arith.muli
800800
// CHECK: arith.divui
801+
// CHECK: [[CST0:%.+]] = arith.constant 0
801802
// CHECK: %[[PADDED:.+]] = tensor.pad %arg0 low[0, 1, 3, 0] high[0, 2, 4, 0] {
802803
// CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: index, %[[ARG4:[0-9a-zA-Z_]+]]: index, %[[ARG5:[0-9a-zA-Z_]+]]: index, %[[ARG6:[0-9a-zA-Z_]+]]: index):
803-
// CHECK: tensor.yield %cst : f32
804+
// CHECK: tensor.yield [[CST0]] : f32
804805
// CHECK: } : tensor<2x?x?x3xf32> to tensor<2x?x?x3xf32>
805806
// CHECK: %[[CONV:.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<[2, 1]> : tensor<2xi64>, strides = dense<[1, 2]> : tensor<2xi64>} ins(%[[PADDED]], %arg1 : tensor<2x?x?x3xf32>, tensor<3x6x3x5xf32>) outs(%{{.*}} : tensor<2x?x?x3x5xf32>) -> tensor<2x?x?x3x5xf32>
806807
// CHECK: %[[COLLAPSED:.+]] = tensor.collapse_shape %[[CONV]] {{\[}}[0], [1], [2], [3, 4]]
@@ -812,6 +813,30 @@ func.func @depthwise_conv2d_dyn_w_h(%arg0: tensor<2x?x?x3xf32>, %arg1: tensor<3x
812813

813814
// -----
814815

816+
// CHECK: #[[$MAP0:.*]] = affine_map<(d0, d1, d2, d3) -> (d3)>
817+
// CHECK: #[[$MAP1:.*]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
818+
819+
// CHECK-LABEL: @depthwise_int_conv_zero_zp
820+
func.func @depthwise_int_conv_zero_zp(%arg0 : tensor<1x7x5x3xi8>, %arg1 : tensor<3x1x3x11xi8>, %arg2 : tensor<33xi32>) -> () {
821+
// CHECK: [[INIT:%.+]] = tensor.empty()
822+
// CHECK: [[CST0:%.+]] = arith.constant 0
823+
// CHECK: [[FILL:%.+]] = linalg.fill ins([[CST0]]{{.*}}outs([[INIT]]
824+
// CHECK: [[OUT:%.+]] = tensor.empty()
825+
// CHECK: [[DEPTH:%.+]] = linalg.depthwise_conv_2d_nhwc_hwcm {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%arg0, %arg1 : tensor<1x7x5x3xi8>, tensor<3x1x3x11xi8>) outs([[FILL]] : tensor<1x5x5x3x11xi32>)
826+
// CHECK: [[COLLAPSED:%.+]] = tensor.collapse_shape [[DEPTH]] {{\[}}[0], [1], [2], [3, 4]]
827+
// CHECK: [[BIAS:%.+]] = linalg.generic {indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP1]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%arg2, [[COLLAPSED]] : tensor<33xi32>, tensor<1x5x5x33xi32>) outs([[OUT]] : tensor<1x5x5x33xi32>) {
828+
// CHECK: ^bb0(%[[ARG3:[0-9a-zA-Z_]+]]: i32, %[[ARG4:[0-9a-zA-Z_]+]]: i32, %[[ARG5:[0-9a-zA-Z_]+]]: i32):
829+
// CHECK: [[ADD:%.+]] = arith.addi %[[ARG3]], %[[ARG4]] : i32
830+
// CHECK: linalg.yield [[ADD]] : i32
831+
// CHECK: } -> tensor<1x5x5x33xi32>
832+
%input_zp = "tosa.const"() <{values = dense<0> : tensor<1xi8>}> : () -> tensor<1xi8>
833+
%weight_zp = "tosa.const"() <{values = dense<0> : tensor<1xi8>}> : () -> tensor<1xi8>
834+
%2 = tosa.depthwise_conv2d %arg0, %arg1, %arg2, %input_zp, %weight_zp {acc_type = i32, pad = array<i64: 0, 0, 0, 0>, stride = array<i64: 1, 1>, dilation = array<i64: 1, 1> } : (tensor<1x7x5x3xi8>, tensor<3x1x3x11xi8>, tensor<33xi32>, tensor<1xi8>, tensor<1xi8>) -> tensor<1x5x5x33xi32>
835+
return
836+
}
837+
838+
// -----
839+
815840
// CHECK: #[[$MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d4)>
816841
// CHECK: #[[$MAP2:.+]] = affine_map<(d0, d1, d2, d3, d4) -> (d0, d1, d2, d3, d4)>
817842

0 commit comments

Comments
 (0)