Skip to content

Commit 02bf3b5

Browse files
authored
[mlir][linalg] Add quantized conv2d operator with FCHW,NCHW order (#107740)
This patch adds a quantized version of the `linalg.conv2d_nchw_fchw` Op. This is the "channel-first" ordering typically used by PyTorch and others.
1 parent 0f0a96b commit 02bf3b5

File tree

3 files changed

+196
-0
lines changed

3 files changed

+196
-0
lines changed

mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOps.yaml

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3114,6 +3114,143 @@ structured_op: !LinalgStructuredOpConfig
31143114
- !ScalarExpression
31153115
scalar_arg: KZp
31163116
--- !LinalgOpConfig
3117+
metadata: !LinalgOpMetadata
3118+
name: conv_2d_nchw_fchw_q
3119+
cpp_class_name: Conv2DNchwFchwQOp
3120+
doc: |-
3121+
Performs 2-D convolution with zero point offsets.
3122+
3123+
Layout:
3124+
* Input: NCHW.
3125+
* Kernel: FCHW.
3126+
3127+
Numeric casting is performed on the operands to the inner multiply, promoting
3128+
them to the same data type as the accumulator/output. This includes the zero
3129+
point offsets common to quantized operations.
3130+
implements:
3131+
- LinalgConvolutionOpInterface
3132+
structured_op: !LinalgStructuredOpConfig
3133+
args:
3134+
- !LinalgOperandDefConfig
3135+
name: I
3136+
kind: input_tensor
3137+
type_var: T1
3138+
shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
3139+
s1, s2 * s3 + s4 * s5, s6 * s7 + s8 * s9)>
3140+
- !LinalgOperandDefConfig
3141+
name: K
3142+
kind: input_tensor
3143+
type_var: T2
3144+
shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s10,
3145+
s1, s4, s8)>
3146+
- !LinalgOperandDefConfig
3147+
name: IZp
3148+
kind: scalar
3149+
type_var: I32
3150+
- !LinalgOperandDefConfig
3151+
name: KZp
3152+
kind: scalar
3153+
type_var: I32
3154+
- !LinalgOperandDefConfig
3155+
name: O
3156+
kind: output_tensor
3157+
type_var: U
3158+
shape_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] -> (s0,
3159+
s10, s2, s6)>
3160+
- !LinalgOperandDefConfig
3161+
name: strides
3162+
kind: index_attr
3163+
index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
3164+
(s3, s7)>
3165+
default_indices:
3166+
- 1
3167+
- 1
3168+
- !LinalgOperandDefConfig
3169+
name: dilations
3170+
kind: index_attr
3171+
index_attr_map: affine_map<()[s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10] ->
3172+
(s5, s9)>
3173+
default_indices:
3174+
- 1
3175+
- 1
3176+
indexing_maps: !LinalgIndexingMapsConfig
3177+
static_indexing_maps:
3178+
- affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
3179+
s9, s10] -> (d0, d4, d2 * s3 + d5 * s5, d3 * s7 + d6 * s9)>
3180+
- affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
3181+
s9, s10] -> (d1, d4, d5, d6)>
3182+
- affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
3183+
s9, s10] -> ()>
3184+
- affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
3185+
s9, s10] -> ()>
3186+
- affine_map<(d0, d1, d2, d3, d4, d5, d6)[s0, s1, s2, s3, s4, s5, s6, s7, s8,
3187+
s9, s10] -> (d0, d1, d2, d3)>
3188+
iterator_types:
3189+
- parallel
3190+
- parallel
3191+
- parallel
3192+
- parallel
3193+
- reduction
3194+
- reduction
3195+
- reduction
3196+
assignments:
3197+
- !ScalarAssign
3198+
arg: O
3199+
value: !ScalarExpression
3200+
scalar_fn:
3201+
kind: binary
3202+
fn_name: add
3203+
operands:
3204+
- !ScalarExpression
3205+
scalar_arg: O
3206+
- !ScalarExpression
3207+
scalar_fn:
3208+
kind: binary
3209+
fn_name: mul
3210+
operands:
3211+
- !ScalarExpression
3212+
scalar_fn:
3213+
kind: binary
3214+
fn_name: sub
3215+
operands:
3216+
- !ScalarExpression
3217+
scalar_fn:
3218+
kind: type
3219+
fn_name: cast_signed
3220+
type_var: U
3221+
operands:
3222+
- !ScalarExpression
3223+
scalar_arg: I
3224+
- !ScalarExpression
3225+
scalar_fn:
3226+
kind: type
3227+
fn_name: cast_signed
3228+
type_var: U
3229+
operands:
3230+
- !ScalarExpression
3231+
scalar_arg: IZp
3232+
- !ScalarExpression
3233+
scalar_fn:
3234+
kind: binary
3235+
fn_name: sub
3236+
operands:
3237+
- !ScalarExpression
3238+
scalar_fn:
3239+
kind: type
3240+
fn_name: cast_signed
3241+
type_var: U
3242+
operands:
3243+
- !ScalarExpression
3244+
scalar_arg: K
3245+
- !ScalarExpression
3246+
scalar_fn:
3247+
kind: type
3248+
fn_name: cast_signed
3249+
type_var: U
3250+
operands:
3251+
- !ScalarExpression
3252+
scalar_arg: KZp
3253+
--- !LinalgOpConfig
31173254
metadata: !LinalgOpMetadata
31183255
name: conv_2d_nchw_fchw
31193256
cpp_class_name: Conv2DNchwFchwOp

mlir/python/mlir/dialects/linalg/opdsl/ops/core_named_ops.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -876,6 +876,35 @@ def conv_2d_nhwc_fhwc_q(
876876
) * (TypeFn.cast_signed(U, K[D.f, D.kh, D.kw, D.c]) - TypeFn.cast_signed(U, KZp))
877877

878878

879+
@linalg_structured_op
880+
def conv_2d_nchw_fchw_q(
881+
I=TensorDef(T1, S.N, S.C, S.OH * S.SH + S.KH * S.DH, S.OW * S.SW + S.KW * S.DW),
882+
K=TensorDef(T2, S.F, S.C, S.KH, S.KW),
883+
IZp=ScalarDef(I32),
884+
KZp=ScalarDef(I32),
885+
O=TensorDef(U, S.N, S.F, S.OH, S.OW, output=True),
886+
strides=IndexAttrDef(S.SH, S.SW, default=[1, 1]),
887+
dilations=IndexAttrDef(S.DH, S.DW, default=[1, 1]),
888+
):
889+
"""Performs 2-D convolution with zero point offsets.
890+
891+
Layout:
892+
* Input: NCHW.
893+
* Kernel: FCHW.
894+
895+
Numeric casting is performed on the operands to the inner multiply, promoting
896+
them to the same data type as the accumulator/output. This includes the zero
897+
point offsets common to quantized operations.
898+
"""
899+
implements(ConvolutionOpInterface)
900+
domain(D.n, D.f, D.oh, D.ow, D.c, D.kh, D.kw)
901+
O[D.n, D.f, D.oh, D.ow] += (
902+
TypeFn.cast_signed(
903+
U, I[D.n, D.c, D.oh * S.SH + D.kh * S.DH, D.ow * S.SW + D.kw * S.DW]
904+
)
905+
- TypeFn.cast_signed(U, IZp)
906+
) * (TypeFn.cast_signed(U, K[D.f, D.c, D.kh, D.kw]) - TypeFn.cast_signed(U, KZp))
907+
879908
@linalg_structured_op
880909
def conv_2d_nchw_fchw(
881910
I=TensorDef(T1, S.N, S.C, S.OH * S.SH + S.KH * S.DH, S.OW * S.SW + S.KW * S.DW),

mlir/test/Dialect/Linalg/roundtrip.mlir

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -664,3 +664,33 @@ func.func @winograd_output_dyn(%arg0: tensor<6x6x?x?x?x?xf32>, %arg1: tensor<?x?
664664

665665
// CHECK-LABEL: func @winograd_output_dyn
666666
// CHECK: linalg.winograd_output_transform m(4) r(3) ins(%arg0 : tensor<6x6x?x?x?x?xf32>) outs(%arg1 : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32>
667+
668+
// -----
669+
670+
func.func @conv2d_channel_first_q(%img: tensor<100x3x224x224xi32>, %filt: tensor<64x3x5x5xi32>, %a: i32, %b: i32) -> tensor<100x64x220x220xi32> {
671+
%init = arith.constant dense<0> : tensor<100x64x220x220xi32>
672+
%1 = linalg.conv_2d_nchw_fchw_q {dilations = dense<1> : tensor<2xi64>,
673+
strides = dense<1> : tensor<2xi64>}
674+
ins(%img, %filt, %a, %b : tensor<100x3x224x224xi32>, tensor<64x3x5x5xi32>, i32, i32)
675+
outs(%init : tensor<100x64x220x220xi32>) -> tensor<100x64x220x220xi32>
676+
return %1 : tensor<100x64x220x220xi32>
677+
}
678+
679+
// CHECK-LABEL: func @conv2d_channel_first_q(
680+
// CHECK: %[[arg0:[a-zA-z0-9]*]]: tensor<100x3x224x224xi32>, %[[arg1:[a-zA-z0-9]*]]: tensor<64x3x5x5xi32>, %[[arg2:[a-zA-z0-9]*]]: i32, %[[arg3:[a-zA-z0-9]*]]: i32)
681+
// CHECK: linalg.conv_2d_nchw_fchw_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%[[arg0]], %[[arg1]], %[[arg2]], %[[arg3]] : tensor<100x3x224x224xi32>, tensor<64x3x5x5xi32>, i32, i32) outs(%{{.*}} : tensor<100x64x220x220xi32>) -> tensor<100x64x220x220xi32>
682+
683+
// -----
684+
685+
func.func @conv2d_channel_first_q_promote(%img: tensor<100x3x224x224xi8>, %filt: tensor<64x3x5x5xi8>, %a: i8, %b: i8) -> tensor<100x64x220x220xi32> {
686+
%init = arith.constant dense<0> : tensor<100x64x220x220xi32>
687+
%1 = linalg.conv_2d_nchw_fchw_q {dilations = dense<1> : tensor<2xi64>,
688+
strides = dense<1> : tensor<2xi64>}
689+
ins(%img, %filt, %a, %b : tensor<100x3x224x224xi8>, tensor<64x3x5x5xi8>, i8, i8)
690+
outs(%init : tensor<100x64x220x220xi32>) -> tensor<100x64x220x220xi32>
691+
return %1 : tensor<100x64x220x220xi32>
692+
}
693+
694+
// CHECK-LABEL: func @conv2d_channel_first_q_promote(
695+
// CHECK: %[[arg0:[a-zA-z0-9]*]]: tensor<100x3x224x224xi8>, %[[arg1:[a-zA-z0-9]*]]: tensor<64x3x5x5xi8>, %[[arg2:[a-zA-z0-9]*]]: i8, %[[arg3:[a-zA-z0-9]*]]: i8)
696+
// CHECK: linalg.conv_2d_nchw_fchw_q {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%[[arg0]], %[[arg1]], %[[arg2]], %[[arg3]] : tensor<100x3x224x224xi8>, tensor<64x3x5x5xi8>, i8, i8) outs(%{{.*}} : tensor<100x64x220x220xi32>) -> tensor<100x64x220x220xi32>

0 commit comments

Comments
 (0)