Skip to content

Commit 9325b8d

Browse files
committed
[mlir][Linalg] Add conv ops with TF definition.
The dimension order of a filter in tensorflow is [filter_height, filter_width, in_channels, out_channels], which is different from current definition. The current definition follows TOSA spec. Add TF version conv ops to .tc, so we do not have to insert a transpose op around a conv op. Reviewed By: antiagainst Differential Revision: https://reviews.llvm.org/D96038
1 parent 8334cdd commit 9325b8d

10 files changed

+1120
-0
lines changed

mlir/include/mlir/Dialect/Linalg/IR/LinalgNamedStructuredOpsSpec.tc

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,3 +124,114 @@ Note: this op only supports channel multiplier == 1.
124124
O(n, oh, ow, c) = std_addf<kh, kw>(std_mulf(
125125
I(n, oh * strides[0] + kh, ow * strides[1] + kw, c), K(kh, kw, c)));
126126
}
127+
128+
ods_def<ConvInputNWCFilterWCFOp>:
129+
def conv_1d_input_nwc_filter_wcf(I: f32(N, W, C), K: f32(KW, C, F)) -> (O: f32(N, W, F))
130+
attr(strides: 1xi64, dilations: 1xi64)
131+
""" A 1-D convolution given NWC layout input and WCF layout filter.
132+
133+
Computes a 1-D convolution given 3-D input and filter. The data layout
134+
of input is NWC and the data layout of filter is WCF.
135+
136+
The indexing maps for these three tensors contain 5 dimensions, following the
137+
order of (`N`, `W`, `F`, `KW`, `C`).
138+
"""
139+
{
140+
O(n, w, f) = std_addf<kw>(
141+
std_mulf(I(n, w * strides[0] + kw * dilations[0], c), K(kw, c, f)));
142+
}
143+
144+
ods_def<ConvInputNCWFilterWCFOp>:
145+
def conv_1d_input_ncw_filter_wcf(I: f32(N, C, W), K: f32(KW, C, F)) -> (O: f32(N, F, W))
146+
attr(strides: 1xi64, dilations: 1xi64)
147+
""" A 1-D convolution given NCW layout input and WCF layout filter.
148+
149+
Computes a 1-D convolution given 3-D input and filter. The data layout
150+
of input is NCW and the data layout of filter is WCF.
151+
152+
The indexing maps for these three tensors contain 5 dimensions, following the
153+
order of (`N`, `F`, `W`, `KW`, `C`).
154+
"""
155+
{
156+
O(n, f, w) = std_addf<kw>(
157+
std_mulf(I(n, c, w * strides[0] + kw * dilations[0]), K(kw, c, f)));
158+
}
159+
160+
ods_def<ConvInputNHWCFilterHWCFOp>:
161+
def conv_2d_input_nhwc_filter_hwcf(I: f32(N, H, W, C), K: f32(KH, KW, C, F)) -> (O: f32(N, H, W, F))
162+
attr(strides: 2xi64, dilations: 2xi64)
163+
""" A 2-D convolution given NHWC layout input and HWCF layout filter.
164+
165+
Computes a 2-D convolution given 4-D input and filter. The data layout
166+
of input is NHWC and the data layout of filter is HWCF.
167+
168+
The indexing maps for these three tensors contain 7 dimensions, following the
169+
order of (`N`, `H`, `W`, `F`, `KH`, `KW`, `C`).
170+
"""
171+
{
172+
O(n, h, w, f) =
173+
std_addf<kh, kw>(std_mulf(I(n, h * strides[0] + kh * dilations[0],
174+
w * strides[1] + kw * dilations[1], c),
175+
K(kh, kw, c, f)));
176+
}
177+
178+
ods_def<ConvInputNCHWFilterHWCFOp>:
179+
def conv_2d_input_nchw_filter_hwcf
180+
(I: f32(N, C, H, W), K: f32(KH, KW, C, F))
181+
-> (O: f32(N, F, H, W))
182+
attr(strides: 2xi64, dilations: 2xi64)
183+
""" A 2-D convolution given NCHW layout input and HWCF layout filter.
184+
185+
Computes a 2-D convolution given 4-D input and filter. The data layout
186+
of input is NCHW and the data layout of filter is HWCF.
187+
188+
The indexing maps for these three tensors contain 7 dimensions, following the
189+
order of (`N`, `F`, `H`, `W`, `KH`, `KW`, `C`).
190+
"""
191+
{
192+
O(n, f, h, w) =
193+
std_addf<kh, kw>(std_mulf(I(n, c, h * strides[0] + kh * dilations[0],
194+
w * strides[1] + kw * dilations[1]),
195+
K(kh, kw, c, f)));
196+
}
197+
198+
ods_def<ConvInputNDHWCFilterDHWCFOp>:
199+
def conv_3d_input_ndhwc_filter_dhwcf
200+
(I: f32(N, D, H, W, C), K: f32(KD, KH, KW, C, F))
201+
-> (O: f32(N, D, H, W, F))
202+
attr(strides: 3xi64, dilations: 3xi64)
203+
""" A 3-D convolution given NDHWC layout input and DHWCF layout filter.
204+
205+
Computes a 3-D convolution given 5-D input and filter. The data layout
206+
of input is NDHWC and the data layout of filter is DHWCF.
207+
208+
The indexing maps for these three tensors contain 9 dimensions, following the
209+
order of (`N`, `D`, `H`, `W`, `F`, `KD`, `KH`, `KW`, `C`).
210+
"""
211+
{
212+
O(n, d, h, w, f) =
213+
std_addf<kd, kh, kw>(std_mulf(I(n, d * strides[0] + kd * dilations[0],
214+
h * strides[1] + kh * dilations[1],
215+
w * strides[2] + kw * dilations[2], c),
216+
K(kd, kh, kw, c, f)));
217+
}
218+
219+
ods_def<ConvInputNCDHWFilterDHWCFOp>:
220+
def conv_3d_input_ncdhw_filter_dhwcf
221+
(I: f32(N, C, D, H, W), K: f32(KD, KH, KW, C, F))
222+
-> (O: f32(N, F, D, H, W))
223+
attr(strides: 3xi64, dilations: 3xi64)
224+
""" A 3-D convolution given NCDHW layout input and DHWCF layout filter.
225+
226+
Computes a 3-D convolution given 5-D input and filter. The data layout
227+
of input is NCDHW and the data layout of filter is DHWCF.
228+
229+
The indexing maps for these three tensors contain 9 dimensions, following the
230+
order of (`N`, `F`, `D`, `H`, `W`, `KD`, `KH`, `KW`, `C`).
231+
"""
232+
{
233+
O(n, f, d, h, w) = std_addf<kd, kh, kw>(std_mulf(
234+
I(n, c, d * strides[0] + kd * dilations[0],
235+
h * strides[1] + kh * dilations[1], w * strides[2] + kw * dilations[2]),
236+
K(kd, kh, kw, c, f)));
237+
}
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
2+
// RUN: mlir-cpu-runner -e main -entry-point-result=void \
3+
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
4+
// RUN: | FileCheck %s
5+
6+
// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,4" -convert-linalg-to-loops -convert-scf-to-std \
7+
// RUN: -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
8+
// RUN: mlir-cpu-runner -e main -entry-point-result=void \
9+
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
10+
// RUN: | FileCheck %s
11+
12+
// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
13+
// RUN: mlir-cpu-runner -e main -entry-point-result=void \
14+
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
15+
// RUN: | FileCheck %s
16+
17+
// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=0,0,4" \
18+
// RUN: -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
19+
// RUN: mlir-cpu-runner -e main -entry-point-result=void \
20+
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
21+
// RUN: | FileCheck %s
22+
23+
func private @print_memref_f32(memref<*xf32>)
24+
25+
// Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f
26+
func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref<?x?x?xf32> {
27+
%buf = alloc(%s1, %s2, %s3) : memref<?x?x?xf32>
28+
linalg.fill(%buf, %f) : memref<?x?x?xf32>, f32
29+
return %buf : memref<?x?x?xf32>
30+
}
31+
32+
func @conv_1d_input_ncw_filter_wcf(%arg0: memref<?x?x?xf32>, %arg1: memref<?x?x?xf32>, %arg2: memref<?x?x?xf32>) {
33+
linalg.conv_1d_input_ncw_filter_wcf {dilations = dense<1> : tensor<1xi64>,
34+
strides = dense<1> : tensor<1xi64>}
35+
ins (%arg0, %arg1: memref<?x?x?xf32>, memref<?x?x?xf32>)
36+
outs (%arg2: memref<?x?x?xf32>)
37+
return
38+
}
39+
40+
func @main() {
41+
%c0 = constant 0 : index
42+
%c1 = constant 1 : index
43+
%c3 = constant 3 : index
44+
%c6 = constant 6 : index
45+
%c8 = constant 8 : index
46+
%f10 = constant 10.00000e+00 : f32
47+
%val = constant 2.00000e+00 : f32
48+
%zero = constant 0.00000e+00 : f32
49+
50+
%filter1D_ncw = call @alloc_3d_filled_f32(%c3, %c1, %c1, %val) : (index, index, index, f32) -> (memref<?x?x?xf32>)
51+
%in1D_ncw = call @alloc_3d_filled_f32(%c1, %c1, %c8, %val) : (index, index, index, f32) -> (memref<?x?x?xf32>)
52+
%out1D_ncw = call @alloc_3d_filled_f32(%c1, %c1, %c6, %zero) : (index, index, index, f32) -> (memref<?x?x?xf32>)
53+
54+
store %f10, %in1D_ncw[%c0, %c0, %c3] : memref<?x?x?xf32>
55+
call @conv_1d_input_ncw_filter_wcf(%in1D_ncw, %filter1D_ncw, %out1D_ncw) : (memref<?x?x?xf32>, memref<?x?x?xf32>, memref<?x?x?xf32>) -> ()
56+
%out1D_ncw_ = memref_cast %out1D_ncw : memref<?x?x?xf32> to memref<*xf32>
57+
call @print_memref_f32(%out1D_ncw_): (memref<*xf32>) -> ()
58+
59+
dealloc %filter1D_ncw : memref<?x?x?xf32>
60+
dealloc %in1D_ncw : memref<?x?x?xf32>
61+
dealloc %out1D_ncw : memref<?x?x?xf32>
62+
return
63+
}
64+
65+
// CHECK: Unranked Memref {{.*}}
66+
// CHECK-NEXT: [
67+
// CHECK-SAME: [
68+
// CHECK-SAME: [12, 28, 28, 28, 12, 12]
69+
// CHECK-SAME: ]
70+
// CHECK-SAME: ]
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
2+
// RUN: mlir-cpu-runner -e main -entry-point-result=void \
3+
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
4+
// RUN: | FileCheck %s
5+
6+
// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,4" -convert-linalg-to-loops -convert-scf-to-std \
7+
// RUN: -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
8+
// RUN: mlir-cpu-runner -e main -entry-point-result=void \
9+
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
10+
// RUN: | FileCheck %s
11+
12+
// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
13+
// RUN: mlir-cpu-runner -e main -entry-point-result=void \
14+
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
15+
// RUN: | FileCheck %s
16+
17+
// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,4" \
18+
// RUN: -test-conv-vectorization="tile-sizes=1,1,1,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
19+
// RUN: mlir-cpu-runner -e main -entry-point-result=void \
20+
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
21+
// RUN: | FileCheck %s
22+
23+
func private @print_memref_f32(memref<*xf32>)
24+
25+
// Creates and returns 3-D buffer of size (%s1, %s2, %s3) filled with the value %f
26+
func @alloc_3d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %f : f32) -> memref<?x?x?xf32> {
27+
%buf = alloc(%s1, %s2, %s3) : memref<?x?x?xf32>
28+
linalg.fill(%buf, %f) : memref<?x?x?xf32>, f32
29+
return %buf : memref<?x?x?xf32>
30+
}
31+
32+
func @conv_1d_input_nwc_filter_wcf(%arg0: memref<?x?x?xf32>, %arg1: memref<?x?x?xf32>, %arg2: memref<?x?x?xf32>) {
33+
linalg.conv_1d_input_nwc_filter_wcf {dilations = dense<1> : tensor<1xi64>,
34+
strides = dense<1> : tensor<1xi64>}
35+
ins (%arg0, %arg1: memref<?x?x?xf32>, memref<?x?x?xf32>)
36+
outs (%arg2: memref<?x?x?xf32>)
37+
return
38+
}
39+
40+
func @main() {
41+
%c0 = constant 0 : index
42+
%c1 = constant 1 : index
43+
%c3 = constant 3 : index
44+
%c6 = constant 6 : index
45+
%c8 = constant 8 : index
46+
%f10 = constant 10.00000e+00 : f32
47+
%val = constant 2.00000e+00 : f32
48+
%zero = constant 0.00000e+00 : f32
49+
50+
%filter1D_nwc = call @alloc_3d_filled_f32(%c3, %c1, %c1, %val) : (index, index, index, f32) -> (memref<?x?x?xf32>)
51+
%in1D_nwc = call @alloc_3d_filled_f32(%c3, %c8, %c1, %val) : (index, index, index, f32) -> (memref<?x?x?xf32>)
52+
%out1D_nwc = call @alloc_3d_filled_f32(%c3, %c6, %c1, %zero) : (index, index, index, f32) -> (memref<?x?x?xf32>)
53+
54+
store %f10, %in1D_nwc[%c0, %c3, %c0] : memref<?x?x?xf32>
55+
call @conv_1d_input_nwc_filter_wcf(%in1D_nwc, %filter1D_nwc, %out1D_nwc) : (memref<?x?x?xf32>, memref<?x?x?xf32>, memref<?x?x?xf32>) -> ()
56+
%out1D_nwc_ = memref_cast %out1D_nwc : memref<?x?x?xf32> to memref<*xf32>
57+
call @print_memref_f32(%out1D_nwc_): (memref<*xf32>) -> ()
58+
59+
dealloc %filter1D_nwc : memref<?x?x?xf32>
60+
dealloc %in1D_nwc : memref<?x?x?xf32>
61+
dealloc %out1D_nwc : memref<?x?x?xf32>
62+
return
63+
}
64+
65+
// CHECK: Unranked Memref {{.*}}
66+
// CHECK-NEXT: [
67+
// CHECK-SAME: [
68+
// CHECK-SAME: [12],
69+
// CHECK-COUNT-3: [28],
70+
// CHECK-NEXT: [12],
71+
// CHECK-NEXT: [12]
72+
// CHECK-SAME: ],
73+
// CHECK-NEXT: [
74+
// CHECK-COUNT-5: [12],
75+
// CHECK-NEXT: [12]
76+
// CHECK-SAME: ],
77+
// CHECK-NEXT: [
78+
// CHECK-COUNT-5: [12],
79+
// CHECK-NEXT: [12]
80+
// CHECK-SAME: ]
81+
// CHECK-SAME: ]
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
// RUN: mlir-opt %s -convert-linalg-to-loops -convert-scf-to-std -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
2+
// RUN: mlir-cpu-runner -e main -entry-point-result=void \
3+
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
4+
// RUN: | FileCheck %s
5+
6+
// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,0,4,4" -convert-linalg-to-loops -convert-scf-to-std \
7+
// RUN: -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-std-to-llvm | \
8+
// RUN: mlir-cpu-runner -e main -entry-point-result=void \
9+
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
10+
// RUN: | FileCheck %s
11+
12+
// RUN: mlir-opt %s -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
13+
// RUN: mlir-cpu-runner -e main -entry-point-result=void \
14+
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
15+
// RUN: | FileCheck %s
16+
17+
// RUN: mlir-opt %s -linalg-tile="linalg-tile-sizes=2,0,4,4" \
18+
// RUN: -test-conv-vectorization="tile-sizes=1,1,1,1,3,3,3" -convert-linalg-to-llvm -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-std-to-llvm | \
19+
// RUN: mlir-cpu-runner -e main -entry-point-result=void \
20+
// RUN: -shared-libs=%mlir_integration_test_dir/libmlir_runner_utils%shlibext \
21+
// RUN: | FileCheck %s
22+
23+
func private @print_memref_f32(memref<*xf32>)
24+
25+
// Creates and returns 4-D buffer of size (%s1, %s2, %s3, %s4) filled with the value %f
26+
func @alloc_4d_filled_f32(%s1 : index, %s2 : index, %s3 : index, %s4 : index, %f : f32) -> memref<?x?x?x?xf32> {
27+
%buf = alloc(%s1, %s2, %s3, %s4) : memref<?x?x?x?xf32>
28+
linalg.fill(%buf, %f) : memref<?x?x?x?xf32>, f32
29+
return %buf : memref<?x?x?x?xf32>
30+
}
31+
32+
func @conv_2d_input_nchw_filter_hwcf(%arg0: memref<?x?x?x?xf32>, %arg1: memref<?x?x?x?xf32>, %arg2: memref<?x?x?x?xf32>) {
33+
linalg.conv_2d_input_nchw_filter_hwcf {dilations = dense<1> : tensor<2xi64>,
34+
strides = dense<1> : tensor<2xi64>}
35+
ins (%arg0, %arg1: memref<?x?x?x?xf32>, memref<?x?x?x?xf32>)
36+
outs (%arg2: memref<?x?x?x?xf32>)
37+
return
38+
}
39+
40+
func @main() {
41+
%c0 = constant 0 : index
42+
%c1 = constant 1 : index
43+
%c3 = constant 3 : index
44+
%c6 = constant 6 : index
45+
%c8 = constant 8 : index
46+
%f10 = constant 10.00000e+00 : f32
47+
%val = constant 2.00000e+00 : f32
48+
%zero = constant 0.00000e+00 : f32
49+
50+
%filter2D_nchw = call @alloc_4d_filled_f32(%c3, %c3, %c1, %c1, %val) : (index, index, index, index, f32) -> (memref<?x?x?x?xf32>)
51+
%in2D_nchw = call @alloc_4d_filled_f32(%c3, %c1, %c8, %c8, %val) : (index, index, index, index, f32) -> (memref<?x?x?x?xf32>)
52+
%out2D_nchw = call @alloc_4d_filled_f32(%c3, %c1, %c6, %c6, %zero) : (index, index, index, index, f32) -> (memref<?x?x?x?xf32>)
53+
54+
store %f10, %in2D_nchw[%c0, %c0, %c0, %c3] : memref<?x?x?x?xf32>
55+
call @conv_2d_input_nchw_filter_hwcf(%in2D_nchw, %filter2D_nchw, %out2D_nchw) : (memref<?x?x?x?xf32>, memref<?x?x?x?xf32>, memref<?x?x?x?xf32>) -> ()
56+
%out2D_nchw_ = memref_cast %out2D_nchw : memref<?x?x?x?xf32> to memref<*xf32>
57+
call @print_memref_f32(%out2D_nchw_): (memref<*xf32>) -> ()
58+
59+
dealloc %filter2D_nchw : memref<?x?x?x?xf32>
60+
dealloc %in2D_nchw : memref<?x?x?x?xf32>
61+
dealloc %out2D_nchw : memref<?x?x?x?xf32>
62+
return
63+
}
64+
65+
// CHECK: Unranked Memref {{.*}}
66+
// CHECK-NEXT: [
67+
// CHECK-SAME: [
68+
// CHECK-SAME: [
69+
// CHECK-SAME: [36, 52, 52, 52, 36, 36],
70+
// CHECK-COUNT-5: [36, 36, 36, 36, 36, 36]
71+
// CHECK-SAME: ]
72+
// CHECK-SAME: ],
73+
// CHECK-NEXT: [
74+
// CHECK-SAME: [
75+
// CHECK-COUNT-6: [36, 36, 36, 36, 36, 36]
76+
// CHECK-SAME: ]
77+
// CHECK-SAME: ],
78+
// CHECK-NEXT: [
79+
// CHECK-SAME: [
80+
// CHECK-COUNT-6: [36, 36, 36, 36, 36, 36]
81+
// CHECK-SAME: ]
82+
// CHECK-SAME: ]
83+
// CHECK-SAME: ]

0 commit comments

Comments
 (0)