Skip to content

Commit 987a1f9

Browse files
swolchokfacebook-github-bot
authored andcommitted
Manual LICM in op_convolution (#3274)
Summary: Pull Request resolved: #3274 Hoisting loads of these constants manually speeds up the kernel a lot. Depending on the types for CTYPE and CTYPE_BIAS, the compiler might not be able to prove this is safe on its own. Reviewed By: kimishpatel, manuelcandales Differential Revision: D56503574 fbshipit-source-id: 75609f5db4ab58d0c5ca8774b1a20123fb1d1218
1 parent 32a5926 commit 987a1f9

File tree

1 file changed

+7
-6
lines changed

1 file changed

+7
-6
lines changed

kernels/portable/cpu/op_convolution.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,13 @@ void conv2d_impl(
7272
exec_aten::SizesType w_coord[kTensorDimensionLimit];
7373
w_coord[0] = out_c;
7474

75+
const int64_t stride_y = val_at(stride, 0);
76+
const int64_t padding_y = val_at(padding, 0, /*default_value=*/0);
77+
const int64_t dilation_y = val_at(dilation, 0);
78+
const int64_t stride_x = val_at(stride, 1);
79+
const int64_t padding_x = val_at(padding, 1, /*default_value=*/0);
80+
const int64_t dilation_x = val_at(dilation, 1);
81+
7582
// Compute 2D output region
7683
for (size_t out_y = 0; out_y < out_H; ++out_y) {
7784
out_coord[2] = out_y;
@@ -87,19 +94,13 @@ void conv2d_impl(
8794
for (size_t w_y = 0; w_y < w_H; ++w_y) {
8895
w_coord[2] = w_y;
8996

90-
int64_t stride_y = val_at(stride, 0);
91-
int64_t padding_y = val_at(padding, 0, /*default_value=*/0);
92-
int64_t dilation_y = val_at(dilation, 0);
9397
size_t in_y = stride_y * out_y + dilation_y * w_y - padding_y;
9498
in_coord[2] = in_y;
9599
// Only proceed if input y coordinate is within bounds
96100
if (in_y >= 0 && in_y < in_H) {
97101
for (size_t w_x = 0; w_x < w_W; ++w_x) {
98102
w_coord[3] = w_x;
99103

100-
int64_t stride_x = val_at(stride, 1);
101-
int64_t padding_x = val_at(padding, 1, /*default_value=*/0);
102-
int64_t dilation_x = val_at(dilation, 1);
103104
size_t in_x = stride_x * out_x + dilation_x * w_x - padding_x;
104105
in_coord[3] = in_x;
105106

0 commit comments

Comments
 (0)