pytorch · manuelcandales · Jul 11, 2024
@@ -136,17 +136,6 @@ void conv2d_impl(
       }
     }
   } else { // transposed convolution
-    if (bias_ptr != nullptr) {
-      out_coord[2] = 0;
-      out_coord[3] = 0;
-      size_t out_c_start_idx =
-          calculate_linear_index(out_coord, out_strides.data(), 4);
-      size_t out_c_end_idx = out_c_start_idx + out_H * out_W;
-      for (size_t out_ix = out_c_start_idx; out_ix < out_c_end_idx; out_ix++) {
-        out_ptr[out_ix] = convert<CTYPE, CTYPE_BIAS>(bias_ptr[out_c]);
-      }
-    }
-
     w_coord[1] = out_c - out_c_start;
 
     for (size_t in_y = 0; in_y < in_H; ++in_y) {
@@ -295,12 +284,22 @@ void convolution_wrapper(
       bias.has_value() ? bias.value().const_data_ptr<CTYPE_BIAS>() : nullptr;
 
   size_t out_N = out.size(0);
-  size_t out_C_per_group = out.size(1) / groups;
+  size_t out_C = out.size(1);
+  size_t out_C_per_group = out_C / groups;
 
-  if (transposed && bias_ptr == nullptr) {
-    // If bias is not present, we need to initialize the output to 0
-    // before we can accumulate into it.
-    memset(out_ptr, 0, out.nbytes());
+  if (transposed) {
+    // For transposed convolution, we need to initialized the output before we
+    // can accumulate into it.
+    if (bias_ptr == nullptr) {
+      // If bias is not present, we need to initialize the output to 0
+      memset(out_ptr, 0, out.nbytes());
+    } else {
+      // If bias is present, we initialize the output to the bias value
+      for (size_t out_ix = 0; out_ix < out.numel(); ++out_ix) {
+        out_ptr[out_ix] = convert<CTYPE, CTYPE_BIAS>(
+            bias_ptr[(out_ix / out_strides[1]) % out_C]);
+      }
+    }
   }
 
   for (size_t batch = 0; batch < out_N; ++batch) {

@@ -556,7 +556,7 @@ TEST_F(OpConvCorrectnessTest, TransposedNonDefaultParams) {
   Tensor input = tf.full({2, 6, 4, 5}, 2.0);
   Tensor weight = tf.full({6, 1, 2, 2}, 0.5);
   Tensor bias = tf.make({3}, {1, 2, 3});
-  Tensor out = tf.zeros({2, 3, 3, 6});
+  Tensor out = tf.full({2, 3, 3, 6}, 0.7);
   Tensor expected = tf.make(
       {2, 3, 3, 6},
       {1, 1, 1, 1, 1, 1, 1, 3, 3, 1, 3, 3, 1, 3, 3, 1, 3, 3, 2, 2, 2, 2,
@@ -587,6 +587,118 @@ TEST_F(OpConvCorrectnessTest, TransposedNonDefaultParams) {
   EXPECT_TENSOR_CLOSE(out, expected);
 }
 
+template <typename T>
+std::vector<T> get_channels_last_data(const Tensor& t) {
+  const std::vector<int32_t> sizes(t.sizes().begin(), t.sizes().end());
+  std::vector<T> contiguous_data(
+      t.const_data_ptr<T>(), t.const_data_ptr<T>() + t.numel());
+  std::vector<T> channels_last_data(t.numel());
+  int32_t N = sizes[0];
+  int32_t C = sizes[1];
+  int32_t H = sizes[2];
+  int32_t W = sizes[3];
+  for (int32_t n = 0; n < N; ++n) {
+    for (int32_t c = 0; c < C; ++c) {
+      for (int32_t h = 0; h < H; ++h) {
+        for (int32_t w = 0; w < W; ++w) {
+          // Calculate the index in the original blob
+          int32_t old_index = ((n * C + c) * H + h) * W + w;
+          // Calculate the index in the new blob
+          int32_t new_index = ((n * H + h) * W + w) * C + c;
+          // Copy the data
+          channels_last_data[new_index] = contiguous_data[old_index];
+        }
+      }
+    }
+  }
+  return channels_last_data;
+}
+
+TEST_F(OpConvCorrectnessTest, TransposedDefaultParamsChannelsLast) {
+  TensorFactory<ScalarType::Float> tf;
+
+  Tensor input = tf.full_channels_last({2, 4, 3, 2}, 2.0);
+  Tensor weight = tf.full_channels_last({4, 1, 2, 2}, 0.5);
+  optional<Tensor> bias;
+  Tensor out = tf.full_channels_last({2, 2, 4, 3}, 0.7);
+  Tensor expected =
+      tf.make({2, 2, 4, 3}, {2, 4, 2, 4, 8, 4, 4, 8, 4, 2, 4, 2, 2, 4, 2, 4,
+                             8, 4, 4, 8, 4, 2, 4, 2, 2, 4, 2, 4, 8, 4, 4, 8,
+                             4, 2, 4, 2, 2, 4, 2, 4, 8, 4, 4, 8, 4, 2, 4, 2});
+
+  const std::vector<int32_t> sizes(
+      expected.sizes().begin(), expected.sizes().end());
+  std::vector<float> channels_last_data =
+      get_channels_last_data<float>(expected);
+  Tensor expected_channels_last =
+      tf.make_channels_last(sizes, channels_last_data);
+
+  int64_t stride[1] = {1};
+  int64_t padding[1] = {0};
+  int64_t dilation[1] = {1};
+  bool transposed = true;
+  int64_t output_padding[1] = {0};
+  int64_t groups = 2;
+
+  op_convolution_out(
+      input,
+      weight,
+      exec_aten::optional<Tensor>(bias),
+      exec_aten::ArrayRef<int64_t>{stride, 1},
+      exec_aten::ArrayRef<int64_t>{padding, 1},
+      exec_aten::ArrayRef<int64_t>{dilation, 1},
+      transposed,
+      exec_aten::ArrayRef<int64_t>{output_padding, 1},
+      groups,
+      out);
+
+  EXPECT_TENSOR_CLOSE(out, expected_channels_last);
+}
+
+TEST_F(OpConvCorrectnessTest, TransposedNonDefaultParamsChannelsLast) {
+  TensorFactory<ScalarType::Float> tf;
+
+  Tensor input = tf.full_channels_last({2, 6, 4, 5}, 2.0);
+  Tensor weight = tf.full_channels_last({6, 1, 2, 2}, 0.5);
+  Tensor bias = tf.make({3}, {1, 2, 3});
+  Tensor out = tf.full_channels_last({2, 3, 3, 6}, 0.7);
+  Tensor expected = tf.make(
+      {2, 3, 3, 6},
+      {1, 1, 1, 1, 1, 1, 1, 3, 3, 1, 3, 3, 1, 3, 3, 1, 3, 3, 2, 2, 2, 2,
+       2, 2, 2, 4, 4, 2, 4, 4, 2, 4, 4, 2, 4, 4, 3, 3, 3, 3, 3, 3, 3, 5,
+       5, 3, 5, 5, 3, 5, 5, 3, 5, 5, 1, 1, 1, 1, 1, 1, 1, 3, 3, 1, 3, 3,
+       1, 3, 3, 1, 3, 3, 2, 2, 2, 2, 2, 2, 2, 4, 4, 2, 4, 4, 2, 4, 4, 2,
+       4, 4, 3, 3, 3, 3, 3, 3, 3, 5, 5, 3, 5, 5, 3, 5, 5, 3, 5, 5});
+
+  const std::vector<int32_t> sizes(
+      expected.sizes().begin(), expected.sizes().end());
+  std::vector<float> channels_last_data =
+      get_channels_last_data<float>(expected);
+  Tensor expected_channels_last =
+      tf.make_channels_last(sizes, channels_last_data);
+
+  int64_t stride[1] = {3};
+  int64_t padding[1] = {7};
+  int64_t dilation[1] = {5};
+  bool transposed = true;
+  int64_t output_padding[1] = {2};
+  int64_t groups = 3;
+
+  op_convolution_out(
+      input,
+      weight,
+      exec_aten::optional<Tensor>(bias),
+      exec_aten::ArrayRef<int64_t>{stride, 1},
+      exec_aten::ArrayRef<int64_t>{padding, 1},
+      exec_aten::ArrayRef<int64_t>{dilation, 1},
+      transposed,
+      exec_aten::ArrayRef<int64_t>{output_padding, 1},
+      groups,
+      out);
+
+  EXPECT_TENSOR_CLOSE(out, expected_channels_last);
+}
+
 TEST_F(OpConvCorrectnessTest, InvalidOutputPadding) {
   TensorFactory<ScalarType::Float> tf;