[executorch] Ignore leading 1 dimensions when checking optimized path for op_mul (#4963)

kirklandsign · swolchok · web-flow · commit 1ae997c30334 · 2024-08-28T20:09:00.000-07:00
A 1 x 1 x ... x m x n tensor can be element-wise multiplied with a m x n tensor just fine. Pull Request resolved: #4806 Co-authored-by: Scott Wolchok <swolchok@fb.com>
diff --git a/kernels/optimized/cpu/op_mul.cpp b/kernels/optimized/cpu/op_mul.cpp
@@ -22,6 +22,25 @@ using ScalarType = exec_aten::ScalarType;
 
 namespace {
 
+bool sizes_match_ignoring_leading_1s(
+    ArrayRef<Tensor::SizesType> lhs,
+    ArrayRef<Tensor::SizesType> rhs) {
+  auto lhs_begin = lhs.begin();
+  auto lhs_end = lhs.end();
+  while (lhs_begin != lhs_end && *lhs_begin == 1) {
+    ++lhs_begin;
+  }
+
+  auto rhs_begin = rhs.begin();
+  auto rhs_end = rhs.end();
+  while (rhs_begin != rhs_end && *rhs_begin == 1) {
+    ++rhs_begin;
+  }
+
+  return ((lhs_end - lhs_begin) == (rhs_end - rhs_begin)) &&
+      std::equal(lhs_begin, lhs_end, rhs_begin);
+}
+
 // Move to generic util as this is applicable to all binary ops
 bool can_use_optimized_path(
     const Tensor& a,
@@ -38,7 +57,9 @@ bool can_use_optimized_path(
       (a_type != ScalarType::Half && b_type != ScalarType::Half);
   can_use_optimized_path = can_use_optimized_path &&
       (a.sizes().equals(b.sizes()) ||
-       (a.numel() == b.numel() && a.numel() == out.numel()));
+       (a.numel() == b.numel() &&
+        (a.numel() == out.numel() ||
+         sizes_match_ignoring_leading_1s(a.sizes(), b.sizes()))));
   return can_use_optimized_path;
 }
 
diff --git a/kernels/test/op_mul_test.cpp b/kernels/test/op_mul_test.cpp
@@ -165,6 +165,21 @@ TEST_F(OpMulOutTest, BoolTensors) {
   EXPECT_TENSOR_EQ(out, tf.make(sizes, /*data=*/{false, false, true, false}));
 }
 
+TEST_F(OpMulOutTest, OptimizedPathIgnoresLeading1Dimensions) {
+  TensorFactory<ScalarType::Float> tf;
+
+  const std::vector<int32_t> sizes1 = {1, 1, 2, 2};
+  const std::vector<int32_t> sizes2 = {1, 2, 2};
+
+  // Destination for the mul.
+  Tensor out = tf.zeros(sizes1);
+
+  // Multiply two tensors
+  op_mul_out(
+      tf.make(sizes1, /*data=*/{1.1, 2.2, 4.4, 8.8}), tf.ones(sizes2), out);
+  EXPECT_TENSOR_CLOSE(out, tf.make(sizes1, /*data=*/{1.1, 2.2, 4.4, 8.8}));
+}
+
 // Mismatched shape tests.
 TEST_F(OpMulOutTest, MismatchedInputShapesDies) {
   if (SupportedFeatures::get()->is_aten) {