Fix bug in optimized mul's broadcast handling (#11590)

kimishpatel · web-flow · commit 053686242c16 · 2025-06-12T13:04:34.000-07:00
Summary: When we have two tensors that match exactly in size but not in dims, the output resize does not work correctly in cases like this a[6] b[1, 1, 6] -> out [1, 1, 6] but current code tried to resize using a.sizes() Test Plan: tests added Reviewers: Subscribers: Tasks: Tags: ### Summary [PLEASE REMOVE] See [CONTRIBUTING.md's Pull Requests](https://github.com/pytorch/executorch/blob/main/CONTRIBUTING.md#pull-requests) for ExecuTorch PR guidelines. [PLEASE REMOVE] If this PR closes an issue, please add a `Fixes #<issue-id>` line. [PLEASE REMOVE] If this PR introduces a fix or feature that should be the upcoming release notes, please add a "Release notes: <area>" label. For a list of available release notes labels, check out [CONTRIBUTING.md's Pull Requests](https://github.com/pytorch/executorch/blob/main/CONTRIBUTING.md#pull-requests). ### Test plan [PLEASE REMOVE] How did you test this PR? Please write down any manual commands you used and note down tests that you have written if applicable.
diff --git a/kernels/optimized/cpu/op_mul.cpp b/kernels/optimized/cpu/op_mul.cpp
@@ -111,14 +111,11 @@ Tensor& opt_mul_out(
 
   auto selected_optimized_path = select_optimized_path(a, b, out);
   if (selected_optimized_path == ElementwiseOptimizedPath::kTreatAs1d) {
-    // Resize for dynamic shape
-    auto error = resize_tensor(out, a.sizes());
-    ET_KERNEL_CHECK_MSG(
+    ET_KERNEL_CHECK(
         ctx,
-        error == Error::Ok,
+        resize_to_broadcast_target_size(a, b, out) == Error::Ok,
         InvalidArgument,
-        out,
-        "Failed to resize output tensor.");
+        out);
 
     if (executorch::runtime::isComplexType(out_type)) {
       ET_KERNEL_CHECK(
diff --git a/kernels/test/op_mul_test.cpp b/kernels/test/op_mul_test.cpp
@@ -794,3 +794,112 @@ TEST_F(OpMulScalarOutTest, BFloat16SanityCheck) {
   // Check that it matches the expected output.
   EXPECT_TENSOR_CLOSE(out, tf.make(sizes, {2.6, 4.2, 9.2, 16.4}));
 }
+
+// Tests for broadcast handling fix: when tensor dimensions don't match,
+// the output should be resized to match the tensor with higher dimensionality
+TEST_F(OpMulOutTest, BroadcastDimensionMismatchFix) {
+  TensorFactory<ScalarType::Float> tf;
+
+  // Test case: tensor a of size [6] and b of size [1, 1, 6]
+  // Expected output should be [1, 1, 6], not [6]
+  Tensor a = tf.make({6}, {1.0, 2.0, 3.0, 4.0, 5.0, 6.0});
+  Tensor b = tf.make({1, 1, 6}, {2.0, 2.0, 2.0, 2.0, 2.0, 2.0});
+
+  // Create output tensor with expected broadcast shape [1, 1, 6]
+  Tensor out = tf.zeros({1, 1, 6});
+
+  // Call the mul function
+  Tensor& result = op_mul_out(a, b, out);
+
+  // Verify the output shape is [1, 1, 6]
+  EXPECT_EQ(result.dim(), 3);
+  EXPECT_EQ(result.size(0), 1);
+  EXPECT_EQ(result.size(1), 1);
+  EXPECT_EQ(result.size(2), 6);
+
+  // Verify the values are correct (element-wise multiplication with
+  // broadcasting)
+  Tensor expected = tf.make({1, 1, 6}, {2.0, 4.0, 6.0, 8.0, 10.0, 12.0});
+  EXPECT_TENSOR_CLOSE(result, expected);
+}
+
+TEST_F(OpMulOutTest, BroadcastDimensionMismatchReversed) {
+  TensorFactory<ScalarType::Float> tf;
+
+  // Test case: tensor a of size [1, 1, 6] and b of size [6]
+  // Expected output should be [1, 1, 6]
+  Tensor a = tf.make({1, 1, 6}, {1.0, 2.0, 3.0, 4.0, 5.0, 6.0});
+  Tensor b = tf.make({6}, {2.0, 2.0, 2.0, 2.0, 2.0, 2.0});
+
+  // Create output tensor with expected broadcast shape [1, 1, 6]
+  Tensor out = tf.zeros({1, 1, 6});
+
+  // Call the mul function
+  Tensor& result = op_mul_out(a, b, out);
+
+  // Verify the output shape is [1, 1, 6]
+  EXPECT_EQ(result.dim(), 3);
+  EXPECT_EQ(result.size(0), 1);
+  EXPECT_EQ(result.size(1), 1);
+  EXPECT_EQ(result.size(2), 6);
+
+  // Verify the values are correct (element-wise multiplication with
+  // broadcasting)
+  Tensor expected = tf.make({1, 1, 6}, {2.0, 4.0, 6.0, 8.0, 10.0, 12.0});
+  EXPECT_TENSOR_CLOSE(result, expected);
+}
+
+TEST_F(OpMulOutTest, BroadcastDimensionMismatchWithDifferentTypes) {
+  // Test the same broadcast fix with different data types
+  TensorFactory<ScalarType::Half> tf_half;
+  TensorFactory<ScalarType::BFloat16> tf_bf16;
+  TensorFactory<ScalarType::Int> tf_int;
+
+  // Test with Half precision
+  {
+    Tensor a = tf_half.make({4}, {1.0, 2.0, 3.0, 4.0});
+    Tensor b = tf_half.make({1, 1, 4}, {2.0, 2.0, 2.0, 2.0});
+    Tensor out = tf_half.zeros({1, 1, 4});
+
+    Tensor& result = op_mul_out(a, b, out);
+    EXPECT_EQ(result.dim(), 3);
+    EXPECT_EQ(result.size(0), 1);
+    EXPECT_EQ(result.size(1), 1);
+    EXPECT_EQ(result.size(2), 4);
+
+    Tensor expected = tf_half.make({1, 1, 4}, {2.0, 4.0, 6.0, 8.0});
+    EXPECT_TENSOR_CLOSE(result, expected);
+  }
+
+  // Test with BFloat16
+  {
+    Tensor a = tf_bf16.make({4}, {1.0, 2.0, 3.0, 4.0});
+    Tensor b = tf_bf16.make({1, 1, 4}, {2.0, 2.0, 2.0, 2.0});
+    Tensor out = tf_bf16.zeros({1, 1, 4});
+
+    Tensor& result = op_mul_out(a, b, out);
+    EXPECT_EQ(result.dim(), 3);
+    EXPECT_EQ(result.size(0), 1);
+    EXPECT_EQ(result.size(1), 1);
+    EXPECT_EQ(result.size(2), 4);
+
+    Tensor expected = tf_bf16.make({1, 1, 4}, {2.0, 4.0, 6.0, 8.0});
+    EXPECT_TENSOR_CLOSE(result, expected);
+  }
+
+  // Test with Int
+  {
+    Tensor a = tf_int.make({4}, {1, 2, 3, 4});
+    Tensor b = tf_int.make({1, 1, 4}, {2, 2, 2, 2});
+    Tensor out = tf_int.zeros({1, 1, 4});
+
+    Tensor& result = op_mul_out(a, b, out);
+    EXPECT_EQ(result.dim(), 3);
+    EXPECT_EQ(result.size(0), 1);
+    EXPECT_EQ(result.size(1), 1);
+    EXPECT_EQ(result.size(2), 4);
+
+    Tensor expected = tf_int.make({1, 1, 4}, {2, 4, 6, 8});
+    EXPECT_TENSOR_EQ(result, expected);
+  }
+}