Handle subscripts shared with broadcasted dimensions

ricardoV94 · ricardoV94 · commit 29e64e90bf7e · 2024-07-10T21:41:39.000+02:00
diff --git a/pytensor/tensor/einsum.py b/pytensor/tensor/einsum.py
@@ -1,5 +1,6 @@
 import collections
 import itertools
+import warnings
 from collections.abc import Sequence
 from functools import partial, reduce
 from itertools import pairwise
@@ -385,7 +386,6 @@ def einsum(subscripts: str, *operands: "TensorLike") -> TensorVariable:
     else:
         # Case 2: All operands have known shapes. In this case, we can use opt_einsum to compute the optimal
         # contraction order.
-        # Call _implementation to bypass dispatch
         _, contraction_list = np.einsum_path(
             subscripts,
             # Numpy einsum_path requires arrays even though only the shapes matter
@@ -428,14 +428,22 @@ def sum_repeats(
                     names = names.replace(name, "", count - 1)
         return operand, names
 
-    # def filter_singleton_dims(operand, names, other_shape, other_names):
-    #     eq = core.definitely_equal
-    #     keep = [
-    #         not eq(operand.shape[i], 1) or j == -1 or eq(other_shape[j], 1)
-    #         for i, j in enumerate(map(other_names.find, names))
-    #     ]
-    #     sqez_axes, keep_axes = partition_list(keep, list(range(operand.ndim)))
-    #     return lax.squeeze(operand, sqez_axes), "".join(names[i] for i in keep_axes)
+    def filter_singleton_dims(operand, names, other_operand, other_names):
+        op_bcast = operand.type.broadcastable
+        other_bcast = other_operand.type.broadcastable
+        keep = [
+            (not op_bcast[i]) or (j == -1) or other_bcast[j]
+            for i, j in enumerate(map(other_names.find, names))
+        ]
+        keep_axes = [i for i, keep_axis in enumerate(keep) if keep_axis]
+        squeeze_axes = [i for i, keep_axis in enumerate(keep) if not keep_axis]
+        if squeeze_axes:
+            # TODO: We could modify the subscripts to avoid the problem?
+            warnings.warn(
+                "The same einsum subscript is used for a broadcastable and non-broadcastable dimension. "
+                "This can result in a suboptimal contraction path."
+            )
+        return operand.squeeze(squeeze_axes), "".join(names[i] for i in keep_axes)
 
     einsum_operands = list(operands)  # So we can pop
     for operand_indices, contracted_names, einstr, _, _ in contraction_list:
@@ -465,13 +473,10 @@ def sum_repeats(
             lhs, rhs = map(einsum_operands.pop, operand_indices)
             lhs_names, rhs_names = input_names
 
-            # TODO: Do this as well?
             # handle cases where one side of a contracting or batch dimension is 1
             # but its counterpart is not.
-            # lhs, lhs_names = filter_singleton_dims(lhs, lhs_names, shape(rhs),
-            #                                        rhs_names)
-            # rhs, rhs_names = filter_singleton_dims(rhs, rhs_names, shape(lhs),
-            #                                        lhs_names)
+            lhs, lhs_names = filter_singleton_dims(lhs, lhs_names, rhs, rhs_names)
+            rhs, rhs_names = filter_singleton_dims(rhs, rhs_names, lhs, lhs_names)
 
             lhs_counts = collections.Counter(lhs_names)
             rhs_counts = collections.Counter(rhs_names)
diff --git a/tests/tensor/test_einsum.py b/tests/tensor/test_einsum.py
@@ -14,6 +14,9 @@
 from pytensor.tensor.shape import Reshape
 
 
+# Fail for unexpected warnings in this file
+pytestmark = pytest.mark.filterwarnings("error")
+
 floatX = pytensor.config.floatX
 ATOL = RTOL = 1e-8 if floatX == "float64" else 1e-4
 
@@ -214,3 +217,39 @@ def test_ellipsis():
     np.testing.assert_allclose(
         out.eval({x: x_test, y: y_test}), expected_out.sum((0, 1)), atol=ATOL, rtol=RTOL
     )
+
+
+def test_broadcastable_dims():
+    # Test that einsum handles broadcasting dims correctly. There are two points:
+    # 1. Numpy einsum allows the same subscript for degenerate and full dimensions
+    # There is some stale discussion on whether this should be a bug or not, but for now it is not:
+    # https://github.com/numpy/numpy/issues/11548
+
+    # 2. Using the same letter for dimensions that are and aren't broadcastable
+    # can lead to suboptimal paths. We check we issue a warning for the following example:
+    # https://github.com/dgasmith/opt_einsum/issues/220
+    rng = np.random.default_rng(222)
+    a = pt.tensor("a", shape=(32, 32, 32))
+    b = pt.tensor("b", shape=(1000, 32))
+    c = pt.tensor("c", shape=(1, 32))
+
+    a_test = rng.normal(size=a.type.shape).astype(floatX)
+    b_test = rng.normal(size=b.type.shape).astype(floatX)
+    c_test = rng.normal(size=c.type.shape).astype(floatX)
+
+    # Note b is used for both 1 and 32
+    with pytest.warns(
+        UserWarning, match="This can result in a suboptimal contraction path"
+    ):
+        suboptimal_out = pt.einsum("ijk,bj,bk->i", a, b, c)
+    assert not [set(p) for p in suboptimal_out.owner.op.path] == [{0, 2}, {0, 1}]
+
+    # If we use a distinct letter we get the optimal path
+    optimal_out = pt.einsum("ijk,bj,ck->i", a, b, c)
+    assert [set(p) for p in optimal_out.owner.op.path] == [{0, 2}, {0, 1}]
+
+    suboptimal_eval = suboptimal_out.eval({a: a_test, b: b_test, c: c_test})
+    optimal_eval = optimal_out.eval({a: a_test, b: b_test, c: c_test})
+    np_eval = np.einsum("ijk,bj,bk->i", a_test, b_test, c_test)
+    np.testing.assert_allclose(suboptimal_eval, np_eval)
+    np.testing.assert_allclose(optimal_eval, np_eval)