Do iteration instead of vmap for elemwise

Ian Schweer · Ian Schweer · commit de7c0690f256 · 2024-11-25T09:34:17.000-08:00
diff --git a/pytensor/link/pytorch/dispatch/elemwise.py b/pytensor/link/pytorch/dispatch/elemwise.py
@@ -1,6 +1,9 @@
+from itertools import chain
+
 import torch
 
 from pytensor.link.pytorch.dispatch.basic import pytorch_funcify
+from pytensor.scalar import ScalarLoop
 from pytensor.tensor.elemwise import DimShuffle, Elemwise
 from pytensor.tensor.math import All, Any, Max, Min, Prod, Sum
 from pytensor.tensor.special import LogSoftmax, Softmax, SoftmaxGrad
@@ -17,6 +20,34 @@ def pytorch_funcify_Elemwise(op, node, **kwargs):
         def elemwise_fn(*inputs):
             Elemwise._check_runtime_broadcast(node, inputs)
             return base_fn(*inputs)
+
+    elif isinstance(scalar_op, ScalarLoop):
+        # note: scalarloop + elemwise is too common
+        # to not work, but @1031, vmap won't allow it.
+        # Instead, we will just successively unbind
+        def elemwise_fn(*inputs):
+            Elemwise._check_runtime_broadcast(node, inputs)
+            shaped_inputs = torch.broadcast_tensors(*inputs)
+            expected_size = shaped_inputs[0].numel()
+            final_inputs = [s.clone() for s in shaped_inputs]
+            for _ in range(shaped_inputs[0].dim() - 1):
+                for i, _ in enumerate(shaped_inputs):
+                    layer = chain.from_iterable([s.unbind(0) for s in final_inputs[i]])
+                    final_inputs[i] = list(layer)
+
+            # make sure we still have the same number of things
+            assert len(final_inputs) == len(shaped_inputs)
+
+            # make sure each group of things are the expected size
+            assert all(len(x) == expected_size for x in final_inputs)
+
+            # make sure they are all single elements
+            assert all(len(x.shape) == 0 for tensor in final_inputs for x in tensor)
+            res = [base_fn(*args) for args in zip(*final_inputs)]
+            states = torch.stack(tuple(out[0] for out in res))
+            done = torch.stack(tuple(out[1] for out in res))
+            return states, done
+
     else:
 
         def elemwise_fn(*inputs):
@@ -26,6 +57,7 @@ def elemwise_fn(*inputs):
             for _ in range(broadcast_inputs[0].dim()):
                 ufunc = torch.vmap(ufunc)
             return ufunc(*broadcast_inputs)
+            return base_fn(*inputs)
 
     return elemwise_fn
 
diff --git a/tests/link/pytorch/test_basic.py b/tests/link/pytorch/test_basic.py
@@ -4,6 +4,7 @@
 import numpy as np
 import pytest
 
+import pytensor.tensor as pt
 import pytensor.tensor.basic as ptb
 from pytensor.compile.builders import OpFromGraph
 from pytensor.compile.function import function
@@ -431,10 +432,19 @@ def test_ScalarLoop_Elemwise():
     x = x0 * 2
     until = x >= 10
 
-    op = ScalarLoop(init=[x0], update=[x], until=until)
-    fn = function([n_steps, x0], Elemwise(op)(n_steps, x0), mode=pytorch_mode)
+    scalarop = ScalarLoop(init=[x0], update=[x], until=until)
+    op = Elemwise(scalarop)
+
+    n_steps = pt.scalar("n_steps", dtype="int32")
+    x0 = pt.vector("x0", dtype="float32")
+    state, done = op(n_steps, x0)
+
+    fn = function([n_steps, x0], [state, done], mode=pytorch_mode)
+    py_fn = function([n_steps, x0], [state, done])
 
-    states, dones = fn(10, np.array(range(5)))
+    args = [np.array(10).astype("int32"), np.arange(0, 5).astype("float32")]
+    torch_states, torch_dones = fn(*args)
+    py_states, py_dones = py_fn(*args)
 
-    np.testing.assert_allclose(states, [0, 4, 8, 12, 16])
-    np.testing.assert_allclose(dones, [False, False, False, True, True])
+    np.testing.assert_allclose(torch_states, py_states)
+    np.testing.assert_allclose(torch_dones, py_dones)