First pass on docstrings

jessegrabowski · jessegrabowski · commit 5ba3cc7da814 · 2024-07-31T20:26:44.000+08:00
diff --git a/pytensor/tensor/einsum.py b/pytensor/tensor/einsum.py
@@ -61,14 +61,117 @@ def __init__(self, *args, subscripts: str, path: PATH, optimized: bool, **kwargs
 
 
 def _iota(shape: TensorVariable, axis: int) -> TensorVariable:
+    """
+    Create an array with values increasing along the specified axis.
+
+    Iota is a multidimensional generalization of the `arange` function. The returned array is filled with whole numbers
+    increasing along the specified axis.
+
+    Parameters
+    ----------
+    shape: TensorVariable
+        The shape of the array to be created.
+    axis: int
+        The axis along which to fill the array with increasing values.
+
+    Returns
+    -------
+    TensorVariable
+        An array with values increasing along the specified axis.
+
+    Examples
+    --------
+    In the simplest case where ``shape`` is 1d, the output will be equivalent to ``pt.arange``:
+
+    .. testcode::
+
+        import pytensor as pt
+        shape = pt.as_tensor('shape', (5,))
+        print(pt._iota(shape, 0).eval())
+
+    .. testoutput::
+
+        [0., 1., 2., 3., 4.]
+
+    In higher dimensions, it will look like many concatenated `pt.arange`:
+
+    .. testcode::
+
+        shape = pt.as_tensor('shape', (5, 5))
+        print(pt._iota(shape, 1).eval())
+
+    .. testoutput::
+
+        [[0., 1., 2., 3., 4.],
+        [0., 1., 2., 3., 4.],
+        [0., 1., 2., 3., 4.],
+        [0., 1., 2., 3., 4.],
+        [0., 1., 2., 3., 4.]]
+
+    Setting ``axis=0`` above would result in the transpose of the output.
+    """
     len_shape = get_vector_length(shape)
     axis = normalize_axis_index(axis, len_shape)
     values = arange(shape[axis])
     return broadcast_to(shape_padright(values, len_shape - axis - 1), shape)
 
 
-def _delta(shape, axes: Sequence[int]) -> TensorVariable:
-    """This utility function exists for creating Kronecker delta arrays."""
+def _delta(shape: TensorVariable, axes: Sequence[int]) -> TensorVariable:
+    """
+    Create a Kroncker delta tensor.
+
+    The Kroncker delta function is defined:
+
+    .. math::
+
+        \\delta(i, j) = \begin{cases} 1 & \text{if} \\quad i = j \\ 0 & \text{otherwise} \\end{cases}
+
+    To create a Kronecker tensor, the delta function is applied elementwise to the axes specified. The result is a
+    tensor of booleans, with ``True`` where the axis indices coincide, and ``False`` otherwise. See below for examples.
+
+    Parameters
+    ----------
+    shape: TensorVariable
+        The shape of the tensor to be created. Note that `_delta` is not defined for 1d tensors, because there is no
+        second axis against which to compare.
+    axes: sequence of int
+        Axes whose indices should be compared. Note that `_delta` is not defined for a single axis, because there is no
+        second axis against which to compare.
+
+    Examples
+    --------
+    An easy case to understand is when the shape is square and the number of axes is equal to the number of dimensions.
+    This will result in a generalized identity tensor, with ``True`` along the main diagonal:
+
+    .. testcode::
+
+        from pytensor.tensor.einsum import _delta
+        print(_delta((5, 5), (0, 1)).eval())
+
+    .. testoutput::
+
+        [[ True False False False False]
+         [False  True False False False]
+         [False False  True False False]
+         [False False False  True False]
+         [False False False False  True]]
+
+    In the case where the shape is not square, the result will be a tensor with ``True`` along the main diagonal and
+    ``False`` elsewhere:
+
+    .. testcode::
+
+        from pytensor.tensor.einsum import _delta
+        print(_delta((3, 2), (0, 1)).eval())
+
+    .. testoutput::
+
+        [[ True False]
+         [False  True]
+         [False False]]
+    """
+    if len(axes) == 1:
+        raise ValueError("Need at least two axes to create a delta tensor")
     base_shape = stack([shape[axis] for axis in axes])
     iotas = [_iota(base_shape, i) for i in range(len(axes))]
     eyes = [eq(i1, i2) for i1, i2 in pairwise(iotas)]
@@ -81,6 +184,46 @@ def _general_dot(
     axes: Sequence[Sequence[int]],  # Should be length 2,
     batch_axes: Sequence[Sequence[int]],  # Should be length 2,
 ) -> TensorVariable:
+    """
+    Generalized dot product between two tensors.
+
+    Ultimately ``_general_dot`` is a call to `tensor_dot`, performing a multiply-and-sum ("dot") operation between two
+    tensors, along a requested dimension. This function further generalizes this operation by allowing arbitrary
+    batch dimensions to be specified for each tensor.
+
+
+    Parameters
+    ----------
+    vars: tuple[TensorVariable, TensorVariable]
+        The tensors to be ``tensor_dot``ed
+    axes: Sequence[Sequence[int]]
+        The axes along which to perform the dot product. Should be a sequence of two sequences, one for each tensor.
+    batch_axes: Sequence[Sequence[int]]
+        The batch axes for each tensor. Should be a sequence of two sequences, one for each tensor.
+
+    Returns
+    -------
+    TensorVariable
+        The result of the ``tensor_dot`` product.
+
+    Examples
+    --------
+    Perform a batched dot product between two 3d tensors:
+
+    .. testcode::
+
+        import pytensor.tensor as pt
+        from pytensor.tensor.einsum import _general_dot
+        A = pt.tensor(shape = (3, 4, 5))
+        B = pt.tensor(shape = (3, 5, 2))
+
+        result = _general_dot((A, B), axes=[[2], [1]], batch_axes=[[0], [0]])
+        print(result.type.shape)
+
+    .. testoutput::
+
+        (3, 4, 2)
+    """
     # Shortcut for non batched case
     if not batch_axes[0] and not batch_axes[1]:
         return tensordot(*vars, axes=axes)