Split block_diag into sparse and dense version

jessegrabowski · jessegrabowski · commit fd26b746c507 · 2024-01-06T19:12:14.000+01:00
Closely follow scipy function signature for `block_diag`
diff --git a/pytensor/sparse/basic.py b/pytensor/sparse/basic.py
@@ -7,6 +7,7 @@
 TODO: Automatic methods for determining best sparse format?
 
 """
+from typing import Literal
 from warnings import warn
 
 import numpy as np
@@ -47,6 +48,7 @@
     trunc,
 )
 from pytensor.tensor.shape import shape, specify_broadcastable
+from pytensor.tensor.slinalg import BaseBlockDiagonal, largest_common_dtype
 from pytensor.tensor.type import TensorType
 from pytensor.tensor.type import continuous_dtypes as tensor_continuous_dtypes
 from pytensor.tensor.type import discrete_dtypes as tensor_discrete_dtypes
@@ -60,7 +62,6 @@
 
 sparse_formats = ["csc", "csr"]
 
-
 """
 Types of sparse matrices to use for testing.
 
@@ -183,7 +184,6 @@ def as_sparse_variable(x, name=None, ndim=None, **kwargs):
 
 as_sparse = as_sparse_variable
 
-
 as_sparse_or_tensor_variable = as_symbolic
 
 
@@ -1800,7 +1800,7 @@ def infer_shape(self, fgraph, node, shapes):
         return r
 
     def __str__(self):
-        return f"{self.__class__.__name__ }{{axis={self.axis}}}"
+        return f"{self.__class__.__name__}{{axis={self.axis}}}"
 
 
 def sp_sum(x, axis=None, sparse_grad=False):
@@ -2775,19 +2775,14 @@ def comparison(self, x, y):
 
 greater_equal_s_d = GreaterEqualSD()
 
-
 eq = __ComparisonSwitch(equal_s_s, equal_s_d, equal_s_d)
 
-
 neq = __ComparisonSwitch(not_equal_s_s, not_equal_s_d, not_equal_s_d)
 
-
 lt = __ComparisonSwitch(less_than_s_s, less_than_s_d, greater_than_s_d)
 
-
 gt = __ComparisonSwitch(greater_than_s_s, greater_than_s_d, less_than_s_d)
 
-
 le = __ComparisonSwitch(less_equal_s_s, less_equal_s_d, greater_equal_s_d)
 
 ge = __ComparisonSwitch(greater_equal_s_s, greater_equal_s_d, less_equal_s_d)
@@ -2992,7 +2987,7 @@ def __str__(self):
         l = []
         if self.inplace:
             l.append("inplace")
-        return f"{self.__class__.__name__ }{{{', '.join(l)}}}"
+        return f"{self.__class__.__name__}{{{', '.join(l)}}}"
 
     def make_node(self, x):
         """
@@ -3291,6 +3286,7 @@ class TrueDot(Op):
     # Simplify code by splitting into DotSS and DotSD.
 
     __props__ = ()
+
     # The grad_preserves_dense attribute doesn't change the
     # execution behavior.  To let the optimizer merge nodes with
     # different values of this attribute we shouldn't compare it
@@ -4260,3 +4256,76 @@ def grad(self, inputs, grads):
 
 
 construct_sparse_from_list = ConstructSparseFromList()
+
+
+class SparseBlockDiagonalMatrix(BaseBlockDiagonal):
+    def make_node(self, *matrices, format: Literal["csc", "csr"] = "csc", name=None):
+        if not matrices:
+            raise ValueError("no matrices to allocate")
+        dtype = largest_common_dtype(matrices)
+        matrices = list(map(pytensor.tensor.as_tensor, matrices))
+
+        if any(mat.type.ndim != 2 for mat in matrices):
+            raise TypeError("all data arguments must be matrices")
+
+        out_type = matrix(format=format, dtype=dtype, name=name)
+        return Apply(self, matrices, [out_type])
+
+    def perform(self, node, inputs, output_storage, params=None):
+        format = node.outputs[0].type.format
+        dtype = largest_common_dtype(inputs)
+        output_storage[0][0] = scipy.sparse.block_diag(inputs, format=format).astype(
+            dtype
+        )
+
+
+_sparse_block_diagonal = SparseBlockDiagonalMatrix()
+
+
+def block_diag(
+    *matrices: TensorVariable, format: Literal["csc", "csr"] = "csc", name=None
+):
+    r"""
+    Construct a block diagonal matrix from a sequence of input matrices.
+
+    Given the inputs `A`, `B` and `C`, the output will have these arrays arranged on the diagonal:
+
+    [[A, 0, 0],
+     [0, B, 0],
+     [0, 0, C]]
+
+    Parameters
+    ----------
+    A, B, C ... : tensors
+        Input sparse matrices to form the block diagonal matrix. Each matrix should have the same number of dimensions,
+        and the block diagonal matrix will be formed using the right-most two dimensions of each input matrix.
+    format: str, optional
+        The format of the output sparse matrix. One of 'csr' or 'csc'. Default is 'csr'. Ignored if sparse=False.
+    name: str, optional
+        Name of the output tensor.
+
+    Returns
+    -------
+    out: sparse matrix tensor
+        Symbolic sparse matrix in the specified format.
+
+    Examples
+    --------
+    Create a sparse block diagonal matrix from two sparse 2x2 matrices:
+
+    ..code-block:: python
+        import numpy as np
+        from pytensor.sparse import block_diag
+        from scipy.sparse import csr_matrix
+
+        A = csr_matrix([[1, 2], [3, 4]])
+        B = csr_matrix([[5, 6], [7, 8]])
+        result_sparse = block_diag(A, B, format='csr', name='X')
+        print(result_sparse.eval())
+
+    The resulting sparse block diagonal matrix `result_sparse` is in CSR format.
+    """
+    if len(matrices) == 1:
+        return matrices
+
+    return _sparse_block_diagonal(*matrices, format=format, name=name)
diff --git a/pytensor/tensor/slinalg.py b/pytensor/tensor/slinalg.py
@@ -912,77 +912,72 @@ def largest_common_dtype(tensors: typing.Sequence[TensorVariable]) -> np.dtype:
     return ft.reduce(lambda l, r: np.promote_types(l, r), [x.dtype for x in tensors])
 
 
-class BlockDiagonalMatrix(Op):
-    __props__ = ("sparse", "format")
+def block_diag_grad(inputs, gout):
+    shapes = pt.stack([i.shape for i in inputs])
+    index_end = shapes.cumsum(0)
+    index_begin = index_end - shapes
+    slices = [
+        ptb.ix_(
+            pt.arange(index_begin[i, 0], index_end[i, 0]),
+            pt.arange(index_begin[i, 1], index_end[i, 1]),
+        )
+        for i in range(len(inputs))
+    ]
+    return [gout[0][slc] for slc in slices]
+
+
+class BaseBlockDiagonal(Op):
+    def grad(self, inputs, gout):
+        return block_diag_grad(inputs, gout)
+
+    def infer_shape(self, fgraph, nodes, shapes):
+        first, second = zip(*shapes)
+        return [(pt.add(*first), pt.add(*second))]
 
-    def __init__(self, sparse=False, format="csr"):
-        if format not in ("csr", "csc"):
-            raise ValueError(f"format must be one of: 'csr', 'csc', got {format}")
-        self.sparse = sparse
-        self.format = format
 
-    def make_node(self, *matrices):
+class BlockDiagonalMatrix(BaseBlockDiagonal):
+    def make_node(self, *matrices, name=None):
         if not matrices:
             raise ValueError("no matrices to allocate")
         dtype = largest_common_dtype(matrices)
         matrices = list(map(pt.as_tensor, matrices))
 
         if any(mat.type.ndim != 2 for mat in matrices):
             raise TypeError("all data arguments must be matrices")
-        if self.sparse:
-            out_type = pytensor.sparse.matrix(self.format, dtype=dtype)
-        else:
-            out_type = pytensor.tensor.matrix(dtype=dtype)
+
+        out_type = pytensor.tensor.matrix(dtype=dtype, name=name)
         return Apply(self, matrices, [out_type])
 
     def perform(self, node, inputs, output_storage, params=None):
         dtype = largest_common_dtype(inputs)
-        if self.sparse:
-            output_storage[0][0] = scipy.sparse.block_diag(inputs, self.format, dtype)
-        else:
-            output_storage[0][0] = scipy.linalg.block_diag(*inputs).astype(dtype)
+        output_storage[0][0] = scipy.linalg.block_diag(*inputs).astype(dtype)
 
-    def grad(self, inputs, gout):
-        shapes = pt.stack([i.shape for i in inputs])
-        index_end = shapes.cumsum(0)
-        index_begin = index_end - shapes
-        slices = [
-            ptb.ix_(
-                pt.arange(index_begin[i, 0], index_end[i, 0]),
-                pt.arange(index_begin[i, 1], index_end[i, 1]),
-            )
-            for i in range(len(inputs))
-        ]
-        return [gout[0][slc] for slc in slices]
 
-    def infer_shape(self, fgraph, nodes, shapes):
-        first, second = zip(*shapes)
-        return [(pt.add(*first), pt.add(*second))]
+_block_diagonal_matrix = BlockDiagonalMatrix()
 
 
-def block_diagonal(
-    matrices: typing.Sequence[TensorVariable],
-    sparse: bool = False,
-    format: Literal["csr", "csc"] = "csr",
-):
+def block_diag(*matrices: TensorVariable, name=None):
     """
-    Construct a block diagonal matrix from a sequence of input matrices.
+    Construct a block diagonal matrix from a sequence of input tensors.
+
+    Given the inputs `A`, `B` and `C`, the output will have these arrays arranged on the diagonal:
+
+    [[A, 0, 0],
+     [0, B, 0],
+     [0, 0, C]]
 
     Parameters
     ----------
-    matrices: sequence of tensors
+    A, B, C ... : tensors
         Input matrices to form the block diagonal matrix. Each matrix should have the same number of dimensions, and the
-         block diagonal matrix will be formed along the first axis of the matrices.
-    sparse : bool, optional
-        If True, the function returns a sparse matrix in the specified format. Default is True.
-    format: str, optional
-        The format of the output sparse matrix. One of 'csr' or 'csc'. Default is 'csr'. Ignored if sparse=False.
+        block diagonal matrix will be formed using the right-most two dimensions of each input matrix.
+    name: str, optional
+        Name of the block diagonal matrix.
 
     Returns
     -------
-    out: tensor or sparse matrix tensor
-        The block diagonal matrix formed from the input matrices. If `sparse` is True, the output will be a symbolic
-        sparse matrix in the specified format. Otherwise, a symbolic tensor will be returned.
+    out: tensor
+        The block diagonal matrix formed from the input matrices.
 
     Examples
     --------
@@ -991,30 +986,21 @@ def block_diagonal(
     ..code-block:: python
 
         import numpy as np
-        from pytensor.tensor.slinalg import block_diagonal
+        from pytensor.tensor.slinalg import block_diag
 
-        matrices = [np.array([[1, 2], [3, 4]]), np.array([[5, 6], [7, 8]])]
-        matrices = [pt.as_tensor_variable(mat) for mat in matrices]
-        result = block_diagonal(matrices)
+        A = pt.as_tensor_variable(np.array([[1, 2], [3, 4]]))
+        B = pt.as_tensor_variable(np.array([[5, 6], [7, 8]]))
 
+        result = block_diagonal(A, B, name='X')
         print(result.eval())
         >>> Out: array([[1, 2, 0, 0],
         >>>             [3, 4, 0, 0],
         >>>             [0, 0, 5, 6],
         >>>             [0, 0, 7, 8]])
-
-    Create a sparse block diagonal matrix from two sparse 2x2 matrices:
-
-    ..code-block:: python
-
-        matrices_sparse = [csr_matrix([[1, 2], [3, 4]]), csr_matrix([[5, 6], [7, 8]])]
-        result_sparse = block_diagonal(matrices_sparse, sparse=True)
-
-    The resulting sparse block diagonal matrix `result_sparse` is in CSR format.
     """
     if len(matrices) == 1:  # graph optimization
-        return matrices[0]
-    return BlockDiagonalMatrix(sparse=sparse, format=format)(*matrices)
+        return matrices
+    return _block_diagonal_matrix(*matrices, name=name)
 
 
 __all__ = [
@@ -1027,5 +1013,5 @@ def block_diagonal(
     "solve_continuous_lyapunov",
     "solve_discrete_are",
     "solve_triangular",
-    "block_diagonal",
+    "block_diag",
 ]
diff --git a/tests/sparse/test_basic.py b/tests/sparse/test_basic.py
@@ -51,6 +51,7 @@
     add_s_s_data,
     as_sparse_or_tensor_variable,
     as_sparse_variable,
+    block_diag,
     cast,
     clean,
     construct_sparse_from_list,
@@ -3389,3 +3390,15 @@ def _helper(x, y):
 )
 class TestSharedOptions:
     pass
+
+
+@pytest.mark.parametrize("format", ["csc", "csr"], ids=["csc", "csr"])
+def test_block_diagonal(format):
+    from scipy.sparse import block_diag as scipy_block_diag
+
+    matrices = [np.array([[1.0, 2.0], [3.0, 4.0]]), np.array([[5.0, 6.0], [7.0, 8.0]])]
+    result = block_diag(*matrices, format=format, name="X")
+    sp_result = scipy_block_diag(matrices, format=format)
+
+    assert isinstance(result.eval(), type(sp_result))
+    np.testing.assert_allclose(result.eval().toarray(), sp_result.toarray())
diff --git a/tests/tensor/test_slinalg.py b/tests/tensor/test_slinalg.py
@@ -15,7 +15,7 @@
     Solve,
     SolveBase,
     SolveTriangular,
-    block_diagonal,
+    block_diag,
     cho_solve,
     cholesky,
     eigvalsh,
@@ -666,10 +666,5 @@ def test_solve_discrete_are_grad():
 
 def test_block_diagonal():
     matrices = [np.array([[1.0, 2.0], [3.0, 4.0]]), np.array([[5.0, 6.0], [7.0, 8.0]])]
-    result = block_diagonal(matrices)
+    result = block_diag(*matrices)
     np.testing.assert_allclose(result.eval(), scipy.linalg.block_diag(*matrices))
-
-    result = block_diagonal(matrices, format="csr", sparse=True)
-    sp_result = scipy.sparse.block_diag(matrices, format="csr")
-    assert isinstance(result.eval(), type(sp_result))
-    np.testing.assert_allclose(result.eval().toarray(), sp_result.toarray())