fixed failing tests and added rewrite for pt.diag

tanish1729 · jessegrabowski · commit 7074e50044e6 · 2024-10-08T22:12:19.000+08:00
diff --git a/pytensor/tensor/rewriting/linalg.py b/pytensor/tensor/rewriting/linalg.py
@@ -2,6 +2,7 @@
 from collections.abc import Callable
 from typing import cast
 
+import pytensor.tensor as pt
 from pytensor import Variable
 from pytensor import tensor as pt
 from pytensor.graph import Apply, FunctionGraph
@@ -859,13 +860,24 @@ def rewrite_cholesky_eye_to_eye(fgraph, node):
 @register_canonicalize
 @register_stabilize
 @node_rewriter([Blockwise])
-def rewrite_cholesky_diag_from_eye_mul(fgraph, node):
+def rewrite_cholesky_diag_to_sqrt_diag(fgraph, node):
     # Find whether cholesky op is being applied
     if not isinstance(node.op.core_op, Cholesky):
         return None
 
-    # Check whether input is diagonal from multiplcation of identity matrix with a tensor
     inputs = node.inputs[0]
+    # Check for use of pt.diag first
+    if (
+        inputs.owner
+        and isinstance(inputs.owner.op, AllocDiag)
+        and AllocDiag.is_offset_zero(inputs.owner)
+    ):
+        cholesky_input = inputs.owner.inputs[0]
+        if cholesky_input.type.ndim == 1:
+            cholesky_val = pt.diag(cholesky_input**0.5)
+            return [cholesky_val]
+
+    # Check if the input is an elemwise multiply with identity matrix -- this also results in a diagonal matrix
     inputs_or_none = _find_diag_from_eye_mul(inputs)
     if inputs_or_none is None:
         return None
@@ -876,6 +888,13 @@ def rewrite_cholesky_diag_from_eye_mul(fgraph, node):
     if len(non_eye_inputs) != 1:
         return None
 
-    eye_input, non_eye_input = eye_input[0], non_eye_inputs[0]
+    non_eye_input = non_eye_inputs[0]
 
-    return [eye_input * (non_eye_input**0.5)]
+    # Now, we can simply return the matrix consisting of sqrt values of the original diagonal elements
+    # For a matrix, we have to first extract the diagonal (non-zero values) and then only use those
+    if non_eye_input.type.broadcastable[-2:] == (False, False):
+        # For Matrix
+        return [eye_input * (non_eye_input.diagonal(axis1=-1, axis2=-2) ** 0.5)]
+    else:
+        # For Vector or Scalar
+        return [eye_input * (non_eye_input**0.5)]
diff --git a/tests/tensor/rewriting/test_linalg.py b/tests/tensor/rewriting/test_linalg.py
@@ -782,8 +782,8 @@ def test_cholesky_eye_rewrite():
 
 @pytest.mark.parametrize(
     "shape",
-    [(), (7,), (1, 7), (7, 1), (7, 7), (3, 7, 7)],
-    ids=["scalar", "vector", "row_vec", "col_vec", "matrix", "batched_input"],
+    [(), (7,), (7, 7)],
+    ids=["scalar", "vector", "matrix"],
 )
 def test_cholesky_diag_from_eye_mul(shape):
     # Initializing x based on scalar/vector/matrix
@@ -814,3 +814,28 @@ def test_cholesky_diag_from_eye_mul(shape):
         atol=1e-3 if config.floatX == "float32" else 1e-8,
         rtol=1e-3 if config.floatX == "float32" else 1e-8,
     )
+
+
+def test_cholesky_diag_from_diag():
+    x = pt.dvector("x")
+    x_diag = pt.diag(x)
+    x_cholesky = pt.linalg.cholesky(x_diag)
+
+    # REWRITE TEST
+    f_rewritten = function([x], x_cholesky, mode="FAST_RUN")
+    nodes = f_rewritten.maker.fgraph.apply_nodes
+
+    assert not any(isinstance(node.op, Cholesky) for node in nodes)
+
+    # NUMERIC VALUE TEST
+    x_test = np.random.rand(10)
+    x_test_matrix = np.eye(10) * x_test
+    cholesky_val = np.linalg.cholesky(x_test_matrix)
+    rewritten_cholesky = f_rewritten(x_test)
+
+    assert_allclose(
+        cholesky_val,
+        rewritten_cholesky,
+        atol=1e-3 if config.floatX == "float32" else 1e-8,
+        rtol=1e-3 if config.floatX == "float32" else 1e-8,
+    )