2
2
from collections .abc import Callable
3
3
from typing import cast
4
4
5
+ import pytensor .tensor as pt
5
6
from pytensor import Variable
6
7
from pytensor import tensor as pt
7
8
from pytensor .graph import Apply , FunctionGraph
@@ -928,13 +929,24 @@ def rewrite_cholesky_eye_to_eye(fgraph, node):
928
929
@register_canonicalize
929
930
@register_stabilize
930
931
@node_rewriter ([Blockwise ])
931
- def rewrite_cholesky_diag_from_eye_mul (fgraph , node ):
932
+ def rewrite_cholesky_diag_to_sqrt_diag (fgraph , node ):
932
933
# Find whether cholesky op is being applied
933
934
if not isinstance (node .op .core_op , Cholesky ):
934
935
return None
935
936
936
- # Check whether input is diagonal from multiplcation of identity matrix with a tensor
937
937
inputs = node .inputs [0 ]
938
+ # Check for use of pt.diag first
939
+ if (
940
+ inputs .owner
941
+ and isinstance (inputs .owner .op , AllocDiag )
942
+ and AllocDiag .is_offset_zero (inputs .owner )
943
+ ):
944
+ cholesky_input = inputs .owner .inputs [0 ]
945
+ if cholesky_input .type .ndim == 1 :
946
+ cholesky_val = pt .diag (cholesky_input ** 0.5 )
947
+ return [cholesky_val ]
948
+
949
+ # Check if the input is an elemwise multiply with identity matrix -- this also results in a diagonal matrix
938
950
inputs_or_none = _find_diag_from_eye_mul (inputs )
939
951
if inputs_or_none is None :
940
952
return None
@@ -945,6 +957,13 @@ def rewrite_cholesky_diag_from_eye_mul(fgraph, node):
945
957
if len (non_eye_inputs ) != 1 :
946
958
return None
947
959
948
- eye_input , non_eye_input = eye_input [ 0 ], non_eye_inputs [0 ]
960
+ non_eye_input = non_eye_inputs [0 ]
949
961
950
- return [eye_input * (non_eye_input ** 0.5 )]
962
+ # Now, we can simply return the matrix consisting of sqrt values of the original diagonal elements
963
+ # For a matrix, we have to first extract the diagonal (non-zero values) and then only use those
964
+ if non_eye_input .type .broadcastable [- 2 :] == (False , False ):
965
+ # For Matrix
966
+ return [eye_input * (non_eye_input .diagonal (axis1 = - 1 , axis2 = - 2 ) ** 0.5 )]
967
+ else :
968
+ # For Vector or Scalar
969
+ return [eye_input * (non_eye_input ** 0.5 )]
0 commit comments