2
2
from collections .abc import Callable
3
3
from typing import cast
4
4
5
+ import pytensor .tensor as pt
5
6
from pytensor import Variable
6
7
from pytensor import tensor as pt
7
8
from pytensor .graph import Apply , FunctionGraph
@@ -859,13 +860,24 @@ def rewrite_cholesky_eye_to_eye(fgraph, node):
859
860
@register_canonicalize
860
861
@register_stabilize
861
862
@node_rewriter ([Blockwise ])
862
- def rewrite_cholesky_diag_from_eye_mul (fgraph , node ):
863
+ def rewrite_cholesky_diag_to_sqrt_diag (fgraph , node ):
863
864
# Find whether cholesky op is being applied
864
865
if not isinstance (node .op .core_op , Cholesky ):
865
866
return None
866
867
867
- # Check whether input is diagonal from multiplcation of identity matrix with a tensor
868
868
inputs = node .inputs [0 ]
869
+ # Check for use of pt.diag first
870
+ if (
871
+ inputs .owner
872
+ and isinstance (inputs .owner .op , AllocDiag )
873
+ and AllocDiag .is_offset_zero (inputs .owner )
874
+ ):
875
+ cholesky_input = inputs .owner .inputs [0 ]
876
+ if cholesky_input .type .ndim == 1 :
877
+ cholesky_val = pt .diag (cholesky_input ** 0.5 )
878
+ return [cholesky_val ]
879
+
880
+ # Check if the input is an elemwise multiply with identity matrix -- this also results in a diagonal matrix
869
881
inputs_or_none = _find_diag_from_eye_mul (inputs )
870
882
if inputs_or_none is None :
871
883
return None
@@ -876,6 +888,13 @@ def rewrite_cholesky_diag_from_eye_mul(fgraph, node):
876
888
if len (non_eye_inputs ) != 1 :
877
889
return None
878
890
879
- eye_input , non_eye_input = eye_input [ 0 ], non_eye_inputs [0 ]
891
+ non_eye_input = non_eye_inputs [0 ]
880
892
881
- return [eye_input * (non_eye_input ** 0.5 )]
893
+ # Now, we can simply return the matrix consisting of sqrt values of the original diagonal elements
894
+ # For a matrix, we have to first extract the diagonal (non-zero values) and then only use those
895
+ if non_eye_input .type .broadcastable [- 2 :] == (False , False ):
896
+ # For Matrix
897
+ return [eye_input * (non_eye_input .diagonal (axis1 = - 1 , axis2 = - 2 ) ** 0.5 )]
898
+ else :
899
+ # For Vector or Scalar
900
+ return [eye_input * (non_eye_input ** 0.5 )]
0 commit comments