Fix bug in rewrite_det_diag_to_prod_diag where batch case was incorrectly passing

jessegrabowski · jessegrabowski · commit dbfe92c04361 · 2024-07-11T00:20:18.000+08:00
diff --git a/pytensor/tensor/rewriting/linalg.py b/pytensor/tensor/rewriting/linalg.py
@@ -404,19 +404,44 @@ def _find_diag_from_eye_mul(potential_mul_input):
     eye_input = [
         mul_input
         for mul_input in inputs_to_mul
-        if mul_input.owner and isinstance(mul_input.owner.op, Eye)
+        if mul_input.owner
+        and (
+            isinstance(mul_input.owner.op, Eye)
+            or
+            # This whole condition checks if there is an Eye hiding inside a DimShuffle.
+            # This arises from batched elementwise multiplication between a tensor and an eye, e.g.:
+            # tensor(shape=(None, 3, 3) * eye(3). This is still potentially valid for diag rewrites.
+            (
+                isinstance(mul_input.owner.op, DimShuffle)
+                and mul_input.owner.inputs[0].owner is not None
+                and isinstance(mul_input.owner.inputs[0].owner.op, Eye)
+            )
+        )
     ]
-
-    # Check if 1's are being put on the main diagonal only (k = 0)
-    if eye_input and getattr(eye_input[0].owner.inputs[-1], "data", -1).item() != 0:
+    if not eye_input:
         return None
 
-    # If the broadcast pattern of eye_input is not (False, False), we do not get a diagonal matrix and thus, dont need to apply the rewrite
-    if eye_input and eye_input[0].broadcastable[-2:] != (False, False):
+    eye_input = eye_input[0]
+
+    # If this multiplication came from a batched operation, it will be wrapped in a DimShuffle
+    if isinstance(eye_input.owner.op, DimShuffle):
+        inner_eye = eye_input.owner.inputs[0]
+        if not isinstance(inner_eye.owner.op, Eye):
+            return None
+        # Check if 1's are being put on the main diagonal only (k = 0)
+        # and if the identity matrix is degenerate (column or row matrix)
+        if getattr(
+            inner_eye.owner.inputs[-1], "data", -1
+        ).item() != 0 or inner_eye.broadcastable[-2:] != (False, False):
+            return None
+
+    elif getattr(
+        eye_input.owner.inputs[-1], "data", -1
+    ).item() != 0 or eye_input.broadcastable[-2:] != (False, False):
         return None
 
     # Get all non Eye inputs (scalars/matrices/vectors)
-    non_eye_inputs = list(set(inputs_to_mul) - set(eye_input))
+    non_eye_inputs = list(set(inputs_to_mul) - {eye_input})
     return eye_input, non_eye_inputs
 
 
@@ -448,15 +473,22 @@ def rewrite_det_diag_to_prod_diag(fgraph, node):
     inputs = node.inputs[0]
 
     # Check for use of pt.diag first
-    if inputs.owner and isinstance(inputs.owner.op, AllocDiag2):
+    if (
+        inputs.owner
+        and isinstance(inputs.owner.op, AllocDiag2)
+        and inputs.owner.op.offset == 0
+    ):
         diag_input = inputs.owner.inputs[0]
+        diag_input.dprint()
         det_val = diag_input.prod(axis=-1)
         return [det_val]
 
     # Check if the input is an elemwise multiply with identity matrix -- this also results in a diagonal matrix
     inputs_or_none = _find_diag_from_eye_mul(inputs)
+
     if inputs_or_none is None:
         return None
+
     eye_input, non_eye_inputs = inputs_or_none
 
     # Dealing with only one other input
diff --git a/tests/tensor/rewriting/test_linalg.py b/tests/tensor/rewriting/test_linalg.py
@@ -396,20 +396,26 @@ def test_local_lift_through_linalg(constructor, f_op, f, g_op, g):
 
 @pytest.mark.parametrize(
     "shape",
-    [(), (7,), (1, 7), (7, 1), (7, 7), (3, 7, 7)],
+    [(), (7,), (1, 7), (7, 1), (7, 7), pytest.param((3, 7, 7))],
     ids=["scalar", "vector", "row_vec", "col_vec", "matrix", "batched_input"],
 )
 def test_det_diag_from_eye_mul(shape):
     # Initializing x based on scalar/vector/matrix
     x = pt.tensor("x", shape=shape)
     y = pt.eye(7) * x
+
     # Calculating determinant value using pt.linalg.det
     z_det = pt.linalg.det(y)
 
     # REWRITE TEST
-    f_rewritten = function([x], z_det, mode="FAST_RUN")
+    with pytensor.config.change_flags(optimizer_verbose=True):
+        f_rewritten = function([x], z_det, mode="FAST_RUN")
     nodes = f_rewritten.maker.fgraph.apply_nodes
-    assert not any(isinstance(node.op, Det) for node in nodes)
+
+    assert not any(
+        isinstance(node.op, Det) or isinstance(getattr(node.op, "core_op", None), Det)
+        for node in nodes
+    )
 
     # NUMERIC VALUE TEST
     if len(shape) == 0:
@@ -418,6 +424,7 @@ def test_det_diag_from_eye_mul(shape):
         x_test = np.random.rand(*shape).astype(config.floatX)
     else:
         x_test = np.random.rand(*shape).astype(config.floatX)
+
     x_test_matrix = np.eye(7) * x_test
     det_val = np.linalg.det(x_test_matrix)
     rewritten_val = f_rewritten(x_test)