Limited verify_grad support for multiple output Ops

jessegrabowski · jessegrabowski · commit bb9b02b9f6af · 2024-02-13T22:48:33.000+08:00
diff --git a/pytensor/gradient.py b/pytensor/gradient.py
@@ -1675,6 +1675,7 @@ def verify_grad(
     mode: Optional[Union["Mode", str]] = None,
     cast_to_output_type: bool = False,
     no_debug_ref: bool = True,
+    sum_outputs=False,
 ):
     """Test a gradient by Finite Difference Method. Raise error on failure.
 
@@ -1722,7 +1723,9 @@ def verify_grad(
         float16 is not handled here.
     no_debug_ref
         Don't use `DebugMode` for the numerical gradient function.
-
+    sum_outputs: bool, default False
+        If True, the gradient of the sum of all outputs is verified. If False, an error is raised if the function has
+        multiple outputs.
     Notes
     -----
     This function does not support multiple outputs. In `tests.scan.test_basic`
@@ -1782,7 +1785,7 @@ def verify_grad(
     # fun can be either a function or an actual Op instance
     o_output = fun(*tensor_pt)
 
-    if isinstance(o_output, list):
+    if isinstance(o_output, list) and not sum_outputs:
         raise NotImplementedError(
             "Can't (yet) auto-test the gradient of a function with multiple outputs"
         )
@@ -1793,7 +1796,7 @@ def verify_grad(
     o_fn = fn_maker(tensor_pt, o_output, name="gradient.py fwd")
     o_fn_out = o_fn(*[p.copy() for p in pt])
 
-    if isinstance(o_fn_out, tuple) or isinstance(o_fn_out, list):
+    if isinstance(o_fn_out, tuple) or isinstance(o_fn_out, list) and not sum_outputs:
         raise TypeError(
             "It seems like you are trying to use verify_grad "
             "on an Op or a function which outputs a list: there should"
@@ -1802,33 +1805,40 @@ def verify_grad(
 
     # random_projection should not have elements too small,
     # otherwise too much precision is lost in numerical gradient
-    def random_projection():
-        plain = rng.random(o_fn_out.shape) + 0.5
-        if cast_to_output_type and o_output.dtype == "float32":
-            return np.array(plain, o_output.dtype)
+    def random_projection(shape, dtype):
+        plain = rng.random(shape) + 0.5
+        if cast_to_output_type and dtype == "float32":
+            return np.array(plain, dtype)
         return plain
 
-    t_r = shared(random_projection(), borrow=True)
-    t_r.name = "random_projection"
-
     # random projection of o onto t_r
     # This sum() is defined above, it's not the builtin sum.
-    cost = pytensor.tensor.sum(t_r * o_output)
+    if sum_outputs:
+        t_rs = [
+            shared(random_projection(o.shape, o.dtype), borrow=True) for o in o_fn_out
+        ]
+        for i, x in enumerate(t_rs):
+            x.name = "ranom_projection_{i}"
+        cost = pytensor.tensor.sum(
+            [pytensor.tensor.sum(x * y) for x, y in zip(t_rs, o_output)]
+        )
+    else:
+        t_r = shared(random_projection(o_fn_out.shape, o_fn_out.dtype), borrow=True)
+        t_r.name = "random_projection"
+
+        cost = pytensor.tensor.sum(t_r * o_output)
 
     if no_debug_ref:
         mode_for_cost = mode_not_slow(mode)
     else:
         mode_for_cost = mode
 
     cost_fn = fn_maker(tensor_pt, cost, name="gradient.py cost", mode=mode_for_cost)
-
     symbolic_grad = grad(cost, tensor_pt, disconnected_inputs="ignore")
-
     grad_fn = fn_maker(tensor_pt, symbolic_grad, name="gradient.py symbolic grad")
 
     for test_num in range(n_tests):
         num_grad = numeric_grad(cost_fn, [p.copy() for p in pt], eps, out_type)
-
         analytic_grad = grad_fn(*[p.copy() for p in pt])
 
         # Since `tensor_pt` is a list, `analytic_grad` should be one too.
@@ -1853,7 +1863,16 @@ def random_projection():
 
         # get new random projection for next test
         if test_num < n_tests - 1:
-            t_r.set_value(random_projection(), borrow=True)
+            if sum_outputs:
+                for r in t_rs:
+                    r.set_value(
+                        random_projection(r.get_value().shape, r.get_value().dtype)
+                    )
+            else:
+                t_r.set_value(
+                    random_projection(t_r.get_value().shape, t_r.get_value().dtype),
+                    borrow=True,
+                )
 
 
 class GradientError(Exception):
diff --git a/tests/tensor/test_nlinalg.py b/tests/tensor/test_nlinalg.py
@@ -245,6 +245,7 @@ def test_grad(self, compute_uv, full_matrices, shape):
                 partial(svd, compute_uv=compute_uv, full_matrices=full_matrices),
                 [A_v],
                 rng=rng,
+                sum_outputs=True,
             )
 
         else:

Original file line number	Diff line number	Diff line change
`@@ -245,6 +245,7 @@ def test_grad(self, compute_uv, full_matrices, shape):`
`245`	`245`	`partial(svd, compute_uv=compute_uv, full_matrices=full_matrices),`
`246`	`246`	`[A_v],`
`247`	`247`	`rng=rng,`
	`248`	`+ sum_outputs=True,`
`248`	`249`	`)`
`249`	`250`
`250`	`251`	`else:`