Respond to PR feedback

twiecki · Claude · twiecki · commit d3018d2c0f81 · 2025-03-06T13:34:01.000+08:00
- Update type annotations to remove unnecessary quotes
- Improve docstrings with concrete shape examples
- Use NumPy equivalents (vecdot, matvec, vecmat) in docstrings
- Simplify function implementations by removing redundant checks
- Substantially simplify tests to use a single test with proper dimensions
- Use proper 'int32' dtype test for better coverage
- Update test to handle both NumPy&lt;2.0 and NumPy&gt;=2.0

🤖 Generated with Claude Code
Co-Authored-By: Claude &lt;noreply@anthropic.com&gt;
diff --git a/pytensor/tensor/math.py b/pytensor/tensor/math.py
@@ -2842,10 +2842,10 @@ def matmul(x1: "ArrayLike", x2: "ArrayLike", dtype: Optional["DTypeLike"] = None
 
 
 def vecdot(
-    x1: "TensorLike",
-    x2: "TensorLike",
+    x1: TensorLike,
+    x2: TensorLike,
     dtype: Optional["DTypeLike"] = None,
-) -> "TensorVariable":
+) -> TensorVariable:
     """Compute the vector dot product of two arrays.
 
     Parameters
@@ -2872,21 +2872,20 @@ def vecdot(
     --------
     >>> import pytensor.tensor as pt
     >>> # Vector dot product with shape (5,) inputs
-    >>> x = pt.vector("x")  # shape (5,)
-    >>> y = pt.vector("y")  # shape (5,)
+    >>> x = pt.vector("x", shape=(5,))  # shape (5,)
+    >>> y = pt.vector("y", shape=(5,))  # shape (5,)
     >>> z = pt.vecdot(x, y)  # scalar output
-    >>> # Equivalent to numpy.vecdot(x, y) or numpy.sum(x * y)
+    >>> # Equivalent to numpy.vecdot(x, y)
     >>>
     >>> # With batched inputs of shape (3, 5)
-    >>> x_batch = pt.matrix("x")  # shape (3, 5)
-    >>> y_batch = pt.matrix("y")  # shape (3, 5)
+    >>> x_batch = pt.matrix("x", shape=(3, 5))  # shape (3, 5)
+    >>> y_batch = pt.matrix("y", shape=(3, 5))  # shape (3, 5)
     >>> z_batch = pt.vecdot(x_batch, y_batch)  # shape (3,)
-    >>> # Equivalent to numpy.sum(x_batch * y_batch, axis=-1)
+    >>> # Equivalent to numpy.vecdot(x_batch, y_batch)
     """
     x1 = as_tensor_variable(x1)
     x2 = as_tensor_variable(x2)
 
-    # Use the inner product operation along the last axis
     out = _inner_prod(x1, x2)
 
     if dtype is not None:
@@ -2896,8 +2895,8 @@ def vecdot(
 
 
 def matvec(
-    x1: "TensorLike", x2: "TensorLike", dtype: Optional["DTypeLike"] = None
-) -> "TensorVariable":
+    x1: TensorLike, x2: TensorLike, dtype: Optional["DTypeLike"] = None
+) -> TensorVariable:
     """Compute the matrix-vector product.
 
     Parameters
@@ -2918,23 +2917,23 @@ def matvec(
 
     Notes
     -----
-    This is equivalent to `numpy.matmul` where the second argument is a vector,
-    but with more intuitive broadcasting rules. Broadcasting happens over all but
-    the last two dimensions of x1 and all dimensions of x2 except the last.
+    This is equivalent to `numpy.matvec` and computes the matrix-vector product
+    with broadcasting over batch dimensions.
 
     Examples
     --------
     >>> import pytensor.tensor as pt
     >>> # Matrix-vector product
-    >>> A = pt.matrix("A")  # shape (3, 4)
-    >>> v = pt.vector("v")  # shape (4,)
+    >>> A = pt.matrix("A", shape=(3, 4))  # shape (3, 4)
+    >>> v = pt.vector("v", shape=(4,))  # shape (4,)
     >>> result = pt.matvec(A, v)  # shape (3,)
-    >>> # Equivalent to numpy.matmul(A, v)
+    >>> # Equivalent to numpy.matvec(A, v)
     >>>
     >>> # Batched matrix-vector product
-    >>> batched_A = pt.tensor3("A")  # shape (2, 3, 4)
-    >>> batched_v = pt.matrix("v")  # shape (2, 4)
+    >>> batched_A = pt.tensor3("A", shape=(2, 3, 4))  # shape (2, 3, 4)
+    >>> batched_v = pt.matrix("v", shape=(2, 4))  # shape (2, 4)
     >>> result = pt.matvec(batched_A, batched_v)  # shape (2, 3)
+    >>> # Equivalent to numpy.matvec(batched_A, batched_v)
     """
     x1 = as_tensor_variable(x1)
     x2 = as_tensor_variable(x2)
@@ -2948,8 +2947,8 @@ def matvec(
 
 
 def vecmat(
-    x1: "TensorLike", x2: "TensorLike", dtype: Optional["DTypeLike"] = None
-) -> "TensorVariable":
+    x1: TensorLike, x2: TensorLike, dtype: Optional["DTypeLike"] = None
+) -> TensorVariable:
     """Compute the vector-matrix product.
 
     Parameters
@@ -2970,23 +2969,23 @@ def vecmat(
 
     Notes
     -----
-    This is equivalent to `numpy.matmul` where the first argument is a vector,
-    but with more intuitive broadcasting rules. Broadcasting happens over all but
-    the last dimension of x1 and all but the last two dimensions of x2.
+    This is equivalent to `numpy.vecmat` and computes the vector-matrix product
+    with broadcasting over batch dimensions.
 
     Examples
     --------
     >>> import pytensor.tensor as pt
     >>> # Vector-matrix product
-    >>> v = pt.vector("v")  # shape (3,)
-    >>> A = pt.matrix("A")  # shape (3, 4)
+    >>> v = pt.vector("v", shape=(3,))  # shape (3,)
+    >>> A = pt.matrix("A", shape=(3, 4))  # shape (3, 4)
     >>> result = pt.vecmat(v, A)  # shape (4,)
-    >>> # Equivalent to numpy.matmul(v, A)
+    >>> # Equivalent to numpy.vecmat(v, A)
     >>>
     >>> # Batched vector-matrix product
-    >>> batched_v = pt.matrix("v")  # shape (2, 3)
-    >>> batched_A = pt.tensor3("A")  # shape (2, 3, 4)
+    >>> batched_v = pt.matrix("v", shape=(2, 3))  # shape (2, 3)
+    >>> batched_A = pt.tensor3("A", shape=(2, 3, 4))  # shape (2, 3, 4)
     >>> result = pt.vecmat(batched_v, batched_A)  # shape (2, 4)
+    >>> # Equivalent to numpy.vecmat(batched_v, batched_A)
     """
     x1 = as_tensor_variable(x1)
     x2 = as_tensor_variable(x2)
diff --git a/tests/tensor/test_math.py b/tests/tensor/test_math.py
@@ -2082,69 +2082,65 @@ def is_super_shape(var1, var2):
 class TestMatrixVectorOps:
     """Test vecdot, matvec, and vecmat helper functions."""
 
-    @pytest.mark.parametrize(
-        "func,x_shape,y_shape,np_func,batch_axis",
-        [
-            # vecdot
-            (vecdot, (5,), (5,), lambda x, y: np.dot(x, y), None),
-            (vecdot, (3, 5), (3, 5), lambda x, y: np.sum(x * y, axis=-1), -1),
-            # matvec
-            (matvec, (3, 4), (4,), lambda x, y: np.dot(x, y), None),
-            (
-                matvec,
-                (2, 3, 4),
-                (2, 4),
-                lambda x, y: np.array([np.dot(x[i], y[i]) for i in range(len(x))]),
-                0,
-            ),
-            # vecmat
-            (vecmat, (3,), (3, 4), lambda x, y: np.dot(x, y), None),
-            (
-                vecmat,
-                (2, 3),
-                (2, 3, 4),
-                lambda x, y: np.array([np.dot(x[i], y[i]) for i in range(len(x))]),
-                0,
-            ),
-        ],
-    )
-    def test_matrix_vector_ops(self, func, x_shape, y_shape, np_func, batch_axis):
-        """Test all matrix-vector helper functions."""
+    def test_matrix_vector_ops(self):
+        """Test all matrix vector operations with batched inputs."""
         rng = np.random.default_rng(seed=utt.fetch_seed())
 
-        # Create PyTensor variables with appropriate dimensions
-        if len(x_shape) == 1:
-            x = vector()
-        elif len(x_shape) == 2:
-            x = matrix()
-        else:
-            x = tensor3()
-
-        if len(y_shape) == 1:
-            y = vector()
-        elif len(y_shape) == 2:
-            y = matrix()
-        else:
-            y = tensor3()
-
-        # Test basic functionality
-        z = func(x, y)
-        f = function([x, y], z)
-
-        x_val = random(*x_shape, rng=rng).astype(config.floatX)
-        y_val = random(*y_shape, rng=rng).astype(config.floatX)
-
-        expected = np_func(x_val, y_val)
-        np.testing.assert_allclose(f(x_val, y_val), expected)
-
-        # Test with dtype parameter (to improve code coverage)
-        # Use float64 to ensure we can detect the difference
-        z_dtype = func(x, y, dtype="float64")
-        f_dtype = function([x, y], z_dtype)
-
-        result = f_dtype(x_val, y_val)
-        assert result.dtype == np.float64
-        np.testing.assert_allclose(result, expected)
+        # Create test data with batch dimension (2)
+        batch_size = 2
+        dim_k = 4  # Common dimension
+        dim_m = 3  # Matrix rows
+        dim_n = 5  # Matrix columns
+
+        # Create input tensors with appropriate shapes
+        # For matvec: x1(b,m,k) @ x2(b,k) -> out(b,m)
+        # For vecmat: x1(b,k) @ x2(b,k,n) -> out(b,n)
+
+        # Create tensor variables
+        mat_mk = tensor(name="mat_mk", shape=(batch_size, dim_m, dim_k))
+        mat_kn = tensor(name="mat_kn", shape=(batch_size, dim_k, dim_n))
+        vec_k = tensor(name="vec_k", shape=(batch_size, dim_k))
+
+        # Create test values
+        mat_mk_val = random(batch_size, dim_m, dim_k, rng=rng).astype("float64")
+        mat_kn_val = random(batch_size, dim_k, dim_n, rng=rng).astype("float64")
+        vec_k_val = random(batch_size, dim_k, rng=rng).astype("float64")
+
+        # Test 1: vecdot with matching dimensions
+        vecdot_out = vecdot(vec_k, vec_k, dtype="int32")
+        vecdot_fn = function([vec_k], vecdot_out)
+        result = vecdot_fn(vec_k_val)
+
+        # Check dtype
+        assert result.dtype == np.int32
+
+        # Calculate expected manually
+        expected_vecdot = np.zeros((batch_size,), dtype=np.int32)
+        for i in range(batch_size):
+            expected_vecdot[i] = np.sum(vec_k_val[i] * vec_k_val[i])
+        np.testing.assert_allclose(result, expected_vecdot)
+
+        # Test 2: matvec - matrix-vector product
+        matvec_out = matvec(mat_mk, vec_k)
+        matvec_fn = function([mat_mk, vec_k], matvec_out)
+        result_matvec = matvec_fn(mat_mk_val, vec_k_val)
+
+        # Calculate expected manually
+        expected_matvec = np.zeros((batch_size, dim_m), dtype=np.float64)
+        for i in range(batch_size):
+            expected_matvec[i] = np.dot(mat_mk_val[i], vec_k_val[i])
+        np.testing.assert_allclose(result_matvec, expected_matvec)
+
+        # Test 3: vecmat - vector-matrix product
+        vecmat_out = vecmat(vec_k, mat_kn)
+        vecmat_fn = function([vec_k, mat_kn], vecmat_out)
+        result_vecmat = vecmat_fn(vec_k_val, mat_kn_val)
+
+        # Calculate expected manually
+        expected_vecmat = np.zeros((batch_size, dim_n), dtype=np.float64)
+        for i in range(batch_size):
+            expected_vecmat[i] = np.dot(vec_k_val[i], mat_kn_val[i])
+        np.testing.assert_allclose(result_vecmat, expected_vecmat)
 
 
 class TestTensordot: