Address PR feedback

twiecki · claude · twiecki · commit 6f0f14c67ac9 · 2025-03-06T13:35:20.000+08:00
- Remove axis parameter from vecdot (no longer needed)
- Update type annotations to use TensorLike
- Add proper return type annotations
- Improve docstrings with examples
- Simplify test implementation and use pytest.parametrize
- Use simpler implementation for batched operations

🤖 Generated with Claude Code
Co-Authored-By: Claude &lt;noreply@anthropic.com&gt;
diff --git a/pytensor/tensor/math.py b/pytensor/tensor/math.py
@@ -4123,66 +4123,55 @@ def matmul(x1: "ArrayLike", x2: "ArrayLike", dtype: Optional["DTypeLike"] = None
 
 
 def vecdot(
-    x1: "ArrayLike",
-    x2: "ArrayLike",
-    axis: int = -1,
+    x1: "TensorLike",
+    x2: "TensorLike",
     dtype: Optional["DTypeLike"] = None,
-):
-    """Compute the dot product of two vectors along specified dimensions.
+) -> "TensorVariable":
+    """Compute the vector dot product of two arrays.
 
     Parameters
     ----------
     x1, x2
-        Input arrays, scalars not allowed.
-    axis
-        The axis along which to compute the dot product. By default, the last
-        axes of the inputs are used.
+        Input arrays with the same shape.
     dtype
-        The desired data-type for the array. If not given, then the type will
+        The desired data-type for the result. If not given, then the type will
         be determined as the minimum type required to hold the objects in the
         sequence.
 
     Returns
     -------
-    out : ndarray
-        The vector dot product of the inputs computed along the specified axes.
+    TensorVariable
+        The vector dot product of the inputs.
 
     Notes
     -----
-    This is similar to `dot` but with broadcasting. It computes the dot product
-    along the specified axes, treating these as vectors, and broadcasts across
-    the remaining axes.
+    This is similar to `np.vecdot` and computes the dot product of
+    vectors along the last axis of both inputs. Broadcasting is supported
+    across all other dimensions.
+
+    Examples
+    --------
+    >>> import pytensor.tensor as pt
+    >>> x = pt.matrix("x")
+    >>> y = pt.matrix("y")
+    >>> z = pt.vecdot(x, y)
+    >>> # Equivalent to np.sum(x * y, axis=-1)
     """
     x1 = as_tensor_variable(x1)
     x2 = as_tensor_variable(x2)
 
-    # Handle negative axis
-    if axis < 0:
-        x1_axis = axis % x1.type.ndim
-        x2_axis = axis % x2.type.ndim
-    else:
-        x1_axis = axis
-        x2_axis = axis
-
-    # Move the axes to the end for dot product calculation
-    x1_perm = list(range(x1.type.ndim))
-    x1_perm.append(x1_perm.pop(x1_axis))
-    x1_transposed = x1.transpose(x1_perm)
-
-    x2_perm = list(range(x2.type.ndim))
-    x2_perm.append(x2_perm.pop(x2_axis))
-    x2_transposed = x2.transpose(x2_perm)
-
-    # Use the inner product operation
-    out = _inner_prod(x1_transposed, x2_transposed)
+    # Use the inner product operation along the last axis
+    out = _inner_prod(x1, x2)
 
     if dtype is not None:
         out = out.astype(dtype)
 
     return out
 
 
-def matvec(x1: "ArrayLike", x2: "ArrayLike", dtype: Optional["DTypeLike"] = None):
+def matvec(
+    x1: "TensorLike", x2: "TensorLike", dtype: Optional["DTypeLike"] = None
+) -> "TensorVariable":
     """Compute the matrix-vector product.
 
     Parameters
@@ -4192,20 +4181,35 @@ def matvec(x1: "ArrayLike", x2: "ArrayLike", dtype: Optional["DTypeLike"] = None
     x2
         Input array for the vector with shape (..., K).
     dtype
-        The desired data-type for the array. If not given, then the type will
+        The desired data-type for the result. If not given, then the type will
         be determined as the minimum type required to hold the objects in the
         sequence.
 
     Returns
     -------
-    out : ndarray
+    TensorVariable
         The matrix-vector product with shape (..., M).
 
     Notes
     -----
-    This is similar to `matmul` where the second argument is a vector,
-    but with different broadcasting rules. Broadcasting happens over all but
-    the last dimension of x1 and all dimensions of x2 except the last.
+    This is equivalent to `numpy.matmul` where the second argument is a vector,
+    but with more intuitive broadcasting rules. Broadcasting happens over all but
+    the last two dimensions of x1 and all dimensions of x2 except the last.
+
+    Examples
+    --------
+    >>> import pytensor.tensor as pt
+    >>> import numpy as np
+    >>> # Matrix-vector product
+    >>> A = pt.matrix("A")  # shape (M, K)
+    >>> v = pt.vector("v")  # shape (K,)
+    >>> result = pt.matvec(A, v)  # shape (M,)
+    >>> # Equivalent to np.matmul(A, v)
+    >>>
+    >>> # Batched matrix-vector product
+    >>> batched_A = pt.tensor3("A")  # shape (B, M, K)
+    >>> batched_v = pt.matrix("v")  # shape (B, K)
+    >>> result = pt.matvec(batched_A, batched_v)  # shape (B, M)
     """
     x1 = as_tensor_variable(x1)
     x2 = as_tensor_variable(x2)
@@ -4218,7 +4222,9 @@ def matvec(x1: "ArrayLike", x2: "ArrayLike", dtype: Optional["DTypeLike"] = None
     return out
 
 
-def vecmat(x1: "ArrayLike", x2: "ArrayLike", dtype: Optional["DTypeLike"] = None):
+def vecmat(
+    x1: "TensorLike", x2: "TensorLike", dtype: Optional["DTypeLike"] = None
+) -> "TensorVariable":
     """Compute the vector-matrix product.
 
     Parameters
@@ -4228,20 +4234,35 @@ def vecmat(x1: "ArrayLike", x2: "ArrayLike", dtype: Optional["DTypeLike"] = None
     x2
         Input array for the matrix with shape (..., K, N).
     dtype
-        The desired data-type for the array. If not given, then the type will
+        The desired data-type for the result. If not given, then the type will
         be determined as the minimum type required to hold the objects in the
         sequence.
 
     Returns
     -------
-    out : ndarray
+    TensorVariable
         The vector-matrix product with shape (..., N).
 
     Notes
     -----
-    This is similar to `matmul` where the first argument is a vector,
-    but with different broadcasting rules. Broadcasting happens over all but
+    This is equivalent to `numpy.matmul` where the first argument is a vector,
+    but with more intuitive broadcasting rules. Broadcasting happens over all but
     the last dimension of x1 and all but the last two dimensions of x2.
+
+    Examples
+    --------
+    >>> import pytensor.tensor as pt
+    >>> import numpy as np
+    >>> # Vector-matrix product
+    >>> v = pt.vector("v")  # shape (K,)
+    >>> A = pt.matrix("A")  # shape (K, N)
+    >>> result = pt.vecmat(v, A)  # shape (N,)
+    >>> # Equivalent to np.matmul(v, A)
+    >>>
+    >>> # Batched vector-matrix product
+    >>> batched_v = pt.matrix("v")  # shape (B, K)
+    >>> batched_A = pt.tensor3("A")  # shape (B, K, N)
+    >>> result = pt.vecmat(batched_v, batched_A)  # shape (B, N)
     """
     x1 = as_tensor_variable(x1)
     x2 = as_tensor_variable(x2)
diff --git a/tests/tensor/test_math.py b/tests/tensor/test_math.py
@@ -2081,7 +2081,7 @@ def is_super_shape(var1, var2):
 
 class TestMatrixVectorOps:
     def test_vecdot(self):
-        """Test vecdot function with various input shapes and axis."""
+        """Test vecdot function with various input shapes."""
         rng = np.random.default_rng(seed=utt.fetch_seed())
 
         # Test vector-vector
@@ -2093,77 +2093,69 @@ def test_vecdot(self):
         y_val = random(5, rng=rng).astype(config.floatX)
         np.testing.assert_allclose(f(x_val, y_val), np.dot(x_val, y_val))
 
-        # Test with axis parameter
-        x = matrix()
-        y = matrix()
-        z0 = vecdot(x, y, axis=0)
-        z1 = vecdot(x, y, axis=1)
-        f0 = function([x, y], z0)
-        f1 = function([x, y], z1)
-
-        x_val = random(3, 4, rng=rng).astype(config.floatX)
-        y_val = random(3, 4, rng=rng).astype(config.floatX)
-        np.testing.assert_allclose(f0(x_val, y_val), np.sum(x_val * y_val, axis=0))
-        np.testing.assert_allclose(f1(x_val, y_val), np.sum(x_val * y_val, axis=1))
-
         # Test batched vectors
         x = tensor3()
         y = tensor3()
-        z = vecdot(x, y, axis=2)
+        z = vecdot(x, y)
         f = function([x, y], z)
 
         x_val = random(2, 3, 4, rng=rng).astype(config.floatX)
         y_val = random(2, 3, 4, rng=rng).astype(config.floatX)
-        np.testing.assert_allclose(f(x_val, y_val), np.sum(x_val * y_val, axis=2))
-
-    def test_matvec(self):
-        """Test matvec function with various input shapes."""
-        rng = np.random.default_rng(seed=utt.fetch_seed())
-
-        # Test matrix-vector
-        x = matrix()
-        y = vector()
-        z = matvec(x, y)
-        f = function([x, y], z)
-
-        x_val = random(3, 4, rng=rng).astype(config.floatX)
-        y_val = random(4, rng=rng).astype(config.floatX)
-        np.testing.assert_allclose(f(x_val, y_val), np.dot(x_val, y_val))
-
-        # Test batched
-        x = tensor3()
-        y = matrix()
-        z = matvec(x, y)
-        f = function([x, y], z)
-
-        x_val = random(2, 3, 4, rng=rng).astype(config.floatX)
-        y_val = random(2, 4, rng=rng).astype(config.floatX)
-        expected = np.array([np.dot(x_val[i], y_val[i]) for i in range(2)])
+        expected = np.sum(x_val * y_val, axis=-1)
         np.testing.assert_allclose(f(x_val, y_val), expected)
 
-    def test_vecmat(self):
-        """Test vecmat function with various input shapes."""
+    @pytest.mark.parametrize(
+        "func,x_shape,y_shape,make_expected",
+        [
+            # matvec tests - Matrix(M,K) @ Vector(K) -> Vector(M)
+            (matvec, (3, 4), (4,), lambda x, y: np.dot(x, y)),
+            # matvec batch tests - Tensor3(B,M,K) @ Matrix(B,K) -> Matrix(B,M)
+            (
+                matvec,
+                (2, 3, 4),
+                (2, 4),
+                lambda x, y: np.array([np.dot(x[i], y[i]) for i in range(len(x))]),
+            ),
+            # vecmat tests - Vector(K) @ Matrix(K,N) -> Vector(N)
+            (vecmat, (3,), (3, 4), lambda x, y: np.dot(x, y)),
+            # vecmat batch tests - Matrix(B,K) @ Tensor3(B,K,N) -> Matrix(B,N)
+            (
+                vecmat,
+                (2, 3),
+                (2, 3, 4),
+                lambda x, y: np.array([np.dot(x[i], y[i]) for i in range(len(x))]),
+            ),
+        ],
+    )
+    def test_mat_vec_ops(self, func, x_shape, y_shape, make_expected):
+        """Parametrized test for matvec and vecmat functions."""
         rng = np.random.default_rng(seed=utt.fetch_seed())
 
-        # Test vector-matrix
-        x = vector()
-        y = matrix()
-        z = vecmat(x, y)
-        f = function([x, y], z)
+        # Create PyTensor variables with appropriate dimensions
+        if len(x_shape) == 1:
+            x = vector()
+        elif len(x_shape) == 2:
+            x = matrix()
+        else:
+            x = tensor3()
 
-        x_val = random(3, rng=rng).astype(config.floatX)
-        y_val = random(3, 4, rng=rng).astype(config.floatX)
-        np.testing.assert_allclose(f(x_val, y_val), np.dot(x_val, y_val))
+        if len(y_shape) == 1:
+            y = vector()
+        elif len(y_shape) == 2:
+            y = matrix()
+        else:
+            y = tensor3()
 
-        # Test batched
-        x = matrix()
-        y = tensor3()
-        z = vecmat(x, y)
+        # Apply the function
+        z = func(x, y)
         f = function([x, y], z)
 
-        x_val = random(2, 3, rng=rng).astype(config.floatX)
-        y_val = random(2, 3, 4, rng=rng).astype(config.floatX)
-        expected = np.array([np.dot(x_val[i], y_val[i]) for i in range(2)])
+        # Create random values
+        x_val = random(*x_shape, rng=rng).astype(config.floatX)
+        y_val = random(*y_shape, rng=rng).astype(config.floatX)
+
+        # Compare with the expected result
+        expected = make_expected(x_val, y_val)
         np.testing.assert_allclose(f(x_val, y_val), expected)
 
     def test_matmul(self):