API: Remove broadcasting ambiguity from np.linalg.solve

asmeurer · asmeurer · commit adb3f18dec37 · 2024-03-01T15:51:41.000-07:00
Previously the np.linalg.solve documentation stated: a : (..., M, M) array_like Coefficient matrix. b : {(..., M,), (..., M, K)}, array_like however, this is inherently ambiguous. For example, if a has shape (2, 2, 2) and b has shape (2, 2), b could be treated as a (2,) stack of (2,) column vectors, in which case the result should have shape (2, 2), or as a single 2x2 matrix, in which case, the result should have shape (2, 2, 2). NumPy resolved this ambiguity in a confusing way, which was to treat b as (..., M) whenever b.ndim == a.ndim - 1, and as (..., M, K) otherwise. A much more consistent way to handle this ambiguity is to treat b as a single vector if and only if it is 1-dimensional, i.e., use b : {(M,), (..., M, K)}, array_like This is consistent with, for instance, the matmul operator, which only uses the special 1-D vector logic if an operand is exactly 1-dimensional, and treats the operands as (stacks of) 2-D matrices otherwise. This updates np.linalg.solve() to use this behavior. This is a backwards compatibility break, as any instance where the b array has more than one dimension and exactly one fewer dimension than the a array will now use the matrix logic, potentially returning a different result with a different shape. The previous behavior can be manually emulated with something like np.solve(a, b[..., None])[..., 0] since b as a (M,) vector is equivalent to b as a (M, 1) matrix (or the user could manually import and use the internal solve1() gufunc which implements the b-as-vector logic). This change aligns the solve() function with the array API, which resolves this broadcasting ambiguity in this way. See https://data-apis.org/array-api/latest/extensions/generated/array_api.linalg.solve.html#array_api.linalg.solve and data-apis/array-api#285. Fixes numpy#15349 Fixes numpy#25583
diff --git a/numpy/linalg/_linalg.py b/numpy/linalg/_linalg.py
@@ -327,7 +327,7 @@ def solve(a, b):
     ----------
     a : (..., M, M) array_like
         Coefficient matrix.
-    b : {(..., M,), (..., M, K)}, array_like
+    b : {(M,), (..., M, K)}, array_like
         Ordinate or "dependent variable" values.
 
     Returns
@@ -359,6 +359,13 @@ def solve(a, b):
     `lstsq` for the least-squares best "solution" of the
     system/equation.
 
+    .. versionchanged:: 2.0
+
+       The b array is only treated as a shape (M,) column vector if it is
+       exactly 1-dimensional. In all other instances it is treated as a stack
+       of (M, K) matrices. Previously b would be treated as a stack of (M,)
+       vectors if b.ndim was equal to a.ndim - 1.
+
     References
     ----------
     .. [1] G. Strang, *Linear Algebra and Its Applications*, 2nd Ed., Orlando,
@@ -390,7 +397,7 @@ def solve(a, b):
 
     # We use the b = (..., M,) logic, only if the number of extra dimensions
     # match exactly
-    if b.ndim == a.ndim - 1:
+    if b.ndim == 1:
         gufunc = _umath_linalg.solve1
     else:
         gufunc = _umath_linalg.solve
diff --git a/numpy/linalg/tests/test_linalg.py b/numpy/linalg/tests/test_linalg.py
@@ -475,6 +475,23 @@ def test_types(self, dtype):
         x = np.array([[1, 0.5], [0.5, 1]], dtype=dtype)
         assert_equal(linalg.solve(x, x).dtype, dtype)
 
+    def test_1_d(self):
+        class ArraySubclass(np.ndarray):
+            pass
+        a = np.arange(8).reshape(2, 2, 2)
+        b = np.arange(2).view(ArraySubclass)
+        result = linalg.solve(a, b)
+        assert result.shape == (2, 2)
+
+        # If b is anything other than 1-D it should be treated as a stack of
+        # matrices
+        b = np.arange(4).reshape(2, 2).view(ArraySubclass)
+        result = linalg.solve(a, b)
+        assert result.shape == (2, 2, 2)
+
+        b = np.arange(2).reshape(1, 2).view(ArraySubclass)
+        assert_raises(ValueError, linalg.solve, a, b)
+
     def test_0_size(self):
         class ArraySubclass(np.ndarray):
             pass
@@ -497,9 +514,9 @@ class ArraySubclass(np.ndarray):
         assert_raises(ValueError, linalg.solve, a[0:0], b[0:0])
 
         # Test zero "single equations" with 0x0 matrices.
-        b = np.arange(2).reshape(1, 2).view(ArraySubclass)
+        b = np.arange(2).view(ArraySubclass)
         expected = linalg.solve(a, b)[:, 0:0]
-        result = linalg.solve(a[:, 0:0, 0:0], b[:, 0:0])
+        result = linalg.solve(a[:, 0:0, 0:0], b[0:0])
         assert_array_equal(result, expected)
         assert_(isinstance(result, ArraySubclass))