diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index dc807f467f65e..0d53ac3a5a56c 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -66,10 +66,6 @@ def _cat_compare_op(op): @unpack_zerodim_and_defer(opname) def func(self, other): - if is_list_like(other) and len(other) != len(self): - # TODO: Could this fail if the categories are listlike objects? - raise ValueError("Lengths must match.") - if not self.ordered: if opname in ["__lt__", "__gt__", "__le__", "__ge__"]: raise TypeError( diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 9ee150447eb5f..973fdbdc4014c 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -85,9 +85,6 @@ def _validate_comparison_value(self, other): elif not is_list_like(other): raise InvalidComparison(other) - elif len(other) != len(self): - raise ValueError("Lengths must match") - else: try: other = self._validate_listlike(other, opname, allow_object=True) @@ -1230,9 +1227,6 @@ def _add_timedelta_arraylike(self, other): """ # overridden by PeriodArray - if len(self) != len(other): - raise ValueError("cannot add indices of unequal length") - if isinstance(other, np.ndarray): # ndarray[timedelta64]; wrap in TimedeltaIndex for op from pandas.core.arrays import TimedeltaArray diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 8a1cacfe304ca..4306ebc1ccc18 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -657,9 +657,6 @@ def _assert_tzawareness_compat(self, other): def _sub_datetime_arraylike(self, other): """subtract DatetimeArray/Index or ndarray[datetime64]""" - if len(self) != len(other): - raise ValueError("cannot add indices of unequal length") - if isinstance(other, np.ndarray): assert is_datetime64_dtype(other) other = type(self)(other) diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 5a90ea4a36a21..3ca7e028913c6 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -517,8 +517,6 @@ def cmp_method(self, other): raise NotImplementedError( "can only perform ops with 1-d structures" ) - if len(self) != len(other): - raise ValueError("Lengths must match to compare") if other is libmissing.NA: # numpy does not handle pd.NA well as "other" scalar (it returns @@ -622,8 +620,6 @@ def integer_arithmetic_method(self, other): raise NotImplementedError( "can only perform ops with 1-d structures" ) - if len(self) != len(other): - raise ValueError("Lengths must match") if not (is_float_dtype(other) or is_integer_dtype(other)): raise TypeError("can only perform ops with numeric values") diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index a62f94b1a3665..99a00837ab407 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -465,10 +465,6 @@ def __mul__(self, other): if not hasattr(other, "dtype"): # list, tuple other = np.array(other) - if len(other) != len(self) and not is_timedelta64_dtype(other): - # Exclude timedelta64 here so we correctly raise TypeError - # for that instead of ValueError - raise ValueError("Cannot multiply with unequal lengths") if is_object_dtype(other.dtype): # this multiplication will succeed only if all elements of other @@ -512,10 +508,7 @@ def __truediv__(self, other): # e.g. list, tuple other = np.array(other) - if len(other) != len(self): - raise ValueError("Cannot divide vectors with unequal lengths") - - elif is_timedelta64_dtype(other.dtype): + if is_timedelta64_dtype(other.dtype): # let numpy handle it return self._data / other @@ -565,10 +558,7 @@ def __rtruediv__(self, other): # e.g. list, tuple other = np.array(other) - if len(other) != len(self): - raise ValueError("Cannot divide vectors with unequal lengths") - - elif is_timedelta64_dtype(other.dtype): + if is_timedelta64_dtype(other.dtype): # let numpy handle it return other / self._data @@ -617,10 +607,8 @@ def __floordiv__(self, other): if not hasattr(other, "dtype"): # list, tuple other = np.array(other) - if len(other) != len(self): - raise ValueError("Cannot divide with unequal lengths") - elif is_timedelta64_dtype(other.dtype): + if is_timedelta64_dtype(other.dtype): other = type(self)(other) # numpy timedelta64 does not natively support floordiv, so operate @@ -671,10 +659,8 @@ def __rfloordiv__(self, other): if not hasattr(other, "dtype"): # list, tuple other = np.array(other) - if len(other) != len(self): - raise ValueError("Cannot divide with unequal lengths") - elif is_timedelta64_dtype(other.dtype): + if is_timedelta64_dtype(other.dtype): other = type(self)(other) # numpy timedelta64 does not natively support floordiv, so operate diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index cf17ce9db6b1a..191fe5bee7530 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -75,6 +75,7 @@ from pandas.core.indexes.frozen import FrozenList import pandas.core.missing as missing from pandas.core.ops import get_op_result_name +from pandas.core.ops.common import unpack_zerodim_and_defer from pandas.core.ops.invalid import make_invalid_op from pandas.core.sorting import ensure_key_mapped from pandas.core.strings import StringMethods @@ -108,10 +109,8 @@ def _make_comparison_op(op, cls): + @unpack_zerodim_and_defer(op.__name__) def cmp_method(self, other): - if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)): - if other.ndim > 0 and len(self) != len(other): - raise ValueError("Lengths must match to compare") if is_object_dtype(self.dtype) and isinstance(other, ABCCategorical): left = type(other)(self._values, dtype=other.dtype) diff --git a/pandas/core/ops/common.py b/pandas/core/ops/common.py index 515a0a5198d74..1fb9398083884 100644 --- a/pandas/core/ops/common.py +++ b/pandas/core/ops/common.py @@ -1,10 +1,13 @@ """ Boilerplate functions used in defining binary operations. """ +from collections import UserDict from functools import wraps from typing import Callable -from pandas._libs.lib import item_from_zerodim +import numpy as np + +from pandas._libs.lib import is_list_like, item_from_zerodim from pandas._typing import F from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries @@ -62,6 +65,25 @@ def new_method(self, other): other = item_from_zerodim(other) + if isinstance(self, (ABCSeries, ABCDataFrame)) and isinstance( + other, (ABCSeries, ABCDataFrame) + ): + # we dont require length matches + pass + elif is_list_like(other, allow_sets=False) and not isinstance( + other, (dict, UserDict) + ): + if len(other) != len(self): + if len(other) == 1 and not hasattr(other, "dtype"): + # i.e. unpack scalar list, but leave e.g. Categorical, + # for which the scalar behavior doesnt match the + # array behavior + other = other[0] + else: + raise ValueError( + "Lengths must match", self.shape, np.shape(other), type(other) + ) + return method(self, other) return new_method diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 8c480faa4ee81..0fb3cb1025639 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -2206,7 +2206,7 @@ def test_sub_dti_dti(self): # different length raises ValueError dti1 = date_range("20130101", periods=3) dti2 = date_range("20130101", periods=4) - msg = "cannot add indices of unequal length" + msg = "Lengths must match" with pytest.raises(ValueError, match=msg): dti1 - dti2 diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index b085ee968dadb..269235b943e46 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -647,7 +647,7 @@ def test_mul_datelike_raises(self, numeric_idx): def test_mul_size_mismatch_raises(self, numeric_idx): idx = numeric_idx - msg = "operands could not be broadcast together" + msg = "Lengths must match" with pytest.raises(ValueError, match=msg): idx * idx[0:3] with pytest.raises(ValueError, match=msg): diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index e3eec1f781948..d540ff923c929 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -451,7 +451,7 @@ def test_addition_ops(self): tm.assert_index_equal(result, expected) # unequal length - msg = "cannot add indices of unequal length" + msg = "Lengths must match" with pytest.raises(ValueError, match=msg): tdi + dti[0:1] with pytest.raises(ValueError, match=msg): @@ -1730,7 +1730,7 @@ def test_tdarr_div_length_mismatch(self, box_with_array): mismatched = [1, 2, 3, 4] rng = tm.box_expected(rng, box_with_array) - msg = "Cannot divide vectors|Unable to coerce to Series" + msg = "Lengths must match|Unable to coerce to Series" for obj in [mismatched, mismatched[:2]]: # one shorter, one longer for other in [obj, np.array(obj), pd.Index(obj)]: @@ -1912,12 +1912,15 @@ def test_td64arr_mul_tdscalar_invalid(self, box_with_array, scalar_td): def test_td64arr_mul_too_short_raises(self, box_with_array): idx = TimedeltaIndex(np.arange(5, dtype="int64")) idx = tm.box_expected(idx, box_with_array) - msg = ( - "cannot use operands with types dtype|" - "Cannot multiply with unequal lengths|" - "Unable to coerce to Series" + msg = "|".join( + [ + "Lengths must match", # <- EA, Index, Series + "cannot use operands with types dtype", # <- DataFrame + "Unable to coerce to Series", # <- Series + ] ) - with pytest.raises(TypeError, match=msg): + with pytest.raises((ValueError, TypeError), match=msg): + # length check before dtype check idx * idx[:3] with pytest.raises(ValueError, match=msg): idx * np.array([1, 2]) diff --git a/pandas/tests/arrays/boolean/test_logical.py b/pandas/tests/arrays/boolean/test_logical.py index bf4775bbd7b32..a61746d46daeb 100644 --- a/pandas/tests/arrays/boolean/test_logical.py +++ b/pandas/tests/arrays/boolean/test_logical.py @@ -46,7 +46,7 @@ def test_empty_ok(self, all_logical_operators): def test_logical_length_mismatch_raises(self, all_logical_operators): op_name = all_logical_operators a = pd.array([True, False, None], dtype="boolean") - msg = "Lengths must match to compare" + msg = "Lengths must match" with pytest.raises(ValueError, match=msg): getattr(a, op_name)([True, False]) diff --git a/pandas/tests/arrays/integer/test_arithmetic.py b/pandas/tests/arrays/integer/test_arithmetic.py index 18f1dac3c13b2..b7fdd8581101b 100644 --- a/pandas/tests/arrays/integer/test_arithmetic.py +++ b/pandas/tests/arrays/integer/test_arithmetic.py @@ -232,8 +232,9 @@ def test_error(self, data, all_arithmetic_operators): result = opa(pd.DataFrame({"A": s})) assert result is NotImplemented - msg = r"can only perform ops with 1-d structures" - with pytest.raises(NotImplementedError, match=msg): + # msg = r"can only perform ops with 1-d structures" + msg = "Lengths must match" + with pytest.raises(ValueError, match=msg): opa(np.arange(len(s)).reshape(-1, len(s))) @pytest.mark.parametrize("zero, negative", [(0, False), (0.0, False), (-0.0, True)]) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index b681abf03a2b3..0894408b79c6c 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -98,7 +98,7 @@ def test_add_2d(): a + b s = pd.Series(a) - with pytest.raises(ValueError, match="3 != 1"): + with pytest.raises(ValueError, match="Lengths must match"): s + b diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index 1b2bfa8573c21..b4df7337fd99a 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -574,7 +574,7 @@ def test_comparison(self): msg = r"unorderable types: Interval\(\) > int\(\)" with pytest.raises(TypeError, match=msg): self.index > np.arange(2) - msg = "Lengths must match to compare" + msg = "Lengths must match" with pytest.raises(ValueError, match=msg): self.index > np.arange(3)