From 99e3418d8b2a6db5fdf1b27fdf0c81fb678686e3 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 3 Jan 2020 09:02:55 -0800 Subject: [PATCH 1/5] REF: standardize listlike coecison --- pandas/core/arrays/datetimes.py | 20 +++++---------- pandas/core/arrays/period.py | 43 ++++++++++++++++++++++++-------- pandas/core/arrays/timedeltas.py | 28 +++++++++++++++------ 3 files changed, 60 insertions(+), 31 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index dcdde4d7fb13a..cc54fb5e5af13 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -161,11 +161,9 @@ def wrapper(self, other): raise ValueError("Lengths must match") else: if isinstance(other, list): - try: - other = type(self)._from_sequence(other) - except ValueError: - other = np.array(other, dtype=np.object_) - elif not isinstance(other, (np.ndarray, DatetimeArray)): + other = np.array(other) + + if not isinstance(other, (np.ndarray, cls)): # Following Timestamp convention, __eq__ is all-False # and __ne__ is all True, others raise TypeError. return invalid_comparison(self, other, op) @@ -179,20 +177,14 @@ def wrapper(self, other): op, self.astype(object), other ) o_mask = isna(other) + elif not (is_datetime64_dtype(other) or is_datetime64tz_dtype(other)): # e.g. is_timedelta64_dtype(other) return invalid_comparison(self, other, op) + else: self._assert_tzawareness_compat(other) - - if ( - is_datetime64_dtype(other) - and not is_datetime64_ns_dtype(other) - or not hasattr(other, "asi8") - ): - # e.g. other.dtype == 'datetime64[s]' - # or an object-dtype ndarray - other = type(self)._from_sequence(other) + other = type(self)._from_sequence(other) result = op(self.view("i8"), other.view("i8")) o_mask = other._isnan diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 056c80717e54f..cd4cc922da5d0 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -29,6 +29,7 @@ is_datetime64_dtype, is_float_dtype, is_list_like, + is_object_dtype, is_period_dtype, pandas_dtype, ) @@ -41,6 +42,7 @@ ) from pandas.core.dtypes.missing import isna, notna +from pandas.core import ops import pandas.core.algorithms as algos from pandas.core.arrays import datetimelike as dtl import pandas.core.common as com @@ -92,22 +94,43 @@ def wrapper(self, other): self._check_compatible_with(other) result = ordinal_op(other.ordinal) - elif isinstance(other, cls): - self._check_compatible_with(other) - - result = ordinal_op(other.asi8) - - mask = self._isnan | other._isnan - if mask.any(): - result[mask] = nat_result - return result elif other is NaT: result = np.empty(len(self.asi8), dtype=bool) result.fill(nat_result) - else: + + elif not is_list_like(other): return invalid_comparison(self, other, op) + else: + if isinstance(other, list): + other = np.array(other) + + if not isinstance(other, (np.ndarray, cls)): + return invalid_comparison(self, other, op) + + if is_object_dtype(other): + with np.errstate(all="ignore"): + result = ops.comp_method_OBJECT_ARRAY( + op, self.astype(object), other + ) + o_mask = isna(other) + + elif not is_period_dtype(other): + # e.g. is_timedelta64_dtype(other) + return invalid_comparison(self, other, op) + + else: + assert isinstance(other, cls), type(other) + + self._check_compatible_with(other) + + result = ordinal_op(other.asi8) + o_mask = other._isnan + + if o_mask.any(): + result[o_mask] = nat_result + if self._hasnans: result[self._isnan] = nat_result diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 098ad268784ed..1874517f0f2e4 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -38,7 +38,7 @@ ) from pandas.core.dtypes.missing import isna -from pandas.core import nanops +from pandas.core import nanops, ops from pandas.core.algorithms import checked_add_with_arr import pandas.core.common as com from pandas.core.ops.common import unpack_zerodim_and_defer @@ -103,15 +103,29 @@ def wrapper(self, other): raise ValueError("Lengths must match") else: - try: - other = type(self)._from_sequence(other)._data - except (ValueError, TypeError): + if isinstance(other, list): + other = np.array(other) + + if not isinstance(other, (np.ndarray, cls)): + return invalid_comparison(self, other, op) + + if is_object_dtype(other): + with np.errstate(all="ignore"): + result = ops.comp_method_OBJECT_ARRAY( + op, self.astype(object), other + ) + o_mask = isna(other) + + elif not is_timedelta64_dtype(other): + # e.g. other is datetimearray return invalid_comparison(self, other, op) - result = op(self.view("i8"), other.view("i8")) - result = com.values_from_object(result) + else: + other = type(self)._from_sequence(other) + + result = op(self.view("i8"), other.view("i8")) + o_mask = other._isnan - o_mask = np.array(isna(other)) if o_mask.any(): result[o_mask] = nat_result From b0291dc01375fbad505fbb608377310662bd8654 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 3 Jan 2020 10:42:23 -0800 Subject: [PATCH 2/5] TST: tests for periodarray comparisons --- pandas/tests/arithmetic/test_period.py | 73 ++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/pandas/tests/arithmetic/test_period.py b/pandas/tests/arithmetic/test_period.py index 3ad7a6d8e465c..6eef99a124b1a 100644 --- a/pandas/tests/arithmetic/test_period.py +++ b/pandas/tests/arithmetic/test_period.py @@ -50,6 +50,79 @@ def test_compare_invalid_scalar(self, box_with_array, scalar): parr = tm.box_expected(pi, box_with_array) assert_invalid_comparison(parr, scalar, box_with_array) + @pytest.mark.parametrize( + "other", + [ + pd.date_range("2000", periods=4).array, + pd.timedelta_range("1D", periods=4).array, + np.arange(4), + np.arange(4).astype(np.float64), + list(range(4)), + ], + ) + def test_compare_invalid_listlike(self, box_with_array, other): + pi = pd.period_range("2000", periods=4) + parr = tm.box_expected(pi, box_with_array) + assert_invalid_comparison(parr, other, box_with_array) + + @pytest.mark.parametrize("other_box", [list, np.array, lambda x: x.astype(object)]) + def test_compare_object_dtype(self, box_with_array, other_box): + pi = pd.period_range("2000", periods=5) + parr = tm.box_expected(pi, box_with_array) + + xbox = np.ndarray if box_with_array is pd.Index else box_with_array + + other = other_box(pi) + + expected = np.array([True, True, True, True, True]) + expected = tm.box_expected(expected, xbox) + + result = parr == other + tm.assert_equal(result, expected) + result = parr <= other + tm.assert_equal(result, expected) + result = parr >= other + tm.assert_equal(result, expected) + + result = parr != other + tm.assert_equal(result, ~expected) + result = parr < other + tm.assert_equal(result, ~expected) + result = parr > other + tm.assert_equal(result, ~expected) + + other = other_box(pi[::-1]) + + expected = np.array([False, False, True, False, False]) + expected = tm.box_expected(expected, xbox) + result = parr == other + tm.assert_equal(result, expected) + + expected = np.array([True, True, True, False, False]) + expected = tm.box_expected(expected, xbox) + result = parr <= other + tm.assert_equal(result, expected) + + expected = np.array([False, False, True, True, True]) + expected = tm.box_expected(expected, xbox) + result = parr >= other + tm.assert_equal(result, expected) + + expected = np.array([True, True, False, True, True]) + expected = tm.box_expected(expected, xbox) + result = parr != other + tm.assert_equal(result, expected) + + expected = np.array([True, True, False, False, False]) + expected = tm.box_expected(expected, xbox) + result = parr < other + tm.assert_equal(result, expected) + + expected = np.array([False, False, False, True, True]) + expected = tm.box_expected(expected, xbox) + result = parr > other + tm.assert_equal(result, expected) + class TestPeriodIndexComparisons: # TODO: parameterize over boxes From 5cef6bac319138ac04e096f5544292db9209be10 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 4 Jan 2020 17:28:57 -0800 Subject: [PATCH 3/5] TST: tests for fixed DTA cases --- pandas/tests/arithmetic/common.py | 2 +- pandas/tests/arithmetic/test_datetime64.py | 23 ++++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/pandas/tests/arithmetic/common.py b/pandas/tests/arithmetic/common.py index 7c3ceb3dba2b6..83d19b8a20ac3 100644 --- a/pandas/tests/arithmetic/common.py +++ b/pandas/tests/arithmetic/common.py @@ -70,7 +70,7 @@ def assert_invalid_comparison(left, right, box): result = right != left tm.assert_equal(result, ~expected) - msg = "Invalid comparison between" + msg = "Invalid comparison between|Cannot compare type|not supported between" with pytest.raises(TypeError, match=msg): left < right with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 20ea8d31ebbe2..82709ebad813b 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -85,6 +85,29 @@ def test_dt64arr_cmp_scalar_invalid(self, other, tz_naive_fixture, box_with_arra dtarr = tm.box_expected(rng, box_with_array) assert_invalid_comparison(dtarr, other, box_with_array) + @pytest.mark.parametrize( + "other", + [ + list(range(10)), + np.arange(10), + np.arange(10).astype(np.float32), + np.arange(10).astype(object), + pd.timedelta_range("1ns", periods=10).array, + np.array(pd.timedelta_range("1ns", periods=10)), + list(pd.timedelta_range("1ns", periods=10)), + pd.timedelta_range("1 Day", periods=10).astype(object), + pd.period_range("1971-01-01", freq="D", periods=10).array, + pd.period_range("1971-01-01", freq="D", periods=10).astype(object), + ], + ) + def test_dt64arr_cmp_arraylike_invalid(self, other, tz_naive_fixture): + # We don't parametrize this over box_with_array because listlike + # other plays poorly with assert_invalid_comparison reversed checks + tz = tz_naive_fixture + + dta = date_range("1970-01-01", freq="ns", periods=10, tz=tz)._data + assert_invalid_comparison(dta, other, tm.to_array) + def test_dt64arr_nat_comparison(self, tz_naive_fixture, box_with_array): # GH#22242, GH#22163 DataFrame considered NaT == ts incorrectly tz = tz_naive_fixture From f51ed016baabb84d0cc4abeca2942ea574b52c42 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 4 Jan 2020 17:36:30 -0800 Subject: [PATCH 4/5] test for mixed-validty --- pandas/tests/arithmetic/test_datetime64.py | 23 ++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 82709ebad813b..1dfd95551f68d 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -108,6 +108,29 @@ def test_dt64arr_cmp_arraylike_invalid(self, other, tz_naive_fixture): dta = date_range("1970-01-01", freq="ns", periods=10, tz=tz)._data assert_invalid_comparison(dta, other, tm.to_array) + def test_dt64arr_cmp_mixed_invalid(self, tz_naive_fixture): + tz = tz_naive_fixture + + dta = date_range("1970-01-01", freq="h", periods=5, tz=tz)._data + + other = np.array([0, 1, 2, dta[3], pd.Timedelta(days=1)]) + result = dta == other + expected = np.array([False, False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = dta != other + tm.assert_numpy_array_equal(result, ~expected) + + msg = "Invalid comparison between|Cannot compare type|not supported between" + with pytest.raises(TypeError, match=msg): + dta < other + with pytest.raises(TypeError, match=msg): + dta > other + with pytest.raises(TypeError, match=msg): + dta <= other + with pytest.raises(TypeError, match=msg): + dta >= other + def test_dt64arr_nat_comparison(self, tz_naive_fixture, box_with_array): # GH#22242, GH#22163 DataFrame considered NaT == ts incorrectly tz = tz_naive_fixture From eb0289207513086b9ee13b111cdbd734eeb5fff1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sat, 4 Jan 2020 17:42:26 -0800 Subject: [PATCH 5/5] TST: timedelta tests --- pandas/tests/arithmetic/test_timedelta64.py | 43 +++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 9b0d3712e9bea..158da37aa7239 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -76,6 +76,49 @@ def test_td64_comparisons_invalid(self, box_with_array, invalid): assert_invalid_comparison(obj, invalid, box) + @pytest.mark.parametrize( + "other", + [ + list(range(10)), + np.arange(10), + np.arange(10).astype(np.float32), + np.arange(10).astype(object), + pd.date_range("1970-01-01", periods=10, tz="UTC").array, + np.array(pd.date_range("1970-01-01", periods=10)), + list(pd.date_range("1970-01-01", periods=10)), + pd.date_range("1970-01-01", periods=10).astype(object), + pd.period_range("1971-01-01", freq="D", periods=10).array, + pd.period_range("1971-01-01", freq="D", periods=10).astype(object), + ], + ) + def test_td64arr_cmp_arraylike_invalid(self, other): + # We don't parametrize this over box_with_array because listlike + # other plays poorly with assert_invalid_comparison reversed checks + + rng = timedelta_range("1 days", periods=10)._data + assert_invalid_comparison(rng, other, tm.to_array) + + def test_td64arr_cmp_mixed_invalid(self): + rng = timedelta_range("1 days", periods=5)._data + + other = np.array([0, 1, 2, rng[3], pd.Timestamp.now()]) + result = rng == other + expected = np.array([False, False, False, True, False]) + tm.assert_numpy_array_equal(result, expected) + + result = rng != other + tm.assert_numpy_array_equal(result, ~expected) + + msg = "Invalid comparison between|Cannot compare type|not supported between" + with pytest.raises(TypeError, match=msg): + rng < other + with pytest.raises(TypeError, match=msg): + rng > other + with pytest.raises(TypeError, match=msg): + rng <= other + with pytest.raises(TypeError, match=msg): + rng >= other + class TestTimedelta64ArrayComparisons: # TODO: All of these need to be parametrized over box