From df4e465090f23bc2e45f565eb52be35cca5a1aae Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 9 Apr 2020 12:47:53 -0700 Subject: [PATCH 1/6] BUG: Timedelta == ndarray[td64] --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/_libs/tslibs/timedeltas.pyx | 52 +++++++++---------- pandas/core/internals/managers.py | 1 + pandas/tests/arithmetic/test_datetime64.py | 2 +- .../tests/scalar/timedelta/test_arithmetic.py | 22 +++++++- 5 files changed, 47 insertions(+), 31 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 5c39377899a20..83920eabc3970 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -395,6 +395,7 @@ Timedelta - Bug in dividing ``np.nan`` or ``None`` by :class:`Timedelta`` incorrectly returning ``NaT`` (:issue:`31869`) - Timedeltas now understand ``µs`` as identifier for microsecond (:issue:`32899`) - :class:`Timedelta` string representation now includes nanoseconds, when nanoseconds are non-zero (:issue:`9309`) +- Bug in comparing a :class:`Timedelta`` object against a ``np.ndarray`` with ``timedelta64`` dtype incorrectly viewing all entries as unequal (:issue:`????`) Timezones ^^^^^^^^^ diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 3af2279e2440f..c5092c8630f06 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -778,36 +778,32 @@ cdef class _Timedelta(timedelta): if isinstance(other, _Timedelta): ots = other - elif PyDelta_Check(other) or isinstance(other, Tick): + elif (is_timedelta64_object(other) or PyDelta_Check(other) + or isinstance(other, Tick)): ots = Timedelta(other) - else: - ndim = getattr(other, "ndim", -1) + # TODO: watch out for overflows - if ndim != -1: - if ndim == 0: - if is_timedelta64_object(other): - other = Timedelta(other) - else: - if op == Py_EQ: - return False - elif op == Py_NE: - return True - # only allow ==, != ops - raise TypeError(f'Cannot compare type ' - f'{type(self).__name__} with ' - f'type {type(other).__name__}') - if util.is_array(other): - return PyObject_RichCompare(np.array([self]), other, op) - return PyObject_RichCompare(other, self, reverse_ops[op]) - else: - if other is NaT: - return PyObject_RichCompare(other, self, reverse_ops[op]) - elif op == Py_EQ: - return False - elif op == Py_NE: - return True - raise TypeError(f'Cannot compare type {type(self).__name__} with ' - f'type {type(other).__name__}') + elif other is NaT: + return op == Py_NE + + elif util.is_array(other): + # TODO: watch out for zero-dim + if other.dtype.kind == "m": + return PyObject_RichCompare(self.asm8, other, op) + elif other.dtype.kind == "O": + # operate element-wise + return np.array( + [PyObject_RichCompare(self, x, op) for x in other], + dtype=bool, + ) + if op == Py_EQ: + return np.zeros(other.shape, dtype=bool) + elif op == Py_NE: + return np.ones(other.shape, dtype=bool) + return NotImplemented # let other raise TypeError + + else: + return NotImplemented return cmp_scalar(self.value, ots.value, op) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index bfb16b48d832c..a4f2daac65211 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -880,6 +880,7 @@ def to_dict(self, copy: bool = True): for b in self.blocks: bd.setdefault(str(b.dtype), []).append(b) + # TODO(EA2D): the combine will be unnecessary with 2D EAs return {dtype: self._combine(blocks, copy=copy) for dtype, blocks in bd.items()} def fast_xs(self, loc: int) -> ArrayLike: diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py index 9a6ae76658949..56c5647d865d3 100644 --- a/pandas/tests/arithmetic/test_datetime64.py +++ b/pandas/tests/arithmetic/test_datetime64.py @@ -734,7 +734,7 @@ def test_dti_cmp_object_dtype(self): result = dti == other expected = np.array([True] * 5 + [False] * 5) tm.assert_numpy_array_equal(result, expected) - msg = "Cannot compare type" + msg = ">=' not supported between instances of 'Timestamp' and 'Timedelta'" with pytest.raises(TypeError, match=msg): dti >= other diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 12572648fca9e..fb22b2f52f727 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -904,6 +904,24 @@ def test_compare_timedelta_ndarray(self): expected = np.array([False, False]) tm.assert_numpy_array_equal(result, expected) + def test_compare_td64_ndarray(self): + arr = np.arange(5).astype("timedelta64[ns]") + td = pd.Timedelta(arr[1]) + + expected = np.array([False, True, False, False, False], dtype=bool) + + result = td == arr + tm.assert_numpy_array_equal(result, expected) + + result = arr == td + tm.assert_numpy_array_equal(result, expected) + + result = td != arr + tm.assert_numpy_array_equal(result, ~expected) + + result = arr != td + tm.assert_numpy_array_equal(result, ~expected) + @pytest.mark.skip(reason="GH#20829 is reverted until after 0.24.0") def test_compare_custom_object(self): """ @@ -943,7 +961,7 @@ def __gt__(self, other): def test_compare_unknown_type(self, val): # GH#20829 t = Timedelta("1s") - msg = "Cannot compare type Timedelta with type (int|str)" + msg = "not supported between instances of 'Timedelta' and '(int|str)'" with pytest.raises(TypeError, match=msg): t >= val with pytest.raises(TypeError, match=msg): @@ -984,7 +1002,7 @@ def test_ops_error_str(): with pytest.raises(TypeError, match=msg): left + right - msg = "Cannot compare type" + msg = "not supported between instances of" with pytest.raises(TypeError, match=msg): left > right From 7637501e84607f7ca52fbd0f47e7d218bad43093 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 9 Apr 2020 12:50:16 -0700 Subject: [PATCH 2/6] GH ref --- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/tests/scalar/timedelta/test_arithmetic.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 83920eabc3970..1161ac2d19049 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -395,7 +395,7 @@ Timedelta - Bug in dividing ``np.nan`` or ``None`` by :class:`Timedelta`` incorrectly returning ``NaT`` (:issue:`31869`) - Timedeltas now understand ``µs`` as identifier for microsecond (:issue:`32899`) - :class:`Timedelta` string representation now includes nanoseconds, when nanoseconds are non-zero (:issue:`9309`) -- Bug in comparing a :class:`Timedelta`` object against a ``np.ndarray`` with ``timedelta64`` dtype incorrectly viewing all entries as unequal (:issue:`????`) +- Bug in comparing a :class:`Timedelta`` object against a ``np.ndarray`` with ``timedelta64`` dtype incorrectly viewing all entries as unequal (:issue:`33441`) Timezones ^^^^^^^^^ diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index fb22b2f52f727..7baeb8f5673bc 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -905,6 +905,7 @@ def test_compare_timedelta_ndarray(self): tm.assert_numpy_array_equal(result, expected) def test_compare_td64_ndarray(self): + # GG#33441 arr = np.arange(5).astype("timedelta64[ns]") td = pd.Timedelta(arr[1]) From 0ddd104a2886ab955766872e6862921a010909b9 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 6 Apr 2020 16:04:04 -0700 Subject: [PATCH 3/6] BUG: Timestamp cmp ndarray[dt64] --- pandas/_libs/tslibs/c_timestamp.pyx | 12 +++++ .../scalar/timestamp/test_comparisons.py | 52 +++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/pandas/_libs/tslibs/c_timestamp.pyx b/pandas/_libs/tslibs/c_timestamp.pyx index 04fadf220388f..68987030e8b4e 100644 --- a/pandas/_libs/tslibs/c_timestamp.pyx +++ b/pandas/_libs/tslibs/c_timestamp.pyx @@ -114,6 +114,18 @@ cdef class _Timestamp(datetime): return NotImplemented elif is_array(other): # avoid recursion error GH#15183 + if other.dtype.kind == "M": + if self.tz is None: + return PyObject_RichCompare(self.asm8, other, op) + raise TypeError( + "Cannot compare tz-naive and tz-aware timestamps" + ) + if other.dtype.kind == "O": + # Operate element-wise + return np.array( + [PyObject_RichCompare(self, x, op) for x in other], + dtype=bool, + ) return PyObject_RichCompare(np.array([self]), other, op) return PyObject_RichCompare(other, self, reverse_ops[op]) else: diff --git a/pandas/tests/scalar/timestamp/test_comparisons.py b/pandas/tests/scalar/timestamp/test_comparisons.py index 4581e736b2ea1..27aef8c4a9eb7 100644 --- a/pandas/tests/scalar/timestamp/test_comparisons.py +++ b/pandas/tests/scalar/timestamp/test_comparisons.py @@ -5,9 +5,61 @@ import pytest from pandas import Timestamp +import pandas._testing as tm class TestTimestampComparison: + def test_comparison_dt64_ndarray(self): + ts = Timestamp.now() + ts2 = Timestamp("2019-04-05") + arr = np.array([[ts.asm8, ts2.asm8]], dtype="M8[ns]") + + result = ts == arr + expected = np.array([[True, False]], dtype=bool) + tm.assert_numpy_array_equal(result, expected) + + result = arr == ts + tm.assert_numpy_array_equal(result, expected) + + result = ts != arr + tm.assert_numpy_array_equal(result, ~expected) + + result = arr != ts + tm.assert_numpy_array_equal(result, ~expected) + + result = ts2 < arr + tm.assert_numpy_array_equal(result, expected) + + result = arr < ts2 + tm.assert_numpy_array_equal(result, np.array([[False, False]], dtype=bool)) + + result = ts2 <= arr + tm.assert_numpy_array_equal(result, np.array([[True, True]], dtype=bool)) + + result = arr <= ts2 + tm.assert_numpy_array_equal(result, ~expected) + + result = ts >= arr + tm.assert_numpy_array_equal(result, np.array([[True, True]], dtype=bool)) + + result = arr >= ts + tm.assert_numpy_array_equal(result, np.array([[True, False]], dtype=bool)) + + @pytest.mark.parametrize("reverse", [True, False]) + def test_comparison_dt64_ndarray_tzaware(self, reverse, all_compare_operators): + op = getattr(operator, all_compare_operators.strip("__")) + + ts = Timestamp.now("UTC") + arr = np.array([ts.asm8, ts.asm8], dtype="M8[ns]") + + left, right = ts, arr + if reverse: + left, right = arr, ts + + msg = "Cannot compare tz-naive and tz-aware timestamps" + with pytest.raises(TypeError, match=msg): + op(left, right) + def test_comparison_object_array(self): # GH#15183 ts = Timestamp("2011-01-03 00:00:00-0500", tz="US/Eastern") From bbf3e50c89eaf653094d70cc3ea42acf620ddf6b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 9 Apr 2020 16:39:10 -0700 Subject: [PATCH 4/6] REF: remove td/ts kludge in replace_list --- pandas/core/internals/managers.py | 9 +++------ pandas/tests/series/methods/test_replace.py | 10 ++++++++++ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index a4f2daac65211..e0dfe26e93434 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -7,14 +7,13 @@ import numpy as np -from pandas._libs import Timedelta, Timestamp, internals as libinternals, lib +from pandas._libs import internals as libinternals, lib from pandas._typing import ArrayLike, DtypeObj, Label from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.cast import ( find_common_type, infer_dtype_from_scalar, - maybe_convert_objects, maybe_promote, ) from pandas.core.dtypes.common import ( @@ -33,6 +32,7 @@ import pandas.core.algorithms as algos from pandas.core.arrays.sparse import SparseDtype from pandas.core.base import PandasObject +import pandas.core.common as com from pandas.core.construction import extract_array from pandas.core.indexers import maybe_convert_indices from pandas.core.indexes.api import Index, ensure_index @@ -626,11 +626,8 @@ def comp(s, regex=False): """ if isna(s): return isna(values) - if isinstance(s, (Timedelta, Timestamp)) and getattr(s, "tz", None) is None: - return _compare_or_regex_search( - maybe_convert_objects(values), s.asm8, regex - ) + s = com.maybe_box_datetimelike(s) return _compare_or_regex_search(values, s, regex) masks = [comp(s, regex) for s in src_list] diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index bea8cb8b105e7..3fa3aa7fdca06 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -108,6 +108,16 @@ def test_replace_gh5319(self): expected = pd.Series([pd.Timestamp.min, ts], dtype=object) tm.assert_series_equal(expected, result) + def test_replace_timedelta_td64(self): + tdi = pd.timedelta_range(0, periods=5) + ser = pd.Series(tdi) + + # Using a single dict argument means we go through replace_list + result = ser.replace({ser[1]: ser[3]}) + + expected = pd.Series([ser[0], ser[3], ser[2], ser[3], ser[4]]) + tm.assert_series_equal(result, expected) + def test_replace_with_single_list(self): ser = pd.Series([0, 1, 2, 3, 4]) result = ser.replace([1, 2, 3]) From f4a1deb74a6f584319314e4b398cf075f08db290 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 9 Apr 2020 18:41:02 -0700 Subject: [PATCH 5/6] comments --- pandas/core/internals/managers.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index e0dfe26e93434..4e4ca85340a3e 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -640,11 +640,10 @@ def comp(s, regex=False): # replace ALWAYS will return a list rb = [blk if inplace else blk.copy()] for i, (s, d) in enumerate(zip(src_list, dest_list)): - # TODO: assert/validate that `d` is always a scalar? new_rb: List[Block] = [] for b in rb: m = masks[i][b.mgr_locs.indexer] - convert = i == src_len + convert = i == src_len # only convert once at the end result = b._replace_coerce( mask=m, to_replace=s, From d006ed38533aa401516690f797b2d679da83e6dd Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 9 Apr 2020 18:42:26 -0700 Subject: [PATCH 6/6] comment --- pandas/core/internals/managers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 4e4ca85340a3e..2c4e22565105e 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -876,7 +876,6 @@ def to_dict(self, copy: bool = True): for b in self.blocks: bd.setdefault(str(b.dtype), []).append(b) - # TODO(EA2D): the combine will be unnecessary with 2D EAs return {dtype: self._combine(blocks, copy=copy) for dtype, blocks in bd.items()} def fast_xs(self, loc: int) -> ArrayLike: