From 71c683507c09fb69ee94d6bc27e872e043f22498 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 27 Dec 2017 13:57:06 -0800 Subject: [PATCH 01/10] Fix Timedelta.__floordiv__, __rfloordiv__ --- pandas/_libs/tslibs/timedeltas.pyx | 70 ++++++++++++++++++--- pandas/tests/scalar/test_timedelta.py | 87 +++++++++++++++++++++++++++ 2 files changed, 148 insertions(+), 9 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index b37e5dc620260..7646eb5612ec6 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -975,13 +975,40 @@ class Timedelta(_Timedelta): __rdiv__ = __rtruediv__ def __floordiv__(self, other): + # numpy does not implement floordiv for timedelta64 dtype, so we cannot + # just defer + if hasattr(other, '_typ'): + # Series, DataFrame, ... + return NotImplemented + if hasattr(other, 'dtype'): - # work with i8 - other = other.astype('m8[ns]').astype('i8') - return self.value // other + if other.dtype.kind == 'm': + # also timedelta-like + # We need to watch out for np.timedelta64('NaT') + if np.ndim(other) == 0: + if np.isnat(other): + return np.nan + else: + other = other.astype('m8[ns]').astype('i8') + return self.value // other + else: + mask = np.isnat(other) + res = self.value // other.astype('m8[ns]').astype('i8') + if mask.any(): + res = res.astype('f8') + res[mask] = np.nan + return res + elif other.dtype.kind in ['i', 'u', 'f']: + if np.ndim(other) == 0: + return Timedelta(self.value // other) + else: + return self.to_timedelta64() // other + else: + raise TypeError('Invalid dtype {dtype} for ' + '{op}'.format(dtype=other.dtype, + op='__floordiv__')) - elif is_integer_object(other): - # integers only + elif is_integer_object(other) or is_float_object(other): return Timedelta(self.value // other, unit='ns') elif not _validate_ops_compat(other): @@ -993,17 +1020,42 @@ class Timedelta(_Timedelta): return self.value // other.value def __rfloordiv__(self, other): + # numpy does not implement floordiv for timedelta64 dtype, so we cannot + # just defer + if hasattr(other, '_typ'): + # Series, DataFrame, ... + return NotImplemented + if hasattr(other, 'dtype'): - # work with i8 - other = other.astype('m8[ns]').astype('i8') - return other // self.value + if other.dtype.kind == 'm': + # also timedelta-like + # We need to watch out for np.timedelta64('NaT') + if np.ndim(other) == 0: + if np.isnat(other): + return np.nan + else: + other = other.astype('m8[ns]').astype('i8') + return other // self.value + else: + mask = np.isnat(other) + res = other.astype('m8[ns]').astype('i8') // self.value + if mask.any(): + res = res.astype('f8') + res[mask] = np.nan + return res + else: + raise TypeError('Invalid dtype {dtype} for ' + '{op}'.format(dtype=other.dtype, + op='__floordiv__')) + if isinstance(other, float) and np.isnan(other): + return NotImplemented elif not _validate_ops_compat(other): return NotImplemented other = Timedelta(other) if other is NaT: - return NaT + return np.nan return other.value // self.value diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py index 001f6c1fdbef4..45c10d7ad4466 100644 --- a/pandas/tests/scalar/test_timedelta.py +++ b/pandas/tests/scalar/test_timedelta.py @@ -162,6 +162,93 @@ def test_binary_ops_with_timedelta(self): # invalid multiply with another timedelta pytest.raises(TypeError, lambda: td * td) + def test_floordiv(self): + # GH#18846 + td = Timedelta(hours=3, minutes=4) + scalar = Timedelta(hours=3, minutes=3) + + # scalar others + assert td // scalar == 1 + assert -td // scalar.to_pytimedelta() == -2 + assert (2 * td) // scalar.to_timedelta64() == 2 + + assert td // np.nan is pd.NaT + assert np.isnan(td // pd.NaT) + assert np.isnan(td // np.timedelta64('NaT')) + + with pytest.raises(TypeError): + td // np.datetime64('2016-01-01', dtype='datetime64[us]') + + expected = Timedelta(hours=1, minutes=32) + assert td // 2 == expected + assert td // 2.0 == expected + assert td // np.float64(2.0) == expected + assert td // np.int32(2.0) == expected + assert td // np.uint8(2.0) == expected + + # Array-like others + assert td // np.array(scalar.to_timedelta64()) == 1 + + res = (3 * td) // np.array([scalar.to_timedelta64()]) + expected = np.array([3]) + np.testing.assert_array_equal(res, expected) + + res = (10 * td) // np.array([scalar.to_timedelta64(), + np.timedelta64('NaT')]) + expected = np.array([10, np.nan]) + np.testing.assert_array_equal(res, expected) + + ser = pd.Series([1], dtype=np.int64) + res = td // ser + assert res.dtype.kind == 'm' + + def test_rfloordiv(self): + # GH#18846 + td = Timedelta(hours=3, minutes=3) + scalar = Timedelta(hours=3, minutes=4) + + # scalar others + assert td.__rfloordiv__(scalar) == 1 + assert (-td).__rfloordiv__(scalar.to_pytimedelta()) == -2 + assert (2 * td).__rfloordiv__(scalar.to_timedelta64()) == 0 + + assert np.isnan(td.__rfloordiv__(pd.NaT)) + assert np.isnan(td.__rfloordiv__(np.timedelta64('NaT'))) + + dt64 = np.datetime64('2016-01-01', dtype='datetime64[us]') + with pytest.raises(TypeError): + td.__rfloordiv__(dt64) + + assert td.__rfloordiv__(np.nan) is NotImplemented + assert td.__rfloordiv__(3.5) is NotImplemented + assert td.__rfloordiv__(2) is NotImplemented + + with pytest.raises(TypeError): + td.__rfloordiv__(np.float64(2.0)) + with pytest.raises(TypeError): + td.__rfloordiv__(np.int32(2.0)) + with pytest.raises(TypeError): + td.__rfloordiv__(np.uint8(9)) + + # Array-like others + assert td.__rfloordiv__(np.array(scalar.to_timedelta64())) == 1 + + res = td.__rfloordiv__(np.array([(3 * scalar).to_timedelta64()])) + expected = np.array([3]) + np.testing.assert_array_equal(res, expected) + + arr = np.array([(10 * scalar).to_timedelta64(), + np.timedelta64('NaT')]) + res = td.__rfloordiv__(arr) + expected = np.array([10, np.nan]) + np.testing.assert_array_equal(res, expected) + + ser = pd.Series([1], dtype=np.int64) + res = td.__rfloordiv__(ser) + assert res is NotImplemented + with pytest.raises(TypeError): + ser // td + class TestTimedeltas(object): _multiprocess_can_split_ = True From c0318643c21ea9b6f06f1d5fa24614306934278e Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 28 Dec 2017 11:45:22 -0800 Subject: [PATCH 02/10] Whatsnew note, DRY off a bit --- doc/source/whatsnew/v0.23.0.txt | 2 +- pandas/_libs/tslibs/timedeltas.pyx | 79 +++++++++++++++++---------- pandas/tests/scalar/test_timedelta.py | 1 + 3 files changed, 52 insertions(+), 30 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 709009542e160..acacee141192f 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -359,7 +359,7 @@ Numeric ^^^^^^^ - Bug in :func:`Series.__sub__` subtracting a non-nanosecond ``np.datetime64`` object from a ``Series`` gave incorrect results (:issue:`7996`) -- +- Bug in :func:`Timedelta.__floordiv__` and :func:`Timedelta.__rfloordiv__` dividing by many incompatible numpy objects was incorrectly allowed (:issue:`18846`) - Categorical diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 7646eb5612ec6..5a87d707ebeab 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -984,25 +984,13 @@ class Timedelta(_Timedelta): if hasattr(other, 'dtype'): if other.dtype.kind == 'm': # also timedelta-like - # We need to watch out for np.timedelta64('NaT') - if np.ndim(other) == 0: - if np.isnat(other): - return np.nan - else: - other = other.astype('m8[ns]').astype('i8') - return self.value // other - else: - mask = np.isnat(other) - res = self.value // other.astype('m8[ns]').astype('i8') - if mask.any(): - res = res.astype('f8') - res[mask] = np.nan - return res + return _broadcast_floordiv_td64(self.value, other, _floordiv) elif other.dtype.kind in ['i', 'u', 'f']: if np.ndim(other) == 0: return Timedelta(self.value // other) else: return self.to_timedelta64() // other + else: raise TypeError('Invalid dtype {dtype} for ' '{op}'.format(dtype=other.dtype, @@ -1029,20 +1017,7 @@ class Timedelta(_Timedelta): if hasattr(other, 'dtype'): if other.dtype.kind == 'm': # also timedelta-like - # We need to watch out for np.timedelta64('NaT') - if np.ndim(other) == 0: - if np.isnat(other): - return np.nan - else: - other = other.astype('m8[ns]').astype('i8') - return other // self.value - else: - mask = np.isnat(other) - res = other.astype('m8[ns]').astype('i8') // self.value - if mask.any(): - res = res.astype('f8') - res[mask] = np.nan - return res + return _broadcast_floordiv_td64(self.value, other, _rfloordiv) else: raise TypeError('Invalid dtype {dtype} for ' '{op}'.format(dtype=other.dtype, @@ -1059,6 +1034,52 @@ class Timedelta(_Timedelta): return other.value // self.value +cdef _floordiv(int64_t value, right): + return value // right + + +cdef _rfloordiv(int64_t value, right): + # analogous to referencing operator.div, but there is no operator.rfloordiv + return right // value + + +cdef _broadcast_floordiv_td64(int64_t value, object other, + object (*operation)(int64_t value, + object right)): + """Boilerplate code shared by Timedelta.__floordiv__ and + Timedelta.__rfloordiv__ because np.timedelta64 does not implement these. + + Parameters + ---------- + value : int64_t; `self.value` from a Timedelta object + other : object + operation : function, either _floordiv or _rfloordiv + + Returns + ------- + result : varies based on `other` + """ + # assumes other.dtype.kind == 'm', i.e. other is timedelta-like + cdef: + int ndim = getattr(other, 'ndim', -1) + + # We need to watch out for np.timedelta64('NaT') + if ndim == 0: + if np.isnat(other): + return np.nan + else: + return operation(value, other.astype('m8[ns]').astype('i8')) + + else: + mask = np.isnat(other) + res = operation(value, other.astype('m8[ns]').astype('i8')) + + if mask.any(): + res = res.astype('f8') + res[mask] = np.nan + return res + + # resolution in ns -Timedelta.min = Timedelta(np.iinfo(np.int64).min +1) +Timedelta.min = Timedelta(np.iinfo(np.int64).min + 1) Timedelta.max = Timedelta(np.iinfo(np.int64).max) diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py index 2a63305ce99b5..66fbdc7e3563e 100644 --- a/pandas/tests/scalar/test_timedelta.py +++ b/pandas/tests/scalar/test_timedelta.py @@ -136,6 +136,7 @@ def test_binary_ops_nat(self): assert (td * pd.NaT) is pd.NaT assert (td / pd.NaT) is np.nan assert (td // pd.NaT) is np.nan + assert (td // np.timedelta64('NaT')) is np.nan def test_binary_ops_integers(self): td = Timedelta(10, unit='d') From 09903b462f9b6bce23d06afac353fffc40c148ab Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 28 Dec 2017 20:34:54 -0800 Subject: [PATCH 03/10] lint pickiness --- pandas/tests/scalar/test_timedelta.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py index 66fbdc7e3563e..146b15a75ba27 100644 --- a/pandas/tests/scalar/test_timedelta.py +++ b/pandas/tests/scalar/test_timedelta.py @@ -192,12 +192,12 @@ def test_floordiv(self): res = (3 * td) // np.array([scalar.to_timedelta64()]) expected = np.array([3]) - np.testing.assert_array_equal(res, expected) + assert (res == expected).all() res = (10 * td) // np.array([scalar.to_timedelta64(), np.timedelta64('NaT')]) expected = np.array([10, np.nan]) - np.testing.assert_array_equal(res, expected) + assert (res == expected).all() ser = pd.Series([1], dtype=np.int64) res = td // ser @@ -236,13 +236,13 @@ def test_rfloordiv(self): res = td.__rfloordiv__(np.array([(3 * scalar).to_timedelta64()])) expected = np.array([3]) - np.testing.assert_array_equal(res, expected) + assert (res == expected).all() arr = np.array([(10 * scalar).to_timedelta64(), np.timedelta64('NaT')]) res = td.__rfloordiv__(arr) expected = np.array([10, np.nan]) - np.testing.assert_array_equal(res, expected) + assert (res == expected).all() ser = pd.Series([1], dtype=np.int64) res = td.__rfloordiv__(ser) From 54cc35f778ef3388dea8a17b1c2e140c242e68bc Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 29 Dec 2017 10:40:31 -0800 Subject: [PATCH 04/10] use tm.assert_numpy_array_equal --- pandas/tests/scalar/test_timedelta.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py index 146b15a75ba27..07ab06c0e9fd6 100644 --- a/pandas/tests/scalar/test_timedelta.py +++ b/pandas/tests/scalar/test_timedelta.py @@ -192,12 +192,12 @@ def test_floordiv(self): res = (3 * td) // np.array([scalar.to_timedelta64()]) expected = np.array([3]) - assert (res == expected).all() + tm.assert_numpy_array_equal(res, expected) res = (10 * td) // np.array([scalar.to_timedelta64(), np.timedelta64('NaT')]) expected = np.array([10, np.nan]) - assert (res == expected).all() + tm.assert_numpy_array_equal(res, expected) ser = pd.Series([1], dtype=np.int64) res = td // ser @@ -236,13 +236,13 @@ def test_rfloordiv(self): res = td.__rfloordiv__(np.array([(3 * scalar).to_timedelta64()])) expected = np.array([3]) - assert (res == expected).all() + tm.assert_numpy_array_equal(res, expected) arr = np.array([(10 * scalar).to_timedelta64(), np.timedelta64('NaT')]) res = td.__rfloordiv__(arr) expected = np.array([10, np.nan]) - assert (res == expected).all() + tm.assert_numpy_array_equal(res, expected) ser = pd.Series([1], dtype=np.int64) res = td.__rfloordiv__(ser) From 0e532fc17da49e617614f5ef37feeda590417375 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 4 Jan 2018 10:10:44 -0800 Subject: [PATCH 05/10] avoid np.isnat --- pandas/_libs/tslibs/timedeltas.pyx | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 5a87d707ebeab..6969cd371bd1c 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1063,15 +1063,17 @@ cdef _broadcast_floordiv_td64(int64_t value, object other, cdef: int ndim = getattr(other, 'ndim', -1) - # We need to watch out for np.timedelta64('NaT') + # We need to watch out for np.timedelta64('NaT'). + mask = other.view('i8') == NPY_NAT + # equiv np.isnat, which does not exist in some supported np versions. + if ndim == 0: - if np.isnat(other): + if mask: return np.nan else: return operation(value, other.astype('m8[ns]').astype('i8')) else: - mask = np.isnat(other) res = operation(value, other.astype('m8[ns]').astype('i8')) if mask.any(): From 79c3a8e5f8a09ec42ecf41c63e82e84efb443b41 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 4 Jan 2018 17:55:46 -0800 Subject: [PATCH 06/10] make dtype explcit to fix appveyor test error --- pandas/tests/scalar/test_timedelta.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py index 07ab06c0e9fd6..e427c7a8057dd 100644 --- a/pandas/tests/scalar/test_timedelta.py +++ b/pandas/tests/scalar/test_timedelta.py @@ -191,7 +191,7 @@ def test_floordiv(self): assert td // np.array(scalar.to_timedelta64()) == 1 res = (3 * td) // np.array([scalar.to_timedelta64()]) - expected = np.array([3]) + expected = np.array([3], dtype=np.int64) tm.assert_numpy_array_equal(res, expected) res = (10 * td) // np.array([scalar.to_timedelta64(), From a0029ad7670176d06b34868b7e4f4c376d460967 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 5 Jan 2018 08:46:46 -0800 Subject: [PATCH 07/10] fix appveyor int32 --- pandas/tests/scalar/test_timedelta.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py index e427c7a8057dd..fe76a4abfc780 100644 --- a/pandas/tests/scalar/test_timedelta.py +++ b/pandas/tests/scalar/test_timedelta.py @@ -235,7 +235,7 @@ def test_rfloordiv(self): assert td.__rfloordiv__(np.array(scalar.to_timedelta64())) == 1 res = td.__rfloordiv__(np.array([(3 * scalar).to_timedelta64()])) - expected = np.array([3]) + expected = np.array([3], dtype=np.int64) tm.assert_numpy_array_equal(res, expected) arr = np.array([(10 * scalar).to_timedelta64(), From 3c38971e1e2d88e225ab14644da21c78885bccd2 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 5 Jan 2018 13:16:19 -0800 Subject: [PATCH 08/10] requested edits --- pandas/_libs/tslibs/timedeltas.pyx | 24 +++++++++++------------- pandas/tests/scalar/test_nat.py | 10 ++++++++++ 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 44423564a1fad..8dba8c15f0b81 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1042,15 +1042,14 @@ class Timedelta(_Timedelta): # also timedelta-like return _broadcast_floordiv_td64(self.value, other, _floordiv) elif other.dtype.kind in ['i', 'u', 'f']: - if np.ndim(other) == 0: + if other.ndim == 0: return Timedelta(self.value // other) else: return self.to_timedelta64() // other - else: - raise TypeError('Invalid dtype {dtype} for ' - '{op}'.format(dtype=other.dtype, - op='__floordiv__')) + raise TypeError('Invalid dtype {dtype} for ' + '{op}'.format(dtype=other.dtype, + op='__floordiv__')) elif is_integer_object(other) or is_float_object(other): return Timedelta(self.value // other, unit='ns') @@ -1074,12 +1073,12 @@ class Timedelta(_Timedelta): if other.dtype.kind == 'm': # also timedelta-like return _broadcast_floordiv_td64(self.value, other, _rfloordiv) - else: - raise TypeError('Invalid dtype {dtype} for ' - '{op}'.format(dtype=other.dtype, - op='__floordiv__')) + raise TypeError('Invalid dtype {dtype} for ' + '{op}'.format(dtype=other.dtype, + op='__floordiv__')) - if isinstance(other, float) and np.isnan(other): + if is_float_object(other) and util._checknull(other): + # i.e. np.nan return NotImplemented elif not _validate_ops_compat(other): return NotImplemented @@ -1121,13 +1120,12 @@ cdef _broadcast_floordiv_td64(int64_t value, object other, # We need to watch out for np.timedelta64('NaT'). mask = other.view('i8') == NPY_NAT - # equiv np.isnat, which does not exist in some supported np versions. if ndim == 0: if mask: return np.nan - else: - return operation(value, other.astype('m8[ns]').astype('i8')) + + return operation(value, other.astype('m8[ns]').astype('i8')) else: res = operation(value, other.astype('m8[ns]').astype('i8')) diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index 69ce7a42851a1..46061367be607 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -273,6 +273,16 @@ def test_nat_arithmetic(): assert right - left is NaT +def test_nat_rfloordiv_timedelta(self): + # GH#18846 + # See also test_timedelta.TestTimedeltaArithmetic.test_floordiv + td = Timedelta(hours=3, minutes=4) + + assert td // np.nan is NaT + assert np.isnan(td // NaT) + assert np.isnan(td // np.timedelta64('NaT')) + + def test_nat_arithmetic_index(): # GH 11718 From 3501956f434600a883315122e8031255816fc4fc Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 6 Jan 2018 09:36:10 -0800 Subject: [PATCH 09/10] fix copy/paste screwup --- pandas/tests/scalar/test_nat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index 46061367be607..d0d204253e3f1 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -273,7 +273,7 @@ def test_nat_arithmetic(): assert right - left is NaT -def test_nat_rfloordiv_timedelta(self): +def test_nat_rfloordiv_timedelta(): # GH#18846 # See also test_timedelta.TestTimedeltaArithmetic.test_floordiv td = Timedelta(hours=3, minutes=4) From f101378da9f156f8d563d0566d087a9bdd71dc14 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 6 Jan 2018 09:58:29 -0800 Subject: [PATCH 10/10] requested comments --- pandas/tests/scalar/test_timedelta.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py index fdebdc31bf408..8c574d8f8873b 100644 --- a/pandas/tests/scalar/test_timedelta.py +++ b/pandas/tests/scalar/test_timedelta.py @@ -209,6 +209,11 @@ def test_rfloordiv(self): scalar = Timedelta(hours=3, minutes=4) # scalar others + # x // Timedelta is defined only for timedelta-like x. int-like, + # float-like, and date-like, in particular, should all either + # a) raise TypeError directly or + # b) return NotImplemented, following which the reversed + # operation will raise TypeError. assert td.__rfloordiv__(scalar) == 1 assert (-td).__rfloordiv__(scalar.to_pytimedelta()) == -2 assert (2 * td).__rfloordiv__(scalar.to_timedelta64()) == 0