diff --git a/doc/source/release.rst b/doc/source/release.rst index 3b5bb04344d25..74e54526cfe9a 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -487,6 +487,8 @@ Bug Fixes - Fix repr for DateOffset. No longer show duplicate entries in kwds. Removed unused offset fields. (:issue:`4638`) - Fixed wrong index name during read_csv if using usecols. Applies to c parser only. (:issue:`4201`) + - ``Timestamp`` objects can now appear in the left hand side of a comparison + operation with a ``Series`` or ``DataFrame`` object (:issue:`4982`). pandas 0.12.0 ------------- diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 82be82ea57dae..a6f806d5ce097 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -4335,6 +4335,31 @@ def check(df,df2): df2 = DataFrame({'a': date_range('20010101', periods=len(df)), 'b': date_range('20100101', periods=len(df))}) check(df,df2) + def test_timestamp_compare(self): + # make sure we can compare Timestamps on the right AND left hand side + # GH4982 + df = DataFrame({'dates1': date_range('20010101', periods=10), + 'dates2': date_range('20010102', periods=10), + 'intcol': np.random.randint(1000000000, size=10), + 'floatcol': np.random.randn(10), + 'stringcol': list(tm.rands(10))}) + df.loc[np.random.rand(len(df)) > 0.5, 'dates2'] = pd.NaT + ops = {'gt': 'lt', 'lt': 'gt', 'ge': 'le', 'le': 'ge', 'eq': 'eq', + 'ne': 'ne'} + for left, right in ops.items(): + left_f = getattr(operator, left) + right_f = getattr(operator, right) + + # no nats + expected = left_f(df, Timestamp('20010109')) + result = right_f(Timestamp('20010109'), df) + tm.assert_frame_equal(result, expected) + + # nats + expected = left_f(df, Timestamp('nat')) + result = right_f(Timestamp('nat'), df) + tm.assert_frame_equal(result, expected) + def test_modulo(self): # GH3590, modulo as ints diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 51a010f9d4ead..0e5e3d1922ec4 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -3,6 +3,7 @@ import sys import os import unittest +import operator import nose @@ -2010,6 +2011,7 @@ def test_join_self(self): joined = index.join(index, how=kind) self.assert_(index is joined) + class TestDatetime64(unittest.TestCase): """ Also test supoprt for datetime64[ns] in Series / DataFrame @@ -2507,6 +2509,74 @@ def test_hash_equivalent(self): stamp = Timestamp(datetime(2011, 1, 1)) self.assertEquals(d[stamp], 5) + def test_timestamp_compare_scalars(self): + # case where ndim == 0 + lhs = np.datetime64(datetime(2013, 12, 6)) + rhs = Timestamp('now') + nat = Timestamp('nat') + + ops = {'gt': 'lt', 'lt': 'gt', 'ge': 'le', 'le': 'ge', 'eq': 'eq', + 'ne': 'ne'} + + for left, right in ops.items(): + left_f = getattr(operator, left) + right_f = getattr(operator, right) + + if pd._np_version_under1p7: + # you have to convert to timestamp for this to work with numpy + # scalars + expected = left_f(Timestamp(lhs), rhs) + + # otherwise a TypeError is thrown + if left not in ('eq', 'ne'): + with tm.assertRaises(TypeError): + left_f(lhs, rhs) + else: + expected = left_f(lhs, rhs) + + result = right_f(rhs, lhs) + self.assertEqual(result, expected) + + expected = left_f(rhs, nat) + result = right_f(nat, rhs) + self.assertEqual(result, expected) + + def test_timestamp_compare_series(self): + # make sure we can compare Timestamps on the right AND left hand side + # GH4982 + s = Series(date_range('20010101', periods=10), name='dates') + s_nat = s.copy(deep=True) + + s[0] = pd.Timestamp('nat') + s[3] = pd.Timestamp('nat') + + ops = {'lt': 'gt', 'le': 'ge', 'eq': 'eq', 'ne': 'ne'} + + for left, right in ops.items(): + left_f = getattr(operator, left) + right_f = getattr(operator, right) + + # no nats + expected = left_f(s, Timestamp('20010109')) + result = right_f(Timestamp('20010109'), s) + tm.assert_series_equal(result, expected) + + # nats + expected = left_f(s, Timestamp('nat')) + result = right_f(Timestamp('nat'), s) + tm.assert_series_equal(result, expected) + + # compare to timestamp with series containing nats + expected = left_f(s_nat, Timestamp('20010109')) + result = right_f(Timestamp('20010109'), s_nat) + tm.assert_series_equal(result, expected) + + # compare to nat with series containing nats + expected = left_f(s_nat, Timestamp('nat')) + result = right_f(Timestamp('nat'), s_nat) + tm.assert_series_equal(result, expected) + + class TestSlicing(unittest.TestCase): def test_slice_year(self): @@ -2775,6 +2845,7 @@ def test_frame_apply_dont_convert_datetime64(self): self.assertTrue(df.x1.dtype == 'M8[ns]') + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 075102dd63100..99b09446be232 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -9,12 +9,15 @@ from cpython cimport ( PyTypeObject, PyFloat_Check, PyObject_RichCompareBool, - PyString_Check + PyObject_RichCompare, + PyString_Check, + Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE ) # Cython < 0.17 doesn't have this in cpython cdef extern from "Python.h": cdef PyTypeObject *Py_TYPE(object) + int PySlice_Check(object) from libc.stdlib cimport free @@ -30,9 +33,6 @@ from datetime import timedelta, datetime from datetime import time as datetime_time from pandas.compat import parse_date -cdef extern from "Python.h": - int PySlice_Check(object) - # initialize numpy import_array() #import_ufunc() @@ -350,6 +350,11 @@ NaT = NaTType() iNaT = util.get_nat() + +cdef inline bint _cmp_nat_dt(_NaT lhs, _Timestamp rhs, int op) except -1: + return _nat_scalar_rules[op] + + cdef _tz_format(object obj, object zone): try: return obj.strftime(' %%Z, tz=%s' % zone) @@ -437,9 +442,35 @@ def apply_offset(ndarray[object] values, object offset): result = np.empty(n, dtype='M8[ns]') new_values = result.view('i8') - pass +cdef inline bint _cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1: + if op == Py_EQ: + return lhs == rhs + elif op == Py_NE: + return lhs != rhs + elif op == Py_LT: + return lhs < rhs + elif op == Py_LE: + return lhs <= rhs + elif op == Py_GT: + return lhs > rhs + elif op == Py_GE: + return lhs >= rhs + + +cdef int _reverse_ops[6] + +_reverse_ops[Py_LT] = Py_GT +_reverse_ops[Py_LE] = Py_GE +_reverse_ops[Py_EQ] = Py_EQ +_reverse_ops[Py_NE] = Py_NE +_reverse_ops[Py_GT] = Py_LT +_reverse_ops[Py_GE] = Py_LE + + +cdef str _NDIM_STRING = "ndim" + # This is PITA. Because we inherit from datetime, which has very specific # construction requirements, we need to do object instantiation in python # (see Timestamp class above). This will serve as a C extension type that @@ -449,18 +480,21 @@ cdef class _Timestamp(datetime): int64_t value, nanosecond object offset # frequency reference - def __hash__(self): + def __hash__(_Timestamp self): if self.nanosecond: return hash(self.value) - else: - return datetime.__hash__(self) + return datetime.__hash__(self) def __richcmp__(_Timestamp self, object other, int op): - cdef _Timestamp ots + cdef: + _Timestamp ots + int ndim if isinstance(other, _Timestamp): + if isinstance(other, _NaT): + return _cmp_nat_dt(other, self, _reverse_ops[op]) ots = other - elif type(other) is datetime: + elif isinstance(other, datetime): if self.nanosecond == 0: val = self.to_datetime() return PyObject_RichCompareBool(val, other, op) @@ -470,70 +504,60 @@ cdef class _Timestamp(datetime): except ValueError: return self._compare_outside_nanorange(other, op) else: - if op == 2: - return False - elif op == 3: - return True + ndim = getattr(other, _NDIM_STRING, -1) + + if ndim != -1: + if ndim == 0: + if isinstance(other, np.datetime64): + other = Timestamp(other) + else: + raise TypeError('Cannot compare type %r with type %r' % + (type(self).__name__, + type(other).__name__)) + return PyObject_RichCompare(other, self, _reverse_ops[op]) else: - raise TypeError('Cannot compare Timestamp with ' - '{0!r}'.format(other.__class__.__name__)) + if op == Py_EQ: + return False + elif op == Py_NE: + return True + raise TypeError('Cannot compare type %r with type %r' % + (type(self).__name__, type(other).__name__)) self._assert_tzawareness_compat(other) + return _cmp_scalar(self.value, ots.value, op) - if op == 2: # == - return self.value == ots.value - elif op == 3: # != - return self.value != ots.value - elif op == 0: # < - return self.value < ots.value - elif op == 1: # <= - return self.value <= ots.value - elif op == 4: # > - return self.value > ots.value - elif op == 5: # >= - return self.value >= ots.value - - cdef _compare_outside_nanorange(self, object other, int op): - dtval = self.to_datetime() + cdef bint _compare_outside_nanorange(_Timestamp self, datetime other, + int op) except -1: + cdef datetime dtval = self.to_datetime() self._assert_tzawareness_compat(other) if self.nanosecond == 0: - if op == 2: # == - return dtval == other - elif op == 3: # != - return dtval != other - elif op == 0: # < - return dtval < other - elif op == 1: # <= - return dtval <= other - elif op == 4: # > - return dtval > other - elif op == 5: # >= - return dtval >= other + return PyObject_RichCompareBool(dtval, other, op) else: - if op == 2: # == + if op == Py_EQ: return False - elif op == 3: # != + elif op == Py_NE: return True - elif op == 0: # < + elif op == Py_LT: return dtval < other - elif op == 1: # <= + elif op == Py_LE: return dtval < other - elif op == 4: # > + elif op == Py_GT: return dtval >= other - elif op == 5: # >= + elif op == Py_GE: return dtval >= other - cdef _assert_tzawareness_compat(self, object other): + cdef int _assert_tzawareness_compat(_Timestamp self, + object other) except -1: if self.tzinfo is None: if other.tzinfo is not None: - raise Exception('Cannot compare tz-naive and ' - 'tz-aware timestamps') + raise ValueError('Cannot compare tz-naive and tz-aware ' + 'timestamps') elif other.tzinfo is None: - raise Exception('Cannot compare tz-naive and tz-aware timestamps') + raise ValueError('Cannot compare tz-naive and tz-aware timestamps') - cpdef to_datetime(self): + cpdef datetime to_datetime(_Timestamp self): cdef: pandas_datetimestruct dts _TSObject ts @@ -580,6 +604,16 @@ cdef inline bint is_timestamp(object o): return Py_TYPE(o) == ts_type # isinstance(o, Timestamp) +cdef bint _nat_scalar_rules[6] + +_nat_scalar_rules[Py_EQ] = False +_nat_scalar_rules[Py_NE] = True +_nat_scalar_rules[Py_LT] = False +_nat_scalar_rules[Py_LE] = False +_nat_scalar_rules[Py_GT] = False +_nat_scalar_rules[Py_GE] = False + + cdef class _NaT(_Timestamp): def __hash__(_NaT self): @@ -587,23 +621,18 @@ cdef class _NaT(_Timestamp): return hash(self.value) def __richcmp__(_NaT self, object other, int op): - # if not isinstance(other, (_NaT, _Timestamp)): - # raise TypeError('Cannot compare %s with NaT' % type(other)) - - if op == 2: # == - return False - elif op == 3: # != - return True - elif op == 0: # < - return False - elif op == 1: # <= - return False - elif op == 4: # > - return False - elif op == 5: # >= - return False + cdef int ndim = getattr(other, 'ndim', -1) + if ndim == -1: + return _nat_scalar_rules[op] + if ndim == 0: + if isinstance(other, np.datetime64): + other = Timestamp(other) + else: + raise TypeError('Cannot compare type %r with type %r' % + (type(self).__name__, type(other).__name__)) + return PyObject_RichCompare(other, self, _reverse_ops[op]) def _delta_to_nanoseconds(delta): diff --git a/vb_suite/binary_ops.py b/vb_suite/binary_ops.py index 3f076f9f922a3..8293f650425e3 100644 --- a/vb_suite/binary_ops.py +++ b/vb_suite/binary_ops.py @@ -102,3 +102,15 @@ frame_multi_and_no_ne = \ Benchmark("df[(df>0) & (df2>0)]", setup, name='frame_multi_and_no_ne',cleanup="expr.set_use_numexpr(True)", start_date=datetime(2013, 2, 26)) + +setup = common_setup + """ +N = 1000000 +halfway = N // 2 - 1 +s = Series(date_range('20010101', periods=N, freq='D')) +ts = s[halfway] +""" + +timestamp_series_compare = Benchmark("ts >= s", setup, + start_date=datetime(2013, 9, 27)) +series_timestamp_compare = Benchmark("s <= ts", setup, + start_date=datetime(2012, 2, 21)) diff --git a/vb_suite/index_object.py b/vb_suite/index_object.py index cf87a9af500fb..8b348ddc6e6cc 100644 --- a/vb_suite/index_object.py +++ b/vb_suite/index_object.py @@ -22,6 +22,16 @@ index_datetime_intersection = Benchmark("rng.intersection(rng2)", setup) index_datetime_union = Benchmark("rng.union(rng2)", setup) +setup = common_setup + """ +rng = date_range('1/1/2000', periods=10000, freq='T') +rng2 = rng[:-1] +""" + +datetime_index_intersection = Benchmark("rng.intersection(rng2)", setup, + start_date=datetime(2013, 9, 27)) +datetime_index_union = Benchmark("rng.union(rng2)", setup, + start_date=datetime(2013, 9, 27)) + # integers setup = common_setup + """ N = 1000000