Skip to content

Commit 8a9a4f2

Browse files
committed
Merge pull request #4983 from cpcloud/series-timestamp-compare
BUG: allow Timestamp comparisons on the left
2 parents 21364c7 + d591a80 commit 8a9a4f2

File tree

6 files changed

+218
-69
lines changed

6 files changed

+218
-69
lines changed

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -487,6 +487,8 @@ Bug Fixes
487487
- Fix repr for DateOffset. No longer show duplicate entries in kwds.
488488
Removed unused offset fields. (:issue:`4638`)
489489
- Fixed wrong index name during read_csv if using usecols. Applies to c parser only. (:issue:`4201`)
490+
- ``Timestamp`` objects can now appear in the left hand side of a comparison
491+
operation with a ``Series`` or ``DataFrame`` object (:issue:`4982`).
490492

491493
pandas 0.12.0
492494
-------------

pandas/tests/test_frame.py

+25
Original file line numberDiff line numberDiff line change
@@ -4335,6 +4335,31 @@ def check(df,df2):
43354335
df2 = DataFrame({'a': date_range('20010101', periods=len(df)), 'b': date_range('20100101', periods=len(df))})
43364336
check(df,df2)
43374337

4338+
def test_timestamp_compare(self):
4339+
# make sure we can compare Timestamps on the right AND left hand side
4340+
# GH4982
4341+
df = DataFrame({'dates1': date_range('20010101', periods=10),
4342+
'dates2': date_range('20010102', periods=10),
4343+
'intcol': np.random.randint(1000000000, size=10),
4344+
'floatcol': np.random.randn(10),
4345+
'stringcol': list(tm.rands(10))})
4346+
df.loc[np.random.rand(len(df)) > 0.5, 'dates2'] = pd.NaT
4347+
ops = {'gt': 'lt', 'lt': 'gt', 'ge': 'le', 'le': 'ge', 'eq': 'eq',
4348+
'ne': 'ne'}
4349+
for left, right in ops.items():
4350+
left_f = getattr(operator, left)
4351+
right_f = getattr(operator, right)
4352+
4353+
# no nats
4354+
expected = left_f(df, Timestamp('20010109'))
4355+
result = right_f(Timestamp('20010109'), df)
4356+
tm.assert_frame_equal(result, expected)
4357+
4358+
# nats
4359+
expected = left_f(df, Timestamp('nat'))
4360+
result = right_f(Timestamp('nat'), df)
4361+
tm.assert_frame_equal(result, expected)
4362+
43384363
def test_modulo(self):
43394364

43404365
# GH3590, modulo as ints

pandas/tseries/tests/test_timeseries.py

+71
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import sys
44
import os
55
import unittest
6+
import operator
67

78
import nose
89

@@ -2010,6 +2011,7 @@ def test_join_self(self):
20102011
joined = index.join(index, how=kind)
20112012
self.assert_(index is joined)
20122013

2014+
20132015
class TestDatetime64(unittest.TestCase):
20142016
"""
20152017
Also test supoprt for datetime64[ns] in Series / DataFrame
@@ -2507,6 +2509,74 @@ def test_hash_equivalent(self):
25072509
stamp = Timestamp(datetime(2011, 1, 1))
25082510
self.assertEquals(d[stamp], 5)
25092511

2512+
def test_timestamp_compare_scalars(self):
2513+
# case where ndim == 0
2514+
lhs = np.datetime64(datetime(2013, 12, 6))
2515+
rhs = Timestamp('now')
2516+
nat = Timestamp('nat')
2517+
2518+
ops = {'gt': 'lt', 'lt': 'gt', 'ge': 'le', 'le': 'ge', 'eq': 'eq',
2519+
'ne': 'ne'}
2520+
2521+
for left, right in ops.items():
2522+
left_f = getattr(operator, left)
2523+
right_f = getattr(operator, right)
2524+
2525+
if pd._np_version_under1p7:
2526+
# you have to convert to timestamp for this to work with numpy
2527+
# scalars
2528+
expected = left_f(Timestamp(lhs), rhs)
2529+
2530+
# otherwise a TypeError is thrown
2531+
if left not in ('eq', 'ne'):
2532+
with tm.assertRaises(TypeError):
2533+
left_f(lhs, rhs)
2534+
else:
2535+
expected = left_f(lhs, rhs)
2536+
2537+
result = right_f(rhs, lhs)
2538+
self.assertEqual(result, expected)
2539+
2540+
expected = left_f(rhs, nat)
2541+
result = right_f(nat, rhs)
2542+
self.assertEqual(result, expected)
2543+
2544+
def test_timestamp_compare_series(self):
2545+
# make sure we can compare Timestamps on the right AND left hand side
2546+
# GH4982
2547+
s = Series(date_range('20010101', periods=10), name='dates')
2548+
s_nat = s.copy(deep=True)
2549+
2550+
s[0] = pd.Timestamp('nat')
2551+
s[3] = pd.Timestamp('nat')
2552+
2553+
ops = {'lt': 'gt', 'le': 'ge', 'eq': 'eq', 'ne': 'ne'}
2554+
2555+
for left, right in ops.items():
2556+
left_f = getattr(operator, left)
2557+
right_f = getattr(operator, right)
2558+
2559+
# no nats
2560+
expected = left_f(s, Timestamp('20010109'))
2561+
result = right_f(Timestamp('20010109'), s)
2562+
tm.assert_series_equal(result, expected)
2563+
2564+
# nats
2565+
expected = left_f(s, Timestamp('nat'))
2566+
result = right_f(Timestamp('nat'), s)
2567+
tm.assert_series_equal(result, expected)
2568+
2569+
# compare to timestamp with series containing nats
2570+
expected = left_f(s_nat, Timestamp('20010109'))
2571+
result = right_f(Timestamp('20010109'), s_nat)
2572+
tm.assert_series_equal(result, expected)
2573+
2574+
# compare to nat with series containing nats
2575+
expected = left_f(s_nat, Timestamp('nat'))
2576+
result = right_f(Timestamp('nat'), s_nat)
2577+
tm.assert_series_equal(result, expected)
2578+
2579+
25102580
class TestSlicing(unittest.TestCase):
25112581

25122582
def test_slice_year(self):
@@ -2775,6 +2845,7 @@ def test_frame_apply_dont_convert_datetime64(self):
27752845

27762846
self.assertTrue(df.x1.dtype == 'M8[ns]')
27772847

2848+
27782849
if __name__ == '__main__':
27792850
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
27802851
exit=False)

pandas/tslib.pyx

+98-69
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,15 @@ from cpython cimport (
99
PyTypeObject,
1010
PyFloat_Check,
1111
PyObject_RichCompareBool,
12-
PyString_Check
12+
PyObject_RichCompare,
13+
PyString_Check,
14+
Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
1315
)
1416

1517
# Cython < 0.17 doesn't have this in cpython
1618
cdef extern from "Python.h":
1719
cdef PyTypeObject *Py_TYPE(object)
20+
int PySlice_Check(object)
1821

1922

2023
from libc.stdlib cimport free
@@ -30,9 +33,6 @@ from datetime import timedelta, datetime
3033
from datetime import time as datetime_time
3134
from pandas.compat import parse_date
3235

33-
cdef extern from "Python.h":
34-
int PySlice_Check(object)
35-
3636
# initialize numpy
3737
import_array()
3838
#import_ufunc()
@@ -350,6 +350,11 @@ NaT = NaTType()
350350

351351
iNaT = util.get_nat()
352352

353+
354+
cdef inline bint _cmp_nat_dt(_NaT lhs, _Timestamp rhs, int op) except -1:
355+
return _nat_scalar_rules[op]
356+
357+
353358
cdef _tz_format(object obj, object zone):
354359
try:
355360
return obj.strftime(' %%Z, tz=%s' % zone)
@@ -437,9 +442,35 @@ def apply_offset(ndarray[object] values, object offset):
437442

438443
result = np.empty(n, dtype='M8[ns]')
439444
new_values = result.view('i8')
440-
pass
441445

442446

447+
cdef inline bint _cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1:
448+
if op == Py_EQ:
449+
return lhs == rhs
450+
elif op == Py_NE:
451+
return lhs != rhs
452+
elif op == Py_LT:
453+
return lhs < rhs
454+
elif op == Py_LE:
455+
return lhs <= rhs
456+
elif op == Py_GT:
457+
return lhs > rhs
458+
elif op == Py_GE:
459+
return lhs >= rhs
460+
461+
462+
cdef int _reverse_ops[6]
463+
464+
_reverse_ops[Py_LT] = Py_GT
465+
_reverse_ops[Py_LE] = Py_GE
466+
_reverse_ops[Py_EQ] = Py_EQ
467+
_reverse_ops[Py_NE] = Py_NE
468+
_reverse_ops[Py_GT] = Py_LT
469+
_reverse_ops[Py_GE] = Py_LE
470+
471+
472+
cdef str _NDIM_STRING = "ndim"
473+
443474
# This is PITA. Because we inherit from datetime, which has very specific
444475
# construction requirements, we need to do object instantiation in python
445476
# (see Timestamp class above). This will serve as a C extension type that
@@ -449,18 +480,21 @@ cdef class _Timestamp(datetime):
449480
int64_t value, nanosecond
450481
object offset # frequency reference
451482

452-
def __hash__(self):
483+
def __hash__(_Timestamp self):
453484
if self.nanosecond:
454485
return hash(self.value)
455-
else:
456-
return datetime.__hash__(self)
486+
return datetime.__hash__(self)
457487

458488
def __richcmp__(_Timestamp self, object other, int op):
459-
cdef _Timestamp ots
489+
cdef:
490+
_Timestamp ots
491+
int ndim
460492

461493
if isinstance(other, _Timestamp):
494+
if isinstance(other, _NaT):
495+
return _cmp_nat_dt(other, self, _reverse_ops[op])
462496
ots = other
463-
elif type(other) is datetime:
497+
elif isinstance(other, datetime):
464498
if self.nanosecond == 0:
465499
val = self.to_datetime()
466500
return PyObject_RichCompareBool(val, other, op)
@@ -470,70 +504,60 @@ cdef class _Timestamp(datetime):
470504
except ValueError:
471505
return self._compare_outside_nanorange(other, op)
472506
else:
473-
if op == 2:
474-
return False
475-
elif op == 3:
476-
return True
507+
ndim = getattr(other, _NDIM_STRING, -1)
508+
509+
if ndim != -1:
510+
if ndim == 0:
511+
if isinstance(other, np.datetime64):
512+
other = Timestamp(other)
513+
else:
514+
raise TypeError('Cannot compare type %r with type %r' %
515+
(type(self).__name__,
516+
type(other).__name__))
517+
return PyObject_RichCompare(other, self, _reverse_ops[op])
477518
else:
478-
raise TypeError('Cannot compare Timestamp with '
479-
'{0!r}'.format(other.__class__.__name__))
519+
if op == Py_EQ:
520+
return False
521+
elif op == Py_NE:
522+
return True
523+
raise TypeError('Cannot compare type %r with type %r' %
524+
(type(self).__name__, type(other).__name__))
480525

481526
self._assert_tzawareness_compat(other)
527+
return _cmp_scalar(self.value, ots.value, op)
482528

483-
if op == 2: # ==
484-
return self.value == ots.value
485-
elif op == 3: # !=
486-
return self.value != ots.value
487-
elif op == 0: # <
488-
return self.value < ots.value
489-
elif op == 1: # <=
490-
return self.value <= ots.value
491-
elif op == 4: # >
492-
return self.value > ots.value
493-
elif op == 5: # >=
494-
return self.value >= ots.value
495-
496-
cdef _compare_outside_nanorange(self, object other, int op):
497-
dtval = self.to_datetime()
529+
cdef bint _compare_outside_nanorange(_Timestamp self, datetime other,
530+
int op) except -1:
531+
cdef datetime dtval = self.to_datetime()
498532

499533
self._assert_tzawareness_compat(other)
500534

501535
if self.nanosecond == 0:
502-
if op == 2: # ==
503-
return dtval == other
504-
elif op == 3: # !=
505-
return dtval != other
506-
elif op == 0: # <
507-
return dtval < other
508-
elif op == 1: # <=
509-
return dtval <= other
510-
elif op == 4: # >
511-
return dtval > other
512-
elif op == 5: # >=
513-
return dtval >= other
536+
return PyObject_RichCompareBool(dtval, other, op)
514537
else:
515-
if op == 2: # ==
538+
if op == Py_EQ:
516539
return False
517-
elif op == 3: # !=
540+
elif op == Py_NE:
518541
return True
519-
elif op == 0: # <
542+
elif op == Py_LT:
520543
return dtval < other
521-
elif op == 1: # <=
544+
elif op == Py_LE:
522545
return dtval < other
523-
elif op == 4: # >
546+
elif op == Py_GT:
524547
return dtval >= other
525-
elif op == 5: # >=
548+
elif op == Py_GE:
526549
return dtval >= other
527550

528-
cdef _assert_tzawareness_compat(self, object other):
551+
cdef int _assert_tzawareness_compat(_Timestamp self,
552+
object other) except -1:
529553
if self.tzinfo is None:
530554
if other.tzinfo is not None:
531-
raise Exception('Cannot compare tz-naive and '
532-
'tz-aware timestamps')
555+
raise ValueError('Cannot compare tz-naive and tz-aware '
556+
'timestamps')
533557
elif other.tzinfo is None:
534-
raise Exception('Cannot compare tz-naive and tz-aware timestamps')
558+
raise ValueError('Cannot compare tz-naive and tz-aware timestamps')
535559

536-
cpdef to_datetime(self):
560+
cpdef datetime to_datetime(_Timestamp self):
537561
cdef:
538562
pandas_datetimestruct dts
539563
_TSObject ts
@@ -580,30 +604,35 @@ cdef inline bint is_timestamp(object o):
580604
return Py_TYPE(o) == ts_type # isinstance(o, Timestamp)
581605

582606

607+
cdef bint _nat_scalar_rules[6]
608+
609+
_nat_scalar_rules[Py_EQ] = False
610+
_nat_scalar_rules[Py_NE] = True
611+
_nat_scalar_rules[Py_LT] = False
612+
_nat_scalar_rules[Py_LE] = False
613+
_nat_scalar_rules[Py_GT] = False
614+
_nat_scalar_rules[Py_GE] = False
615+
616+
583617
cdef class _NaT(_Timestamp):
584618

585619
def __hash__(_NaT self):
586620
# py3k needs this defined here
587621
return hash(self.value)
588622

589623
def __richcmp__(_NaT self, object other, int op):
590-
# if not isinstance(other, (_NaT, _Timestamp)):
591-
# raise TypeError('Cannot compare %s with NaT' % type(other))
592-
593-
if op == 2: # ==
594-
return False
595-
elif op == 3: # !=
596-
return True
597-
elif op == 0: # <
598-
return False
599-
elif op == 1: # <=
600-
return False
601-
elif op == 4: # >
602-
return False
603-
elif op == 5: # >=
604-
return False
624+
cdef int ndim = getattr(other, 'ndim', -1)
605625

626+
if ndim == -1:
627+
return _nat_scalar_rules[op]
606628

629+
if ndim == 0:
630+
if isinstance(other, np.datetime64):
631+
other = Timestamp(other)
632+
else:
633+
raise TypeError('Cannot compare type %r with type %r' %
634+
(type(self).__name__, type(other).__name__))
635+
return PyObject_RichCompare(other, self, _reverse_ops[op])
607636

608637

609638
def _delta_to_nanoseconds(delta):

0 commit comments

Comments
 (0)