Skip to content

BUG: allow Timestamp comparisons on the left #4983

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Sep 27, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,8 @@ Bug Fixes
- Fix repr for DateOffset. No longer show duplicate entries in kwds.
Removed unused offset fields. (:issue:`4638`)
- Fixed wrong index name during read_csv if using usecols. Applies to c parser only. (:issue:`4201`)
- ``Timestamp`` objects can now appear in the left hand side of a comparison
operation with a ``Series`` or ``DataFrame`` object (:issue:`4982`).

pandas 0.12.0
-------------
Expand Down
25 changes: 25 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4335,6 +4335,31 @@ def check(df,df2):
df2 = DataFrame({'a': date_range('20010101', periods=len(df)), 'b': date_range('20100101', periods=len(df))})
check(df,df2)

def test_timestamp_compare(self):
# make sure we can compare Timestamps on the right AND left hand side
# GH4982
df = DataFrame({'dates1': date_range('20010101', periods=10),
'dates2': date_range('20010102', periods=10),
'intcol': np.random.randint(1000000000, size=10),
'floatcol': np.random.randn(10),
'stringcol': list(tm.rands(10))})
df.loc[np.random.rand(len(df)) > 0.5, 'dates2'] = pd.NaT
ops = {'gt': 'lt', 'lt': 'gt', 'ge': 'le', 'le': 'ge', 'eq': 'eq',
'ne': 'ne'}
for left, right in ops.items():
left_f = getattr(operator, left)
right_f = getattr(operator, right)

# no nats
expected = left_f(df, Timestamp('20010109'))
result = right_f(Timestamp('20010109'), df)
tm.assert_frame_equal(result, expected)

# nats
expected = left_f(df, Timestamp('nat'))
result = right_f(Timestamp('nat'), df)
tm.assert_frame_equal(result, expected)

def test_modulo(self):

# GH3590, modulo as ints
Expand Down
71 changes: 71 additions & 0 deletions pandas/tseries/tests/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import sys
import os
import unittest
import operator

import nose

Expand Down Expand Up @@ -2010,6 +2011,7 @@ def test_join_self(self):
joined = index.join(index, how=kind)
self.assert_(index is joined)


class TestDatetime64(unittest.TestCase):
"""
Also test supoprt for datetime64[ns] in Series / DataFrame
Expand Down Expand Up @@ -2507,6 +2509,74 @@ def test_hash_equivalent(self):
stamp = Timestamp(datetime(2011, 1, 1))
self.assertEquals(d[stamp], 5)

def test_timestamp_compare_scalars(self):
# case where ndim == 0
lhs = np.datetime64(datetime(2013, 12, 6))
rhs = Timestamp('now')
nat = Timestamp('nat')

ops = {'gt': 'lt', 'lt': 'gt', 'ge': 'le', 'le': 'ge', 'eq': 'eq',
'ne': 'ne'}

for left, right in ops.items():
left_f = getattr(operator, left)
right_f = getattr(operator, right)

if pd._np_version_under1p7:
# you have to convert to timestamp for this to work with numpy
# scalars
expected = left_f(Timestamp(lhs), rhs)

# otherwise a TypeError is thrown
if left not in ('eq', 'ne'):
with tm.assertRaises(TypeError):
left_f(lhs, rhs)
else:
expected = left_f(lhs, rhs)

result = right_f(rhs, lhs)
self.assertEqual(result, expected)

expected = left_f(rhs, nat)
result = right_f(nat, rhs)
self.assertEqual(result, expected)

def test_timestamp_compare_series(self):
# make sure we can compare Timestamps on the right AND left hand side
# GH4982
s = Series(date_range('20010101', periods=10), name='dates')
s_nat = s.copy(deep=True)

s[0] = pd.Timestamp('nat')
s[3] = pd.Timestamp('nat')

ops = {'lt': 'gt', 'le': 'ge', 'eq': 'eq', 'ne': 'ne'}

for left, right in ops.items():
left_f = getattr(operator, left)
right_f = getattr(operator, right)

# no nats
expected = left_f(s, Timestamp('20010109'))
result = right_f(Timestamp('20010109'), s)
tm.assert_series_equal(result, expected)

# nats
expected = left_f(s, Timestamp('nat'))
result = right_f(Timestamp('nat'), s)
tm.assert_series_equal(result, expected)

# compare to timestamp with series containing nats
expected = left_f(s_nat, Timestamp('20010109'))
result = right_f(Timestamp('20010109'), s_nat)
tm.assert_series_equal(result, expected)

# compare to nat with series containing nats
expected = left_f(s_nat, Timestamp('nat'))
result = right_f(Timestamp('nat'), s_nat)
tm.assert_series_equal(result, expected)


class TestSlicing(unittest.TestCase):

def test_slice_year(self):
Expand Down Expand Up @@ -2775,6 +2845,7 @@ def test_frame_apply_dont_convert_datetime64(self):

self.assertTrue(df.x1.dtype == 'M8[ns]')


if __name__ == '__main__':
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
exit=False)
167 changes: 98 additions & 69 deletions pandas/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,15 @@ from cpython cimport (
PyTypeObject,
PyFloat_Check,
PyObject_RichCompareBool,
PyString_Check
PyObject_RichCompare,
PyString_Check,
Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
)

# Cython < 0.17 doesn't have this in cpython
cdef extern from "Python.h":
cdef PyTypeObject *Py_TYPE(object)
int PySlice_Check(object)


from libc.stdlib cimport free
Expand All @@ -30,9 +33,6 @@ from datetime import timedelta, datetime
from datetime import time as datetime_time
from pandas.compat import parse_date

cdef extern from "Python.h":
int PySlice_Check(object)

# initialize numpy
import_array()
#import_ufunc()
Expand Down Expand Up @@ -350,6 +350,11 @@ NaT = NaTType()

iNaT = util.get_nat()


cdef inline bint _cmp_nat_dt(_NaT lhs, _Timestamp rhs, int op) except -1:
return _nat_scalar_rules[op]


cdef _tz_format(object obj, object zone):
try:
return obj.strftime(' %%Z, tz=%s' % zone)
Expand Down Expand Up @@ -437,9 +442,35 @@ def apply_offset(ndarray[object] values, object offset):

result = np.empty(n, dtype='M8[ns]')
new_values = result.view('i8')
pass


cdef inline bint _cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1:
if op == Py_EQ:
return lhs == rhs
elif op == Py_NE:
return lhs != rhs
elif op == Py_LT:
return lhs < rhs
elif op == Py_LE:
return lhs <= rhs
elif op == Py_GT:
return lhs > rhs
elif op == Py_GE:
return lhs >= rhs


cdef int _reverse_ops[6]

_reverse_ops[Py_LT] = Py_GT
_reverse_ops[Py_LE] = Py_GE
_reverse_ops[Py_EQ] = Py_EQ
_reverse_ops[Py_NE] = Py_NE
_reverse_ops[Py_GT] = Py_LT
_reverse_ops[Py_GE] = Py_LE


cdef str _NDIM_STRING = "ndim"

# This is PITA. Because we inherit from datetime, which has very specific
# construction requirements, we need to do object instantiation in python
# (see Timestamp class above). This will serve as a C extension type that
Expand All @@ -449,18 +480,21 @@ cdef class _Timestamp(datetime):
int64_t value, nanosecond
object offset # frequency reference

def __hash__(self):
def __hash__(_Timestamp self):
if self.nanosecond:
return hash(self.value)
else:
return datetime.__hash__(self)
return datetime.__hash__(self)

def __richcmp__(_Timestamp self, object other, int op):
cdef _Timestamp ots
cdef:
_Timestamp ots
int ndim

if isinstance(other, _Timestamp):
if isinstance(other, _NaT):
return _cmp_nat_dt(other, self, _reverse_ops[op])
ots = other
elif type(other) is datetime:
elif isinstance(other, datetime):
if self.nanosecond == 0:
val = self.to_datetime()
return PyObject_RichCompareBool(val, other, op)
Expand All @@ -470,70 +504,60 @@ cdef class _Timestamp(datetime):
except ValueError:
return self._compare_outside_nanorange(other, op)
else:
if op == 2:
return False
elif op == 3:
return True
ndim = getattr(other, _NDIM_STRING, -1)

if ndim != -1:
if ndim == 0:
if isinstance(other, np.datetime64):
other = Timestamp(other)
else:
raise TypeError('Cannot compare type %r with type %r' %
(type(self).__name__,
type(other).__name__))
return PyObject_RichCompare(other, self, _reverse_ops[op])
else:
raise TypeError('Cannot compare Timestamp with '
'{0!r}'.format(other.__class__.__name__))
if op == Py_EQ:
return False
elif op == Py_NE:
return True
raise TypeError('Cannot compare type %r with type %r' %
(type(self).__name__, type(other).__name__))

self._assert_tzawareness_compat(other)
return _cmp_scalar(self.value, ots.value, op)

if op == 2: # ==
return self.value == ots.value
elif op == 3: # !=
return self.value != ots.value
elif op == 0: # <
return self.value < ots.value
elif op == 1: # <=
return self.value <= ots.value
elif op == 4: # >
return self.value > ots.value
elif op == 5: # >=
return self.value >= ots.value

cdef _compare_outside_nanorange(self, object other, int op):
dtval = self.to_datetime()
cdef bint _compare_outside_nanorange(_Timestamp self, datetime other,
int op) except -1:
cdef datetime dtval = self.to_datetime()

self._assert_tzawareness_compat(other)

if self.nanosecond == 0:
if op == 2: # ==
return dtval == other
elif op == 3: # !=
return dtval != other
elif op == 0: # <
return dtval < other
elif op == 1: # <=
return dtval <= other
elif op == 4: # >
return dtval > other
elif op == 5: # >=
return dtval >= other
return PyObject_RichCompareBool(dtval, other, op)
else:
if op == 2: # ==
if op == Py_EQ:
return False
elif op == 3: # !=
elif op == Py_NE:
return True
elif op == 0: # <
elif op == Py_LT:
return dtval < other
elif op == 1: # <=
elif op == Py_LE:
return dtval < other
elif op == 4: # >
elif op == Py_GT:
return dtval >= other
elif op == 5: # >=
elif op == Py_GE:
return dtval >= other

cdef _assert_tzawareness_compat(self, object other):
cdef int _assert_tzawareness_compat(_Timestamp self,
object other) except -1:
if self.tzinfo is None:
if other.tzinfo is not None:
raise Exception('Cannot compare tz-naive and '
'tz-aware timestamps')
raise ValueError('Cannot compare tz-naive and tz-aware '
'timestamps')
elif other.tzinfo is None:
raise Exception('Cannot compare tz-naive and tz-aware timestamps')
raise ValueError('Cannot compare tz-naive and tz-aware timestamps')

cpdef to_datetime(self):
cpdef datetime to_datetime(_Timestamp self):
cdef:
pandas_datetimestruct dts
_TSObject ts
Expand Down Expand Up @@ -580,30 +604,35 @@ cdef inline bint is_timestamp(object o):
return Py_TYPE(o) == ts_type # isinstance(o, Timestamp)


cdef bint _nat_scalar_rules[6]

_nat_scalar_rules[Py_EQ] = False
_nat_scalar_rules[Py_NE] = True
_nat_scalar_rules[Py_LT] = False
_nat_scalar_rules[Py_LE] = False
_nat_scalar_rules[Py_GT] = False
_nat_scalar_rules[Py_GE] = False


cdef class _NaT(_Timestamp):

def __hash__(_NaT self):
# py3k needs this defined here
return hash(self.value)

def __richcmp__(_NaT self, object other, int op):
# if not isinstance(other, (_NaT, _Timestamp)):
# raise TypeError('Cannot compare %s with NaT' % type(other))

if op == 2: # ==
return False
elif op == 3: # !=
return True
elif op == 0: # <
return False
elif op == 1: # <=
return False
elif op == 4: # >
return False
elif op == 5: # >=
return False
cdef int ndim = getattr(other, 'ndim', -1)

if ndim == -1:
return _nat_scalar_rules[op]

if ndim == 0:
if isinstance(other, np.datetime64):
other = Timestamp(other)
else:
raise TypeError('Cannot compare type %r with type %r' %
(type(self).__name__, type(other).__name__))
return PyObject_RichCompare(other, self, _reverse_ops[op])


def _delta_to_nanoseconds(delta):
Expand Down
Loading