Skip to content

Commit d591a80

Browse files
committed
PERF: add Timestamp <-> Series comparison vbench
1 parent ebbe7bc commit d591a80

File tree

4 files changed

+98
-75
lines changed

4 files changed

+98
-75
lines changed

pandas/tests/test_series.py

-69
Original file line numberDiff line numberDiff line change
@@ -2059,42 +2059,6 @@ def check_comparators(series, other):
20592059
check_comparators(self.ts, 5)
20602060
check_comparators(self.ts, self.ts + 1)
20612061

2062-
def test_timestamp_compare(self):
2063-
# make sure we can compare Timestamps on the right AND left hand side
2064-
# GH4982
2065-
df = DataFrame({'dates': date_range('20010101', periods=10)})
2066-
s = df.dates.copy()
2067-
2068-
s[0] = pd.Timestamp('nat')
2069-
s[3] = pd.Timestamp('nat')
2070-
2071-
ops = {'lt': 'gt', 'le': 'ge', 'eq': 'eq', 'ne': 'ne'}
2072-
2073-
for left, right in ops.items():
2074-
left_f = getattr(operator, left)
2075-
right_f = getattr(operator, right)
2076-
2077-
# no nats
2078-
expected = left_f(df.dates, Timestamp('20010109'))
2079-
result = right_f(Timestamp('20010109'), df.dates)
2080-
tm.assert_series_equal(result, expected)
2081-
2082-
# nats
2083-
expected = left_f(df.dates, Timestamp('nat'))
2084-
result = right_f(Timestamp('nat'), df.dates)
2085-
tm.assert_series_equal(result, expected)
2086-
2087-
# compare to timestamp with series containing nats
2088-
expected = left_f(s, Timestamp('20010109'))
2089-
result = right_f(Timestamp('20010109'), s)
2090-
tm.assert_series_equal(result, expected)
2091-
2092-
# compare to nat with series containing nats
2093-
expected = left_f(s, Timestamp('nat'))
2094-
result = right_f(Timestamp('nat'), s)
2095-
tm.assert_series_equal(result, expected)
2096-
2097-
20982062
def test_operators_empty_int_corner(self):
20992063
s1 = Series([], [], dtype=np.int32)
21002064
s2 = Series({'x': 0.})
@@ -5026,39 +4990,6 @@ def test_numpy_unique(self):
50264990
result = np.unique(self.ts)
50274991

50284992

5029-
def test_timestamp_compare_scalars():
5030-
# case where ndim == 0
5031-
lhs = np.datetime64(datetime(2013, 12, 6))
5032-
rhs = Timestamp('now')
5033-
nat = Timestamp('nat')
5034-
5035-
ops = {'gt': 'lt', 'lt': 'gt', 'ge': 'le', 'le': 'ge', 'eq': 'eq',
5036-
'ne': 'ne'}
5037-
5038-
for left, right in ops.items():
5039-
left_f = getattr(operator, left)
5040-
right_f = getattr(operator, right)
5041-
5042-
if pd._np_version_under1p7:
5043-
# you have to convert to timestamp for this to work with numpy
5044-
# scalars
5045-
expected = left_f(Timestamp(lhs), rhs)
5046-
5047-
# otherwise a TypeError is thrown
5048-
if left not in ('eq', 'ne'):
5049-
with tm.assertRaises(TypeError):
5050-
left_f(lhs, rhs)
5051-
else:
5052-
expected = left_f(lhs, rhs)
5053-
5054-
result = right_f(rhs, lhs)
5055-
tm.assert_equal(result, expected)
5056-
5057-
expected = left_f(rhs, nat)
5058-
result = right_f(nat, rhs)
5059-
tm.assert_equal(result, expected)
5060-
5061-
50624993
class TestSeriesNonUnique(unittest.TestCase):
50634994

50644995
_multiprocess_can_split_ = True

pandas/tseries/tests/test_timeseries.py

+71
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import sys
44
import os
55
import unittest
6+
import operator
67

78
import nose
89

@@ -2010,6 +2011,7 @@ def test_join_self(self):
20102011
joined = index.join(index, how=kind)
20112012
self.assert_(index is joined)
20122013

2014+
20132015
class TestDatetime64(unittest.TestCase):
20142016
"""
20152017
Also test supoprt for datetime64[ns] in Series / DataFrame
@@ -2507,6 +2509,74 @@ def test_hash_equivalent(self):
25072509
stamp = Timestamp(datetime(2011, 1, 1))
25082510
self.assertEquals(d[stamp], 5)
25092511

2512+
def test_timestamp_compare_scalars(self):
2513+
# case where ndim == 0
2514+
lhs = np.datetime64(datetime(2013, 12, 6))
2515+
rhs = Timestamp('now')
2516+
nat = Timestamp('nat')
2517+
2518+
ops = {'gt': 'lt', 'lt': 'gt', 'ge': 'le', 'le': 'ge', 'eq': 'eq',
2519+
'ne': 'ne'}
2520+
2521+
for left, right in ops.items():
2522+
left_f = getattr(operator, left)
2523+
right_f = getattr(operator, right)
2524+
2525+
if pd._np_version_under1p7:
2526+
# you have to convert to timestamp for this to work with numpy
2527+
# scalars
2528+
expected = left_f(Timestamp(lhs), rhs)
2529+
2530+
# otherwise a TypeError is thrown
2531+
if left not in ('eq', 'ne'):
2532+
with tm.assertRaises(TypeError):
2533+
left_f(lhs, rhs)
2534+
else:
2535+
expected = left_f(lhs, rhs)
2536+
2537+
result = right_f(rhs, lhs)
2538+
self.assertEqual(result, expected)
2539+
2540+
expected = left_f(rhs, nat)
2541+
result = right_f(nat, rhs)
2542+
self.assertEqual(result, expected)
2543+
2544+
def test_timestamp_compare_series(self):
2545+
# make sure we can compare Timestamps on the right AND left hand side
2546+
# GH4982
2547+
s = Series(date_range('20010101', periods=10), name='dates')
2548+
s_nat = s.copy(deep=True)
2549+
2550+
s[0] = pd.Timestamp('nat')
2551+
s[3] = pd.Timestamp('nat')
2552+
2553+
ops = {'lt': 'gt', 'le': 'ge', 'eq': 'eq', 'ne': 'ne'}
2554+
2555+
for left, right in ops.items():
2556+
left_f = getattr(operator, left)
2557+
right_f = getattr(operator, right)
2558+
2559+
# no nats
2560+
expected = left_f(s, Timestamp('20010109'))
2561+
result = right_f(Timestamp('20010109'), s)
2562+
tm.assert_series_equal(result, expected)
2563+
2564+
# nats
2565+
expected = left_f(s, Timestamp('nat'))
2566+
result = right_f(Timestamp('nat'), s)
2567+
tm.assert_series_equal(result, expected)
2568+
2569+
# compare to timestamp with series containing nats
2570+
expected = left_f(s_nat, Timestamp('20010109'))
2571+
result = right_f(Timestamp('20010109'), s_nat)
2572+
tm.assert_series_equal(result, expected)
2573+
2574+
# compare to nat with series containing nats
2575+
expected = left_f(s_nat, Timestamp('nat'))
2576+
result = right_f(Timestamp('nat'), s_nat)
2577+
tm.assert_series_equal(result, expected)
2578+
2579+
25102580
class TestSlicing(unittest.TestCase):
25112581

25122582
def test_slice_year(self):
@@ -2775,6 +2845,7 @@ def test_frame_apply_dont_convert_datetime64(self):
27752845

27762846
self.assertTrue(df.x1.dtype == 'M8[ns]')
27772847

2848+
27782849
if __name__ == '__main__':
27792850
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
27802851
exit=False)

pandas/tslib.pyx

+15-6
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,11 @@ NaT = NaTType()
350350

351351
iNaT = util.get_nat()
352352

353+
354+
cdef inline bint _cmp_nat_dt(_NaT lhs, _Timestamp rhs, int op) except -1:
355+
return _nat_scalar_rules[op]
356+
357+
353358
cdef _tz_format(object obj, object zone):
354359
try:
355360
return obj.strftime(' %%Z, tz=%s' % zone)
@@ -464,7 +469,7 @@ _reverse_ops[Py_GT] = Py_LT
464469
_reverse_ops[Py_GE] = Py_LE
465470

466471

467-
cdef char* _NDIM_STRING = "ndim"
472+
cdef str _NDIM_STRING = "ndim"
468473

469474
# This is PITA. Because we inherit from datetime, which has very specific
470475
# construction requirements, we need to do object instantiation in python
@@ -483,11 +488,11 @@ cdef class _Timestamp(datetime):
483488
def __richcmp__(_Timestamp self, object other, int op):
484489
cdef:
485490
_Timestamp ots
486-
int ndim = getattr(other, _NDIM_STRING, -1)
491+
int ndim
487492

488493
if isinstance(other, _Timestamp):
489-
if isinstance(other, NaTType):
490-
return PyObject_RichCompare(other, self, _reverse_ops[op])
494+
if isinstance(other, _NaT):
495+
return _cmp_nat_dt(other, self, _reverse_ops[op])
491496
ots = other
492497
elif isinstance(other, datetime):
493498
if self.nanosecond == 0:
@@ -499,6 +504,8 @@ cdef class _Timestamp(datetime):
499504
except ValueError:
500505
return self._compare_outside_nanorange(other, op)
501506
else:
507+
ndim = getattr(other, _NDIM_STRING, -1)
508+
502509
if ndim != -1:
503510
if ndim == 0:
504511
if isinstance(other, np.datetime64):
@@ -541,7 +548,8 @@ cdef class _Timestamp(datetime):
541548
elif op == Py_GE:
542549
return dtval >= other
543550

544-
cdef void _assert_tzawareness_compat(_Timestamp self, object other):
551+
cdef int _assert_tzawareness_compat(_Timestamp self,
552+
object other) except -1:
545553
if self.tzinfo is None:
546554
if other.tzinfo is not None:
547555
raise ValueError('Cannot compare tz-naive and tz-aware '
@@ -622,7 +630,8 @@ cdef class _NaT(_Timestamp):
622630
if isinstance(other, np.datetime64):
623631
other = Timestamp(other)
624632
else:
625-
raise TypeError("asdf")
633+
raise TypeError('Cannot compare type %r with type %r' %
634+
(type(self).__name__, type(other).__name__))
626635
return PyObject_RichCompare(other, self, _reverse_ops[op])
627636

628637

vb_suite/binary_ops.py

+12
Original file line numberDiff line numberDiff line change
@@ -102,3 +102,15 @@
102102
frame_multi_and_no_ne = \
103103
Benchmark("df[(df>0) & (df2>0)]", setup, name='frame_multi_and_no_ne',cleanup="expr.set_use_numexpr(True)",
104104
start_date=datetime(2013, 2, 26))
105+
106+
setup = common_setup + """
107+
N = 1000000
108+
halfway = N // 2 - 1
109+
s = Series(date_range('20010101', periods=N, freq='D'))
110+
ts = s[halfway]
111+
"""
112+
113+
timestamp_series_compare = Benchmark("ts >= s", setup,
114+
start_date=datetime(2013, 9, 27))
115+
series_timestamp_compare = Benchmark("s <= ts", setup,
116+
start_date=datetime(2012, 2, 21))

0 commit comments

Comments
 (0)