Skip to content

Commit 19ab5ed

Browse files
author
Tom Augspurger
committed
Merge pull request #6650 from agijsberts/speed-up-dateconverter
PERF: Speed up DatetimeConverter by using Matplotlib's epoch2num when possible...
2 parents 25506e5 + 64d9e92 commit 19ab5ed

File tree

4 files changed

+61
-1
lines changed

4 files changed

+61
-1
lines changed

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,8 @@ Improvements to existing features
175175
- ``StataWriter`` and ``DataFrame.to_stata`` accept time stamp and data labels (:issue:`6545`)
176176
- offset/freq info now in Timestamp __repr__ (:issue:`4553`)
177177
- Support passing ``encoding`` with xlwt (:issue:`3710`)
178+
- Performance improvement when converting ``DatetimeIndex`` to floating ordinals
179+
using ``DatetimeConverter`` (:issue:`6636`)
178180

179181
.. _release.bug_fixes-0.14.0:
180182

pandas/tseries/converter.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import pandas.core.common as com
1717
from pandas.core.index import Index
1818

19+
from pandas.core.series import Series
1920
from pandas.tseries.index import date_range
2021
import pandas.tseries.tools as tools
2122
import pandas.tseries.frequencies as frequencies
@@ -144,7 +145,10 @@ def _dt_to_float_ordinal(dt):
144145
preserving hours, minutes, seconds and microseconds. Return value
145146
is a :func:`float`.
146147
"""
147-
base = dates.date2num(dt)
148+
if isinstance(dt, (np.ndarray, Series)) and com.is_datetime64_ns_dtype(dt):
149+
base = dates.epoch2num(dt.asi8 / 1.0E9)
150+
else:
151+
base = dates.date2num(dt)
148152
return base
149153

150154

pandas/tseries/tests/test_converter.py

+42
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,11 @@
55
import nose
66

77
import numpy as np
8+
from numpy.testing import assert_almost_equal as np_assert_almost_equal
9+
from pandas import Timestamp
810
from pandas.compat import u
911
import pandas.util.testing as tm
12+
from pandas.tseries.offsets import Second, Milli, Micro
1013

1114
try:
1215
import pandas.tseries.converter as converter
@@ -46,9 +49,48 @@ def test_conversion(self):
4649
rs = self.dtc.convert('2012-1-1', None, None)
4750
self.assertEqual(rs, xp)
4851

52+
rs = self.dtc.convert(Timestamp('2012-1-1'), None, None)
53+
self.assertEqual(rs, xp)
54+
55+
def test_conversion_float(self):
56+
decimals = 9
57+
58+
rs = self.dtc.convert(Timestamp('2012-1-1 01:02:03', tz='UTC'), None, None)
59+
xp = converter.dates.date2num(Timestamp('2012-1-1 01:02:03', tz='UTC'))
60+
np_assert_almost_equal(rs, xp, decimals)
61+
62+
rs = self.dtc.convert(Timestamp('2012-1-1 09:02:03', tz='Asia/Hong_Kong'), None, None)
63+
np_assert_almost_equal(rs, xp, decimals)
64+
65+
rs = self.dtc.convert(datetime(2012, 1, 1, 1, 2, 3), None, None)
66+
np_assert_almost_equal(rs, xp, decimals)
67+
4968
def test_time_formatter(self):
5069
self.tc(90000)
5170

71+
def test_dateindex_conversion(self):
72+
decimals = 9
73+
74+
for freq in ('B', 'L', 'S'):
75+
dateindex = tm.makeDateIndex(k = 10, freq = freq)
76+
rs = self.dtc.convert(dateindex, None, None)
77+
xp = converter.dates.date2num(dateindex)
78+
np_assert_almost_equal(rs, xp, decimals)
79+
80+
def test_resolution(self):
81+
def _assert_less(ts1, ts2):
82+
val1 = self.dtc.convert(ts1, None, None)
83+
val2 = self.dtc.convert(ts2, None, None)
84+
if not val1 < val2:
85+
raise AssertionError('{0} is not less than {1}.'.format(val1, val2))
86+
87+
# Matplotlib's time representation using floats cannot distinguish intervals smaller
88+
# than ~10 microsecond in the common range of years.
89+
ts = Timestamp('2012-1-1')
90+
_assert_less(ts, ts + Second())
91+
_assert_less(ts, ts + Milli())
92+
_assert_less(ts, ts + Micro(50))
93+
5294

5395
if __name__ == '__main__':
5496
import nose

vb_suite/timeseries.py

+12
Original file line numberDiff line numberDiff line change
@@ -269,3 +269,15 @@ def date_range(start=None, end=None, periods=None, freq=None):
269269
dataframe_resample_max_numpy = \
270270
Benchmark("df.resample('1s', how=np.max)", setup)
271271

272+
273+
#----------------------------------------------------------------------
274+
# DatetimeConverter
275+
276+
setup = common_setup + """
277+
from pandas.tseries.converter import DatetimeConverter
278+
"""
279+
280+
datetimeindex_converter = \
281+
Benchmark('DatetimeConverter.convert(rng, None, None)',
282+
setup, start_date=datetime(2013, 1, 1))
283+

0 commit comments

Comments
 (0)