diff --git a/doc/source/release.rst b/doc/source/release.rst index 2583b47d9b3bf..6f9aa1c01fc37 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -159,6 +159,8 @@ Improvements to existing features - ``StataWriter`` and ``DataFrame.to_stata`` accept time stamp and data labels (:issue:`6545`) - offset/freq info now in Timestamp __repr__ (:issue:`4553`) - Support passing ``encoding`` with xlwt (:issue:`3710`) +- Performance improvement when converting ``DatetimeIndex`` to floating ordinals + using ``DatetimeConverter`` (:issue:`6636`) .. _release.bug_fixes-0.14.0: diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index d059d229ef22e..b9939976fded8 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -16,6 +16,7 @@ import pandas.core.common as com from pandas.core.index import Index +from pandas.core.series import Series from pandas.tseries.index import date_range import pandas.tseries.tools as tools import pandas.tseries.frequencies as frequencies @@ -144,7 +145,10 @@ def _dt_to_float_ordinal(dt): preserving hours, minutes, seconds and microseconds. Return value is a :func:`float`. """ - base = dates.date2num(dt) + if isinstance(dt, (np.ndarray, Series)) and com.is_datetime64_ns_dtype(dt): + base = dates.epoch2num(dt.asi8 / 1.0E9) + else: + base = dates.date2num(dt) return base diff --git a/pandas/tseries/tests/test_converter.py b/pandas/tseries/tests/test_converter.py index 29137f9cb3e50..902b9cb549e32 100644 --- a/pandas/tseries/tests/test_converter.py +++ b/pandas/tseries/tests/test_converter.py @@ -5,8 +5,11 @@ import nose import numpy as np +from numpy.testing import assert_almost_equal as np_assert_almost_equal +from pandas import Timestamp from pandas.compat import u import pandas.util.testing as tm +from pandas.tseries.offsets import Second, Milli, Micro try: import pandas.tseries.converter as converter @@ -46,9 +49,48 @@ def test_conversion(self): rs = self.dtc.convert('2012-1-1', None, None) self.assertEqual(rs, xp) + rs = self.dtc.convert(Timestamp('2012-1-1'), None, None) + self.assertEqual(rs, xp) + + def test_conversion_float(self): + decimals = 9 + + rs = self.dtc.convert(Timestamp('2012-1-1 01:02:03', tz='UTC'), None, None) + xp = converter.dates.date2num(Timestamp('2012-1-1 01:02:03', tz='UTC')) + np_assert_almost_equal(rs, xp, decimals) + + rs = self.dtc.convert(Timestamp('2012-1-1 09:02:03', tz='Asia/Hong_Kong'), None, None) + np_assert_almost_equal(rs, xp, decimals) + + rs = self.dtc.convert(datetime(2012, 1, 1, 1, 2, 3), None, None) + np_assert_almost_equal(rs, xp, decimals) + def test_time_formatter(self): self.tc(90000) + def test_dateindex_conversion(self): + decimals = 9 + + for freq in ('B', 'L', 'S'): + dateindex = tm.makeDateIndex(k = 10, freq = freq) + rs = self.dtc.convert(dateindex, None, None) + xp = converter.dates.date2num(dateindex) + np_assert_almost_equal(rs, xp, decimals) + + def test_resolution(self): + def _assert_less(ts1, ts2): + val1 = self.dtc.convert(ts1, None, None) + val2 = self.dtc.convert(ts2, None, None) + if not val1 < val2: + raise AssertionError('{0} is not less than {1}.'.format(val1, val2)) + + # Matplotlib's time representation using floats cannot distinguish intervals smaller + # than ~10 microsecond in the common range of years. + ts = Timestamp('2012-1-1') + _assert_less(ts, ts + Second()) + _assert_less(ts, ts + Milli()) + _assert_less(ts, ts + Micro(50)) + if __name__ == '__main__': import nose diff --git a/vb_suite/timeseries.py b/vb_suite/timeseries.py index c43d2fb76dbdb..93821c3be3c2c 100644 --- a/vb_suite/timeseries.py +++ b/vb_suite/timeseries.py @@ -269,3 +269,15 @@ def date_range(start=None, end=None, periods=None, freq=None): dataframe_resample_max_numpy = \ Benchmark("df.resample('1s', how=np.max)", setup) + +#---------------------------------------------------------------------- +# DatetimeConverter + +setup = common_setup + """ +from pandas.tseries.converter import DatetimeConverter +""" + +datetimeindex_converter = \ + Benchmark('DatetimeConverter.convert(rng, None, None)', + setup, start_date=datetime(2013, 1, 1)) +