From 7b82e8b4c7602dd42a8c65723eb76835872eb3f4 Mon Sep 17 00:00:00 2001 From: Mikolaj Chwalisz Date: Fri, 7 Apr 2017 14:47:10 +0200 Subject: [PATCH] DOC: timeseries.rst floating point precision (#15817) --- doc/source/timeseries.rst | 28 ++++++++++++++++++++++------ pandas/tseries/tools.py | 10 ++++++++++ 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 44c200e13b877..45fe271e9de9d 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -265,17 +265,23 @@ Typical epoch stored units pd.to_datetime([1349720105100, 1349720105200, 1349720105300, 1349720105400, 1349720105500 ], unit='ms') -These *work*, but the results may be unexpected. +.. note:: -.. ipython:: python + Epoch times will be rounded to the nearest nanosecond. - pd.to_datetime([1]) +.. warning:: - pd.to_datetime([1, 3.14], unit='s') + Conversion of float epoch times can lead to inaccurate and unexpected results. + :ref:`Python floats ` have about 15 digits precision in + decimal. Rounding during conversion from float to high precision ``Timestamp`` is + unavoidable. The only way to achieve exact precision is to use a fixed-width + types (e.g. an int64). -.. note:: + .. ipython:: python - Epoch times will be rounded to the nearest nanosecond. + 1490195805.433502912 + pd.to_datetime([1490195805.433, 1490195805.433502912], unit='s') + pd.to_datetime(1490195805433502912, unit='ns') .. _timeseries.origin: @@ -300,6 +306,16 @@ Commonly called 'unix epoch' or POSIX time. pd.to_datetime([1, 2, 3], unit='D') +.. note:: + + Without specifying origin the following examples still evaluate, but the results + may be unexpected. + + .. ipython:: python + + pd.to_datetime([1]) + pd.to_datetime([1, 3.14], unit='s') + .. _timeseries.daterange: Generating Ranges of Timestamps diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index d0f1671f9e309..9d5821d859187 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -315,6 +315,16 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False, >>> %timeit pd.to_datetime(s,infer_datetime_format=False) 1 loop, best of 3: 471 ms per loop + Using a unix epoch time + + >>> pd.to_datetime(1490195805, unit='s') + Timestamp('2017-03-22 15:16:45') + >>> pd.to_datetime(1490195805433502912, unit='ns') + Timestamp('2017-03-22 15:16:45.433502912') + + .. warning:: For float arg, precision rounding might happen. To prevent + unexpected behavior use a fixed-width exact type. + Using a non-unix epoch origin >>> pd.to_datetime([1, 2, 3], unit='D',