Skip to content

BUG: Fixes rounding error in Timestamp.floor() #19240

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 7, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,7 @@ Datetimelike
- Bug in :func:`~DataFrame.pct_change` using ``periods`` and ``freq`` returned different length outputs (:issue:`7292`)
- Bug in comparison of :class:`DatetimeIndex` against ``None`` or ``datetime.date`` objects raising ``TypeError`` for ``==`` and ``!=`` comparisons instead of all-``False`` and all-``True``, respectively (:issue:`19301`)
- Bug in :class:`Timestamp` and :func:`to_datetime` where a string representing a barely out-of-bounds timestamp would be incorrectly rounded down instead of raising ``OutOfBoundsDatetime`` (:issue:`19382`)
- Bug in :func:`Timestamp.floor` :func:`DatetimeIndex.floor` where time stamps far in the future and past were not rounded correctly (:issue:`19206`)
-

Timezones
Expand Down
60 changes: 42 additions & 18 deletions pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,46 @@ cdef inline object create_timestamp_from_ts(int64_t value,
return ts_base


def round_ns(values, rounder, freq):
"""
Applies rounding function at given frequency

Parameters
----------
values : int, :obj:`ndarray`
rounder : function
freq : str, obj

Returns
-------
int or :obj:`ndarray`
"""
from pandas.tseries.frequencies import to_offset
unit = to_offset(freq).nanos
if unit < 1000:
# for nano rounding, work with the last 6 digits separately
# due to float precision
buff = 1000000
r = (buff * (values // buff) + unit *
(rounder((values % buff) * (1 / float(unit)))).astype('i8'))
else:
if unit % 1000 != 0:
msg = 'Precision will be lost using frequency: {}'
warnings.warn(msg.format(freq))

# GH19206
# to deal with round-off when unit is large
if unit >= 1e9:
divisor = 10 ** int(np.log10(unit / 1e7))
else:
divisor = 10

r = (unit * rounder((values * (divisor / float(unit))) / divisor)
.astype('i8'))

return r


# This is PITA. Because we inherit from datetime, which has very specific
# construction requirements, we need to do object instantiation in python
# (see Timestamp class above). This will serve as a C extension type that
Expand Down Expand Up @@ -590,28 +630,12 @@ class Timestamp(_Timestamp):
return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq)

def _round(self, freq, rounder):

cdef:
int64_t unit, r, value, buff = 1000000
object result

from pandas.tseries.frequencies import to_offset
unit = to_offset(freq).nanos
if self.tz is not None:
value = self.tz_localize(None).value
else:
value = self.value
if unit < 1000 and unit % 1000 != 0:
# for nano rounding, work with the last 6 digits separately
# due to float precision
r = (buff * (value // buff) + unit *
(rounder((value % buff) / float(unit))).astype('i8'))
elif unit >= 1000 and unit % 1000 != 0:
msg = 'Precision will be lost using frequency: {}'
warnings.warn(msg.format(freq))
r = (unit * rounder(value / float(unit)).astype('i8'))
else:
r = (unit * rounder(value / float(unit)).astype('i8'))

r = round_ns(value, rounder, freq)
result = Timestamp(r, unit='ns')
if self.tz is not None:
result = result.tz_localize(self.tz)
Expand Down
17 changes: 2 additions & 15 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
from pandas._libs import lib, iNaT, NaT
from pandas._libs.tslibs.period import Period
from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds
from pandas._libs.tslibs.timestamps import round_ns

from pandas.core.indexes.base import Index, _index_shared_docs
from pandas.util._decorators import Appender, cache_readonly
Expand Down Expand Up @@ -90,23 +91,9 @@ class TimelikeOps(object):
""")

def _round(self, freq, rounder):

from pandas.tseries.frequencies import to_offset
unit = to_offset(freq).nanos
# round the local times
values = _ensure_datetimelike_to_i8(self)
if unit < 1000 and unit % 1000 != 0:
# for nano rounding, work with the last 6 digits separately
# due to float precision
buff = 1000000
result = (buff * (values // buff) + unit *
(rounder((values % buff) / float(unit))).astype('i8'))
elif unit >= 1000 and unit % 1000 != 0:
msg = 'Precision will be lost using frequency: {}'
warnings.warn(msg.format(freq))
result = (unit * rounder(values / float(unit)).astype('i8'))
else:
result = (unit * rounder(values / float(unit)).astype('i8'))
result = round_ns(values, rounder, freq)
result = self._maybe_mask_results(result, fill_value=NaT)

attribs = self._get_attributes_dict()
Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/indexes/datetimes/test_scalar_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,22 @@ def test_round(self, tz):
ts = '2016-10-17 12:00:00.001501031'
DatetimeIndex([ts]).round('1010ns')

@pytest.mark.parametrize('test_input, rounder, freq, expected', [
('2117-01-01 00:00:45', 'floor', '15s', '2117-01-01 00:00:45'),
('2117-01-01 00:00:45', 'ceil', '15s', '2117-01-01 00:00:45'),
('2117-01-01 00:00:45.000000012', 'floor', '10ns',
'2117-01-01 00:00:45.000000010'),
('1823-01-01 00:00:01.000000012', 'ceil', '10ns',
'1823-01-01 00:00:01.000000020'),
('1823-01-01 00:00:01', 'floor', '1s', '1823-01-01 00:00:01'),
('1823-01-01 00:00:01', 'ceil', '1s', '1823-01-01 00:00:01')])
def test_ceil_floor_edge(self, tz, test_input, rounder, freq, expected):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a NaT and another element here (the 2nd can be the same as the first)

dt = DatetimeIndex([test_input])
func = getattr(dt, rounder)
result = func(freq)
expected = DatetimeIndex([expected])
assert expected.equals(result)

# ----------------------------------------------------------------
# DatetimeIndex.normalize

Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/scalar/timestamp/test_unary_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,22 @@ def test_round_frequencies(self, freq, expected):
result = stamp.round(freq=freq)
assert result == expected

@pytest.mark.parametrize('test_input, rounder, freq, expected', [
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a NaT here as well

('2117-01-01 00:00:45', 'floor', '15s', '2117-01-01 00:00:45'),
('2117-01-01 00:00:45', 'ceil', '15s', '2117-01-01 00:00:45'),
('2117-01-01 00:00:45.000000012', 'floor', '10ns',
'2117-01-01 00:00:45.000000010'),
('1823-01-01 00:00:01.000000012', 'ceil', '10ns',
'1823-01-01 00:00:01.000000020'),
('1823-01-01 00:00:01', 'floor', '1s', '1823-01-01 00:00:01'),
('1823-01-01 00:00:01', 'ceil', '1s', '1823-01-01 00:00:01')])
def test_ceil_floor_edge(self, test_input, rounder, freq, expected):
dt = Timestamp(test_input)
func = getattr(dt, rounder)
result = func(freq)
expected = Timestamp(expected)
assert result == expected

def test_ceil(self):
dt = Timestamp('20130101 09:10:11')
result = dt.ceil('D')
Expand Down