Skip to content

Commit 5052842

Browse files
cbertinatojreback
authored andcommitted
BUG: Fixes rounding error in Timestamp.floor() (#19240)
1 parent 6b0c7e7 commit 5052842

File tree

5 files changed

+90
-34
lines changed

5 files changed

+90
-34
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -620,6 +620,7 @@ Datetimelike
620620
- Bug in :func:`~DataFrame.pct_change` using ``periods`` and ``freq`` returned different length outputs (:issue:`7292`)
621621
- Bug in comparison of :class:`DatetimeIndex` against ``None`` or ``datetime.date`` objects raising ``TypeError`` for ``==`` and ``!=`` comparisons instead of all-``False`` and all-``True``, respectively (:issue:`19301`)
622622
- Bug in :class:`Timestamp` and :func:`to_datetime` where a string representing a barely out-of-bounds timestamp would be incorrectly rounded down instead of raising ``OutOfBoundsDatetime`` (:issue:`19382`)
623+
- Bug in :func:`Timestamp.floor` :func:`DatetimeIndex.floor` where time stamps far in the future and past were not rounded correctly (:issue:`19206`)
623624
-
624625

625626
Timezones

pandas/_libs/tslibs/timestamps.pyx

+42-18
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,46 @@ cdef inline object create_timestamp_from_ts(int64_t value,
5858
return ts_base
5959

6060

61+
def round_ns(values, rounder, freq):
62+
"""
63+
Applies rounding function at given frequency
64+
65+
Parameters
66+
----------
67+
values : int, :obj:`ndarray`
68+
rounder : function
69+
freq : str, obj
70+
71+
Returns
72+
-------
73+
int or :obj:`ndarray`
74+
"""
75+
from pandas.tseries.frequencies import to_offset
76+
unit = to_offset(freq).nanos
77+
if unit < 1000:
78+
# for nano rounding, work with the last 6 digits separately
79+
# due to float precision
80+
buff = 1000000
81+
r = (buff * (values // buff) + unit *
82+
(rounder((values % buff) * (1 / float(unit)))).astype('i8'))
83+
else:
84+
if unit % 1000 != 0:
85+
msg = 'Precision will be lost using frequency: {}'
86+
warnings.warn(msg.format(freq))
87+
88+
# GH19206
89+
# to deal with round-off when unit is large
90+
if unit >= 1e9:
91+
divisor = 10 ** int(np.log10(unit / 1e7))
92+
else:
93+
divisor = 10
94+
95+
r = (unit * rounder((values * (divisor / float(unit))) / divisor)
96+
.astype('i8'))
97+
98+
return r
99+
100+
61101
# This is PITA. Because we inherit from datetime, which has very specific
62102
# construction requirements, we need to do object instantiation in python
63103
# (see Timestamp class above). This will serve as a C extension type that
@@ -581,28 +621,12 @@ class Timestamp(_Timestamp):
581621
return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq)
582622

583623
def _round(self, freq, rounder):
584-
585-
cdef:
586-
int64_t unit, r, value, buff = 1000000
587-
object result
588-
589-
from pandas.tseries.frequencies import to_offset
590-
unit = to_offset(freq).nanos
591624
if self.tz is not None:
592625
value = self.tz_localize(None).value
593626
else:
594627
value = self.value
595-
if unit < 1000 and unit % 1000 != 0:
596-
# for nano rounding, work with the last 6 digits separately
597-
# due to float precision
598-
r = (buff * (value // buff) + unit *
599-
(rounder((value % buff) / float(unit))).astype('i8'))
600-
elif unit >= 1000 and unit % 1000 != 0:
601-
msg = 'Precision will be lost using frequency: {}'
602-
warnings.warn(msg.format(freq))
603-
r = (unit * rounder(value / float(unit)).astype('i8'))
604-
else:
605-
r = (unit * rounder(value / float(unit)).astype('i8'))
628+
629+
r = round_ns(value, rounder, freq)
606630
result = Timestamp(r, unit='ns')
607631
if self.tz is not None:
608632
result = result.tz_localize(self.tz)

pandas/core/indexes/datetimelike.py

+2-15
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from pandas._libs import lib, iNaT, NaT
3737
from pandas._libs.tslibs.period import Period
3838
from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds
39+
from pandas._libs.tslibs.timestamps import round_ns
3940

4041
from pandas.core.indexes.base import Index, _index_shared_docs
4142
from pandas.util._decorators import Appender, cache_readonly
@@ -90,23 +91,9 @@ class TimelikeOps(object):
9091
""")
9192

9293
def _round(self, freq, rounder):
93-
94-
from pandas.tseries.frequencies import to_offset
95-
unit = to_offset(freq).nanos
9694
# round the local times
9795
values = _ensure_datetimelike_to_i8(self)
98-
if unit < 1000 and unit % 1000 != 0:
99-
# for nano rounding, work with the last 6 digits separately
100-
# due to float precision
101-
buff = 1000000
102-
result = (buff * (values // buff) + unit *
103-
(rounder((values % buff) / float(unit))).astype('i8'))
104-
elif unit >= 1000 and unit % 1000 != 0:
105-
msg = 'Precision will be lost using frequency: {}'
106-
warnings.warn(msg.format(freq))
107-
result = (unit * rounder(values / float(unit)).astype('i8'))
108-
else:
109-
result = (unit * rounder(values / float(unit)).astype('i8'))
96+
result = round_ns(values, rounder, freq)
11097
result = self._maybe_mask_results(result, fill_value=NaT)
11198

11299
attribs = self._get_attributes_dict()

pandas/tests/indexes/datetimes/test_scalar_compat.py

+21
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,27 @@ def test_round(self, tz):
126126
ts = '2016-10-17 12:00:00.001501031'
127127
DatetimeIndex([ts]).round('1010ns')
128128

129+
@pytest.mark.parametrize('test_input, rounder, freq, expected', [
130+
(['2117-01-01 00:00:45'], 'floor', '15s', ['2117-01-01 00:00:45']),
131+
(['2117-01-01 00:00:45'], 'ceil', '15s', ['2117-01-01 00:00:45']),
132+
(['2117-01-01 00:00:45.000000012'], 'floor', '10ns',
133+
['2117-01-01 00:00:45.000000010']),
134+
(['1823-01-01 00:00:01.000000012'], 'ceil', '10ns',
135+
['1823-01-01 00:00:01.000000020']),
136+
(['1823-01-01 00:00:01'], 'floor', '1s', ['1823-01-01 00:00:01']),
137+
(['1823-01-01 00:00:01'], 'ceil', '1s', ['1823-01-01 00:00:01']),
138+
(('NaT', '1823-01-01 00:00:01'), 'floor', '1s',
139+
('NaT', '1823-01-01 00:00:01')),
140+
(('NaT', '1823-01-01 00:00:01'), 'ceil', '1s',
141+
('NaT', '1823-01-01 00:00:01'))
142+
])
143+
def test_ceil_floor_edge(self, tz, test_input, rounder, freq, expected):
144+
dt = DatetimeIndex(list(test_input))
145+
func = getattr(dt, rounder)
146+
result = func(freq)
147+
expected = DatetimeIndex(list(expected))
148+
assert expected.equals(result)
149+
129150
# ----------------------------------------------------------------
130151
# DatetimeIndex.normalize
131152

pandas/tests/scalar/timestamp/test_unary_ops.py

+24-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
from pandas.compat import PY3
1212
from pandas._libs.tslibs.frequencies import _INVALID_FREQ_ERROR
13-
from pandas import Timestamp
13+
from pandas import Timestamp, NaT
1414

1515

1616
class TestTimestampUnaryOps(object):
@@ -93,6 +93,29 @@ def test_round_frequencies(self, freq, expected):
9393
result = stamp.round(freq=freq)
9494
assert result == expected
9595

96+
@pytest.mark.parametrize('test_input, rounder, freq, expected', [
97+
('2117-01-01 00:00:45', 'floor', '15s', '2117-01-01 00:00:45'),
98+
('2117-01-01 00:00:45', 'ceil', '15s', '2117-01-01 00:00:45'),
99+
('2117-01-01 00:00:45.000000012', 'floor', '10ns',
100+
'2117-01-01 00:00:45.000000010'),
101+
('1823-01-01 00:00:01.000000012', 'ceil', '10ns',
102+
'1823-01-01 00:00:01.000000020'),
103+
('1823-01-01 00:00:01', 'floor', '1s', '1823-01-01 00:00:01'),
104+
('1823-01-01 00:00:01', 'ceil', '1s', '1823-01-01 00:00:01'),
105+
('NaT', 'floor', '1s', 'NaT'),
106+
('NaT', 'ceil', '1s', 'NaT')
107+
])
108+
def test_ceil_floor_edge(self, test_input, rounder, freq, expected):
109+
dt = Timestamp(test_input)
110+
func = getattr(dt, rounder)
111+
result = func(freq)
112+
113+
if dt is NaT:
114+
assert result is NaT
115+
else:
116+
expected = Timestamp(expected)
117+
assert result == expected
118+
96119
def test_ceil(self):
97120
dt = Timestamp('20130101 09:10:11')
98121
result = dt.ceil('D')

0 commit comments

Comments
 (0)