Skip to content

Commit 414a2d5

Browse files
committed
BUG: Rounding error in Timestamp.floor and DatetimeIndex.floor for dates far in the future and past (GH19206)
1 parent 54f1b3e commit 414a2d5

File tree

5 files changed

+77
-33
lines changed

5 files changed

+77
-33
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -551,6 +551,7 @@ Datetimelike
551551
- Bug in :func:`~DataFrame.pct_change` using ``periods`` and ``freq`` returned different length outputs (:issue:`7292`)
552552
- Bug in comparison of :class:`DatetimeIndex` against ``None`` or ``datetime.date`` objects raising ``TypeError`` for ``==`` and ``!=`` comparisons instead of all-``False`` and all-``True``, respectively (:issue:`19301`)
553553
- Bug in :class:`Timestamp` and :func:`to_datetime` where a string representing a barely out-of-bounds timestamp would be incorrectly rounded down instead of raising ``OutOfBoundsDatetime`` (:issue:`19382`)
554+
- Bug in :func:`Timestamp.floor` :func:`DatetimeIndex.floor` where time stamps far in the future and past were not rounded correctly (:issue:`19206`)
554555
-
555556

556557
Timezones

pandas/_libs/tslibs/timestamps.pyx

+42-18
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,46 @@ cdef inline object create_timestamp_from_ts(int64_t value,
5959
return ts_base
6060

6161

62+
def round_ns(values, rounder, freq):
63+
"""
64+
Applies rounding function at given frequency
65+
66+
Parameters
67+
----------
68+
values : int, :obj:`ndarray`
69+
rounder : function
70+
freq : str, obj
71+
72+
Returns
73+
-------
74+
int or :obj:`ndarray`
75+
"""
76+
from pandas.tseries.frequencies import to_offset
77+
unit = to_offset(freq).nanos
78+
if unit < 1000:
79+
# for nano rounding, work with the last 6 digits separately
80+
# due to float precision
81+
buff = 1000000
82+
r = (buff * (values // buff) + unit *
83+
(rounder((values % buff) * (1 / float(unit)))).astype('i8'))
84+
else:
85+
if unit % 1000 != 0:
86+
msg = 'Precision will be lost using frequency: {}'
87+
warnings.warn(msg.format(freq))
88+
89+
# GH19206
90+
# to deal with round-off when unit is large
91+
if unit >= 1e9:
92+
divisor = 10 ** int(np.log10(unit / 1e7))
93+
else:
94+
divisor = 10
95+
96+
r = (unit * rounder((values * (divisor / float(unit))) / divisor)
97+
.astype('i8'))
98+
99+
return r
100+
101+
62102
# This is PITA. Because we inherit from datetime, which has very specific
63103
# construction requirements, we need to do object instantiation in python
64104
# (see Timestamp class above). This will serve as a C extension type that
@@ -590,28 +630,12 @@ class Timestamp(_Timestamp):
590630
return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq)
591631

592632
def _round(self, freq, rounder):
593-
594-
cdef:
595-
int64_t unit, r, value, buff = 1000000
596-
object result
597-
598-
from pandas.tseries.frequencies import to_offset
599-
unit = to_offset(freq).nanos
600633
if self.tz is not None:
601634
value = self.tz_localize(None).value
602635
else:
603636
value = self.value
604-
if unit < 1000 and unit % 1000 != 0:
605-
# for nano rounding, work with the last 6 digits separately
606-
# due to float precision
607-
r = (buff * (value // buff) + unit *
608-
(rounder((value % buff) / float(unit))).astype('i8'))
609-
elif unit >= 1000 and unit % 1000 != 0:
610-
msg = 'Precision will be lost using frequency: {}'
611-
warnings.warn(msg.format(freq))
612-
r = (unit * rounder(value / float(unit)).astype('i8'))
613-
else:
614-
r = (unit * rounder(value / float(unit)).astype('i8'))
637+
638+
r = round_ns(value, rounder, freq)
615639
result = Timestamp(r, unit='ns')
616640
if self.tz is not None:
617641
result = result.tz_localize(self.tz)

pandas/core/indexes/datetimelike.py

+2-15
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from pandas._libs import lib, iNaT, NaT
3737
from pandas._libs.tslibs.period import Period
3838
from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds
39+
from pandas._libs.tslibs.timestamps import round_ns
3940

4041
from pandas.core.indexes.base import Index, _index_shared_docs
4142
from pandas.util._decorators import Appender, cache_readonly
@@ -90,23 +91,9 @@ class TimelikeOps(object):
9091
""")
9192

9293
def _round(self, freq, rounder):
93-
94-
from pandas.tseries.frequencies import to_offset
95-
unit = to_offset(freq).nanos
9694
# round the local times
9795
values = _ensure_datetimelike_to_i8(self)
98-
if unit < 1000 and unit % 1000 != 0:
99-
# for nano rounding, work with the last 6 digits separately
100-
# due to float precision
101-
buff = 1000000
102-
result = (buff * (values // buff) + unit *
103-
(rounder((values % buff) / float(unit))).astype('i8'))
104-
elif unit >= 1000 and unit % 1000 != 0:
105-
msg = 'Precision will be lost using frequency: {}'
106-
warnings.warn(msg.format(freq))
107-
result = (unit * rounder(values / float(unit)).astype('i8'))
108-
else:
109-
result = (unit * rounder(values / float(unit)).astype('i8'))
96+
result = round_ns(values, rounder, freq)
11097
result = self._maybe_mask_results(result, fill_value=NaT)
11198

11299
attribs = self._get_attributes_dict()

pandas/tests/indexes/datetimes/test_scalar_compat.py

+16
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,22 @@ def test_round(self, tz):
126126
ts = '2016-10-17 12:00:00.001501031'
127127
DatetimeIndex([ts]).round('1010ns')
128128

129+
@pytest.mark.parametrize('test_input, rounder, freq, expected', [
130+
('2117-01-01 00:00:45', 'floor', '15s', '2117-01-01 00:00:45'),
131+
('2117-01-01 00:00:45', 'ceil', '15s', '2117-01-01 00:00:45'),
132+
('2117-01-01 00:00:45.000000012', 'floor', '10ns',
133+
'2117-01-01 00:00:45.000000010'),
134+
('1823-01-01 00:00:01.000000012', 'ceil', '10ns',
135+
'1823-01-01 00:00:01.000000020'),
136+
('1823-01-01 00:00:01', 'floor', '1s', '1823-01-01 00:00:01'),
137+
('1823-01-01 00:00:01', 'ceil', '1s', '1823-01-01 00:00:01')])
138+
def test_ceil_floor_edge(self, tz, test_input, rounder, freq, expected):
139+
dt = DatetimeIndex([test_input])
140+
func = getattr(dt, rounder)
141+
result = func(freq)
142+
expected = DatetimeIndex([expected])
143+
assert expected.equals(result)
144+
129145
# ----------------------------------------------------------------
130146
# DatetimeIndex.normalize
131147

pandas/tests/scalar/timestamp/test_unary_ops.py

+16
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,22 @@ def test_round_frequencies(self, freq, expected):
9393
result = stamp.round(freq=freq)
9494
assert result == expected
9595

96+
@pytest.mark.parametrize('test_input, rounder, freq, expected', [
97+
('2117-01-01 00:00:45', 'floor', '15s', '2117-01-01 00:00:45'),
98+
('2117-01-01 00:00:45', 'ceil', '15s', '2117-01-01 00:00:45'),
99+
('2117-01-01 00:00:45.000000012', 'floor', '10ns',
100+
'2117-01-01 00:00:45.000000010'),
101+
('1823-01-01 00:00:01.000000012', 'ceil', '10ns',
102+
'1823-01-01 00:00:01.000000020'),
103+
('1823-01-01 00:00:01', 'floor', '1s', '1823-01-01 00:00:01'),
104+
('1823-01-01 00:00:01', 'ceil', '1s', '1823-01-01 00:00:01')])
105+
def test_ceil_floor_edge(self, test_input, rounder, freq, expected):
106+
dt = Timestamp(test_input)
107+
func = getattr(dt, rounder)
108+
result = func(freq)
109+
expected = Timestamp(expected)
110+
assert result == expected
111+
96112
def test_ceil(self):
97113
dt = Timestamp('20130101 09:10:11')
98114
result = dt.ceil('D')

0 commit comments

Comments
 (0)