Skip to content

Commit 2c78964

Browse files
committed
implement int64 rounding function round_nsint64
The old `round_ns` is replaced by `round_nsint64`; `round_nsint64` is based on integer arithmetic while `round_ns` was based on floating point numbers. Rounding mode is explicitly defined by RoundTo enum class: - RoundTo.MINUS_INFTY rounds to -∞ (floor) - RountTo.PLUS_INFTY rounds to +∞ (ceil) - RoundTo.NEAREST_HALF_MINUS_INFTY rounds to nearest multiple, and breaks tie to -∞ - RoundTo.NEAREST_HALF_PLUS_INFTY rounds to nearest multiple, and breaks tie to +∞ - RoundTo.NEAREST_HALF_EVEN rounds to nearest multiple, and breaks tie to even multiple
1 parent ada51de commit 2c78964

File tree

2 files changed

+49
-42
lines changed

2 files changed

+49
-42
lines changed

pandas/_libs/tslibs/timestamps.pyx

+43-36
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ cimport ccalendar
2222
from conversion import tz_localize_to_utc, normalize_i8_timestamps
2323
from conversion cimport (tz_convert_single, _TSObject,
2424
convert_to_tsobject, convert_datetime_to_tsobject)
25+
import enum
2526
from fields import get_start_end_field, get_date_name_field
2627
from nattype import NaT
2728
from nattype cimport NPY_NAT
@@ -57,50 +58,56 @@ cdef inline object create_timestamp_from_ts(int64_t value,
5758
return ts_base
5859

5960

60-
def round_ns(values, rounder, freq):
61+
@enum.unique
62+
class RoundTo(enum.Enum):
63+
MINUS_INFTY = 0
64+
PLUS_INFTY = 1
65+
NEAREST_HALF_EVEN = 2
66+
NEAREST_HALF_PLUS_INFTY = 3
67+
NEAREST_HALF_MINUS_INFTY = 4
68+
69+
70+
def round_nsint64(values, mode: RoundTo, freq):
6171
"""
62-
Applies rounding function at given frequency
72+
Applies rounding mode at given frequency
6373
6474
Parameters
6575
----------
6676
values : :obj:`ndarray`
67-
rounder : function, eg. 'ceil', 'floor', 'round'
77+
mode : instance of `RoundTo` enumeration
6878
freq : str, obj
6979
7080
Returns
7181
-------
7282
:obj:`ndarray`
7383
"""
84+
85+
if not isinstance(mode, RoundTo):
86+
raise ValueError('mode should be a RoundTo member')
87+
7488
unit = to_offset(freq).nanos
7589

76-
# GH21262 If the Timestamp is multiple of the freq str
77-
# don't apply any rounding
78-
mask = values % unit == 0
79-
if mask.all():
80-
return values
81-
r = values.copy()
82-
83-
if unit < 1000:
84-
# for nano rounding, work with the last 6 digits separately
85-
# due to float precision
86-
buff = 1000000
87-
r[~mask] = (buff * (values[~mask] // buff) +
88-
unit * (rounder((values[~mask] % buff) *
89-
(1 / float(unit)))).astype('i8'))
90-
else:
91-
if unit % 1000 != 0:
92-
msg = 'Precision will be lost using frequency: {}'
93-
warnings.warn(msg.format(freq))
94-
# GH19206
95-
# to deal with round-off when unit is large
96-
if unit >= 1e9:
97-
divisor = 10 ** int(np.log10(unit / 1e7))
98-
else:
99-
divisor = 10
100-
r[~mask] = (unit * rounder((values[~mask] *
101-
(divisor / float(unit))) / divisor)
102-
.astype('i8'))
103-
return r
90+
if mode is RoundTo.MINUS_INFTY:
91+
return values - (values % unit)
92+
elif mode is RoundTo.PLUS_INFTY:
93+
return values + (-values % unit)
94+
elif mode is RoundTo.NEAREST_HALF_MINUS_INFTY:
95+
return round_nsint64(values - unit//2, RoundTo.PLUS_INFTY, freq)
96+
elif mode is RoundTo.NEAREST_HALF_PLUS_INFTY:
97+
return round_nsint64(values + unit//2, RoundTo.MINUS_INFTY, freq)
98+
elif mode is RoundTo.NEAREST_HALF_EVEN:
99+
# for odd unit there is n need of a tie break
100+
if unit % 2:
101+
return round_nsint64(values, RoundTo.NEAREST_HALF_MINUS_INFTY, freq)
102+
d, r = np.divmod(values, unit)
103+
mask = np.logical_or(
104+
r > (unit // 2),
105+
np.logical_and(r == (unit // 2), d % 2)
106+
)
107+
d[mask] += 1
108+
return d * unit
109+
110+
raise NotImplementedError(mode)
104111

105112

106113
# This is PITA. Because we inherit from datetime, which has very specific
@@ -656,7 +663,7 @@ class Timestamp(_Timestamp):
656663

657664
return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq)
658665

659-
def _round(self, freq, rounder):
666+
def _round(self, freq, mode):
660667
if self.tz is not None:
661668
value = self.tz_localize(None).value
662669
else:
@@ -665,7 +672,7 @@ class Timestamp(_Timestamp):
665672
value = np.array([value], dtype=np.int64)
666673

667674
# Will only ever contain 1 element for timestamp
668-
r = round_ns(value, rounder, freq)[0]
675+
r = round_nsint64(value, mode, freq)[0]
669676
result = Timestamp(r, unit='ns')
670677
if self.tz is not None:
671678
result = result.tz_localize(self.tz)
@@ -687,7 +694,7 @@ class Timestamp(_Timestamp):
687694
------
688695
ValueError if the freq cannot be converted
689696
"""
690-
return self._round(freq, np.round)
697+
return self._round(freq, RoundTo.NEAREST_HALF_EVEN)
691698

692699
def floor(self, freq):
693700
"""
@@ -697,7 +704,7 @@ class Timestamp(_Timestamp):
697704
----------
698705
freq : a freq string indicating the flooring resolution
699706
"""
700-
return self._round(freq, np.floor)
707+
return self._round(freq, RoundTo.MINUS_INFTY)
701708

702709
def ceil(self, freq):
703710
"""
@@ -707,7 +714,7 @@ class Timestamp(_Timestamp):
707714
----------
708715
freq : a freq string indicating the ceiling resolution
709716
"""
710-
return self._round(freq, np.ceil)
717+
return self._round(freq, RoundTo.PLUS_INFTY)
711718

712719
@property
713720
def tz(self):

pandas/core/indexes/datetimelike.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import numpy as np
1212

1313
from pandas._libs import lib, iNaT, NaT
14-
from pandas._libs.tslibs.timestamps import round_ns
14+
from pandas._libs.tslibs.timestamps import round_nsint64, RoundTo
1515

1616
from pandas.core.dtypes.common import (
1717
ensure_int64,
@@ -168,10 +168,10 @@ class TimelikeOps(object):
168168
"""
169169
)
170170

171-
def _round(self, freq, rounder):
171+
def _round(self, freq, mode):
172172
# round the local times
173173
values = _ensure_datetimelike_to_i8(self)
174-
result = round_ns(values, rounder, freq)
174+
result = round_nsint64(values, mode, freq)
175175
result = self._maybe_mask_results(result, fill_value=NaT)
176176

177177
attribs = self._get_attributes_dict()
@@ -184,15 +184,15 @@ def _round(self, freq, rounder):
184184

185185
@Appender((_round_doc + _round_example).format(op="round"))
186186
def round(self, freq, *args, **kwargs):
187-
return self._round(freq, np.round)
187+
return self._round(freq, RoundTo.NEAREST_HALF_EVEN)
188188

189189
@Appender((_round_doc + _floor_example).format(op="floor"))
190190
def floor(self, freq):
191-
return self._round(freq, np.floor)
191+
return self._round(freq, RoundTo.MINUS_INFTY)
192192

193193
@Appender((_round_doc + _ceil_example).format(op="ceil"))
194194
def ceil(self, freq):
195-
return self._round(freq, np.ceil)
195+
return self._round(freq, RoundTo.PLUS_INFTY)
196196

197197

198198
class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin):

0 commit comments

Comments
 (0)