diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt index 9c4b408a1d24b..8c36d51a5fd16 100644 --- a/doc/source/whatsnew/v0.23.2.txt +++ b/doc/source/whatsnew/v0.23.2.txt @@ -54,7 +54,7 @@ Fixed Regressions - Fixed regression in :meth:`to_csv` when handling file-like object incorrectly (:issue:`21471`) - Bug in both :meth:`DataFrame.first_valid_index` and :meth:`Series.first_valid_index` raised for a row index having duplicate values (:issue:`21441`) -- +- Bug in :meth:`Timestamp.ceil` and :meth:`Timestamp.floor` when timestamp is a multiple of the rounding frequency (:issue:`21262`) .. _whatsnew_0232.performance: diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index ba5ebdab82ddc..123ccebf83a56 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -59,42 +59,51 @@ cdef inline object create_timestamp_from_ts(int64_t value, def round_ns(values, rounder, freq): + """ Applies rounding function at given frequency Parameters ---------- - values : int, :obj:`ndarray` - rounder : function + values : :obj:`ndarray` + rounder : function, eg. 'ceil', 'floor', 'round' freq : str, obj Returns ------- - int or :obj:`ndarray` + :obj:`ndarray` """ + from pandas.tseries.frequencies import to_offset unit = to_offset(freq).nanos + + # GH21262 If the Timestamp is multiple of the freq str + # don't apply any rounding + mask = values % unit == 0 + if mask.all(): + return values + r = values.copy() + if unit < 1000: # for nano rounding, work with the last 6 digits separately # due to float precision buff = 1000000 - r = (buff * (values // buff) + unit * - (rounder((values % buff) * (1 / float(unit)))).astype('i8')) + r[~mask] = (buff * (values[~mask] // buff) + + unit * (rounder((values[~mask] % buff) * + (1 / float(unit)))).astype('i8')) else: if unit % 1000 != 0: msg = 'Precision will be lost using frequency: {}' warnings.warn(msg.format(freq)) - # GH19206 # to deal with round-off when unit is large if unit >= 1e9: divisor = 10 ** int(np.log10(unit / 1e7)) else: divisor = 10 - - r = (unit * rounder((values * (divisor / float(unit))) / divisor) - .astype('i8')) - + r[~mask] = (unit * rounder((values[~mask] * + (divisor / float(unit))) / divisor) + .astype('i8')) return r @@ -649,7 +658,10 @@ class Timestamp(_Timestamp): else: value = self.value - r = round_ns(value, rounder, freq) + value = np.array([value], dtype=np.int64) + + # Will only ever contain 1 element for timestamp + r = round_ns(value, rounder, freq)[0] result = Timestamp(r, unit='ns') if self.tz is not None: result = result.tz_localize(self.tz) diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py index 9180bb0af3af3..801dcb91b124e 100644 --- a/pandas/tests/indexes/datetimes/test_scalar_compat.py +++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py @@ -134,6 +134,21 @@ def test_round(self, tz): ts = '2016-10-17 12:00:00.001501031' DatetimeIndex([ts]).round('1010ns') + def test_no_rounding_occurs(self, tz): + # GH 21262 + rng = date_range(start='2016-01-01', periods=5, + freq='2Min', tz=tz) + + expected_rng = DatetimeIndex([ + Timestamp('2016-01-01 00:00:00', tz=tz, freq='2T'), + Timestamp('2016-01-01 00:02:00', tz=tz, freq='2T'), + Timestamp('2016-01-01 00:04:00', tz=tz, freq='2T'), + Timestamp('2016-01-01 00:06:00', tz=tz, freq='2T'), + Timestamp('2016-01-01 00:08:00', tz=tz, freq='2T'), + ]) + + tm.assert_index_equal(rng.round(freq='2T'), expected_rng) + @pytest.mark.parametrize('test_input, rounder, freq, expected', [ (['2117-01-01 00:00:45'], 'floor', '15s', ['2117-01-01 00:00:45']), (['2117-01-01 00:00:45'], 'ceil', '15s', ['2117-01-01 00:00:45']), @@ -143,6 +158,10 @@ def test_round(self, tz): ['1823-01-01 00:00:01.000000020']), (['1823-01-01 00:00:01'], 'floor', '1s', ['1823-01-01 00:00:01']), (['1823-01-01 00:00:01'], 'ceil', '1s', ['1823-01-01 00:00:01']), + (['2018-01-01 00:15:00'], 'ceil', '15T', ['2018-01-01 00:15:00']), + (['2018-01-01 00:15:00'], 'floor', '15T', ['2018-01-01 00:15:00']), + (['1823-01-01 03:00:00'], 'ceil', '3H', ['1823-01-01 03:00:00']), + (['1823-01-01 03:00:00'], 'floor', '3H', ['1823-01-01 03:00:00']), (('NaT', '1823-01-01 00:00:01'), 'floor', '1s', ('NaT', '1823-01-01 00:00:01')), (('NaT', '1823-01-01 00:00:01'), 'ceil', '1s', diff --git a/pandas/tests/scalar/timestamp/test_unary_ops.py b/pandas/tests/scalar/timestamp/test_unary_ops.py index 6f3b5ae6a20a3..b02fef707a6fe 100644 --- a/pandas/tests/scalar/timestamp/test_unary_ops.py +++ b/pandas/tests/scalar/timestamp/test_unary_ops.py @@ -118,6 +118,25 @@ def test_ceil_floor_edge(self, test_input, rounder, freq, expected): expected = Timestamp(expected) assert result == expected + @pytest.mark.parametrize('test_input, freq, expected', [ + ('2018-01-01 00:02:06', '2s', '2018-01-01 00:02:06'), + ('2018-01-01 00:02:00', '2T', '2018-01-01 00:02:00'), + ('2018-01-01 00:04:00', '4T', '2018-01-01 00:04:00'), + ('2018-01-01 00:15:00', '15T', '2018-01-01 00:15:00'), + ('2018-01-01 00:20:00', '20T', '2018-01-01 00:20:00'), + ('2018-01-01 03:00:00', '3H', '2018-01-01 03:00:00'), + ]) + @pytest.mark.parametrize('rounder', ['ceil', 'floor', 'round']) + def test_round_minute_freq(self, test_input, freq, expected, rounder): + # Ensure timestamps that shouldnt round dont! + # GH#21262 + + dt = Timestamp(test_input) + expected = Timestamp(expected) + func = getattr(dt, rounder) + result = func(freq) + assert result == expected + def test_ceil(self): dt = Timestamp('20130101 09:10:11') result = dt.ceil('D') @@ -264,7 +283,6 @@ def test_timestamp(self): if PY3: # datetime.timestamp() converts in the local timezone with tm.set_timezone('UTC'): - # should agree with datetime.timestamp method dt = ts.to_pydatetime() assert dt.timestamp() == ts.timestamp()