Skip to content

BUG/ENH: Handle AmbiguousTimeError in date rounding #22647

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Sep 23, 2018
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ Other Enhancements
- :func:`to_timedelta` now supports iso-formated timedelta strings (:issue:`21877`)
- :class:`Series` and :class:`DataFrame` now support :class:`Iterable` in constructor (:issue:`2193`)
- :class:`DatetimeIndex` gained :attr:`DatetimeIndex.timetz` attribute. Returns local time with timezone information. (:issue:`21358`)
- :meth:`round`, :meth:`ceil`, and meth:`floor` for :class:`DatetimeIndex` and :class:`Timestamp` now support an ``ambiguous`` argument for handling datetimes that are rounded to ambiguous times (:issue:`18946`)
- :class:`Resampler` now is iterable like :class:`GroupBy` (:issue:`15314`).
- :ref:`Series.resample` and :ref:`DataFrame.resample` have gained the :meth:`Resampler.quantile` (:issue:`15023`).

Expand Down
23 changes: 23 additions & 0 deletions pandas/_libs/tslibs/nattype.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,11 @@ class NaTType(_NaT):
Parameters
----------
freq : a freq string indicating the rounding resolution
ambiguous : bool, 'NaT', default 'raise'
- bool contains flags to determine if time is dst or not (note
that this flag is only applicable for ambiguous fall dst dates)
- 'NaT' will return NaT for an ambiguous time
- 'raise' will raise an AmbiguousTimeError for an ambiguous time

Raises
------
Expand All @@ -490,6 +495,15 @@ class NaTType(_NaT):
Parameters
----------
freq : a freq string indicating the flooring resolution
ambiguous : bool, 'NaT', default 'raise'
- bool contains flags to determine if time is dst or not (note
that this flag is only applicable for ambiguous fall dst dates)
- 'NaT' will return NaT for an ambiguous time
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add versionadded tags here (and other new arg places)

- 'raise' will raise an AmbiguousTimeError for an ambiguous time

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

may make sense to provide a template for the doc-strings (can be followup)

Raises
------
ValueError if the freq cannot be converted
""")
ceil = _make_nat_func('ceil', # noqa:E128
"""
Expand All @@ -498,6 +512,15 @@ class NaTType(_NaT):
Parameters
----------
freq : a freq string indicating the ceiling resolution
ambiguous : bool, 'NaT', default 'raise'
- bool contains flags to determine if time is dst or not (note
that this flag is only applicable for ambiguous fall dst dates)
- 'NaT' will return NaT for an ambiguous time
- 'raise' will raise an AmbiguousTimeError for an ambiguous time

Raises
------
ValueError if the freq cannot be converted
""")

tz_convert = _make_nat_func('tz_convert', # noqa:E128
Expand Down
39 changes: 31 additions & 8 deletions pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -656,7 +656,7 @@ class Timestamp(_Timestamp):

return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, freq)

def _round(self, freq, rounder):
def _round(self, freq, rounder, ambiguous='raise'):
if self.tz is not None:
value = self.tz_localize(None).value
else:
Expand All @@ -668,10 +668,10 @@ class Timestamp(_Timestamp):
r = round_ns(value, rounder, freq)[0]
result = Timestamp(r, unit='ns')
if self.tz is not None:
result = result.tz_localize(self.tz)
result = result.tz_localize(self.tz, ambiguous=ambiguous)
return result

def round(self, freq):
def round(self, freq, ambiguous='raise'):
"""
Round the Timestamp to the specified resolution

Expand All @@ -682,32 +682,55 @@ class Timestamp(_Timestamp):
Parameters
----------
freq : a freq string indicating the rounding resolution
ambiguous : bool, 'NaT', default 'raise'
- bool contains flags to determine if time is dst or not (note
that this flag is only applicable for ambiguous fall dst dates)
- 'NaT' will return NaT for an ambiguous time
- 'raise' will raise an AmbiguousTimeError for an ambiguous time

Raises
------
ValueError if the freq cannot be converted
"""
return self._round(freq, np.round)
return self._round(freq, np.round, ambiguous)

def floor(self, freq):
def floor(self, freq, ambiguous='raise'):
"""
return a new Timestamp floored to this resolution

Parameters
----------
freq : a freq string indicating the flooring resolution
ambiguous : bool, 'NaT', default 'raise'
- bool contains flags to determine if time is dst or not (note
that this flag is only applicable for ambiguous fall dst dates)
- 'NaT' will return NaT for an ambiguous time
- 'raise' will raise an AmbiguousTimeError for an ambiguous time

Raises
------
ValueError if the freq cannot be converted
"""
return self._round(freq, np.floor)
return self._round(freq, np.floor, ambiguous)

def ceil(self, freq):
def ceil(self, freq, ambiguous='raise'):
"""
return a new Timestamp ceiled to this resolution

Parameters
----------
freq : a freq string indicating the ceiling resolution
ambiguous : bool, 'NaT', default 'raise'
- bool contains flags to determine if time is dst or not (note
that this flag is only applicable for ambiguous fall dst dates)
- 'NaT' will return NaT for an ambiguous time
- 'raise' will raise an AmbiguousTimeError for an ambiguous time

Raises
------
ValueError if the freq cannot be converted
"""
return self._round(freq, np.ceil)
return self._round(freq, np.ceil, ambiguous)

@property
def tz(self):
Expand Down
33 changes: 23 additions & 10 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,16 @@ class TimelikeOps(object):
frequency like 'S' (second) not 'ME' (month end). See
:ref:`frequency aliases <timeseries.offset_aliases>` for
a list of possible `freq` values.
ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'
- 'infer' will attempt to infer fall dst-transition hours based on
order
- bool-ndarray where True signifies a DST time, False designates
a non-DST time (note that this flag is only applicable for
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

versionadded

can you test for .dt accessors as well?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The .dt accessor tests are here:

def test_dt_round(self, method, dates):

ambiguous times)
- 'NaT' will return NaT where there are ambiguous times
- 'raise' will raise an AmbiguousTimeError if there are ambiguous
times
Only relevant for DatetimeIndex

Returns
-------
Expand Down Expand Up @@ -168,7 +178,7 @@ class TimelikeOps(object):
"""
)

def _round(self, freq, rounder):
def _round(self, freq, rounder, ambiguous):
# round the local times
values = _ensure_datetimelike_to_i8(self)
result = round_ns(values, rounder, freq)
Expand All @@ -180,19 +190,20 @@ def _round(self, freq, rounder):
if 'tz' in attribs:
attribs['tz'] = None
return self._ensure_localized(
self._shallow_copy(result, **attribs))
self._shallow_copy(result, **attribs), ambiguous
)

@Appender((_round_doc + _round_example).format(op="round"))
def round(self, freq, *args, **kwargs):
return self._round(freq, np.round)
def round(self, freq, ambiguous='raise'):
return self._round(freq, np.round, ambiguous)

@Appender((_round_doc + _floor_example).format(op="floor"))
def floor(self, freq):
return self._round(freq, np.floor)
def floor(self, freq, ambiguous='raise'):
return self._round(freq, np.floor, ambiguous)

@Appender((_round_doc + _ceil_example).format(op="ceil"))
def ceil(self, freq):
return self._round(freq, np.ceil)
def ceil(self, freq, ambiguous='raise'):
return self._round(freq, np.ceil, ambiguous)


class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin):
Expand Down Expand Up @@ -264,7 +275,7 @@ def _evaluate_compare(self, other, op):
except TypeError:
return result

def _ensure_localized(self, result):
def _ensure_localized(self, result, ambiguous='raise'):
"""
ensure that we are re-localized

Expand All @@ -274,6 +285,8 @@ def _ensure_localized(self, result):
Parameters
----------
result : DatetimeIndex / i8 ndarray
ambiguous : str, bool, or bool-ndarray
default 'raise'

Returns
-------
Expand All @@ -284,7 +297,7 @@ def _ensure_localized(self, result):
if getattr(self, 'tz', None) is not None:
if not isinstance(result, ABCIndexClass):
result = self._simple_new(result)
result = result.tz_localize(self.tz)
result = result.tz_localize(self.tz, ambiguous=ambiguous)
return result

def _box_values_as_index(self):
Expand Down
22 changes: 22 additions & 0 deletions pandas/tests/scalar/timestamp/test_unary_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,28 @@ def test_floor(self):
expected = Timestamp('20130101')
assert result == expected

@pytest.mark.parametrize('method', ['ceil', 'round', 'floor'])
def test_round_dst_border(self, method):
# GH 18946 round near DST
ts = Timestamp('2017-10-29 00:00:00', tz='UTC').tz_convert(
'Europe/Madrid'
)
#
result = getattr(ts, method)('H', ambiguous=True)
assert result == ts

result = getattr(ts, method)('H', ambiguous=False)
expected = Timestamp('2017-10-29 01:00:00', tz='UTC').tz_convert(
'Europe/Madrid'
)
assert result == expected

result = getattr(ts, method)('H', ambiguous='NaT')
assert result is NaT

with pytest.raises(pytz.AmbiguousTimeError):
getattr(ts, method)('H', ambiguous='raise')

# --------------------------------------------------------------
# Timestamp.replace

Expand Down
95 changes: 59 additions & 36 deletions pandas/tests/series/test_datetime_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import calendar
import unicodedata
import pytest
import pytz

from datetime import datetime, time, date

Expand Down Expand Up @@ -95,42 +96,6 @@ def compare(s, name):
expected = Series(exp_values, index=s.index, name='xxx')
tm.assert_series_equal(result, expected)

# round
s = Series(pd.to_datetime(['2012-01-01 13:00:00',
'2012-01-01 12:01:00',
'2012-01-01 08:00:00']), name='xxx')
result = s.dt.round('D')
expected = Series(pd.to_datetime(['2012-01-02', '2012-01-02',
'2012-01-01']), name='xxx')
tm.assert_series_equal(result, expected)

# round with tz
result = (s.dt.tz_localize('UTC')
.dt.tz_convert('US/Eastern')
.dt.round('D'))
exp_values = pd.to_datetime(['2012-01-01', '2012-01-01',
'2012-01-01']).tz_localize('US/Eastern')
expected = Series(exp_values, name='xxx')
tm.assert_series_equal(result, expected)

# floor
s = Series(pd.to_datetime(['2012-01-01 13:00:00',
'2012-01-01 12:01:00',
'2012-01-01 08:00:00']), name='xxx')
result = s.dt.floor('D')
expected = Series(pd.to_datetime(['2012-01-01', '2012-01-01',
'2012-01-01']), name='xxx')
tm.assert_series_equal(result, expected)

# ceil
s = Series(pd.to_datetime(['2012-01-01 13:00:00',
'2012-01-01 12:01:00',
'2012-01-01 08:00:00']), name='xxx')
result = s.dt.ceil('D')
expected = Series(pd.to_datetime(['2012-01-02', '2012-01-02',
'2012-01-02']), name='xxx')
tm.assert_series_equal(result, expected)

# datetimeindex with tz
s = Series(date_range('20130101', periods=5, tz='US/Eastern'),
name='xxx')
Expand Down Expand Up @@ -261,6 +226,64 @@ def get_dir(s):
with pytest.raises(com.SettingWithCopyError):
s.dt.hour[0] = 5

@pytest.mark.parametrize('method, dates', [
['round', ['2012-01-02', '2012-01-02', '2012-01-01']],
['floor', ['2012-01-01', '2012-01-01', '2012-01-01']],
['ceil', ['2012-01-02', '2012-01-02', '2012-01-02']]
])
def test_dt_round(self, method, dates):
# round
s = Series(pd.to_datetime(['2012-01-01 13:00:00',
'2012-01-01 12:01:00',
'2012-01-01 08:00:00']), name='xxx')
result = getattr(s.dt, method)('D')
expected = Series(pd.to_datetime(dates), name='xxx')
tm.assert_series_equal(result, expected)

def test_dt_round_tz(self):
s = Series(pd.to_datetime(['2012-01-01 13:00:00',
'2012-01-01 12:01:00',
'2012-01-01 08:00:00']), name='xxx')
result = (s.dt.tz_localize('UTC')
.dt.tz_convert('US/Eastern')
.dt.round('D'))

exp_values = pd.to_datetime(['2012-01-01', '2012-01-01',
'2012-01-01']).tz_localize('US/Eastern')
expected = Series(exp_values, name='xxx')
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize('method', ['ceil', 'round', 'floor'])
def test_dt_round_tz_ambiguous(self, method):
# GH 18946 round near DST
df1 = pd.DataFrame([
pd.to_datetime('2017-10-29 02:00:00+02:00', utc=True),
pd.to_datetime('2017-10-29 02:00:00+01:00', utc=True),
pd.to_datetime('2017-10-29 03:00:00+01:00', utc=True)
],
columns=['date'])
df1['date'] = df1['date'].dt.tz_convert('Europe/Madrid')
# infer
result = getattr(df1.date.dt, method)('H', ambiguous='infer')
expected = df1['date']
tm.assert_series_equal(result, expected)

# bool-array
result = getattr(df1.date.dt, method)(
'H', ambiguous=[True, False, False]
)
tm.assert_series_equal(result, expected)

# NaT
result = getattr(df1.date.dt, method)('H', ambiguous='NaT')
expected = df1['date'].copy()
expected.iloc[0:2] = pd.NaT
tm.assert_series_equal(result, expected)

# raise
with pytest.raises(pytz.AmbiguousTimeError):
getattr(df1.date.dt, method)('H', ambiguous='raise')

def test_dt_namespace_accessor_categorical(self):
# GH 19468
dti = DatetimeIndex(['20171111', '20181212']).repeat(2)
Expand Down