Skip to content

BUG: Fix handling of ambiguous or nonexistent of start and end times in date_range #27088

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jun 28, 2019
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -691,6 +691,7 @@ Timezones
- Bug in :func:`to_datetime` where an uninformative ``RuntimeError`` was raised when passing a naive :class:`Timestamp` with datetime strings with mixed UTC offsets (:issue:`25978`)
- Bug in :func:`to_datetime` with ``unit='ns'`` would drop timezone information from the parsed argument (:issue:`26168`)
- Bug in :func:`DataFrame.join` where joining a timezone aware index with a timezone aware column would result in a column of ``NaN`` (:issue:`26335`)
- Bug in :func:`date_range` where ambiguous or nonexistent start or end times were not handled by the ``ambiguous`` or ``nonexistent`` keywords respectively (:issue:`27088`)

Numeric
^^^^^^^
Expand Down
22 changes: 15 additions & 7 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,10 +433,12 @@ def _generate_range(cls, start, end, periods, freq, tz=None,
if tz is not None:
# Localize the start and end arguments
start = _maybe_localize_point(
start, getattr(start, 'tz', None), start, freq, tz
start, getattr(start, 'tz', None), start, freq, tz,
ambiguous, nonexistent
)
end = _maybe_localize_point(
end, getattr(end, 'tz', None), end, freq, tz
end, getattr(end, 'tz', None), end, freq, tz,
ambiguous, nonexistent
)
if freq is not None:
# We break Day arithmetic (fixed 24 hour) here and opt for
Expand Down Expand Up @@ -2121,7 +2123,8 @@ def _maybe_normalize_endpoints(start, end, normalize):
return start, end, _normalized


def _maybe_localize_point(ts, is_none, is_not_none, freq, tz):
def _maybe_localize_point(ts, is_none, is_not_none, freq, tz, ambiguous,
nonexistent):
"""
Localize a start or end Timestamp to the timezone of the corresponding
start or end Timestamp
Expand All @@ -2133,6 +2136,8 @@ def _maybe_localize_point(ts, is_none, is_not_none, freq, tz):
is_not_none : argument that should not be None
freq : Tick, DateOffset, or None
tz : str, timezone object or None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

of u feel like typing the args here would be great (or follow up done too)

ambiguous: str, localization behavior for ambiguous times
nonexistent: str, localization behavior for nonexistent times

Returns
-------
Expand All @@ -2141,10 +2146,13 @@ def _maybe_localize_point(ts, is_none, is_not_none, freq, tz):
# Make sure start and end are timezone localized if:
# 1) freq = a Timedelta-like frequency (Tick)
# 2) freq = None i.e. generating a linspaced range
if isinstance(freq, Tick) or freq is None:
localize_args = {'tz': tz, 'ambiguous': False}
else:
localize_args = {'tz': None}
if is_none is None and is_not_none is not None:
# Note: We can't ambiguous='infer' a singular ambiguous time; however,
# we have historically defaulted ambiguous=False
ambiguous = ambiguous if ambiguous != 'infer' else False
localize_args = {'ambiguous': ambiguous, 'nonexistent': nonexistent,
'tz': None}
if isinstance(freq, Tick) or freq is None:
localize_args['tz'] = tz
ts = ts.tz_localize(**localize_args)
return ts
29 changes: 23 additions & 6 deletions pandas/tests/indexes/datetimes/test_timezones.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,12 +541,9 @@ def test_dti_construction_ambiguous_endpoint(self, tz):
# construction with an ambiguous end-point
# GH#11626

# FIXME: This next block fails to raise; it was taken from an older
# version of this test that had an indention mistake that caused it
# to not get executed.
# with pytest.raises(pytz.AmbiguousTimeError):
# date_range("2013-10-26 23:00", "2013-10-27 01:00",
# tz="Europe/London", freq="H")
with pytest.raises(pytz.AmbiguousTimeError):
date_range("2013-10-26 23:00", "2013-10-27 01:00",
tz="Europe/London", freq="H")

times = date_range("2013-10-26 23:00", "2013-10-27 01:00", freq="H",
tz=tz, ambiguous='infer')
Expand All @@ -561,6 +558,26 @@ def test_dti_construction_ambiguous_endpoint(self, tz):
assert times[-1] == Timestamp('2013-10-27 01:00:00+0000',
tz=tz, freq="H")

@pytest.mark.parametrize('tz, option, expected', [
['US/Pacific', 'shift_forward', "2019-03-10 03:00"],
['dateutil/US/Pacific', 'shift_forward', "2019-03-10 03:00"],
['US/Pacific', 'shift_backward', "2019-03-10 01:00"],
pytest.param('dateutil/US/Pacific', 'shift_backward',
"2019-03-10 01:00",
marks=pytest.mark.xfail(reason="GH 24329")),
['US/Pacific', timedelta(hours=1), "2019-03-10 03:00"]
])
def test_dti_construction_nonexistent_endpoint(self, tz, option, expected):
# construction with an nonexistent end-point

with pytest.raises(pytz.NonExistentTimeError):
date_range("2019-03-10 00:00", "2019-03-10 02:00",
tz="US/Pacific", freq="H")

times = date_range("2019-03-10 00:00", "2019-03-10 02:00", freq="H",
tz=tz, nonexistent=option)
assert times[-1] == Timestamp(expected, tz=tz, freq="H")

def test_dti_tz_localize_bdate_range(self):
dr = pd.bdate_range('1/1/2009', '1/1/2010')
dr_utc = pd.bdate_range('1/1/2009', '1/1/2010', tz=pytz.utc)
Expand Down