Skip to content

Commit fc1fe83

Browse files
mroeschkejreback
authored andcommitted
BUG: Indexing with UTC offset string no longer ignored (#25263)
1 parent 5ae9b48 commit fc1fe83

File tree

6 files changed

+129
-45
lines changed

6 files changed

+129
-45
lines changed

doc/source/user_guide/timeseries.rst

+10
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,16 @@ We are stopping on the included end-point as it is part of the index:
633633
dft2 = dft2.swaplevel(0, 1).sort_index()
634634
dft2.loc[idx[:, '2013-01-05'], :]
635635
636+
.. versionadded:: 0.25.0
637+
638+
Slicing with string indexing also honors UTC offset.
639+
640+
.. ipython:: python
641+
642+
df = pd.DataFrame([0], index=pd.DatetimeIndex(['2019-01-01'], tz='US/Pacific'))
643+
df
644+
df['2019-01-01 12:00:00+04:00':'2019-01-01 13:00:00+04:00']
645+
636646
.. _timeseries.slice_vs_exact_match:
637647

638648
Slice vs. Exact Match

doc/source/whatsnew/v0.25.0.rst

+32-2
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,37 @@ Other Enhancements
3131
Backwards incompatible API changes
3232
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3333

34-
- :meth:`Timestamp.strptime` will now raise a NotImplementedError (:issue:`25016`)
34+
.. _whatsnew_0250.api_breaking.utc_offset_indexing:
35+
36+
Indexing with date strings with UTC offsets
37+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
38+
39+
Indexing a :class:`DataFrame` or :class:`Series` with a :class:`DatetimeIndex` with a
40+
date string with a UTC offset would previously ignore the UTC offset. Now, the UTC offset
41+
is respected in indexing. (:issue:`24076`, :issue:`16785`)
42+
43+
*Previous Behavior*:
44+
45+
.. code-block:: ipython
46+
47+
In [1]: df = pd.DataFrame([0], index=pd.DatetimeIndex(['2019-01-01'], tz='US/Pacific'))
48+
49+
In [2]: df
50+
Out[2]:
51+
0
52+
2019-01-01 00:00:00-08:00 0
53+
54+
In [3]: df['2019-01-01 00:00:00+04:00':'2019-01-01 01:00:00+04:00']
55+
Out[3]:
56+
0
57+
2019-01-01 00:00:00-08:00 0
58+
59+
*New Behavior*:
60+
61+
.. ipython:: ipython
62+
63+
df = pd.DataFrame([0], index=pd.DatetimeIndex(['2019-01-01'], tz='US/Pacific'))
64+
df['2019-01-01 12:00:00+04:00':'2019-01-01 13:00:00+04:00']
3565

3666
.. _whatsnew_0250.api.other:
3767

@@ -40,7 +70,7 @@ Other API Changes
4070

4171
- :class:`DatetimeTZDtype` will now standardize pytz timezones to a common timezone instance (:issue:`24713`)
4272
- ``Timestamp`` and ``Timedelta`` scalars now implement the :meth:`to_numpy` method as aliases to :meth:`Timestamp.to_datetime64` and :meth:`Timedelta.to_timedelta64`, respectively. (:issue:`24653`)
43-
-
73+
- :meth:`Timestamp.strptime` will now rise a ``NotImplementedError`` (:issue:`25016`)
4474
-
4575

4676
.. _whatsnew_0250.deprecations:

pandas/core/indexes/base.py

+18-3
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@
66
import numpy as np
77

88
from pandas._libs import (
9-
Timedelta, algos as libalgos, index as libindex, join as libjoin, lib,
10-
tslibs)
9+
algos as libalgos, index as libindex, join as libjoin, lib)
1110
from pandas._libs.lib import is_datetime_array
11+
from pandas._libs.tslibs import OutOfBoundsDatetime, Timedelta, Timestamp
12+
from pandas._libs.tslibs.timezones import tz_compare
1213
import pandas.compat as compat
1314
from pandas.compat import range, set_function_name, u
1415
from pandas.compat.numpy import function as nv
@@ -447,7 +448,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
447448
try:
448449
return DatetimeIndex(subarr, copy=copy,
449450
name=name, **kwargs)
450-
except tslibs.OutOfBoundsDatetime:
451+
except OutOfBoundsDatetime:
451452
pass
452453

453454
elif inferred.startswith('timedelta'):
@@ -4867,6 +4868,20 @@ def slice_locs(self, start=None, end=None, step=None, kind=None):
48674868
# If it's a reverse slice, temporarily swap bounds.
48684869
start, end = end, start
48694870

4871+
# GH 16785: If start and end happen to be date strings with UTC offsets
4872+
# attempt to parse and check that the offsets are the same
4873+
if (isinstance(start, (compat.string_types, datetime))
4874+
and isinstance(end, (compat.string_types, datetime))):
4875+
try:
4876+
ts_start = Timestamp(start)
4877+
ts_end = Timestamp(end)
4878+
except (ValueError, TypeError):
4879+
pass
4880+
else:
4881+
if not tz_compare(ts_start.tzinfo, ts_end.tzinfo):
4882+
raise ValueError("Both dates must have the "
4883+
"same UTC offset")
4884+
48704885
start_slice = None
48714886
if start is not None:
48724887
start_slice = self.get_slice_bound(start, 'left', kind)

pandas/core/indexes/datetimes.py

+41-39
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,8 @@
3232
from pandas.core.ops import get_op_result_name
3333
import pandas.core.tools.datetimes as tools
3434

35-
from pandas.tseries import offsets
3635
from pandas.tseries.frequencies import Resolution, to_offset
37-
from pandas.tseries.offsets import CDay, prefix_mapping
36+
from pandas.tseries.offsets import CDay, Nano, prefix_mapping
3837

3938

4039
def _new_DatetimeIndex(cls, d):
@@ -852,54 +851,57 @@ def _parsed_string_to_bounds(self, reso, parsed):
852851
lower, upper: pd.Timestamp
853852
854853
"""
854+
valid_resos = {'year', 'month', 'quarter', 'day', 'hour', 'minute',
855+
'second', 'minute', 'second', 'microsecond'}
856+
if reso not in valid_resos:
857+
raise KeyError
855858
if reso == 'year':
856-
return (Timestamp(datetime(parsed.year, 1, 1), tz=self.tz),
857-
Timestamp(datetime(parsed.year, 12, 31, 23,
858-
59, 59, 999999), tz=self.tz))
859+
start = Timestamp(parsed.year, 1, 1)
860+
end = Timestamp(parsed.year, 12, 31, 23, 59, 59, 999999)
859861
elif reso == 'month':
860862
d = ccalendar.get_days_in_month(parsed.year, parsed.month)
861-
return (Timestamp(datetime(parsed.year, parsed.month, 1),
862-
tz=self.tz),
863-
Timestamp(datetime(parsed.year, parsed.month, d, 23,
864-
59, 59, 999999), tz=self.tz))
863+
start = Timestamp(parsed.year, parsed.month, 1)
864+
end = Timestamp(parsed.year, parsed.month, d, 23, 59, 59, 999999)
865865
elif reso == 'quarter':
866866
qe = (((parsed.month - 1) + 2) % 12) + 1 # two months ahead
867867
d = ccalendar.get_days_in_month(parsed.year, qe) # at end of month
868-
return (Timestamp(datetime(parsed.year, parsed.month, 1),
869-
tz=self.tz),
870-
Timestamp(datetime(parsed.year, qe, d, 23, 59,
871-
59, 999999), tz=self.tz))
868+
start = Timestamp(parsed.year, parsed.month, 1)
869+
end = Timestamp(parsed.year, qe, d, 23, 59, 59, 999999)
872870
elif reso == 'day':
873-
st = datetime(parsed.year, parsed.month, parsed.day)
874-
return (Timestamp(st, tz=self.tz),
875-
Timestamp(Timestamp(st + offsets.Day(),
876-
tz=self.tz).value - 1))
871+
start = Timestamp(parsed.year, parsed.month, parsed.day)
872+
end = start + timedelta(days=1) - Nano(1)
877873
elif reso == 'hour':
878-
st = datetime(parsed.year, parsed.month, parsed.day,
879-
hour=parsed.hour)
880-
return (Timestamp(st, tz=self.tz),
881-
Timestamp(Timestamp(st + offsets.Hour(),
882-
tz=self.tz).value - 1))
874+
start = Timestamp(parsed.year, parsed.month, parsed.day,
875+
parsed.hour)
876+
end = start + timedelta(hours=1) - Nano(1)
883877
elif reso == 'minute':
884-
st = datetime(parsed.year, parsed.month, parsed.day,
885-
hour=parsed.hour, minute=parsed.minute)
886-
return (Timestamp(st, tz=self.tz),
887-
Timestamp(Timestamp(st + offsets.Minute(),
888-
tz=self.tz).value - 1))
878+
start = Timestamp(parsed.year, parsed.month, parsed.day,
879+
parsed.hour, parsed.minute)
880+
end = start + timedelta(minutes=1) - Nano(1)
889881
elif reso == 'second':
890-
st = datetime(parsed.year, parsed.month, parsed.day,
891-
hour=parsed.hour, minute=parsed.minute,
892-
second=parsed.second)
893-
return (Timestamp(st, tz=self.tz),
894-
Timestamp(Timestamp(st + offsets.Second(),
895-
tz=self.tz).value - 1))
882+
start = Timestamp(parsed.year, parsed.month, parsed.day,
883+
parsed.hour, parsed.minute, parsed.second)
884+
end = start + timedelta(seconds=1) - Nano(1)
896885
elif reso == 'microsecond':
897-
st = datetime(parsed.year, parsed.month, parsed.day,
898-
parsed.hour, parsed.minute, parsed.second,
899-
parsed.microsecond)
900-
return (Timestamp(st, tz=self.tz), Timestamp(st, tz=self.tz))
901-
else:
902-
raise KeyError
886+
start = Timestamp(parsed.year, parsed.month, parsed.day,
887+
parsed.hour, parsed.minute, parsed.second,
888+
parsed.microsecond)
889+
end = start + timedelta(microseconds=1) - Nano(1)
890+
# GH 24076
891+
# If an incoming date string contained a UTC offset, need to localize
892+
# the parsed date to this offset first before aligning with the index's
893+
# timezone
894+
if parsed.tzinfo is not None:
895+
if self.tz is None:
896+
raise ValueError("The index must be timezone aware "
897+
"when indexing with a date string with a "
898+
"UTC offset")
899+
start = start.tz_localize(parsed.tzinfo).tz_convert(self.tz)
900+
end = end.tz_localize(parsed.tzinfo).tz_convert(self.tz)
901+
elif self.tz is not None:
902+
start = start.tz_localize(self.tz)
903+
end = end.tz_localize(self.tz)
904+
return start, end
903905

904906
def _partial_date_slice(self, reso, parsed, use_lhs=True, use_rhs=True):
905907
is_monotonic = self.is_monotonic

pandas/tests/indexes/datetimes/test_datetime.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def test_stringified_slice_with_tz(self):
102102
# GH#2658
103103
import datetime
104104
start = datetime.datetime.now()
105-
idx = date_range(start=start, freq="1d", periods=10)
105+
idx = date_range(start=start, freq="1d", periods=10, tz='US/Eastern')
106106
df = DataFrame(lrange(10), index=idx)
107107
df["2013-01-14 23:44:34.437768-05:00":] # no exception here
108108

pandas/tests/indexes/datetimes/test_partial_slicing.py

+27
Original file line numberDiff line numberDiff line change
@@ -396,3 +396,30 @@ def test_selection_by_datetimelike(self, datetimelike, op, expected):
396396
result = op(df.A, datetimelike)
397397
expected = Series(expected, name='A')
398398
tm.assert_series_equal(result, expected)
399+
400+
@pytest.mark.parametrize('start', [
401+
'2018-12-02 21:50:00+00:00', pd.Timestamp('2018-12-02 21:50:00+00:00'),
402+
pd.Timestamp('2018-12-02 21:50:00+00:00').to_pydatetime()
403+
])
404+
@pytest.mark.parametrize('end', [
405+
'2018-12-02 21:52:00+00:00', pd.Timestamp('2018-12-02 21:52:00+00:00'),
406+
pd.Timestamp('2018-12-02 21:52:00+00:00').to_pydatetime()
407+
])
408+
def test_getitem_with_datestring_with_UTC_offset(self, start, end):
409+
# GH 24076
410+
idx = pd.date_range(start='2018-12-02 14:50:00-07:00',
411+
end='2018-12-02 14:50:00-07:00', freq='1min')
412+
df = pd.DataFrame(1, index=idx, columns=['A'])
413+
result = df[start:end]
414+
expected = df.iloc[0:3, :]
415+
tm.assert_frame_equal(result, expected)
416+
417+
# GH 16785
418+
start = str(start)
419+
end = str(end)
420+
with pytest.raises(ValueError, match="Both dates must"):
421+
df[start:end[:-4] + '1:00']
422+
423+
with pytest.raises(ValueError, match="The index must be timezone"):
424+
df = df.tz_localize(None)
425+
df[start:end]

0 commit comments

Comments
 (0)