From d2716fd042119eef93fa140c6f2f600cd03bf647 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 10 Feb 2019 19:11:05 -0800 Subject: [PATCH 01/13] BUG: Indexing with UTC offset string not longer ignored --- pandas/core/indexes/base.py | 23 ++++++-- pandas/core/indexes/datetimes.py | 73 ++++++++++++-------------- pandas/tests/indexing/test_datetime.py | 18 +++++++ 3 files changed, 71 insertions(+), 43 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index cf813f4c3030b..68ecc54aec582 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5,10 +5,11 @@ import numpy as np -from pandas._libs import ( - Timedelta, algos as libalgos, index as libindex, join as libjoin, lib, - tslibs) +from pandas._libs import (algos as libalgos, index as libindex, + join as libjoin, lib) from pandas._libs.lib import is_datetime_array +from pandas._libs.tslibs import Timedelta, Timestamp, OutOfBoundsDatetime +from pandas._libs.tslibs.timezones import tz_compare import pandas.compat as compat from pandas.compat import range, set_function_name, u from pandas.compat.numpy import function as nv @@ -447,7 +448,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, try: return DatetimeIndex(subarr, copy=copy, name=name, **kwargs) - except tslibs.OutOfBoundsDatetime: + except OutOfBoundsDatetime: pass elif inferred.startswith('timedelta'): @@ -4866,6 +4867,20 @@ def slice_locs(self, start=None, end=None, step=None, kind=None): # If it's a reverse slice, temporarily swap bounds. start, end = end, start + # GH 16785: If start and end happen to be date strings with UTC offsets + # attempt to parse and check that the offsets are the same + if (isinstance(start, compat.string_types) and + isinstance(end, compat.string_types)): + try: + ts_start = Timestamp(start) + ts_end = Timestamp(end) + except ValueError: + pass + else: + if not tz_compare(ts_start.tz, ts_end.tz): + raise ValueError("Both date strings must have the same " + "UTC offset") + start_slice = None if start is not None: start_slice = self.get_slice_bound(start, 'left', kind) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index df91c71cfe238..8005e9db66a8b 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -32,9 +32,8 @@ from pandas.core.ops import get_op_result_name import pandas.core.tools.datetimes as tools -from pandas.tseries import offsets from pandas.tseries.frequencies import Resolution, to_offset -from pandas.tseries.offsets import CDay, prefix_mapping +from pandas.tseries.offsets import CDay, prefix_mapping, Nano def _new_DatetimeIndex(cls, d): @@ -826,54 +825,50 @@ def _parsed_string_to_bounds(self, reso, parsed): lower, upper: pd.Timestamp """ + valid_resos = {'year', 'month', 'quarter', 'day', 'hour', 'minute', + 'second', 'minute', 'second', 'microsecond'} + if reso not in valid_resos: + raise KeyError if reso == 'year': - return (Timestamp(datetime(parsed.year, 1, 1), tz=self.tz), - Timestamp(datetime(parsed.year, 12, 31, 23, - 59, 59, 999999), tz=self.tz)) + start = Timestamp(parsed.year, 1, 1) + end = Timestamp(parsed.year, 12, 31, 23, 59, 59, 999999) elif reso == 'month': d = ccalendar.get_days_in_month(parsed.year, parsed.month) - return (Timestamp(datetime(parsed.year, parsed.month, 1), - tz=self.tz), - Timestamp(datetime(parsed.year, parsed.month, d, 23, - 59, 59, 999999), tz=self.tz)) + start = Timestamp(parsed.year, parsed.month, 1) + end = Timestamp(parsed.year, parsed.month, d, 23, 59, 59, 999999) elif reso == 'quarter': qe = (((parsed.month - 1) + 2) % 12) + 1 # two months ahead d = ccalendar.get_days_in_month(parsed.year, qe) # at end of month - return (Timestamp(datetime(parsed.year, parsed.month, 1), - tz=self.tz), - Timestamp(datetime(parsed.year, qe, d, 23, 59, - 59, 999999), tz=self.tz)) + start = Timestamp(parsed.year, parsed.month, 1) + end = Timestamp(parsed.year, qe, d, 23, 59, 59, 999999) elif reso == 'day': - st = datetime(parsed.year, parsed.month, parsed.day) - return (Timestamp(st, tz=self.tz), - Timestamp(Timestamp(st + offsets.Day(), - tz=self.tz).value - 1)) + start = Timestamp(parsed.year, parsed.month, parsed.day) + end = start + timedelta(days=1) - Nano(1) elif reso == 'hour': - st = datetime(parsed.year, parsed.month, parsed.day, - hour=parsed.hour) - return (Timestamp(st, tz=self.tz), - Timestamp(Timestamp(st + offsets.Hour(), - tz=self.tz).value - 1)) + start = Timestamp(parsed.year, parsed.month, parsed.day, + parsed.hour) + end = start + timedelta(hours=1) - Nano(1) elif reso == 'minute': - st = datetime(parsed.year, parsed.month, parsed.day, - hour=parsed.hour, minute=parsed.minute) - return (Timestamp(st, tz=self.tz), - Timestamp(Timestamp(st + offsets.Minute(), - tz=self.tz).value - 1)) + start = Timestamp(parsed.year, parsed.month, parsed.day, + parsed.hour, parsed.minute) + end = start + timedelta(minutes=1) - Nano(1) elif reso == 'second': - st = datetime(parsed.year, parsed.month, parsed.day, - hour=parsed.hour, minute=parsed.minute, - second=parsed.second) - return (Timestamp(st, tz=self.tz), - Timestamp(Timestamp(st + offsets.Second(), - tz=self.tz).value - 1)) + start = Timestamp(parsed.year, parsed.month, parsed.day, + parsed.hour, parsed.minute, parsed.second) + end = start + timedelta(seconds=1) - Nano(1) elif reso == 'microsecond': - st = datetime(parsed.year, parsed.month, parsed.day, - parsed.hour, parsed.minute, parsed.second, - parsed.microsecond) - return (Timestamp(st, tz=self.tz), Timestamp(st, tz=self.tz)) - else: - raise KeyError + start = Timestamp(parsed.year, parsed.month, parsed.day, + parsed.hour, parsed.minute, parsed.second, + parsed.microsecond) + end = start + timedelta(microseconds=1) - Nano(1) + if parsed.tzinfo is not None: + if self.tz is None: + raise ValueError("The index must be timezone aware " + "when indexing with a date string with a " + "UTC offset") + start = start.tz_localize(parsed.tzinfo).tz_convert(self.tz) + end = end.tz_localize(parsed.tzinfo).tz_convert(self.tz) + return start, end def _partial_date_slice(self, reso, parsed, use_lhs=True, use_rhs=True): is_monotonic = self.is_monotonic diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index 11fb90ebd9bb9..a3d5af78b220a 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -2,6 +2,7 @@ from dateutil import tz import numpy as np +import pytest import pandas as pd from pandas import DataFrame, Index, Series, Timestamp, date_range @@ -313,3 +314,20 @@ def test_loc_setitem_with_existing_dst(self): columns=['value'], dtype=object) tm.assert_frame_equal(result, expected) + + def test_getitem_with_datestring_with_UTC_offset(self): + # GH 24076 + idx = pd.date_range(start='2018-12-02 14:50:00-07:00', + end='2018-12-03 15:00:00-07:00', freq='1min') + df = pd.DataFrame(1, index=idx, columns=['A']) + result = df['2018-12-02 21:50:00+00:00':'2018-12-02 21:52:00+00:00'] + expected = df.iloc[0:3, :] + tm.assert_frame_equal(result, expected) + + # GH 16785 + with pytest.raises(ValueError, match="Both date strings"): + df['2018-12-02 21:50:00+00:00':'2018-12-02 21:52:00+01:00'] + + with pytest.raises(ValueError, match="The index must be timezone"): + df = df.tz_localize(None) + df['2018-12-02 21:50:00+00:00':'2018-12-02 21:52:00+00:00'] From f444b92caac79f18b777c19ab6415cde61cfe050 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 10 Feb 2019 19:13:09 -0800 Subject: [PATCH 02/13] Add whatsnew --- doc/source/whatsnew/v0.25.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 4032dc20b2e19..577332f1994a9 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -131,7 +131,7 @@ Interval Indexing ^^^^^^^^ -- +- Bug when a date string with a UTC offset would get ignored during indexing. (:issue:`24076`, :issue:`16785`) - - From 6994e77b51aff89d54bf8ba8072422c7db74e6ad Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 10 Feb 2019 22:27:45 -0800 Subject: [PATCH 03/13] Address failures --- pandas/core/indexes/datetimes.py | 3 +++ pandas/tests/indexes/datetimes/test_datetime.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 8005e9db66a8b..cf9f01b8e2e4d 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -868,6 +868,9 @@ def _parsed_string_to_bounds(self, reso, parsed): "UTC offset") start = start.tz_localize(parsed.tzinfo).tz_convert(self.tz) end = end.tz_localize(parsed.tzinfo).tz_convert(self.tz) + elif self.tz is not None: + start = start.tz_localize(self.tz) + end = end.tz_localize(self.tz) return start, end def _partial_date_slice(self, reso, parsed, use_lhs=True, use_rhs=True): diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index e1ba0e1708442..a3ee5fe39769f 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -102,7 +102,7 @@ def test_stringified_slice_with_tz(self): # GH#2658 import datetime start = datetime.datetime.now() - idx = date_range(start=start, freq="1d", periods=10) + idx = date_range(start=start, freq="1d", periods=10, tz='US/Eastern') df = DataFrame(lrange(10), index=idx) df["2013-01-14 23:44:34.437768-05:00":] # no exception here From f02e66999a7650aef82248cb226addaca8927baa Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 10 Feb 2019 23:03:11 -0800 Subject: [PATCH 04/13] isort --- pandas/core/indexes/base.py | 6 +++--- pandas/core/indexes/datetimes.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 68ecc54aec582..1da82172eb956 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5,10 +5,10 @@ import numpy as np -from pandas._libs import (algos as libalgos, index as libindex, - join as libjoin, lib) +from pandas._libs import ( + algos as libalgos, index as libindex, join as libjoin, lib) from pandas._libs.lib import is_datetime_array -from pandas._libs.tslibs import Timedelta, Timestamp, OutOfBoundsDatetime +from pandas._libs.tslibs import OutOfBoundsDatetime, Timedelta, Timestamp from pandas._libs.tslibs.timezones import tz_compare import pandas.compat as compat from pandas.compat import range, set_function_name, u diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index cf9f01b8e2e4d..c1e97b3a64052 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -33,7 +33,7 @@ import pandas.core.tools.datetimes as tools from pandas.tseries.frequencies import Resolution, to_offset -from pandas.tseries.offsets import CDay, prefix_mapping, Nano +from pandas.tseries.offsets import CDay, Nano, prefix_mapping def _new_DatetimeIndex(cls, d): From 807a1f8286fa878aa5e84314fdf40fbc91275520 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 11 Feb 2019 10:04:48 -0800 Subject: [PATCH 05/13] Add comment and move test --- pandas/core/indexes/datetimes.py | 4 ++++ .../indexes/datetimes/test_partial_slicing.py | 18 ++++++++++++++++++ pandas/tests/indexing/test_datetime.py | 18 ------------------ 3 files changed, 22 insertions(+), 18 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index c1e97b3a64052..7256685c84525 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -861,6 +861,10 @@ def _parsed_string_to_bounds(self, reso, parsed): parsed.hour, parsed.minute, parsed.second, parsed.microsecond) end = start + timedelta(microseconds=1) - Nano(1) + # GH 24076 + # If an incoming date string contained a UTC offset, need to localize + # the parsed date to this offset first before aligning with the index's + # timezone if parsed.tzinfo is not None: if self.tz is None: raise ValueError("The index must be timezone aware " diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index a0c9d9f02385c..9c78458062de1 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -396,3 +396,21 @@ def test_selection_by_datetimelike(self, datetimelike, op, expected): result = op(df.A, datetimelike) expected = Series(expected, name='A') tm.assert_series_equal(result, expected) + + + def test_getitem_with_datestring_with_UTC_offset(self): + # GH 24076 + idx = pd.date_range(start='2018-12-02 14:50:00-07:00', + end='2018-12-03 15:00:00-07:00', freq='1min') + df = pd.DataFrame(1, index=idx, columns=['A']) + result = df['2018-12-02 21:50:00+00:00':'2018-12-02 21:52:00+00:00'] + expected = df.iloc[0:3, :] + tm.assert_frame_equal(result, expected) + + # GH 16785 + with pytest.raises(ValueError, match="Both date strings"): + df['2018-12-02 21:50:00+00:00':'2018-12-02 21:52:00+01:00'] + + with pytest.raises(ValueError, match="The index must be timezone"): + df = df.tz_localize(None) + df['2018-12-02 21:50:00+00:00':'2018-12-02 21:52:00+00:00'] diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index a3d5af78b220a..11fb90ebd9bb9 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -2,7 +2,6 @@ from dateutil import tz import numpy as np -import pytest import pandas as pd from pandas import DataFrame, Index, Series, Timestamp, date_range @@ -314,20 +313,3 @@ def test_loc_setitem_with_existing_dst(self): columns=['value'], dtype=object) tm.assert_frame_equal(result, expected) - - def test_getitem_with_datestring_with_UTC_offset(self): - # GH 24076 - idx = pd.date_range(start='2018-12-02 14:50:00-07:00', - end='2018-12-03 15:00:00-07:00', freq='1min') - df = pd.DataFrame(1, index=idx, columns=['A']) - result = df['2018-12-02 21:50:00+00:00':'2018-12-02 21:52:00+00:00'] - expected = df.iloc[0:3, :] - tm.assert_frame_equal(result, expected) - - # GH 16785 - with pytest.raises(ValueError, match="Both date strings"): - df['2018-12-02 21:50:00+00:00':'2018-12-02 21:52:00+01:00'] - - with pytest.raises(ValueError, match="The index must be timezone"): - df = df.tz_localize(None) - df['2018-12-02 21:50:00+00:00':'2018-12-02 21:52:00+00:00'] From 71b093ec1711ffdc771e5694f184d53d6d07a2d9 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 11 Feb 2019 10:10:03 -0800 Subject: [PATCH 06/13] lint --- pandas/tests/indexes/datetimes/test_partial_slicing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index 9c78458062de1..a1357837a3b98 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -397,7 +397,6 @@ def test_selection_by_datetimelike(self, datetimelike, op, expected): expected = Series(expected, name='A') tm.assert_series_equal(result, expected) - def test_getitem_with_datestring_with_UTC_offset(self): # GH 24076 idx = pd.date_range(start='2018-12-02 14:50:00-07:00', From 3bda5fa4d3d1ac904afa026349aedd4960d036e3 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 11 Feb 2019 15:53:19 -0800 Subject: [PATCH 07/13] Add test with mixed timestamp:string slice --- pandas/core/indexes/base.py | 19 ++++++++---------- .../indexes/datetimes/test_partial_slicing.py | 20 +++++++++++++------ 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 1da82172eb956..f154e8c213f61 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4869,17 +4869,14 @@ def slice_locs(self, start=None, end=None, step=None, kind=None): # GH 16785: If start and end happen to be date strings with UTC offsets # attempt to parse and check that the offsets are the same - if (isinstance(start, compat.string_types) and - isinstance(end, compat.string_types)): - try: - ts_start = Timestamp(start) - ts_end = Timestamp(end) - except ValueError: - pass - else: - if not tz_compare(ts_start.tz, ts_end.tz): - raise ValueError("Both date strings must have the same " - "UTC offset") + try: + ts_start = Timestamp(start) + ts_end = Timestamp(end) + except (ValueError, TypeError): + pass + else: + if not tz_compare(ts_start.tzinfo, ts_end.tzinfo): + raise ValueError("Both dates must have the same UTC offset") start_slice = None if start is not None: diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index a1357837a3b98..9c4c043a009b4 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -397,19 +397,27 @@ def test_selection_by_datetimelike(self, datetimelike, op, expected): expected = Series(expected, name='A') tm.assert_series_equal(result, expected) - def test_getitem_with_datestring_with_UTC_offset(self): + @pytest.mark.parametrize('start', [ + '2018-12-02 21:50:00+00:00', pd.Timestamp('2018-12-02 21:50:00+00:00') + ]) + @pytest.mark.parametrize('end', [ + '2018-12-02 21:52:00+00:00', pd.Timestamp('2018-12-02 21:52:00+00:00') + ]) + def test_getitem_with_datestring_with_UTC_offset(self, start, end): # GH 24076 idx = pd.date_range(start='2018-12-02 14:50:00-07:00', - end='2018-12-03 15:00:00-07:00', freq='1min') + end='2018-12-02 14:50:00-07:00', freq='1min') df = pd.DataFrame(1, index=idx, columns=['A']) - result = df['2018-12-02 21:50:00+00:00':'2018-12-02 21:52:00+00:00'] + result = df[start:end] expected = df.iloc[0:3, :] tm.assert_frame_equal(result, expected) # GH 16785 - with pytest.raises(ValueError, match="Both date strings"): - df['2018-12-02 21:50:00+00:00':'2018-12-02 21:52:00+01:00'] + start = str(start) + end = str(end) + with pytest.raises(ValueError, match="Both dates must"): + df[start:end[:-4] + '1:00'] with pytest.raises(ValueError, match="The index must be timezone"): df = df.tz_localize(None) - df['2018-12-02 21:50:00+00:00':'2018-12-02 21:52:00+00:00'] + df[start:end] From c10bcff9c507d6b5f80e3b6396c74a599eb80934 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 11 Feb 2019 16:30:35 -0800 Subject: [PATCH 08/13] Don't check if one end is None --- pandas/core/indexes/base.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f154e8c213f61..1d259f16df2c6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4869,14 +4869,16 @@ def slice_locs(self, start=None, end=None, step=None, kind=None): # GH 16785: If start and end happen to be date strings with UTC offsets # attempt to parse and check that the offsets are the same - try: - ts_start = Timestamp(start) - ts_end = Timestamp(end) - except (ValueError, TypeError): - pass - else: - if not tz_compare(ts_start.tzinfo, ts_end.tzinfo): - raise ValueError("Both dates must have the same UTC offset") + if start is not None and end is not None: + try: + ts_start = Timestamp(start) + ts_end = Timestamp(end) + except (ValueError, TypeError): + pass + else: + if not tz_compare(ts_start.tzinfo, ts_end.tzinfo): + raise ValueError("Both dates must have the " + "same UTC offset") start_slice = None if start is not None: From 6363f058bf038ad0e2f1ff8092dbda553c8a945a Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 11 Feb 2019 17:05:54 -0800 Subject: [PATCH 09/13] Be explicity with endpoints to check --- pandas/core/indexes/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 1d259f16df2c6..4e8602a9abd1b 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4869,7 +4869,8 @@ def slice_locs(self, start=None, end=None, step=None, kind=None): # GH 16785: If start and end happen to be date strings with UTC offsets # attempt to parse and check that the offsets are the same - if start is not None and end is not None: + if (isinstance(start, (compat.string_types, datetime)) + and isinstance(end, (compat.string_types, datetime))): try: ts_start = Timestamp(start) ts_end = Timestamp(end) From 9678ebb197235a46f832f0ad56663e9344e6ecb8 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 11 Feb 2019 18:11:32 -0800 Subject: [PATCH 10/13] Add datetime args in test --- pandas/tests/indexes/datetimes/test_partial_slicing.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index 9c4c043a009b4..64693324521b3 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -398,10 +398,12 @@ def test_selection_by_datetimelike(self, datetimelike, op, expected): tm.assert_series_equal(result, expected) @pytest.mark.parametrize('start', [ - '2018-12-02 21:50:00+00:00', pd.Timestamp('2018-12-02 21:50:00+00:00') + '2018-12-02 21:50:00+00:00', pd.Timestamp('2018-12-02 21:50:00+00:00'), + pd.Timestamp('2018-12-02 21:50:00+00:00').to_pydatetime() ]) @pytest.mark.parametrize('end', [ - '2018-12-02 21:52:00+00:00', pd.Timestamp('2018-12-02 21:52:00+00:00') + '2018-12-02 21:52:00+00:00', pd.Timestamp('2018-12-02 21:52:00+00:00'), + pd.Timestamp('2018-12-02 21:52:00+00:00').to_pydatetime() ]) def test_getitem_with_datestring_with_UTC_offset(self, start, end): # GH 24076 From 6ac45986e4823d10a5e5f4e6d91ff39d57a17a1e Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 19 Feb 2019 16:26:42 -0800 Subject: [PATCH 11/13] Document change in own section and timeseries.rst --- doc/source/user_guide/timeseries.rst | 8 +++++++ doc/source/whatsnew/v0.25.0.rst | 34 +++++++++++++++++++++++++++- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 23f1aabd69ff3..2a570f9d02b70 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -633,6 +633,14 @@ We are stopping on the included end-point as it is part of the index: dft2 = dft2.swaplevel(0, 1).sort_index() dft2.loc[idx[:, '2013-01-05'], :] +Slicing with string indexing also honors UTC offset. + +.. ipython:: python + + df = pd.DataFrame([0], index=pd.DatetimeIndex(['2019-01-01'], tz='US/Pacific')) + df + df['2019-01-01 12:00:00+04:00':'2019-01-01 13:00:00+04:00'] + .. _timeseries.slice_vs_exact_match: Slice vs. Exact Match diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 35a07677c475e..f5356c60114df 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -28,6 +28,38 @@ Other Enhancements Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. _whatsnew_0240.api_breaking.utc_offset_indexing: + +Indexing with date strings with UTC offsets +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Indexing a :class:`DataFrame` or :class:`Series` with a :class:`DatetimeIndex` with a +date string with a UTC offset would previously ignore the UTC offset. Now, the UTC offset +is respected in indexing. (:issue:`24076`, :issue:`16785`) + +*Previous Behavior*: + +.. code-block:: ipython + + In [1]: df = pd.DataFrame([0], index=pd.DatetimeIndex(['2019-01-01'], tz='US/Pacific')) + + In [2]: df + Out[2]: + 0 + 2019-01-01 00:00:00-08:00 0 + + In [3]: df['2019-01-01 00:00:00+04:00':'2019-01-01 01:00:00+04:00'] + Out[3]: + 0 + 2019-01-01 00:00:00-08:00 0 + +*New Behavior*: + +.. ipython:: ipython + + df = pd.DataFrame([0], index=pd.DatetimeIndex(['2019-01-01'], tz='US/Pacific')) + df['2019-01-01 12:00:00+04:00':'2019-01-01 13:00:00+04:00'] + .. _whatsnew_0250.api.other: Other API Changes @@ -134,7 +166,7 @@ Interval Indexing ^^^^^^^^ -- Bug when a date string with a UTC offset would get ignored during indexing. (:issue:`24076`, :issue:`16785`) +- - - From 138ac7c2e059fbde78df8b07e4b1d774a901dd91 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 20 Feb 2019 09:49:48 -0800 Subject: [PATCH 12/13] add versionadded tag to timeseries.rst --- doc/source/user_guide/timeseries.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 2a570f9d02b70..4e2c428415926 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -633,6 +633,8 @@ We are stopping on the included end-point as it is part of the index: dft2 = dft2.swaplevel(0, 1).sort_index() dft2.loc[idx[:, '2013-01-05'], :] +.. versionadded:: 0.25.0 + Slicing with string indexing also honors UTC offset. .. ipython:: python From bb4814aa1ff5c5032e89e1e30202dbef40128995 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 23 Feb 2019 14:14:51 -0800 Subject: [PATCH 13/13] fix formatting --- doc/source/whatsnew/v0.25.0.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index fc5193e60abc6..5716bea7ce694 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -29,7 +29,7 @@ Other Enhancements Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. _whatsnew_0240.api_breaking.utc_offset_indexing: +.. _whatsnew_0250.api_breaking.utc_offset_indexing: Indexing with date strings with UTC offsets ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -68,7 +68,7 @@ Other API Changes - :class:`DatetimeTZDtype` will now standardize pytz timezones to a common timezone instance (:issue:`24713`) - ``Timestamp`` and ``Timedelta`` scalars now implement the :meth:`to_numpy` method as aliases to :meth:`Timestamp.to_datetime64` and :meth:`Timedelta.to_timedelta64`, respectively. (:issue:`24653`) -- :meth:`Timestamp.strptime` will now rise a NotImplementedError (:issue:`25016`) +- :meth:`Timestamp.strptime` will now rise a ``NotImplementedError`` (:issue:`25016`) - .. _whatsnew_0250.deprecations: