From dd5cf537ce29a23afc6898f47d5717849697a9fb Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 30 Aug 2018 20:08:53 -0700 Subject: [PATCH 1/3] Warn before dropping tzinfo, fix incorrect TDI/DTI indexing --- doc/source/whatsnew/v0.24.0.txt | 4 +- pandas/_libs/tslibs/timestamps.pyx | 5 + pandas/core/indexes/datetimes.py | 14 ++- pandas/core/indexes/timedeltas.py | 4 +- pandas/tests/indexes/datetimes/test_astype.py | 106 ++++++++++-------- .../tests/indexes/datetimes/test_indexing.py | 8 ++ .../tests/indexes/timedeltas/test_indexing.py | 7 ++ .../tests/scalar/timestamp/test_timestamp.py | 8 ++ 8 files changed, 108 insertions(+), 48 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index be16806cb4d1e..44ad3e6a3090c 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -485,6 +485,7 @@ Datetimelike API Changes - :class:`DateOffset` objects are now immutable. Attempting to alter one of these will now raise ``AttributeError`` (:issue:`21341`) - :class:`PeriodIndex` subtraction of another ``PeriodIndex`` will now return an object-dtype :class:`Index` of :class:`DateOffset` objects instead of raising a ``TypeError`` (:issue:`20049`) - :func:`cut` and :func:`qcut` now returns a :class:`DatetimeIndex` or :class:`TimedeltaIndex` bins when the input is datetime or timedelta dtype respectively and ``retbins=True`` (:issue:`19891`) +- :meth:`DatetimeIndex.to_period` and :meth:`Timestamp.to_period` will issue a warning when timezone information will be lost (:issue:`21333`) .. _whatsnew_0240.api.other: @@ -582,6 +583,7 @@ Datetimelike - Bug in :class:`DataFrame` comparisons against ``Timestamp``-like objects failing to raise ``TypeError`` for inequality checks with mismatched types (:issue:`8932`,:issue:`22163`) - Bug in :class:`DataFrame` with mixed dtypes including ``datetime64[ns]`` incorrectly raising ``TypeError`` on equality comparisons (:issue:`13128`,:issue:`22163`) - Bug in :meth:`DataFrame.eq` comparison against ``NaT`` incorrectly returning ``True`` or ``NaN`` (:issue:`15697`,:issue:`22163`) +- Bug in :class:`DatetimeIndex` incorrectly allowing indexing with ``Timedelta`` object (:issue:`20464`) Timedelta ^^^^^^^^^ @@ -590,7 +592,7 @@ Timedelta - Bug in multiplying a :class:`Series` with numeric dtype against a ``timedelta`` object (:issue:`22390`) - Bug in :class:`Series` with numeric dtype when adding or subtracting an an array or ``Series`` with ``timedelta64`` dtype (:issue:`22390`) - Bug in :class:`Index` with numeric dtype when multiplying or dividing an array with dtype ``timedelta64`` (:issue:`22390`) -- +- Bug in :class:`TimedeltaIndex` incorrectly allowing indexing with ``Timestamp`` object (:issue:`20464`) - - diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 3ab1396c0fe38..53c396f2acaa0 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -737,6 +737,11 @@ class Timestamp(_Timestamp): """ from pandas import Period + if self.tz is not None: + # GH#21333 + warnings.warn("Converting to Period representation will " + "drop timezone information.") + if freq is None: freq = self.freq diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 019aad4941d26..faf26f1baed7e 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -2,7 +2,7 @@ from __future__ import division import operator import warnings -from datetime import time, datetime +from datetime import time, datetime, timedelta import numpy as np from pytz import utc @@ -727,6 +727,10 @@ def to_period(self, freq=None): """ from pandas.core.indexes.period import PeriodIndex + if self.tz is not None: + warnings.warn("Converting to PeriodIndex representation will " + "drop timezone information.") + if freq is None: freq = self.freqstr or self.inferred_freq @@ -737,7 +741,7 @@ def to_period(self, freq=None): freq = get_period_alias(freq) - return PeriodIndex(self.values, name=self.name, freq=freq, tz=self.tz) + return PeriodIndex(self.values, name=self.name, freq=freq) def snap(self, freq='S'): """ @@ -1201,6 +1205,12 @@ def get_loc(self, key, method=None, tolerance=None): key = Timestamp(key, tz=self.tz) return Index.get_loc(self, key, method, tolerance) + if isinstance(key, timedelta): + # GH#20464 + raise TypeError("Cannot index {cls} with {other}" + .format(cls=type(self).__name__, + other=type(key).__name__)) + if isinstance(key, time): if method is not None: raise NotImplementedError('cannot yet lookup inexact labels ' diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 9f14d4cfd5863..d9344ec6e55cb 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -1,5 +1,6 @@ """ implement the TimedeltaIndex """ import operator +from datetime import datetime import numpy as np from pandas.core.dtypes.common import ( @@ -487,7 +488,8 @@ def get_loc(self, key, method=None, tolerance=None): ------- loc : int """ - if is_list_like(key): + if is_list_like(key) or (isinstance(key, datetime) and key is not NaT): + # GH#20464 for datetime case raise TypeError if isna(key): diff --git a/pandas/tests/indexes/datetimes/test_astype.py b/pandas/tests/indexes/datetimes/test_astype.py index 64b8f48f6a4e1..429e8c810a3b2 100644 --- a/pandas/tests/indexes/datetimes/test_astype.py +++ b/pandas/tests/indexes/datetimes/test_astype.py @@ -246,7 +246,9 @@ def setup_method(self, method): def test_to_period_millisecond(self): index = self.index - period = index.to_period(freq='L') + with tm.assert_produces_warning(UserWarning): + # warning that timezone info will be lost + period = index.to_period(freq='L') assert 2 == len(period) assert period[0] == Period('2007-01-01 10:11:12.123Z', 'L') assert period[1] == Period('2007-01-01 10:11:13.789Z', 'L') @@ -254,7 +256,9 @@ def test_to_period_millisecond(self): def test_to_period_microsecond(self): index = self.index - period = index.to_period(freq='U') + with tm.assert_produces_warning(UserWarning): + # warning that timezone info will be lost + period = index.to_period(freq='U') assert 2 == len(period) assert period[0] == Period('2007-01-01 10:11:12.123456Z', 'U') assert period[1] == Period('2007-01-01 10:11:13.789123Z', 'U') @@ -266,81 +270,95 @@ def test_to_period_tz_pytz(self): ts = date_range('1/1/2000', '4/1/2000', tz='US/Eastern') - result = ts.to_period()[0] - expected = ts[0].to_period() + with tm.assert_produces_warning(UserWarning): + # warning that timezone info will be lost + result = ts.to_period()[0] + expected = ts[0].to_period() - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) + assert result == expected + tm.assert_index_equal(ts.to_period(), xp) - ts = date_range('1/1/2000', '4/1/2000', tz=UTC) + ts = date_range('1/1/2000', '4/1/2000', tz=UTC) - result = ts.to_period()[0] - expected = ts[0].to_period() + result = ts.to_period()[0] + expected = ts[0].to_period() - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) + assert result == expected + tm.assert_index_equal(ts.to_period(), xp) - ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) + ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) - result = ts.to_period()[0] - expected = ts[0].to_period() + result = ts.to_period()[0] + expected = ts[0].to_period() - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) + assert result == expected + tm.assert_index_equal(ts.to_period(), xp) + + def test_to_period_tz_warning(self): + # GH#21333 make sure a warning is issued when timezone + # info is lost + dti = date_range('1/1/2000', '4/1/2000', tz='US/Eastern') + with tm.assert_produces_warning(UserWarning): + # warning that timezone info will be lost + dti.to_period() def test_to_period_tz_explicit_pytz(self): xp = date_range('1/1/2000', '4/1/2000').to_period() ts = date_range('1/1/2000', '4/1/2000', tz=pytz.timezone('US/Eastern')) - result = ts.to_period()[0] - expected = ts[0].to_period() + with tm.assert_produces_warning(UserWarning): + # warning that timezone info will be lost + result = ts.to_period()[0] + expected = ts[0].to_period() - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) + assert result == expected + tm.assert_index_equal(ts.to_period(), xp) - ts = date_range('1/1/2000', '4/1/2000', tz=pytz.utc) + ts = date_range('1/1/2000', '4/1/2000', tz=pytz.utc) - result = ts.to_period()[0] - expected = ts[0].to_period() + result = ts.to_period()[0] + expected = ts[0].to_period() - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) + assert result == expected + tm.assert_index_equal(ts.to_period(), xp) - ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) + ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) - result = ts.to_period()[0] - expected = ts[0].to_period() + result = ts.to_period()[0] + expected = ts[0].to_period() - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) + assert result == expected + tm.assert_index_equal(ts.to_period(), xp) def test_to_period_tz_dateutil(self): xp = date_range('1/1/2000', '4/1/2000').to_period() ts = date_range('1/1/2000', '4/1/2000', tz='dateutil/US/Eastern') - result = ts.to_period()[0] - expected = ts[0].to_period() + with tm.assert_produces_warning(UserWarning): + # warning that timezone info will be lost + result = ts.to_period()[0] + expected = ts[0].to_period() - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) + assert result == expected + tm.assert_index_equal(ts.to_period(), xp) - ts = date_range('1/1/2000', '4/1/2000', tz=dateutil.tz.tzutc()) + ts = date_range('1/1/2000', '4/1/2000', tz=dateutil.tz.tzutc()) - result = ts.to_period()[0] - expected = ts[0].to_period() + result = ts.to_period()[0] + expected = ts[0].to_period() - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) + assert result == expected + tm.assert_index_equal(ts.to_period(), xp) - ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) + ts = date_range('1/1/2000', '4/1/2000', tz=tzlocal()) - result = ts.to_period()[0] - expected = ts[0].to_period() + result = ts.to_period()[0] + expected = ts[0].to_period() - assert result == expected - tm.assert_index_equal(ts.to_period(), xp) + assert result == expected + tm.assert_index_equal(ts.to_period(), xp) def test_to_period_nofreq(self): idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-04']) diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index 8cffa035721b0..018194de5e131 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -586,3 +586,11 @@ def test_reasonable_keyerror(self): with pytest.raises(KeyError) as excinfo: index.get_loc('1/1/2000') assert '2000' in str(excinfo.value) + + def test_timedelta_invalid_key(self): + # GH#20464 + dti = pd.date_range('1970-01-01', periods=10) + with pytest.raises(TypeError): + dti.get_loc(pd.Timedelta(0)) + with pytest.raises(TypeError): + dti.get_loc(pd.Timedelta(1)) diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py index 08992188265bd..b83c66940333d 100644 --- a/pandas/tests/indexes/timedeltas/test_indexing.py +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -41,6 +41,13 @@ def test_getitem(self): tm.assert_index_equal(result, expected) assert result.freq == expected.freq + def test_timestamp_invalid_key(self): + # GH#20464 + tdi = pd.timedelta_range(0, periods=10) + with pytest.raises(TypeError): + tdi.get_loc(pd.Timestamp('1970-01-01')) + with pytest.raises(TypeError): + tdi.get_loc(pd.Timestamp('1970-01-02')) class TestWhere(object): # placeholder for symmetry with DatetimeIndex and PeriodIndex tests diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 58146cae587fe..872c510094a4f 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -929,3 +929,11 @@ def test_to_datetime_bijective(self): with tm.assert_produces_warning(exp_warning, check_stacklevel=False): assert (Timestamp(Timestamp.min.to_pydatetime()).value / 1000 == Timestamp.min.value / 1000) + + def test_to_period_tz_warning(self): + # GH#21333 make sure a warning is issued when timezone + # info is lost + ts = Timestamp('2009-04-15 16:17:18', tz='US/Eastern') + with tm.assert_produces_warning(UserWarning): + # warning that timezone info will be lost + ts.to_period('D') From 4d34e149269f8f9e811cd1ad036ee7fd999b7187 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 30 Aug 2018 21:27:10 -0700 Subject: [PATCH 2/3] Flake8 fixup --- pandas/tests/indexes/timedeltas/test_indexing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py index b83c66940333d..a66b7036f2499 100644 --- a/pandas/tests/indexes/timedeltas/test_indexing.py +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -49,6 +49,7 @@ def test_timestamp_invalid_key(self): with pytest.raises(TypeError): tdi.get_loc(pd.Timestamp('1970-01-02')) + class TestWhere(object): # placeholder for symmetry with DatetimeIndex and PeriodIndex tests pass From 0c5e65280b209ad94da9e3230b151835fc08856b Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 4 Sep 2018 08:49:53 -0700 Subject: [PATCH 3/3] explicit warnings, parametrize tests --- pandas/_libs/tslibs/timestamps.pyx | 3 ++- pandas/core/indexes/datetimes.py | 4 ++-- pandas/core/indexes/timedeltas.py | 5 ++++- pandas/tests/indexes/datetimes/test_indexing.py | 14 ++++++++++---- pandas/tests/indexes/timedeltas/test_indexing.py | 11 ++++++----- 5 files changed, 24 insertions(+), 13 deletions(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 53c396f2acaa0..52343593d1cc1 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -740,7 +740,8 @@ class Timestamp(_Timestamp): if self.tz is not None: # GH#21333 warnings.warn("Converting to Period representation will " - "drop timezone information.") + "drop timezone information.", + UserWarning) if freq is None: freq = self.freq diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index faf26f1baed7e..d12f2e61635b8 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -729,7 +729,7 @@ def to_period(self, freq=None): if self.tz is not None: warnings.warn("Converting to PeriodIndex representation will " - "drop timezone information.") + "drop timezone information.", UserWarning) if freq is None: freq = self.freqstr or self.inferred_freq @@ -1205,7 +1205,7 @@ def get_loc(self, key, method=None, tolerance=None): key = Timestamp(key, tz=self.tz) return Index.get_loc(self, key, method, tolerance) - if isinstance(key, timedelta): + elif isinstance(key, timedelta): # GH#20464 raise TypeError("Cannot index {cls} with {other}" .format(cls=type(self).__name__, diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index d9344ec6e55cb..f49219ccbaa1d 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -489,7 +489,10 @@ def get_loc(self, key, method=None, tolerance=None): loc : int """ if is_list_like(key) or (isinstance(key, datetime) and key is not NaT): - # GH#20464 for datetime case + # GH#20464 datetime check here is to ensure we don't allow + # datetime objects to be incorrectly treated as timedelta + # objects; NaT is a special case because it plays a double role + # as Not-A-Timedelta raise TypeError if isna(key): diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index 018194de5e131..601a7b13e370a 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -587,10 +587,16 @@ def test_reasonable_keyerror(self): index.get_loc('1/1/2000') assert '2000' in str(excinfo.value) - def test_timedelta_invalid_key(self): + @pytest.mark.parametrize('key', [pd.Timedelta(0), + pd.Timedelta(1), + timedelta(0)]) + def test_timedelta_invalid_key(self, key): # GH#20464 dti = pd.date_range('1970-01-01', periods=10) with pytest.raises(TypeError): - dti.get_loc(pd.Timedelta(0)) - with pytest.raises(TypeError): - dti.get_loc(pd.Timedelta(1)) + dti.get_loc(key) + + def test_get_loc_nat(self): + # GH#20464 + index = DatetimeIndex(['1/3/2000', 'NaT']) + assert index.get_loc(pd.NaT) == 1 diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py index a66b7036f2499..8ba2c81f429d8 100644 --- a/pandas/tests/indexes/timedeltas/test_indexing.py +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -1,4 +1,4 @@ -from datetime import timedelta +from datetime import datetime, timedelta import pytest import numpy as np @@ -41,13 +41,14 @@ def test_getitem(self): tm.assert_index_equal(result, expected) assert result.freq == expected.freq - def test_timestamp_invalid_key(self): + @pytest.mark.parametrize('key', [pd.Timestamp('1970-01-01'), + pd.Timestamp('1970-01-02'), + datetime(1970, 1, 1)]) + def test_timestamp_invalid_key(self, key): # GH#20464 tdi = pd.timedelta_range(0, periods=10) with pytest.raises(TypeError): - tdi.get_loc(pd.Timestamp('1970-01-01')) - with pytest.raises(TypeError): - tdi.get_loc(pd.Timestamp('1970-01-02')) + tdi.get_loc(key) class TestWhere(object):