-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: Assorted DatetimeIndex bugfixes #24157
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 25 commits
092b3b9
fbcc04b
ac52857
121c373
ef66bb9
91738d3
bb0d065
ff3a5c0
e54159d
44e0126
12e0f4e
28bf2de
06d0a8e
81c7d0f
97f976f
9e55d97
c7f280f
af303c7
8ece686
30f01a0
9617b85
3731098
df05c88
8f39b23
e958ce6
97cb6b3
36d37e4
3fc1c19
88f7094
e178bb9
50f6b7e
b927925
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -351,6 +351,9 @@ def __getitem__(self, key): | |
freq = key.step * self.freq | ||
else: | ||
freq = self.freq | ||
elif key is Ellipsis: | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# GH#21282 avoid losing `freq` attribute | ||
freq = self.freq | ||
|
||
attribs['freq'] = freq | ||
|
||
|
@@ -547,9 +550,19 @@ def _validate_frequency(cls, index, freq, **kwargs): | |
if index.size == 0 or inferred == freq.freqstr: | ||
return None | ||
|
||
on_freq = cls._generate_range(start=index[0], end=None, | ||
periods=len(index), freq=freq, **kwargs) | ||
if not np.array_equal(index.asi8, on_freq.asi8): | ||
try: | ||
on_freq = cls._generate_range(start=index[0], end=None, | ||
periods=len(index), freq=freq, | ||
**kwargs) | ||
if not np.array_equal(index.asi8, on_freq.asi8): | ||
raise ValueError | ||
except ValueError as e: | ||
if "non-fixed" in str(e): | ||
# non-fixed frequencies are not meaningful for timedelta64; | ||
# we retain that error message | ||
raise e | ||
# GH#11587 if index[0] is NaT _generate_range will raise | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is too cryptic, pls make more readable if you don't actually know what's happening here |
||
# ValueError, which we re-raise with a targeted error message | ||
raise ValueError('Inferred frequency {infer} from passed values ' | ||
'does not conform to passed frequency {passed}' | ||
.format(infer=inferred, passed=freq.freqstr)) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,9 +14,9 @@ | |
from pandas.util._decorators import Appender | ||
|
||
from pandas.core.dtypes.common import ( | ||
_INT64_DTYPE, _NS_DTYPE, is_datetime64_dtype, is_datetime64tz_dtype, | ||
is_extension_type, is_float_dtype, is_int64_dtype, is_object_dtype, | ||
is_period_dtype, is_string_dtype, is_timedelta64_dtype) | ||
_INT64_DTYPE, _NS_DTYPE, is_categorical_dtype, is_datetime64_dtype, | ||
is_datetime64tz_dtype, is_extension_type, is_float_dtype, is_int64_dtype, | ||
is_object_dtype, is_period_dtype, is_string_dtype, is_timedelta64_dtype) | ||
from pandas.core.dtypes.dtypes import DatetimeTZDtype | ||
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries | ||
from pandas.core.dtypes.missing import isna | ||
|
@@ -264,6 +264,8 @@ def _generate_range(cls, start, end, periods, freq, tz=None, | |
if closed is not None: | ||
raise ValueError("Closed has to be None if not both of start" | ||
"and end are defined") | ||
if start is NaT or end is NaT: | ||
raise ValueError("Neither `start` nor `end` can be NaT") | ||
|
||
left_closed, right_closed = dtl.validate_endpoints(closed) | ||
|
||
|
@@ -1652,6 +1654,13 @@ def maybe_convert_dtype(data, copy): | |
raise TypeError("Passing PeriodDtype data is invalid. " | ||
"Use `data.to_timestamp()` instead") | ||
|
||
elif is_categorical_dtype(data): | ||
# GH#18664 preserve tz in going DTI->Categorical->DTI | ||
# TODO: cases where we need to do another pass through this func, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is the TODO case here, what is an example? do you have an xfailed test? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. example is Categorical(TimedeltaIndex) where we should do another pass through maybe_convert_dtype in order to issue the FutureWarning. I'm assuming there are other corner cases here that will need to be caught/tested. |
||
# e.g. the categories are timedelta64s | ||
data = data.categories.take(data.codes, fill_value=NaT) | ||
copy = False | ||
|
||
elif is_extension_type(data) and not is_datetime64tz_dtype(data): | ||
# Includes categorical | ||
# TODO: We have no tests for these | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,12 +14,42 @@ | |
from pandas import ( | ||
DatetimeIndex, Index, Timestamp, date_range, datetime, offsets, | ||
to_datetime) | ||
from pandas.core.arrays import period_array | ||
from pandas.core.arrays import ( | ||
DatetimeArrayMixin as DatetimeArray, period_array) | ||
import pandas.util.testing as tm | ||
|
||
|
||
class TestDatetimeIndex(object): | ||
|
||
@pytest.mark.parametrize('dt_cls', [DatetimeIndex, DatetimeArray]) | ||
def test_freq_validation_with_nat(self, dt_cls): | ||
# GH#11587 make sure we get a useful error message when generate_range | ||
# raises | ||
msg = ("Inferred frequency None from passed values does not conform " | ||
"to passed frequency D") | ||
with pytest.raises(ValueError, match=msg): | ||
dt_cls([pd.NaT, pd.Timestamp('2011-01-01')], freq='D') | ||
with pytest.raises(ValueError, match=msg): | ||
dt_cls([pd.NaT, pd.Timestamp('2011-01-01').value], | ||
freq='D') | ||
|
||
def test_categorical_preserves_tz(self): | ||
# GH#18664 retain tz when going DTI-->Categorical-->DTI | ||
# TODO: parametrize over DatetimeIndex/DatetimeArray | ||
# once CategoricalIndex(DTA) works | ||
|
||
dti = pd.DatetimeIndex( | ||
[pd.NaT, '2015-01-01', '1999-04-06 15:14:13', '2015-01-01'], | ||
tz='US/Eastern') | ||
|
||
ci = pd.CategoricalIndex(dti) | ||
carr = pd.Categorical(dti) | ||
cser = pd.Series(ci) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you parametrize There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. will take a look There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not without adding casting code that makes the verbosity a wash. Once Categorical(DatetimeArray) works (currently raises bc DatetimeArray doesn't have _constructor) then this test can be parametrized over DatetimeIndex/DatetimeArray |
||
for obj in [ci, carr, cser]: | ||
result = pd.DatetimeIndex(obj) | ||
tm.assert_index_equal(result, dti) | ||
|
||
def test_dti_with_period_data_raises(self): | ||
# GH#23675 | ||
data = pd.PeriodIndex(['2016Q1', '2016Q2'], freq='Q') | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ | |
from datetime import date, datetime, timedelta | ||
import functools | ||
import operator | ||
import warnings | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. prob going to fail the build as this is not needed |
||
|
||
from dateutil.easter import easter | ||
import numpy as np | ||
|
@@ -2487,6 +2488,9 @@ def generate_range(start=None, end=None, periods=None, | |
|
||
""" | ||
if time_rule is not None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. did you need to change any existing tests to account for this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, tests.indexes.datetimes.test_date_range test_generate and test_generate_cday There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this sufficiently internal that we don't need to do a deprecation cycle? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, let's just drop it entirely (fix the tests). I don't think There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will do. OK with moving this to arrays.datetimes while we're at it? |
||
warnings.warn("`time_rule` is deprecated and will be removed in a " | ||
"future version. Use `offset` instead.", | ||
FutureWarning, stacklevel=2) | ||
from pandas.tseries.frequencies import get_offset | ||
|
||
offset = get_offset(time_rule) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
these don't jive with what's at the top of the PR. missing 2 notes?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what about #11587 ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does it merit a note? All that is changed is an error message. But yes, this does close 11587, I'll update this above.