-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
REF: make DatetimeIndex._simple_new actually simple #32282
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 32 commits
6389872
2329008
91a0937
8924c55
b55e8b1
d04f3b5
3a2c228
b6ffc2a
5b0bac8
7405488
13eb9a7
aadedfd
9f220f5
1451163
e8ad19a
e8483f8
f82f389
5f22827
9a5360a
1d9cea3
1acb5a3
d92ff5f
34b4dec
242695c
50d1c6c
8a90f45
0e7ffe1
5cba722
6efd065
893d0aa
d659a23
864b604
743a003
82c5645
e91814d
10a2ff2
c8c3ebb
711987e
e74ebc0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,17 +7,13 @@ | |
|
||
from pandas._libs import NaT, Period, Timestamp, index as libindex, lib, tslib as libts | ||
from pandas._libs.tslibs import fields, parsing, timezones | ||
from pandas._typing import Label | ||
from pandas.util._decorators import cache_readonly | ||
|
||
from pandas.core.dtypes.common import _NS_DTYPE, is_float, is_integer, is_scalar | ||
from pandas.core.dtypes.dtypes import DatetimeTZDtype | ||
from pandas.core.dtypes.missing import is_valid_nat_for_dtype | ||
|
||
from pandas.core.arrays.datetimes import ( | ||
DatetimeArray, | ||
tz_to_dtype, | ||
validate_tz_from_dtype, | ||
) | ||
from pandas.core.arrays.datetimes import DatetimeArray, tz_to_dtype | ||
import pandas.core.common as com | ||
from pandas.core.indexes.base import Index, InvalidIndexError, maybe_extract_name | ||
from pandas.core.indexes.datetimelike import DatetimeTimedeltaMixin | ||
|
@@ -36,7 +32,16 @@ def _new_DatetimeIndex(cls, d): | |
if "data" in d and not isinstance(d["data"], DatetimeIndex): | ||
# Avoid need to verify integrity by calling simple_new directly | ||
data = d.pop("data") | ||
result = cls._simple_new(data, **d) | ||
if not isinstance(data, DatetimeArray): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what is all this about? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. backward compat for old pickles There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you make this nicer. it is very hard to grok. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pls comment the cases |
||
tz = d.pop("tz") | ||
freq = d.pop("freq") | ||
dta = DatetimeArray._simple_new(data, dtype=tz_to_dtype(tz), freq=freq) | ||
else: | ||
dta = data | ||
for key in ["tz", "freq"]: | ||
if key in d: | ||
assert d.pop(key) == getattr(dta, key) | ||
result = cls._simple_new(dta, **d) | ||
else: | ||
with warnings.catch_warnings(): | ||
# TODO: If we knew what was going in to **d, we might be able to | ||
|
@@ -235,33 +240,19 @@ def __new__( | |
return subarr | ||
|
||
@classmethod | ||
def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None): | ||
def _simple_new(cls, values: DatetimeArray, name: Label = None): | ||
""" | ||
We require the we have a dtype compat for the values | ||
jbrockmendel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if we are passed a non-dtype compat, then coerce using the constructor | ||
""" | ||
if isinstance(values, DatetimeArray): | ||
if tz: | ||
tz = validate_tz_from_dtype(dtype, tz) | ||
dtype = DatetimeTZDtype(tz=tz) | ||
elif dtype is None: | ||
dtype = _NS_DTYPE | ||
|
||
values = DatetimeArray(values, freq=freq, dtype=dtype) | ||
tz = values.tz | ||
freq = values.freq | ||
values = values._data | ||
|
||
dtype = tz_to_dtype(tz) | ||
dtarr = DatetimeArray._simple_new(values, freq=freq, dtype=dtype) | ||
assert isinstance(dtarr, DatetimeArray) | ||
assert isinstance(values, DatetimeArray), type(values) | ||
|
||
result = object.__new__(cls) | ||
result._data = dtarr | ||
result._data = values | ||
result.name = name | ||
result._no_setting_name = False | ||
# For groupby perf. See note in indexes/base about _index_data | ||
result._index_data = dtarr._data | ||
result._index_data = values._data | ||
result._reset_identity() | ||
return result | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1155,7 +1155,12 @@ def apply_index(self, i): | |
shifted = liboffsets.shift_months(i.asi8, self.n, self._day_opt) | ||
# TODO: going through __new__ raises on call to _validate_frequency; | ||
# are we passing incorrect freq? | ||
return type(i)._simple_new(shifted, freq=i.freq, dtype=i.dtype) | ||
dta = i | ||
if not isinstance(i._data, np.ndarray): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. WAT? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. compat for getting either a DTA or a DTI, we already do this in a couple places. I think we can do it just once by putting it in the |
||
# DTA | ||
dta = i._data | ||
|
||
return type(dta)._simple_new(shifted, freq=dta.freq, dtype=dta.dtype) | ||
|
||
|
||
class MonthEnd(MonthOffset): | ||
|
@@ -1885,9 +1890,12 @@ def apply_index(self, dtindex): | |
) | ||
# TODO: going through __new__ raises on call to _validate_frequency; | ||
# are we passing incorrect freq? | ||
return type(dtindex)._simple_new( | ||
shifted, freq=dtindex.freq, dtype=dtindex.dtype | ||
) | ||
dta = dtindex | ||
if not isinstance(dtindex._data, np.ndarray): | ||
# DTA | ||
dta = dtindex._data | ||
|
||
return type(dta)._simple_new(shifted, freq=dta.freq, dtype=dta.dtype) | ||
|
||
|
||
class BQuarterEnd(QuarterOffset): | ||
|
@@ -1971,9 +1979,11 @@ def apply_index(self, dtindex): | |
) | ||
# TODO: going through __new__ raises on call to _validate_frequency; | ||
# are we passing incorrect freq? | ||
return type(dtindex)._simple_new( | ||
shifted, freq=dtindex.freq, dtype=dtindex.dtype | ||
) | ||
dta = dtindex | ||
if not isinstance(dtindex._data, np.ndarray): | ||
# DTA | ||
dta = dtindex._data | ||
return type(dta)._simple_new(shifted, freq=dta.freq, dtype=dta.dtype) | ||
|
||
def is_on_offset(self, dt: datetime) -> bool: | ||
if self.normalize and not _is_normalized(dt): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you could change _set_freq to return self to make this idiom more clear (also _set_freq should be marked in the doc-string as a mutating function)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Holding off on this since _set_freq needs a bigger rethink, xref #31218.
All other comments addressed, i think