Skip to content

Make _freq/freq/tz/_tz/dtype/_dtype/offset/_offset all inherit reliably #24517

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jan 1, 2019
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1138,8 +1138,8 @@ def _time_shift(self, periods, freq=None):
freq = frequencies.to_offset(freq)
offset = periods * freq
result = self + offset
if hasattr(self, 'tz'):
result._tz = self.tz
if getattr(self, 'tz', None):
result._dtype = self._dtype
return result

if periods == 0:
Expand Down
22 changes: 15 additions & 7 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin,
# Constructors

_attributes = ["freq", "tz"]
_tz = None
_dtype = None
_freq = None

@classmethod
Expand All @@ -231,8 +231,13 @@ def _simple_new(cls, values, freq=None, tz=None):
result = object.__new__(cls)
result._data = values
result._freq = freq
tz = timezones.maybe_get_tz(tz)
result._tz = timezones.tz_standardize(tz)
if tz is None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it cheap to detect when you need to call maybe_get_tz & tz_standarize? ideally these could be called in the DatetimeTZDtype constructor / alternatively could have a dedicate constructor,

DateteimeTZDtype.construct_from_tztype, and will pave the way for a unified dtype as well.

dtype = _NS_DTYPE
else:
tz = timezones.maybe_get_tz(tz)
tz = timezones.tz_standardize(tz)
dtype = DatetimeTZDtype('ns', tz)
result._dtype = dtype
return result

def __new__(cls, values, freq=None, tz=None, dtype=None, copy=False,
Expand Down Expand Up @@ -399,9 +404,12 @@ def dtype(self):
If the values are tz-aware, then the ``DatetimeTZDtype``
is returned.
"""
if self.tz is None:
return _NS_DTYPE
return DatetimeTZDtype('ns', self.tz)
return self._dtype

@property
def _tz(self):
# TODO: Can we get rid of the private version of this?
return getattr(self._dtype, "tz", None)

@property
def tz(self):
Expand All @@ -411,7 +419,7 @@ def tz(self):
Returns
-------
datetime.tzinfo, pytz.tzinfo.BaseTZInfo, dateutil.tz.tz.tzfile, or None
Returns None when the array is tz-naive.
Returns None when the array is tz-naive.
"""
# GH 18595
return self._tz
Expand Down
98 changes: 60 additions & 38 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,11 +321,10 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None):

dtarr = DatetimeArray._simple_new(values, freq=freq, tz=tz)
result = object.__new__(cls)
result._data = dtarr._data
result._freq = dtarr.freq
result._tz = dtarr.tz
result._eadata = dtarr
result.name = name
# For groupby perf. See note in indexes/base about _index_data
# TODO: make sure this is updated correctly if edited
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This TODO is for _index_data? In theory that shouldn't happen, since DatetimeIndex is immutable.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In _libs.reduction there is a line:

object.__setattr__(cached_ityp, '_index_data', islider.buf)

which makes me wary. Is this just never relevant for DTI?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's the intent of all this, to directly mutate the buffer in place. The .reset method undoes all this stuff. Nobody else should be messing with it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@TomAugspurger I think all your other comments have been addressed, not sure about this one. Should this TODO comment be removed? Some other action taken?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah this todo isn't necessary AFAICT.

Also, I really don't think we should have inverted the relationship between the eadata and data attributes.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just because that change isn't going to last through #24024, so I think it was unnecessary.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, I really don't think we should have inverted the relationship between the eadata and data attributes. [...] so I think it was unnecessary.

We can't have both 1) _eadata being a property that depends on self.freq and 2) freq be a property that depends on _eadata.

Definitely agree that 24024 should return things to the old pattern.

result._index_data = result._data
result._reset_identity()
return result
Expand All @@ -345,19 +344,6 @@ def _values(self):
else:
return self.values

@property
def tz(self):
# GH 18595
return self._tz

@tz.setter
def tz(self, value):
# GH 3746: Prevent localizing or converting the index by setting tz
raise AttributeError("Cannot directly set timezone. Use tz_localize() "
"or tz_convert() as appropriate")

tzinfo = tz

@property
def size(self):
# TODO: Remove this when we have a DatetimeTZArray
Expand Down Expand Up @@ -416,15 +402,18 @@ def __setstate__(self, state):
data = np.empty(nd_state[1], dtype=nd_state[2])
np.ndarray.__setstate__(data, nd_state)

freq = own_state[1]
tz = timezones.tz_standardize(own_state[2])
dtarr = DatetimeArray._simple_new(data, freq=freq, tz=tz)

self.name = own_state[0]
self._freq = own_state[1]
self._tz = timezones.tz_standardize(own_state[2])

else: # pragma: no cover
data = np.empty(state)
np.ndarray.__setstate__(data, state)
dtarr = DatetimeArray(data)

self._data = data
self._eadata = dtarr
self._reset_identity()

else:
Expand Down Expand Up @@ -502,7 +491,14 @@ def union(self, other):
else:
result = Index.union(this, other)
if isinstance(result, DatetimeIndex):
result._tz = timezones.tz_standardize(this.tz)
if this.tz is not None:
# TODO: can this be simplified? It used to just be
# `result._tz = timezones.tz_standardize(this.tz)`
tz = timezones.tz_standardize(this.tz)
if result.tz is None:
result = result.tz_localize(tz)
else:
result = result.tz_convert(tz)
if (result.freq is None and
(this.freq is not None or other.freq is not None)):
result.freq = to_offset(result.inferred_freq)
Expand Down Expand Up @@ -532,9 +528,13 @@ def union_many(self, others):
else:
tz = this.tz
this = Index.union(this, other)
if isinstance(this, DatetimeIndex):
this._tz = timezones.tz_standardize(tz)

if isinstance(this, DatetimeIndex) and tz is not None:
# TODO: can this be simplified? it used to just be
# `this._tz = tz_standardize(tz)`
if this.tz is None:
this = this.tz_localize(timezones.tz_standardize(tz))
else:
this = this.tz_convert(timezones.tz_standardize(tz))
return this

def _can_fast_union(self, other):
Expand Down Expand Up @@ -1129,9 +1129,43 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None):
# Wrapping DatetimeArray

@property
def _eadata(self):
return DatetimeArray._simple_new(self._data,
tz=self.tz, freq=self.freq)
def _data(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think that _data is a property anywhere else. Just an attribute that's set in _simple_new.

Actually... how does this even work? If you don't have a setter (which I don't see) then simple_new should fail.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah I see you set eadata.

It doesn't really matter since we're removing it soon anyway, but I'd prefer that _eadata always reference _data. That makes keeps all the index classes consistent that ._data is the actual array, not a property.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think that _data is a property anywhere else. Just an attribute that's set in _simple_new.

That's right. In this PR we set _eadata inside _simple_new and make _data a property returning _eadata._data. The freq/tz passthrough doesn't work with _eadata as a property (as in master), so the only question is whether to also set _data in _simple_new. I chose to make it a property to prevent any shenanigans where they become untied.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think https://github.com/pandas-dev/pandas/pull/24024/files#diff-26a6d2ca7adfca586aabbb1c9dd8bf36R74 is what we want for eadata & freq (and if we can do it here, instead of that PR, then that's best).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The freq/tz passthrough doesn't work with _eadata as a property (as in master),

Why's that? I suppose I could see why setting doesn't work, since IIUC we create a new DateteimArray on each invocation of _eadata.

Should we just hold off on these changes til #24024 then?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, of course it won't work, since we call .freq when creating the _eadata instance

return self._eadata._data

@property
def _freq(self):
return self._eadata._freq

@_freq.setter
def _freq(self, value):
# Validation will be handled by _eadata setter
self._eadata._freq = value

@property
def freq(self):
return self._freq

@freq.setter
def freq(self, value):
# validation is handled by _eadata setter
self._eadata.freq = value

@property
def _tz(self):
return self._eadata._tz

@property
def tz(self):
# GH#18595
return self._tz

@tz.setter
def tz(self, value):
# GH 3746: Prevent localizing or converting the index by setting tz
raise AttributeError("Cannot directly set timezone. Use tz_localize() "
"or tz_convert() as appropriate")

tzinfo = tz

# Compat for frequency inference, see GH#23789
_is_monotonic_increasing = Index.is_monotonic_increasing
Expand Down Expand Up @@ -1168,18 +1202,6 @@ def offset(self, value):
warnings.warn(msg, FutureWarning, stacklevel=2)
self.freq = value

@property
def freq(self):
return self._freq

@freq.setter
def freq(self, value):
if value is not None:
# let DatetimeArray to validation
self._eadata.freq = value

self._freq = to_offset(value)

def __getitem__(self, key):
result = self._eadata.__getitem__(key)
if is_scalar(result):
Expand Down
31 changes: 1 addition & 30 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
from pandas._libs.tslibs import NaT, iNaT, resolution
from pandas._libs.tslibs.period import (
DIFFERENT_FREQ, IncompatibleFrequency, Period)
from pandas.util._decorators import (
Appender, Substitution, cache_readonly, deprecate_kwarg)
from pandas.util._decorators import Appender, Substitution, cache_readonly

from pandas.core.dtypes.common import (
is_bool_dtype, is_datetime64_any_dtype, is_float, is_float_dtype,
Expand Down Expand Up @@ -472,34 +471,6 @@ def _int64index(self):
# ------------------------------------------------------------------------
# Index Methods

@deprecate_kwarg(old_arg_name='n', new_arg_name='periods')
def shift(self, periods):
"""
Shift index by desired number of increments.

This method is for shifting the values of period indexes
by a specified time increment.

Parameters
----------
periods : int, default 1
Number of periods (or increments) to shift by,
can be positive or negative.

.. versionchanged:: 0.24.0

Returns
-------
pandas.PeriodIndex
Shifted index.

See Also
--------
DatetimeIndex.shift : Shift values of DatetimeIndex.
"""
i8values = self._data._time_shift(periods)
return self._simple_new(i8values, name=self.name, freq=self.freq)

def _coerce_scalar_to_index(self, item):
"""
we need to coerce a scalar to a compat for our index type
Expand Down
40 changes: 23 additions & 17 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,11 +235,12 @@ def _simple_new(cls, values, name=None, freq=None, dtype=_TD_DTYPE):
freq = to_offset(freq)
tdarr = TimedeltaArray._simple_new(values, freq=freq)
result = object.__new__(cls)
result._data = tdarr._data
result._freq = tdarr._freq
result._eadata = tdarr
result.name = name
# For groupby perf. See note in indexes/base about _index_data
result._index_data = result._data
# TODO: make sure this is updated correctly if edited
result._index_data = tdarr._data

result._reset_identity()
return result

Expand Down Expand Up @@ -279,8 +280,25 @@ def _format_native_types(self, na_rep='NaT', date_format=None, **kwargs):
# Wrapping TimedeltaArray

@property
def _eadata(self):
return TimedeltaArray._simple_new(self._data, freq=self.freq)
def _data(self):
return self._eadata._data

@property
def _freq(self):
return self._eadata._freq

@_freq.setter
def _freq(self, value):
self._eadata._freq = value

@property
def freq(self):
return self._freq

@freq.setter
def freq(self, value):
# Validation will be done in _eadata setter
self._eadata.freq = value

__mul__ = _make_wrapped_arith_op("__mul__")
__rmul__ = _make_wrapped_arith_op("__rmul__")
Expand Down Expand Up @@ -316,18 +334,6 @@ def __getitem__(self, key):
return result
return type(self)(result, name=self.name)

@property
def freq(self): # TODO: get via eadata
return self._freq

@freq.setter
def freq(self, value): # TODO: get via eadata
if value is not None:
# dispatch to TimedeltaArray to validate frequency
self._eadata.freq = value

self._freq = to_offset(value)

# -------------------------------------------------------------------

@Appender(_index_shared_docs['astype'])
Expand Down