-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
REF: Simplify Datetimelike constructor dispatching #23140
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f13cc58
4188ec7
7804f1b
a4775f4
8ee34fa
78943c1
aa71383
eae8389
e871733
7840f91
ec50b0b
eb7a6b6
32c6391
c903917
b97ec96
11db555
147de57
7c4d281
b90f421
dc4f474
46d5e64
b5827c7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -11,6 +11,7 @@ | |
from pandas._libs.tslibs.period import ( | ||
Period, DIFFERENT_FREQ_INDEX, IncompatibleFrequency) | ||
|
||
from pandas.util._decorators import deprecate_kwarg | ||
from pandas.errors import NullFrequencyError, PerformanceWarning | ||
from pandas import compat | ||
|
||
|
@@ -39,7 +40,6 @@ | |
from pandas.core.algorithms import checked_add_with_arr | ||
|
||
from .base import ExtensionOpsMixin | ||
from pandas.util._decorators import deprecate_kwarg | ||
|
||
|
||
def _make_comparison_op(cls, op): | ||
|
@@ -143,6 +143,10 @@ def asi8(self): | |
# ------------------------------------------------------------------ | ||
# Array-like Methods | ||
|
||
@property | ||
def ndim(self): | ||
return len(self.shape) | ||
|
||
@property | ||
def shape(self): | ||
return (len(self),) | ||
|
@@ -151,6 +155,10 @@ def shape(self): | |
def size(self): | ||
return np.prod(self.shape) | ||
|
||
@property | ||
def nbytes(self): | ||
return self._ndarray_values.nbytes | ||
|
||
def __len__(self): | ||
return len(self._data) | ||
|
||
|
@@ -211,6 +219,10 @@ def astype(self, dtype, copy=True): | |
# ------------------------------------------------------------------ | ||
# Null Handling | ||
|
||
def isna(self): | ||
# EA Interface | ||
return self._isnan | ||
|
||
@property # NB: override with cache_readonly in immutable subclasses | ||
def _isnan(self): | ||
""" return if each value is nan""" | ||
|
@@ -332,6 +344,10 @@ def _validate_frequency(cls, index, freq, **kwargs): | |
# Frequency validation is not meaningful for Period Array/Index | ||
return None | ||
|
||
# DatetimeArray may pass `ambiguous`, nothing else will be accepted | ||
# by cls._generate_range below | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why wouldn’t u just pop the kwarg for key and pass it directly? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm actually that ends up being appreciably more verbose. We have to do separate cls._generate_range calls for TimedeltaArray vs DatetimeArray |
||
assert all(key == 'ambiguous' for key in kwargs) | ||
|
||
inferred = index.inferred_freq | ||
if index.size == 0 or inferred == freq.freqstr: | ||
return None | ||
|
@@ -595,9 +611,12 @@ def _time_shift(self, periods, freq=None): | |
|
||
start = self[0] + periods * self.freq | ||
end = self[-1] + periods * self.freq | ||
attribs = self._get_attributes_dict() | ||
|
||
# Note: in the DatetimeTZ case, _generate_range will infer the | ||
# appropriate timezone from `start` and `end`, so tz does not need | ||
# to be passed explicitly. | ||
return self._generate_range(start=start, end=end, periods=None, | ||
**attribs) | ||
freq=self.freq) | ||
|
||
@classmethod | ||
def _add_datetimelike_methods(cls): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ | |
|
||
from pandas.core.dtypes.common import ( | ||
is_integer_dtype, is_float_dtype, is_period_dtype, is_timedelta64_dtype, | ||
is_object_dtype, | ||
is_datetime64_dtype, _TD_DTYPE) | ||
from pandas.core.dtypes.dtypes import PeriodDtype | ||
from pandas.core.dtypes.generic import ABCSeries | ||
|
@@ -122,18 +123,30 @@ def freq(self, value): | |
|
||
_attributes = ["freq"] | ||
|
||
def __new__(cls, values, freq=None, **kwargs): | ||
def __new__(cls, values, freq=None, dtype=None, **kwargs): | ||
|
||
if freq is not None: | ||
# coerce freq to freq object, otherwise it can be coerced | ||
# elementwise, which is slow | ||
freq = Period._maybe_convert_freq(freq) | ||
|
||
freq = dtl.validate_dtype_freq(dtype, freq) | ||
|
||
if is_period_dtype(values): | ||
# PeriodArray, PeriodIndex | ||
if freq is not None and values.freq != freq: | ||
raise IncompatibleFrequency(freq, values.freq) | ||
freq = values.freq | ||
freq = dtl.validate_dtype_freq(values.dtype, freq) | ||
values = values.asi8 | ||
|
||
elif is_datetime64_dtype(values): | ||
# TODO: what if it has tz? | ||
values = dt64arr_to_periodarr(values, freq) | ||
|
||
elif is_object_dtype(values) or isinstance(values, (list, tuple)): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. shouldn't this be is_list_like? (for the isinstance check) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is specifically for object dtype (actually, I need to add There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. specifically what happens if other non ndarray list likes hit this path? do they need handling? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. They do need handling, but we're not there yet. The thought process for implementing these constructors piece-by-piece is a) The DatetimeIndex/TimedeltaIndex/PeriodIndex constructors are overgrown; let's avoid that in the Array subclasses. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Other question: where was this handled previously? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's hard for me to say what's better in the abstract. From the WIP PeriodArray PR, I found that having to think carefully about what type of data I had forced some clarity in the code. I liked having to explicitly reach for that Regardless, I think our two goals with the array constructors should be
If you think we're likely to end up in a situation where being able to pass an array of objects to the main There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i am a bit puzzled why you would handle lists and and ndarray differently (tom and joris); these are clearly doing the same thing and we have a very similar handling for list likes throughout pandas separating these is a non starter - even having a separate constructor is also not very friendly. pandas does inference on the construction which is one of the big selling points. trying to change this, esp at the micro level is a huge mental disconnect. if you want to propose something like that pls do it in other issues. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I don't think we are. But, my only argument was
If that's not persuasive then I'm not going to argue against handling them in the init. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
+1
+1
+1
Yes, I think we should be pretty forgiving about what gets accepted into There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
It's not about lists vs arrays, it's about arrays of Period objects vs arrays of ordinal integers, which is something very different.
Being forgiving is exactly what lead to the complex Period/DatetimeIndex constructors. I think we should not make the same choice for our Array classes. I personally also think it makes the code clearer to even separate those two concepts (basically what we also did with IntegerArray), but maybe let's open an issue to further discuss that instead of here in a hidden review comment thread? (i can only open one later today ) |
||
# e.g. array([Period(...), Period(...), NaT]) | ||
values = np.array(values, dtype=object) | ||
if freq is None: | ||
freq = libperiod.extract_freq(values) | ||
values = libperiod.extract_ordinals(values, freq) | ||
|
||
return cls._simple_new(values, freq=freq, **kwargs) | ||
|
||
@classmethod | ||
|
@@ -176,11 +189,13 @@ def _from_ordinals(cls, values, freq=None, **kwargs): | |
|
||
@classmethod | ||
def _generate_range(cls, start, end, periods, freq, fields): | ||
periods = dtl.validate_periods(periods) | ||
|
||
if freq is not None: | ||
freq = Period._maybe_convert_freq(freq) | ||
|
||
field_count = len(fields) | ||
if com.count_not_none(start, end) > 0: | ||
if start is not None or end is not None: | ||
if field_count > 0: | ||
raise ValueError('Can either instantiate from fields ' | ||
'or endpoints, but not both') | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it needed to have the
_isnan
concept on the arrays? We use it in some internal methods on the Index class, but for Arrays it seems to me additional complexity compared to simply definingisna
appropriately on each Array ?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Discussed elsewhere; can we mark as resolved?