Skip to content

WIP: make _holder changeover #24540

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 12 commits into from
9 changes: 7 additions & 2 deletions pandas/_libs/src/ujson/python/objToJSON.c
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,11 @@ static PyObject *get_values(PyObject *obj) {
PRINTMARK();

if (values && !PyArray_CheckExact(values)) {

if (PyObject_HasAttrString(values, "to_numpy")) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a comment here about what this is doing, and . TODO to see if we can remove the .values below

values = PyObject_CallMethod(values, "to_numpy", NULL);
}

if (PyObject_HasAttrString(values, "values")) {
PyObject *subvals = get_values(values);
PyErr_Clear();
Expand Down Expand Up @@ -279,8 +284,8 @@ static PyObject *get_values(PyObject *obj) {
repr = PyString_FromString("<unknown dtype>");
}

PyErr_Format(PyExc_ValueError, "%s or %s are not JSON serializable yet",
PyString_AS_STRING(repr), PyString_AS_STRING(typeRepr));
PyErr_Format(PyExc_ValueError, "%R or %R are not JSON serializable yet",
repr, typeRepr);
Py_DECREF(repr);
Py_DECREF(typeRepr);

Expand Down
15 changes: 11 additions & 4 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
is_extension_type, is_float_dtype, is_int64_dtype, is_object_dtype,
is_period_dtype, is_string_dtype, is_timedelta64_dtype, pandas_dtype)
from pandas.core.dtypes.dtypes import DatetimeTZDtype
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries, ABCPandasArray
from pandas.core.dtypes.missing import isna

from pandas.core import ops
Expand Down Expand Up @@ -240,11 +240,14 @@ def _simple_new(cls, values, freq=None, tz=None):
result._dtype = dtype
return result

def __new__(cls, values, freq=None, tz=None, dtype=None, copy=False,
def __init__(self, values, freq=None, tz=None, dtype=None, copy=False,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The init should be values, freq, dtype, copy to match PeriodArray.

dayfirst=False, yearfirst=False, ambiguous='raise'):
return cls._from_sequence(
result = type(self)._from_sequence(
values, freq=freq, tz=tz, dtype=dtype, copy=copy,
dayfirst=dayfirst, yearfirst=yearfirst, ambiguous=ambiguous)
self._data = result._data
self._freq = result._freq
self._dtype = result._dtype

@classmethod
def _from_sequence(cls, data, dtype=None, copy=False,
Expand Down Expand Up @@ -523,7 +526,9 @@ def _ndarray_values(self):

@Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__)
def _validate_fill_value(self, fill_value):
if isna(fill_value):
if isna(fill_value) or fill_value == iNaT:
# FIXME: shouldn't allow iNaT through here; see discussion
# in GH#24024
fill_value = iNaT
elif isinstance(fill_value, (datetime, np.datetime64)):
self._assert_tzawareness_compat(fill_value)
Expand Down Expand Up @@ -1568,6 +1573,8 @@ def sequence_to_dt64ns(data, dtype=None, copy=False,
copy = False
elif isinstance(data, ABCSeries):
data = data._values
elif isinstance(data, ABCPandasArray):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you know if this is necessary? Why not fall through to the usual sequence handling?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there was a test in tests.series.test_missing that ended up calling lib.infer_dtype and raising an AttributeError because PandasDType doesn't have a base attribute

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That probably indicates a bug that this is hiding. Shouldn't all non ndarray sequences be converted to ndarrays before calling infer_dtype?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i seem to recall something about extension dtypes being registered in such a way that infer_dtype could pick up on them. will take a look

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jbrockmendel
infer_dtype does not know about extension dtypes per se (and cannot because the point is to figure out whats hiding behind a dtype (mostly object), so dispatching based on dtype is not possible).

The only ones that are directly implemented are Period and Interval, but no other EA type, including Datetime64TZ.

data = data.to_numpy()

if hasattr(data, "freq"):
# i.e. DatetimeArray/Index
Expand Down
5 changes: 2 additions & 3 deletions pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@

from pandas.core.dtypes.dtypes import (
CategoricalDtype, CategoricalDtypeType, DatetimeTZDtype, ExtensionDtype,
IntervalDtype, PandasExtensionDtype, PeriodDtype, _pandas_registry,
registry)
IntervalDtype, PandasExtensionDtype, PeriodDtype, registry)
from pandas.core.dtypes.generic import (
ABCCategorical, ABCCategoricalIndex, ABCDateOffset, ABCDatetimeIndex,
ABCIndexClass, ABCPeriodArray, ABCPeriodIndex, ABCSeries, ABCSparseArray,
Expand Down Expand Up @@ -1984,7 +1983,7 @@ def pandas_dtype(dtype):
return dtype

# registered extension types
result = _pandas_registry.find(dtype) or registry.find(dtype)
result = registry.find(dtype)
if result is not None:
return result

Expand Down
14 changes: 11 additions & 3 deletions pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
is_extension_array_dtype, is_interval_dtype, is_object_dtype,
is_period_dtype, is_sparse, is_timedelta64_dtype)
from pandas.core.dtypes.generic import (
ABCDatetimeIndex, ABCPeriodIndex, ABCRangeIndex, ABCSparseDataFrame,
ABCTimedeltaIndex)
ABCDatetimeArray, ABCDatetimeIndex, ABCIndexClass, ABCPeriodIndex,
ABCRangeIndex, ABCSparseDataFrame, ABCTimedeltaIndex)

from pandas import compat

Expand Down Expand Up @@ -471,7 +471,15 @@ def _concat_datetimetz(to_concat, name=None):
all inputs must be DatetimeIndex
it is used in DatetimeIndex.append also
"""
return to_concat[0]._concat_same_dtype(to_concat, name=name)
# Right now, internals will pass a List[DatetimeArray] here
# for reductions like quantile. I would like to disentangle
# all this before we get here.
sample = to_concat[0]

if isinstance(sample, ABCIndexClass):
return sample._concat_same_dtype(to_concat, name=name)
elif isinstance(sample, ABCDatetimeArray):
return sample._concat_same_type(to_concat)


def _concat_index_same_dtype(indexes, klass=None):
Expand Down
16 changes: 5 additions & 11 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,8 +479,8 @@ def _is_boolean(self):
return is_bool_dtype(self.categories)


class DatetimeTZDtype(PandasExtensionDtype):

@register_extension_dtype
class DatetimeTZDtype(PandasExtensionDtype, ExtensionDtype):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can now move PandasExtensionDtype to subclass ExtensionDtype, see the note in pandas.core.dtypes.base for _DtypeMixin

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good as a followup item I think. IIRC I created an issue when I had to create PandasExtensionDtype in the first place.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, i agree, do we have an issue about fixing this?

"""
A np.dtype duck-typed class, suitable for holding a custom datetime with tz
dtype.
Expand All @@ -493,6 +493,7 @@ class DatetimeTZDtype(PandasExtensionDtype):
str = '|M8[ns]'
num = 101
base = np.dtype('M8[ns]')
na_value = NaT
_metadata = ('unit', 'tz')
_match = re.compile(r"(datetime64|M8)\[(?P<unit>.+), (?P<tz>.+)\]")
_cache = {}
Expand Down Expand Up @@ -570,8 +571,8 @@ def construct_array_type(cls):
-------
type
"""
from pandas import DatetimeIndex
return DatetimeIndex
from pandas.core.arrays import DatetimeArrayMixin
return DatetimeArrayMixin

@classmethod
def construct_from_string(cls, string):
Expand Down Expand Up @@ -885,10 +886,3 @@ def is_dtype(cls, dtype):
else:
return False
return super(IntervalDtype, cls).is_dtype(dtype)


# TODO(Extension): remove the second registry once all internal extension
# dtypes are real extension dtypes.
_pandas_registry = Registry()

_pandas_registry.register(DatetimeTZDtype)
2 changes: 2 additions & 0 deletions pandas/core/dtypes/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ def _check(cls, inst):
("extension",
"categorical",
"periodarray",
"datetimearray",
"timedeltaarray",
"npy_extension",
))
ABCPandasArray = create_pandas_abc_type("ABCPandasArray",
Expand Down
Loading