diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index 9662c59dddf4c..d0caeb3333548 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -228,6 +228,11 @@ static PyObject *get_values(PyObject *obj) { PRINTMARK(); if (values && !PyArray_CheckExact(values)) { + + if (PyObject_HasAttrString(values, "to_numpy")) { + values = PyObject_CallMethod(values, "to_numpy", NULL); + } + if (PyObject_HasAttrString(values, "values")) { PyObject *subvals = get_values(values); PyErr_Clear(); @@ -279,8 +284,8 @@ static PyObject *get_values(PyObject *obj) { repr = PyString_FromString(""); } - PyErr_Format(PyExc_ValueError, "%s or %s are not JSON serializable yet", - PyString_AS_STRING(repr), PyString_AS_STRING(typeRepr)); + PyErr_Format(PyExc_ValueError, "%R or %R are not JSON serializable yet", + repr, typeRepr); Py_DECREF(repr); Py_DECREF(typeRepr); diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index ab5621d857e89..89fc90589a6e4 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -414,6 +414,17 @@ def _formatter(self, boxed=False): # ---------------------------------------------------------------- # Array-Like / EA-Interface Methods + @property + def transpose(self): + # no-op because we are always 1-D + return self + + T = transpose + + def ravel(self, method=None): + # no-op because we are always 1-D + return self + @property def nbytes(self): return self._data.nbytes diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 3f32b7b7dcea9..8fefbefc09eef 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -19,7 +19,7 @@ is_extension_type, is_float_dtype, is_int64_dtype, is_object_dtype, is_period_dtype, is_string_dtype, is_timedelta64_dtype, pandas_dtype) from pandas.core.dtypes.dtypes import DatetimeTZDtype -from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries +from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries, ABCPandasArray from pandas.core.dtypes.missing import isna from pandas.core import ops @@ -240,11 +240,14 @@ def _simple_new(cls, values, freq=None, tz=None): result._dtype = dtype return result - def __new__(cls, values, freq=None, tz=None, dtype=None, copy=False, - dayfirst=False, yearfirst=False, ambiguous='raise'): - return cls._from_sequence( + def __init__(self, values, freq=None, tz=None, dtype=None, copy=False, + dayfirst=False, yearfirst=False, ambiguous='raise'): + result = type(self)._from_sequence( values, freq=freq, tz=tz, dtype=dtype, copy=copy, dayfirst=dayfirst, yearfirst=yearfirst, ambiguous=ambiguous) + self._data = result._data + self._freq = result._freq + self._dtype = result._dtype @classmethod def _from_sequence(cls, data, dtype=None, copy=False, @@ -523,7 +526,9 @@ def _ndarray_values(self): @Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__) def _validate_fill_value(self, fill_value): - if isna(fill_value): + if isna(fill_value) or fill_value == iNaT: + # FIXME: shouldn't allow iNaT through here; see discussion + # in GH#24024 fill_value = iNaT elif isinstance(fill_value, (datetime, np.datetime64)): self._assert_tzawareness_compat(fill_value) @@ -1568,6 +1573,8 @@ def sequence_to_dt64ns(data, dtype=None, copy=False, copy = False elif isinstance(data, ABCSeries): data = data._values + elif isinstance(data, ABCPandasArray): + data = data.to_numpy() if hasattr(data, "freq"): # i.e. DatetimeArray/Index diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index e1141c6b6b3a8..293ce7d8e4aca 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -10,8 +10,7 @@ from pandas.core.dtypes.dtypes import ( CategoricalDtype, CategoricalDtypeType, DatetimeTZDtype, ExtensionDtype, - IntervalDtype, PandasExtensionDtype, PeriodDtype, _pandas_registry, - registry) + IntervalDtype, PandasExtensionDtype, PeriodDtype, registry) from pandas.core.dtypes.generic import ( ABCCategorical, ABCCategoricalIndex, ABCDateOffset, ABCDatetimeIndex, ABCIndexClass, ABCPeriodArray, ABCPeriodIndex, ABCSeries, ABCSparseArray, @@ -1984,7 +1983,7 @@ def pandas_dtype(dtype): return dtype # registered extension types - result = _pandas_registry.find(dtype) or registry.find(dtype) + result = registry.find(dtype) if result is not None: return result diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 0501889d743d4..e6967ed2a4d3d 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -12,8 +12,8 @@ is_extension_array_dtype, is_interval_dtype, is_object_dtype, is_period_dtype, is_sparse, is_timedelta64_dtype) from pandas.core.dtypes.generic import ( - ABCDatetimeIndex, ABCPeriodIndex, ABCRangeIndex, ABCSparseDataFrame, - ABCTimedeltaIndex) + ABCDatetimeArray, ABCDatetimeIndex, ABCIndexClass, ABCPeriodIndex, + ABCRangeIndex, ABCSparseDataFrame, ABCTimedeltaIndex) from pandas import compat @@ -471,7 +471,15 @@ def _concat_datetimetz(to_concat, name=None): all inputs must be DatetimeIndex it is used in DatetimeIndex.append also """ - return to_concat[0]._concat_same_dtype(to_concat, name=name) + # Right now, internals will pass a List[DatetimeArray] here + # for reductions like quantile. I would like to disentangle + # all this before we get here. + sample = to_concat[0] + + if isinstance(sample, ABCIndexClass): + return sample._concat_same_dtype(to_concat, name=name) + elif isinstance(sample, ABCDatetimeArray): + return sample._concat_same_type(to_concat) def _concat_index_same_dtype(indexes, klass=None): diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index e0d0cf3393dd5..b4a46ef049809 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -479,8 +479,8 @@ def _is_boolean(self): return is_bool_dtype(self.categories) -class DatetimeTZDtype(PandasExtensionDtype): - +@register_extension_dtype +class DatetimeTZDtype(PandasExtensionDtype, ExtensionDtype): """ A np.dtype duck-typed class, suitable for holding a custom datetime with tz dtype. @@ -493,6 +493,7 @@ class DatetimeTZDtype(PandasExtensionDtype): str = '|M8[ns]' num = 101 base = np.dtype('M8[ns]') + na_value = NaT _metadata = ('unit', 'tz') _match = re.compile(r"(datetime64|M8)\[(?P.+), (?P.+)\]") _cache = {} @@ -570,8 +571,8 @@ def construct_array_type(cls): ------- type """ - from pandas import DatetimeIndex - return DatetimeIndex + from pandas.core.arrays import DatetimeArrayMixin + return DatetimeArrayMixin @classmethod def construct_from_string(cls, string): @@ -885,10 +886,3 @@ def is_dtype(cls, dtype): else: return False return super(IntervalDtype, cls).is_dtype(dtype) - - -# TODO(Extension): remove the second registry once all internal extension -# dtypes are real extension dtypes. -_pandas_registry = Registry() - -_pandas_registry.register(DatetimeTZDtype) diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py index bbc447d6fa0da..5c659b35bd0cc 100644 --- a/pandas/core/dtypes/generic.py +++ b/pandas/core/dtypes/generic.py @@ -67,6 +67,8 @@ def _check(cls, inst): ("extension", "categorical", "periodarray", + "datetimearray", + "timedeltaarray", "npy_extension", )) ABCPandasArray = create_pandas_abc_type("ABCPandasArray", diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index c9ed2521676ad..fca789030f9a4 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -35,11 +35,11 @@ import pandas.core.algorithms as algos from pandas.core.arrays import ( - Categorical, DatetimeArrayMixin as DatetimeArray, ExtensionArray) + Categorical, DatetimeArrayMixin as DatetimeArray, ExtensionArray, + TimedeltaArrayMixin as TimedeltaArray) from pandas.core.base import PandasObject import pandas.core.common as com from pandas.core.indexes.datetimes import DatetimeIndex -from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.indexing import check_setitem_lengths from pandas.core.internals.arrays import extract_array import pandas.core.missing as missing @@ -2169,7 +2169,7 @@ class DatetimeLikeBlockMixin(object): @property def _holder(self): - return DatetimeIndex + return DatetimeArray @property def _na_value(self): @@ -2179,15 +2179,33 @@ def _na_value(self): def fill_value(self): return tslibs.iNaT + def to_dense(self): + # TODO(DatetimeBlock): remove + return np.asarray(self.values) + def get_values(self, dtype=None): """ return object dtype as boxed values, such as Timestamps/Timedelta """ if is_object_dtype(dtype): - return lib.map_infer(self.values.ravel(), - self._box_func).reshape(self.values.shape) + values = self.values + + if self.ndim > 1: + values = values.ravel() + + values = lib.map_infer(values, self._box_func) + + if self.ndim > 1: + values = values.reshape(self.values.shape) + + return values + return self.values + @property + def asi8(self): + return self.values.view('i8') + class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock): __slots__ = () @@ -2198,13 +2216,15 @@ class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock): def __init__(self, values, placement, ndim=None): if values.dtype != _TD_DTYPE: values = conversion.ensure_timedelta64ns(values) - + if isinstance(values, TimedeltaArray): + values = values._data + assert isinstance(values, np.ndarray), type(values) super(TimeDeltaBlock, self).__init__(values, placement=placement, ndim=ndim) @property def _holder(self): - return TimedeltaIndex + return TimedeltaArray @property def _box_func(self): @@ -2299,6 +2319,9 @@ def to_native_types(self, slicer=None, na_rep=None, quoting=None, dtype=object) return rvalues + def external_values(self, dtype=None): + return np.asarray(self.values.astype("timedelta64[ns]", copy=False)) + class BoolBlock(NumericBlock): __slots__ = () @@ -2771,6 +2794,10 @@ def _maybe_coerce_values(self, values): """ if values.dtype != _NS_DTYPE: values = conversion.ensure_datetime64ns(values) + if isinstance(values, DatetimeArray): + values = values._data + + assert isinstance(values, np.ndarray), type(values) return values def _astype(self, dtype, **kwargs): @@ -2857,15 +2884,17 @@ def to_native_types(self, slicer=None, na_rep=None, date_format=None, """ convert to our native types format, slicing if desired """ values = self.values + i8values = self.asi8 + if slicer is not None: - values = values[..., slicer] + i8values = i8values[..., slicer] from pandas.io.formats.format import _get_format_datetime64_from_values format = _get_format_datetime64_from_values(values, date_format) result = tslib.format_array_from_datetime( - values.view('i8').ravel(), tz=getattr(self.values, 'tz', None), - format=format, na_rep=na_rep).reshape(values.shape) + i8values.ravel(), tz=getattr(self.values, 'tz', None), + format=format, na_rep=na_rep).reshape(i8values.shape) return np.atleast_2d(result) def should_store(self, value): @@ -2885,12 +2914,16 @@ def set(self, locs, values, check=False): self.values[locs] = values + def external_values(self): + return np.asarray(self.values.astype('datetime64[ns]', copy=False)) -class DatetimeTZBlock(NonConsolidatableMixIn, DatetimeBlock): + +class DatetimeTZBlock(ExtensionBlock, DatetimeBlock): """ implement a datetime64 block with a tz attribute """ __slots__ = () _concatenator = staticmethod(_concat._concat_datetime) is_datetimetz = True + is_extension = True def __init__(self, values, placement, ndim=2, dtype=None): # XXX: This will end up calling _maybe_coerce_values twice @@ -2905,6 +2938,10 @@ def __init__(self, values, placement, ndim=2, dtype=None): super(DatetimeTZBlock, self).__init__(values, placement=placement, ndim=ndim) + @property + def _holder(self): + return DatetimeArray + def _maybe_coerce_values(self, values, dtype=None): """Input validation for values passed to __init__. Ensure that we have datetime64TZ, coercing if necessary. @@ -2926,7 +2963,8 @@ def _maybe_coerce_values(self, values, dtype=None): if dtype is not None: if isinstance(dtype, compat.string_types): dtype = DatetimeTZDtype.construct_from_string(dtype) - values = values._shallow_copy(tz=dtype.tz) + values = type(values)(values, dtype=dtype) + # TODO: shouldn't this be done via tz_convert? if values.tz is None: raise ValueError("cannot create a DatetimeTZBlock without a tz") @@ -2936,8 +2974,8 @@ def _maybe_coerce_values(self, values, dtype=None): @property def is_view(self): """ return a boolean if I am possibly a view """ - # check the ndarray values of the DatetimeIndex values - return self.values.values.base is not None + # check the ndarray values of the DatetimeArray values + return self.values._data.base is not None def copy(self, deep=True): """ copy constructor """ @@ -2946,18 +2984,40 @@ def copy(self, deep=True): values = values.copy(deep=True) return self.make_block_same_class(values) - def external_values(self): - """ we internally represent the data as a DatetimeIndex, but for - external compat with ndarray, export as a ndarray of Timestamps + def get_values(self, dtype=None): """ - return self.values.astype('datetime64[ns]').values + Returns an ndarray of values. - def get_values(self, dtype=None): + Parameters + ---------- + dtype : np.dtype + Only `object`-like dtypes are respected here (not sure + why). + + Returns + ------- + values : ndarray + When ``dtype=object``, then and object-dtype ndarray of + boxed values is returned. Otherwise, an M8[ns] ndarray + is returned. + + DatetimeArray is always 1-d. ``get_values`` will reshape + the return value to be the same dimensionality as the + block. + """ # return object dtype as Timestamps with the zones + values = self.values if is_object_dtype(dtype): - return lib.map_infer( - self.values.ravel(), self._box_func).reshape(self.values.shape) - return self.values + values = values._box_values(values._data) + + values = np.asarray(values) + + if self.ndim == 2: + # Ensure that our shape is correct for DataFrame. + # ExtensionArrays are always 1-D, even in a DataFrame when + # the analogous NumPy-backed column would be a 2-D ndarray. + values = values.reshape(1, -1) + return values def _slice(self, slicer): """ return a slice of my values """ @@ -2982,13 +3042,19 @@ def _try_coerce_args(self, values, other): base-type values, base-type other """ # asi8 is a view, needs copy - values = _block_shape(values.asi8, ndim=self.ndim) + values = _block_shape(values.view("i8"), ndim=self.ndim) if isinstance(other, ABCSeries): other = self._holder(other) if isinstance(other, bool): raise TypeError + elif is_datetime64_dtype(other): + # add the tz back + # FIXME: we shouldn't be ravelling at this point, but Otherwise + # this raises on tests.frame.test_quantile.test_quantile_box + other = self._holder(other.ravel(), dtype=self.dtype) + elif (is_null_datelike_scalar(other) or (lib.is_scalar(other) and isna(other))): other = tslibs.iNaT @@ -3023,7 +3089,8 @@ def _try_coerce_result(self, result): result = result.reshape(np.prod(result.shape)) # GH#24096 new values invalidates a frequency - result = self.values._shallow_copy(result, freq=None) + result = self._holder._simple_new(result, freq=None, + tz=self.values.tz) return result @@ -3031,32 +3098,6 @@ def _try_coerce_result(self, result): def _box_func(self): return lambda x: tslibs.Timestamp(x, tz=self.dtype.tz) - def shift(self, periods, axis=0, fill_value=None): - """ shift the block by periods """ - - # think about moving this to the DatetimeIndex. This is a non-freq - # (number of periods) shift ### - - N = len(self) - indexer = np.zeros(N, dtype=int) - if periods > 0: - indexer[periods:] = np.arange(N - periods) - else: - indexer[:periods] = np.arange(-periods, N) - - new_values = self.values.asi8.take(indexer) - - if isna(fill_value): - fill_value = tslibs.iNaT - if periods > 0: - new_values[:periods] = fill_value - else: - new_values[periods:] = fill_value - - new_values = self.values._shallow_copy(new_values) - return [self.make_block_same_class(new_values, - placement=self.mgr_locs)] - def diff(self, n, axis=0): """1st discrete difference @@ -3086,14 +3127,45 @@ def diff(self, n, axis=0): return [TimeDeltaBlock(new_values, placement=self.mgr_locs.indexer)] def concat_same_type(self, to_concat, placement=None): - """ - Concatenate list of single blocks of the same type. - """ - values = self._concatenator([blk.values for blk in to_concat], - axis=self.ndim - 1) - # not using self.make_block_same_class as values can be non-tz dtype - return make_block( - values, placement=placement or slice(0, len(values), 1)) + # need to handle concat([tz1, tz2]) here, since DatetimeArray + # only handles cases where all the tzs are the same. + # Instead of placing the condition here, it could also go into the + # is_uniform_join_units check, but I'm not sure what is better + + if len({x.dtype for x in to_concat}) > 1: + values = _concat._concat_datetime([x.values for x in to_concat]) + placement = placement or slice(0, len(values), 1) + + if self.ndim > 1: + values = np.atleast_2d(values) + return ObjectBlock(values, ndim=self.ndim, placement=placement) + return super(DatetimeTZBlock, self).concat_same_type(to_concat, + placement) + + def fillna(self, value, limit=None, inplace=False, downcast=None): + # We support filling a DatetimeTZ with a `value` whose timezone + # is different by coercing to object. + try: + return super(DatetimeTZBlock, self).fillna( + value, limit, inplace, downcast + ) + except (ValueError, TypeError): + # different timezones, or a non-tz + return self.astype(object).fillna( + value, limit=limit, inplace=inplace, downcast=downcast + ) + + def setitem(self, indexer, value): + # https://github.com/pandas-dev/pandas/issues/24020 + # Need a dedicated setitem until #24020 (type promotion in setitem + # for extension arrays) is designed and implemented. + try: + return super(DatetimeTZBlock, self).setitem(indexer, value) + except (ValueError, TypeError): + newb = make_block(self.values.astype(object), + placement=self.mgr_locs, + klass=ObjectBlock,) + return newb.setitem(indexer, value) # ----------------------------------------------------------------- @@ -3115,8 +3187,16 @@ def get_block_type(values, dtype=None): dtype = dtype or values.dtype vtype = dtype.type - if is_categorical(values): + if is_sparse(dtype): + # Need this first(ish) so that Sparse[datetime] is sparse + cls = ExtensionBlock + elif is_categorical(values): cls = CategoricalBlock + elif issubclass(vtype, np.datetime64): + assert not is_datetime64tz_dtype(values) + cls = DatetimeBlock + elif is_datetime64tz_dtype(values): + cls = DatetimeTZBlock elif is_interval_dtype(dtype) or is_period_dtype(dtype): cls = ObjectValuesExtensionBlock elif is_extension_array_dtype(values): @@ -3128,11 +3208,6 @@ def get_block_type(values, dtype=None): cls = TimeDeltaBlock elif issubclass(vtype, np.complexfloating): cls = ComplexBlock - elif issubclass(vtype, np.datetime64): - assert not is_datetime64tz_dtype(values) - cls = DatetimeBlock - elif is_datetime64tz_dtype(values): - cls = DatetimeTZBlock elif issubclass(vtype, np.integer): cls = IntBlock elif dtype == np.bool_: diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index b18b966406bbb..b3c893c7d84be 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -589,7 +589,7 @@ def sanitize_array(data, index, dtype=None, copy=False, # everything else in this block must also handle ndarray's, # becuase we've unwrapped PandasArray into an ndarray. - if dtype is not None and not data.dtype.is_dtype(dtype): + if dtype is not None: subarr = data.astype(dtype) if copy: diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 7cab52ddda87f..3c966b036aac8 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1539,17 +1539,20 @@ def wrapper(left, right): raise TypeError("{typ} cannot perform the operation " "{op}".format(typ=type(left).__name__, op=str_rep)) - elif (is_extension_array_dtype(left) or - (is_extension_array_dtype(right) and not is_scalar(right))): - # GH#22378 disallow scalar to exclude e.g. "category", "Int64" - return dispatch_to_extension_op(op, left, right) - elif is_datetime64_dtype(left) or is_datetime64tz_dtype(left): + # Give dispatch_to_index_op a chance for tests like + # test_dt64_series_add_intlike, which the index dispatching handles + # specifically. result = dispatch_to_index_op(op, left, right, pd.DatetimeIndex) return construct_result(left, result, index=left.index, name=res_name, dtype=result.dtype) + elif (is_extension_array_dtype(left) or + (is_extension_array_dtype(right) and not is_scalar(right))): + # GH#22378 disallow scalar to exclude e.g. "category", "Int64" + return dispatch_to_extension_op(op, left, right) + elif is_timedelta64_dtype(left): result = dispatch_to_index_op(op, left, right, pd.TimedeltaIndex) return construct_result(left, result, diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 77dc04e9453a9..baffea3cd21d2 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -9,7 +9,7 @@ from pandas.core.dtypes.dtypes import ( DatetimeTZDtype, PeriodDtype, - IntervalDtype, CategoricalDtype, registry, _pandas_registry) + IntervalDtype, CategoricalDtype, registry) from pandas.core.dtypes.common import ( is_categorical_dtype, is_categorical, is_datetime64tz_dtype, is_datetimetz, @@ -804,14 +804,6 @@ def test_registry(dtype): assert dtype in registry.dtypes -@pytest.mark.parametrize('dtype', [ - DatetimeTZDtype, -]) -def test_pandas_registry(dtype): - assert dtype not in registry.dtypes - assert dtype in _pandas_registry.dtypes - - @pytest.mark.parametrize('dtype, expected', [ ('int64', None), ('interval', IntervalDtype()), @@ -824,13 +816,6 @@ def test_registry_find(dtype, expected): assert registry.find(dtype) == expected -@pytest.mark.parametrize('dtype, expected', [ - ('datetime64[ns, US/Eastern]', DatetimeTZDtype('ns', 'US/Eastern')), -]) -def test_pandas_registry_find(dtype, expected): - assert _pandas_registry.find(dtype) == expected - - @pytest.mark.parametrize('dtype, expected', [ (str, False), (int, False), diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index d58b7ddf29123..bd50584406312 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -157,6 +157,12 @@ def astype(self, dtype, copy=True): # NumPy has issues when all the dicts are the same length. # np.array([UserDict(...), UserDict(...)]) fails, # but np.array([{...}, {...}]) works, so cast. + + # needed to add this check for the Series constructor + if isinstance(dtype, type(self.dtype)) and dtype == self.dtype: + if copy: + return self.copy() + return self return np.array([dict(x) for x in self], dtype=dtype, copy=copy) def unique(self): diff --git a/pandas/tests/extension/test_common.py b/pandas/tests/extension/test_common.py index 2bc4bf5df2298..db3f3b80bca6b 100644 --- a/pandas/tests/extension/test_common.py +++ b/pandas/tests/extension/test_common.py @@ -77,14 +77,6 @@ def test_astype_no_copy(): assert arr is not result -@pytest.mark.parametrize('dtype', [ - dtypes.DatetimeTZDtype('ns', 'US/Central'), -]) -def test_is_not_extension_array_dtype(dtype): - assert not isinstance(dtype, dtypes.ExtensionDtype) - assert not is_extension_array_dtype(dtype) - - @pytest.mark.parametrize('dtype', [ dtypes.CategoricalDtype(), dtypes.IntervalDtype(), diff --git a/pandas/tests/extension/test_datetime.py b/pandas/tests/extension/test_datetime.py new file mode 100644 index 0000000000000..7c4491d6edbcf --- /dev/null +++ b/pandas/tests/extension/test_datetime.py @@ -0,0 +1,237 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import DatetimeTZDtype + +import pandas as pd +from pandas.core.arrays import DatetimeArrayMixin as DatetimeArray +from pandas.tests.extension import base + + +@pytest.fixture(params=["US/Central"]) +def dtype(request): + return DatetimeTZDtype(unit="ns", tz=request.param) + + +@pytest.fixture +def data(dtype): + data = DatetimeArray(pd.date_range("2000", periods=100, tz=dtype.tz), + dtype=dtype) + return data + + +@pytest.fixture +def data_missing(dtype): + return DatetimeArray( + np.array(['NaT', '2000-01-01'], dtype='datetime64[ns]'), + dtype=dtype + ) + + +@pytest.fixture +def data_for_sorting(dtype): + a = pd.Timestamp('2000-01-01') + b = pd.Timestamp('2000-01-02') + c = pd.Timestamp('2000-01-03') + return DatetimeArray(np.array([b, c, a], dtype='datetime64[ns]'), + dtype=dtype) + + +@pytest.fixture +def data_missing_for_sorting(dtype): + a = pd.Timestamp('2000-01-01') + b = pd.Timestamp('2000-01-02') + return DatetimeArray(np.array([b, 'NaT', a], dtype='datetime64[ns]'), + dtype=dtype) + + +@pytest.fixture +def data_for_grouping(dtype): + """ + Expected to be like [B, B, NA, NA, A, A, B, C] + + Where A < B < C and NA is missing + """ + a = pd.Timestamp('2000-01-01') + b = pd.Timestamp('2000-01-02') + c = pd.Timestamp('2000-01-03') + na = 'NaT' + return DatetimeArray(np.array([b, b, na, na, a, a, b, c], + dtype='datetime64[ns]'), + dtype=dtype) + + +@pytest.fixture +def na_cmp(): + def cmp(a, b): + return a is pd.NaT and a is b + return cmp + + +@pytest.fixture +def na_value(): + return pd.NaT + + +# ---------------------------------------------------------------------------- +class BaseDatetimeTests(object): + pass + + +# ---------------------------------------------------------------------------- +# Tests +class TestDatetimeDtype(BaseDatetimeTests, base.BaseDtypeTests): + pass + + +class TestConstructors(BaseDatetimeTests, base.BaseConstructorsTests): + pass + + +class TestGetitem(BaseDatetimeTests, base.BaseGetitemTests): + pass + + +class TestMethods(BaseDatetimeTests, base.BaseMethodsTests): + @pytest.mark.skip(reason="Incorrect expected") + def test_value_counts(self, all_data, dropna): + pass + + def test_combine_add(self, data_repeated): + # Timestamp.__add__(Timestamp) not defined + pass + + +class TestInterface(BaseDatetimeTests, base.BaseInterfaceTests): + + def test_array_interface(self, data): + if data.tz: + # np.asarray(DTA) is currently always tz-naive. + pytest.skip("GH-23569") + else: + super(TestInterface, self).test_array_interface(data) + + +class TestArithmeticOps(BaseDatetimeTests, base.BaseArithmeticOpsTests): + implements = {'__sub__', '__rsub__'} + + def test_arith_series_with_scalar(self, data, all_arithmetic_operators): + if all_arithmetic_operators in self.implements: + s = pd.Series(data) + self.check_opname(s, all_arithmetic_operators, s.iloc[0], + exc=None) + else: + # ... but not the rest. + super(TestArithmeticOps, self).test_arith_series_with_scalar( + data, all_arithmetic_operators + ) + + def test_add_series_with_extension_array(self, data): + # Datetime + Datetime not implemented + s = pd.Series(data) + msg = 'cannot add DatetimeArray(Mixin)? and DatetimeArray(Mixin)?' + with pytest.raises(TypeError, match=msg): + s + data + + def test_arith_series_with_array(self, data, all_arithmetic_operators): + if all_arithmetic_operators in self.implements: + s = pd.Series(data) + self.check_opname(s, all_arithmetic_operators, s.iloc[0], + exc=None) + else: + # ... but not the rest. + super(TestArithmeticOps, self).test_arith_series_with_scalar( + data, all_arithmetic_operators + ) + + def test_error(self, data, all_arithmetic_operators): + pass + + @pytest.mark.xfail(reason="different implementation", strict=False) + def test_direct_arith_with_series_returns_not_implemented(self, data): + # Right now, we have trouble with this. Returning NotImplemented + # fails other tests like + # tests/arithmetic/test_datetime64::TestTimestampSeriesArithmetic:: + # test_dt64_seris_add_intlike + return super( + TestArithmeticOps, + self + ).test_direct_arith_with_series_returns_not_implemented(data) + + +class TestCasting(BaseDatetimeTests, base.BaseCastingTests): + pass + + +class TestComparisonOps(BaseDatetimeTests, base.BaseComparisonOpsTests): + + def _compare_other(self, s, data, op_name, other): + # the base test is not appropriate for us. We raise on comparison + # with (some) integers, depending on the value. + pass + + @pytest.mark.xfail(reason="different implementation", strict=False) + def test_direct_arith_with_series_returns_not_implemented(self, data): + return super( + TestComparisonOps, + self + ).test_direct_arith_with_series_returns_not_implemented(data) + + +class TestMissing(BaseDatetimeTests, base.BaseMissingTests): + pass + + +class TestReshaping(BaseDatetimeTests, base.BaseReshapingTests): + + @pytest.mark.skip(reason="We have DatetimeTZBlock") + def test_concat(self, data, in_frame): + pass + + def test_concat_mixed_dtypes(self, data): + # concat(Series[datetimetz], Series[category]) uses a + # plain np.array(values) on the DatetimeArray, which + # drops the tz. + super(TestReshaping, self).test_concat_mixed_dtypes(data) + + @pytest.mark.parametrize("obj", ["series", "frame"]) + def test_unstack(self, obj): + # GH-13287: can't use base test, since building the expected fails. + data = DatetimeArray._from_sequence(['2000', '2001', '2002', '2003'], + tz='US/Central') + index = pd.MultiIndex.from_product(([['A', 'B'], ['a', 'b']]), + names=['a', 'b']) + + if obj == "series": + ser = pd.Series(data, index=index) + expected = pd.DataFrame({ + "A": data.take([0, 1]), + "B": data.take([2, 3]) + }, index=pd.Index(['a', 'b'], name='b')) + expected.columns.name = 'a' + + else: + ser = pd.DataFrame({"A": data, "B": data}, index=index) + expected = pd.DataFrame( + {("A", "A"): data.take([0, 1]), + ("A", "B"): data.take([2, 3]), + ("B", "A"): data.take([0, 1]), + ("B", "B"): data.take([2, 3])}, + index=pd.Index(['a', 'b'], name='b') + ) + expected.columns.names = [None, 'a'] + + result = ser.unstack(0) + self.assert_equal(result, expected) + + +class TestSetitem(BaseDatetimeTests, base.BaseSetitemTests): + pass + + +class TestGroupby(BaseDatetimeTests, base.BaseGroupbyTests): + pass + + +class TestPrinting(BaseDatetimeTests, base.BasePrintingTests): + pass diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index a21d0104b0d04..b877ed93f07a2 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -3244,8 +3244,8 @@ def test_setitem(self): # are copies) b1 = df._data.blocks[1] b2 = df._data.blocks[2] - assert b1.values.equals(b2.values) - assert id(b1.values.values.base) != id(b2.values.values.base) + tm.assert_extension_array_equal(b1.values, b2.values) + assert id(b1.values._data.base) != id(b2.values._data.base) # with nan df2 = df.copy() diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 26cd39c4b807c..f7eb7e8443f55 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -11,8 +11,12 @@ from distutils.version import LooseVersion import itertools from pandas import (Index, MultiIndex, DataFrame, DatetimeIndex, - Series, Categorical, TimedeltaIndex, SparseArray) + Series, Categorical, SparseArray) from pandas.compat import OrderedDict, lrange +from pandas.core.arrays import ( + DatetimeArrayMixin as DatetimeArray, + TimedeltaArrayMixin as TimedeltaArray, +) from pandas.core.internals import (SingleBlockManager, make_block, BlockManager) import pandas.core.algorithms as algos @@ -290,7 +294,7 @@ def test_make_block_same_class(self): block = create_block('M8[ns, US/Eastern]', [3]) with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): - block.make_block_same_class(block.values.values, + block.make_block_same_class(block.values, dtype=block.values.dtype) @@ -451,7 +455,7 @@ def test_copy(self, mgr): assert cp_blk.values.base is blk.values.base else: # DatetimeTZBlock has DatetimeIndex values - assert cp_blk.values.values.base is blk.values.values.base + assert cp_blk.values._data.base is blk.values._data.base cp = mgr.copy(deep=True) for blk, cp_blk in zip(mgr.blocks, cp.blocks): @@ -460,7 +464,7 @@ def test_copy(self, mgr): # some blocks it is an array (e.g. datetimetz), but was copied assert cp_blk.equals(blk) if not isinstance(cp_blk.values, np.ndarray): - assert cp_blk.values.values.base is not blk.values.values.base + assert cp_blk.values._data.base is not blk.values._data.base else: assert cp_blk.values.base is None and blk.values.base is None @@ -1258,9 +1262,9 @@ def test_binop_other(self, op, value, dtype): @pytest.mark.parametrize('typestr, holder', [ ('category', Categorical), - ('M8[ns]', DatetimeIndex), - ('M8[ns, US/Central]', DatetimeIndex), - ('m8[ns]', TimedeltaIndex), + ('M8[ns]', DatetimeArray), + ('M8[ns, US/Central]', DatetimeArray), + ('m8[ns]', TimedeltaArray), ('sparse', SparseArray), ]) def test_holder(typestr, holder): diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 0706cb12ac5d0..4bc8d41d11823 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1016,13 +1016,17 @@ def test_append_empty_frame_to_series_with_dateutil_tz(self): s = Series({'date': date, 'a': 1.0, 'b': 2.0}) df = DataFrame(columns=['c', 'd']) result = df.append(s, ignore_index=True) + # n.b. it's not clear to me that expected is correct here. + # It's possible that the `date` column should have + # datetime64[ns, tz] dtype for both result and expected. + # that would be more consistent with new columns having + # their own dtype (float for a and b, datetime64ns, tz for date). expected = DataFrame([[np.nan, np.nan, 1., 2., date]], - columns=['c', 'd', 'a', 'b', 'date']) + columns=['c', 'd', 'a', 'b', 'date'], + dtype=object) # These columns get cast to object after append - object_cols = ['c', 'd', 'date'] - expected.loc[:, object_cols] = expected.loc[:, object_cols].astype( - object - ) + expected['a'] = expected['a'].astype(float) + expected['b'] = expected['b'].astype(float) assert_frame_equal(result, expected)