diff --git a/doc/source/api/arrays.rst b/doc/source/api/arrays.rst index 5ecc5181af22c..8a66e1d87c1c6 100644 --- a/doc/source/api/arrays.rst +++ b/doc/source/api/arrays.rst @@ -136,7 +136,7 @@ Methods A collection of timestamps may be stored in a :class:`arrays.DatetimeArray`. For timezone-aware data, the ``.dtype`` of a ``DatetimeArray`` is a -:class:`DatetimeTZDtype`. For timezone-naive data, ``np.dtype("datetime64[ns]")`` +:class:`DatetimeTZDtype`. For timezone-naive data, :class:`DatetimeDtype` is used. If the data are tz-aware, then every value in the array must have the same timezone. @@ -145,6 +145,7 @@ If the data are tz-aware, then every value in the array must have the same timez :toctree: generated/ arrays.DatetimeArray + DatetimeDtype DatetimeTZDtype .. _api.arrays.timedelta: diff --git a/pandas/core/api.py b/pandas/core/api.py index afc929c39086c..3bb659a83ccae 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -21,7 +21,9 @@ CategoricalDtype, PeriodDtype, IntervalDtype, + DatetimeDtype, DatetimeTZDtype, + TimedeltaDtype, ) from pandas.core.arrays import Categorical, array from pandas.core.groupby import Grouper diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index a2d67efbecbba..f622ae96c6ef5 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -18,7 +18,7 @@ is_datetime64_ns_dtype, is_datetime64tz_dtype, is_dtype_equal, is_extension_type, is_float_dtype, is_object_dtype, is_period_dtype, is_string_dtype, is_timedelta64_dtype, pandas_dtype) -from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.dtypes.dtypes import DatetimeDtype, DatetimeTZDtype from pandas.core.dtypes.generic import ( ABCDataFrame, ABCIndexClass, ABCPandasArray, ABCSeries) from pandas.core.dtypes.missing import isna @@ -334,6 +334,8 @@ def __init__(self, values, dtype=_NS_DTYPE, freq=None, copy=False): # a tz-aware Timestamp (with a tz specific to its datetime) will # be incorrect(ish?) for the array as a whole dtype = DatetimeTZDtype(tz=timezones.tz_standardize(dtype.tz)) + else: + dtype = DatetimeDtype() self._data = values self._dtype = dtype @@ -1987,7 +1989,8 @@ def _validate_dt64_dtype(dtype): if dtype is not None: dtype = pandas_dtype(dtype) if ((isinstance(dtype, np.dtype) and dtype != _NS_DTYPE) - or not isinstance(dtype, (np.dtype, DatetimeTZDtype))): + or not isinstance(dtype, (np.dtype, DatetimeTZDtype, + DatetimeDtype))): raise ValueError("Unexpected value for 'dtype': '{dtype}'. " "Must be 'datetime64[ns]' or DatetimeTZDtype'." .format(dtype=dtype)) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index be1a7097b0e0d..e1cc86ba9245b 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -19,7 +19,7 @@ is_integer_dtype, is_list_like, is_object_dtype, is_scalar, is_string_dtype, is_timedelta64_dtype, is_timedelta64_ns_dtype, pandas_dtype) -from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.dtypes.dtypes import DatetimeTZDtype, TimedeltaDtype from pandas.core.dtypes.generic import ( ABCDataFrame, ABCIndexClass, ABCSeries, ABCTimedeltaIndex) from pandas.core.dtypes.missing import isna @@ -127,7 +127,7 @@ def _box_func(self): @property def dtype(self): - return _TD_DTYPE + return self._dtype # ---------------------------------------------------------------- # Constructors @@ -160,16 +160,8 @@ def __init__(self, values, dtype=_TD_DTYPE, freq=None, copy=False): # nanosecond UTC (or tz-naive) unix timestamps values = values.view(_TD_DTYPE) - if values.dtype != _TD_DTYPE: - raise TypeError(_BAD_DTYPE.format(dtype=values.dtype)) - - try: - dtype_mismatch = dtype != _TD_DTYPE - except TypeError: - raise TypeError(_BAD_DTYPE.format(dtype=dtype)) - else: - if dtype_mismatch: - raise TypeError(_BAD_DTYPE.format(dtype=dtype)) + _validate_td64_dtype(values.dtype) + dtype = _validate_td64_dtype(dtype) if freq == "infer": msg = ( @@ -192,21 +184,19 @@ def __init__(self, values, dtype=_TD_DTYPE, freq=None, copy=False): @classmethod def _simple_new(cls, values, freq=None, dtype=_TD_DTYPE): - assert dtype == _TD_DTYPE, dtype + dtype = _validate_td64_dtype(dtype) assert isinstance(values, np.ndarray), type(values) result = object.__new__(cls) result._data = values.view(_TD_DTYPE) result._freq = to_offset(freq) - result._dtype = _TD_DTYPE + result._dtype = dtype return result @classmethod def _from_sequence(cls, data, dtype=_TD_DTYPE, copy=False, freq=None, unit=None): - if dtype != _TD_DTYPE: - raise ValueError("Only timedelta64[ns] dtype is valid.") - + _validate_td64_dtype(dtype) freq, freq_infer = dtl.maybe_infer_freq(freq) data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit) @@ -1015,3 +1005,40 @@ def _generate_regular_range(start, end, periods, offset): data = np.arange(b, e, stride, dtype=np.int64) return data + + +def _validate_td64_dtype(dtype): + """ + Validate a dtype for TimedeltaArray. + + Parameters + ---------- + dtype : Union[str, numpy.dtype, Timedelta] + Only np.dtype("m8[ns]") is allowed numpy dtypes. + + Returns + ------- + TimedeltaDtype + """ + if isinstance(dtype, compat.string_types): + try: + dtype = np.dtype(dtype) + except TypeError: + # not a Numpy dtype + pass + + if isinstance(dtype, np.dtype): + if dtype != _TD_DTYPE: + raise TypeError(_BAD_DTYPE.format(dtype=dtype)) + + dtype = TimedeltaDtype() + + elif isinstance(dtype, compat.string_types): + if dtype != "ns": + raise TypeError(_BAD_DTYPE.format(dtype=dtype)) + dtype = TimedeltaDtype(dtype) + + if dtype != TimedeltaDtype(): + raise ValueError("Only timedelta64[ns] dtype is valid") + + return dtype diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index e9bf0f87088db..cc7ac05aee4ee 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -4,12 +4,12 @@ import numpy as np from pandas._libs import algos, lib -from pandas._libs.tslibs import conversion +from pandas._libs.tslibs import Timedelta, Timestamp, conversion from pandas.compat import PY3, PY36, string_types from pandas.core.dtypes.dtypes import ( CategoricalDtype, DatetimeTZDtype, ExtensionDtype, IntervalDtype, - PandasExtensionDtype, PeriodDtype, registry) + PandasExtensionDtype, PeriodDtype, TimedeltaDtype, registry) from pandas.core.dtypes.generic import ( ABCCategorical, ABCDateOffset, ABCDatetimeIndex, ABCIndexClass, ABCPeriodArray, ABCPeriodIndex, ABCSeries) @@ -426,9 +426,24 @@ def is_datetime64_dtype(arr_or_dtype): True >>> is_datetime64_dtype([1, 2, 3]) False + >>> is_datetime64_dtype(pd.DatetimeDtype()) + True + >>> is_datetime64_dtype(pd.DatetimeTZDtype(tz="CET")) + False """ + # It's somewhat tricky to support both of the following: + # 1. is_datetime64_dtype(DatetimeDtype()) == True + # 2. is_datetime64_dtype(DatetimeTZDtype()) == False + # because both use `Timestamp` as the `type`. + # So we look at the `dtype` to see if there's a `.tz` attached. + dtype = getattr(arr_or_dtype, 'dtype', arr_or_dtype) + try: + dtype = pandas_dtype(dtype) + except (ValueError, TypeError): + dtype = None - return _is_dtype_type(arr_or_dtype, classes(np.datetime64)) + return (_is_dtype_type(arr_or_dtype, classes(np.datetime64, Timestamp)) + and getattr(dtype, 'tz', None) is None) def is_datetime64tz_dtype(arr_or_dtype): @@ -497,7 +512,7 @@ def is_timedelta64_dtype(arr_or_dtype): False """ - return _is_dtype_type(arr_or_dtype, classes(np.timedelta64)) + return _is_dtype_type(arr_or_dtype, classes(np.timedelta64, Timedelta)) def is_period_dtype(arr_or_dtype): @@ -1192,7 +1207,10 @@ def is_timedelta64_ns_dtype(arr_or_dtype): >>> is_timedelta64_ns_dtype(np.array([1, 2], dtype=np.timedelta64)) False """ - return _is_dtype(arr_or_dtype, lambda dtype: dtype == _TD_DTYPE) + def condition(dtype): + return isinstance(dtype, TimedeltaDtype) or dtype == _TD_DTYPE + + return _is_dtype(arr_or_dtype, condition) def is_datetime_or_timedelta_dtype(arr_or_dtype): @@ -1229,9 +1247,11 @@ def is_datetime_or_timedelta_dtype(arr_or_dtype): >>> is_datetime_or_timedelta_dtype(np.array([], dtype=np.datetime64)) True """ - - return _is_dtype_type( - arr_or_dtype, classes(np.datetime64, np.timedelta64)) + dtype = getattr(arr_or_dtype, 'dtype', arr_or_dtype) + return (_is_dtype_type( + arr_or_dtype, classes(np.datetime64, np.timedelta64, + Timestamp, Timedelta)) + and getattr(dtype, 'tz', None) is None) def _is_unorderable_exception(e): diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index f84471c3b04e8..291816522d38a 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -3,10 +3,10 @@ import warnings import numpy as np -import pytz from pandas._libs.interval import Interval -from pandas._libs.tslibs import NaT, Period, Timestamp, timezones +from pandas._libs.tslibs import NaT, Period, Timedelta, Timestamp, timezones +from pandas._libs.tslibs.timezones import tz_compare from pandas.core.dtypes.generic import ABCCategoricalIndex, ABCIndexClass @@ -573,9 +573,7 @@ def _is_boolean(self): return is_bool_dtype(self.categories) -@register_extension_dtype -class DatetimeTZDtype(PandasExtensionDtype, ExtensionDtype): - +class _DatetimeDtypeBase(PandasExtensionDtype, ExtensionDtype): """ A np.dtype duck-typed class, suitable for holding a custom datetime with tz dtype. @@ -583,15 +581,110 @@ class DatetimeTZDtype(PandasExtensionDtype, ExtensionDtype): THIS IS NOT A REAL NUMPY DTYPE, but essentially a sub-class of np.datetime64[ns] """ + _match = None # type: typing.re.Pattern type = Timestamp kind = 'M' str = '|M8[ns]' num = 101 base = np.dtype('M8[ns]') na_value = NaT + + @property + def unit(self): + """The precision of the datetime data.""" + return self._unit + + @classmethod + def construct_array_type(cls): + """ + Return the array type associated with this dtype + + Returns + ------- + type + """ + from pandas.core.arrays import DatetimeArray + return DatetimeArray + + @classmethod + def construct_from_string(cls, string): + """ + Construct a DatetimeTZDtype from a string. + + Parameters + ---------- + string : str + The string alias for this DatetimeTZDtype. + Should be formatted like ``datetime64[ns, ]``, + where ```` is the timezone name. + + Examples + -------- + >>> DatetimeTZDtype.construct_from_string('datetime64[ns, UTC]') + datetime64[ns, UTC] + """ + if isinstance(string, compat.string_types): + msg = "Could not construct DatetimeTZDtype from '{}'" + try: + match = cls._match.match(string) + if match: + d = match.groupdict() + return cls(**d) + except Exception: + # TODO(py3): Change this pass to `raise TypeError(msg) from e` + pass + raise TypeError(msg.format(string)) + + raise TypeError("Could not construct DatetimeTZDtype") + + @property + def name(self): + """A string representation of the dtype.""" + return str(self) + + def __hash__(self): + # make myself hashable + # TODO: update this. + return hash(str(self)) + + def __eq__(self, other): + if isinstance(other, compat.string_types): + return other == self.name + + return (isinstance(other, type(self)) and + self.unit == other.unit and + # TODO: figure out why this was needed + # failure in test_get_loc_datetimelike_overlapping + # is something not being normalized? + tz_compare(getattr(self, 'tz', None), + getattr(other, 'tz', None))) + + +class DatetimeDtype(_DatetimeDtypeBase): + # This does not register itself as an ExtensionDtype. + # We found it easier to let pandas_dtype('M8[ns]') continue + # to be np.dtype('M8[ns]') + # Registering as an extension dtype caused issues in places + # like is_dtype_equal, is_datetime*, etc. + _metadata = ('unit',) + _match = re.compile(r"(datetime64|M8)\[ns\]") + + def __init__(self, unit="ns"): + assert unit == "ns" + self._unit = unit + + def __unicode__(self): + return 'datetime64[ns]' + + def __getstate__(self): + # override PandasExtensionDtype.__getstate__ + return self.__dict__ + + +@register_extension_dtype +class DatetimeTZDtype(_DatetimeDtypeBase): _metadata = ('unit', 'tz') _match = re.compile(r"(datetime64|M8)\[(?P.+), (?P.+)\]") - _cache = {} def __init__(self, unit="ns", tz=None): """ @@ -639,92 +732,86 @@ def __init__(self, unit="ns", tz=None): if tz: tz = timezones.maybe_get_tz(tz) - elif tz is not None: - raise pytz.UnknownTimeZoneError(tz) - elif tz is None: + else: raise TypeError("A 'tz' is required.") self._unit = unit self._tz = tz - @property - def unit(self): - """The precision of the datetime data.""" - return self._unit - @property def tz(self): """The timezone.""" return self._tz - @classmethod - def construct_array_type(cls): - """ - Return the array type associated with this dtype + def __unicode__(self): + return "datetime64[{unit}, {tz}]".format(unit=self.unit, tz=self.tz) - Returns - ------- - type - """ - from pandas.core.arrays import DatetimeArray - return DatetimeArray + def __setstate__(self, state): + # for pickle compat. + self._tz = state['tz'] + self._unit = state['unit'] - @classmethod - def construct_from_string(cls, string): - """ - Construct a DatetimeTZDtype from a string. - Parameters - ---------- - string : str - The string alias for this DatetimeTZDtype. - Should be formatted like ``datetime64[ns, ]``, - where ```` is the timezone name. +class TimedeltaDtype(PandasExtensionDtype, ExtensionDtype): + _metadata = ('unit',) + _match = re.compile(r"(timedelta64|m8)\[(?P\w+)\]") + type = Timedelta + kind = 'm' + str = '|m8[ns]' + base = np.dtype('m8[ns]') + na_value = NaT - Examples - -------- - >>> DatetimeTZDtype.construct_from_string('datetime64[ns, UTC]') - datetime64[ns, UTC] - """ - if isinstance(string, compat.string_types): - msg = "Could not construct DatetimeTZDtype from '{}'" - try: - match = cls._match.match(string) - if match: - d = match.groupdict() - return cls(unit=d['unit'], tz=d['tz']) - except Exception: - # TODO(py3): Change this pass to `raise TypeError(msg) from e` - pass - raise TypeError(msg.format(string)) + def __init__(self, unit="ns"): + if isinstance(unit, np.dtype): + if unit != np.dtype("m8[ns]"): + raise ValueError() + unit = "ns" + if unit != "ns": + raise ValueError() - raise TypeError("Could not construct DatetimeTZDtype") + self._unit = unit def __unicode__(self): - return "datetime64[{unit}, {tz}]".format(unit=self.unit, tz=self.tz) + return "timedelta64[ns]" + + def __eq__(self, other): + # TODO: Allow comparison with numpy dtypes? + if isinstance(other, compat.string_types): + return other == self.name or other == 'm8[ns]' + return super(TimedeltaDtype, self).__eq__(other) + + def __hash__(self): + return super(PandasExtensionDtype, self).__hash__() + + def __getstate__(self): + # override PandasExtensionDtype.__getstate__ + return self.__dict__ + + @property + def unit(self): + return self._unit @property def name(self): - """A string representation of the dtype.""" return str(self) - def __hash__(self): - # make myself hashable - # TODO: update this. - return hash(str(self)) - - def __eq__(self, other): - if isinstance(other, compat.string_types): - return other == self.name + @classmethod + def construct_array_type(cls): + from pandas.arrays import TimedeltaArray + return TimedeltaArray - return (isinstance(other, DatetimeTZDtype) and - self.unit == other.unit and - str(self.tz) == str(other.tz)) + @classmethod + def construct_from_string(cls, string): + try: + match = cls._match.match(string) + if match: + d = match.groupdict() + return cls(d['unit']) + except Exception: + pass - def __setstate__(self, state): - # for pickle compat. - self._tz = state['tz'] - self._unit = state['unit'] + msg = "Could not construct a TimedeltaArray from '{}'" + raise TypeError(msg.format(string)) @register_extension_dtype diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 309fb3b841461..84e9cecf104b9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -70,7 +70,9 @@ is_iterator, is_sequence, is_named_tuple) -from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass, ABCMultiIndex +from pandas.core.dtypes.generic import ( + ABCSeries, ABCIndexClass, ABCMultiIndex, ABCDatetimeArray +) from pandas.core.dtypes.missing import isna, notna from pandas.core import algorithms @@ -3643,8 +3645,15 @@ def reindexer(value): value = cast_scalar_to_array(len(self.index), value) value = maybe_cast_to_datetime(value, infer_dtype) + if isinstance(value, ABCDatetimeArray) and not value.tz: + # Ensure that tz-naive data are only stored internally as ndarray. + # This should be unnecessary when sanitize_array and + # maybe_cast_to_datetime are cleaned up. + value = value._data + # return internal types directly if is_extension_type(value) or is_extension_array_dtype(value): + return value # broadcast across multiple columns if necessary diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5a9bf6c2c6263..f34b06254fce0 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -312,7 +312,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, (dtype is not None and is_timedelta64_dtype(dtype))): from pandas import TimedeltaIndex result = TimedeltaIndex(data, copy=copy, name=name, **kwargs) - if dtype is not None and _o_dtype == dtype: + if dtype is not None and is_object_dtype(dtype): return Index(result.to_pytimedelta(), dtype=_o_dtype) else: return result diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index a4e058160e567..8e8a196a3db5c 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -359,7 +359,9 @@ def __array__(self, dtype=None): @property def dtype(self): - return self._data.dtype + if self.tz: + return self._data.dtype + return _NS_DTYPE @property def tz(self): diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index cbe5ae198838f..b9c8f136eb216 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -6,6 +6,7 @@ from pandas._libs import ( NaT, Timedelta, index as libindex, join as libjoin, lib) +from pandas._libs.properties import cache_readonly import pandas.compat as compat from pandas.util._decorators import Appender, Substitution @@ -17,7 +18,8 @@ from pandas.core.accessor import delegate_names from pandas.core.arrays import datetimelike as dtl -from pandas.core.arrays.timedeltas import TimedeltaArray, _is_convertible_to_td +from pandas.core.arrays.timedeltas import ( + TimedeltaArray, _is_convertible_to_td, _validate_td64_dtype) from pandas.core.base import _shared_docs import pandas.core.common as com from pandas.core.indexes.base import Index, _index_shared_docs @@ -235,9 +237,8 @@ def _simple_new(cls, values, name=None, freq=None, dtype=_TD_DTYPE): else: if freq is None: freq = values.freq + _validate_td64_dtype(dtype) assert isinstance(values, TimedeltaArray), type(values) - assert dtype == _TD_DTYPE, dtype - assert values.dtype == 'm8[ns]', values.dtype tdarr = TimedeltaArray._simple_new(values._data, freq=freq) result = object.__new__(cls) @@ -249,6 +250,10 @@ def _simple_new(cls, values, name=None, freq=None, dtype=_TD_DTYPE): result._reset_identity() return result + @cache_readonly + def dtype(self): + return self._data.dtype.base + # ------------------------------------------------------------------- def __setstate__(self, state): diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index df764aa4ba666..9e37e86cb0164 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -26,7 +26,8 @@ is_re, is_re_compilable, is_sparse, is_timedelta64_dtype, pandas_dtype) import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.dtypes import ( - CategoricalDtype, ExtensionDtype, PandasExtensionDtype) + CategoricalDtype, DatetimeDtype, ExtensionDtype, PandasExtensionDtype, + TimedeltaDtype) from pandas.core.dtypes.generic import ( ABCDataFrame, ABCDatetimeIndex, ABCExtensionArray, ABCIndexClass, ABCSeries) @@ -593,6 +594,8 @@ def _astype(self, dtype, copy=False, errors='raise', values=None, # convert dtypes if needed dtype = pandas_dtype(dtype) + if isinstance(dtype, DatetimeDtype): + dtype = _NS_DTYPE # astype processing if is_dtype_equal(self.dtype, dtype): if copy: @@ -1678,6 +1681,7 @@ class ExtensionBlock(NonConsolidatableMixIn, Block): def __init__(self, values, placement, ndim=None): values = self._maybe_coerce_values(values) + assert not isinstance(values.dtype, DatetimeDtype) super(ExtensionBlock, self).__init__(values, placement, ndim) def _maybe_coerce_values(self, values): @@ -2681,6 +2685,10 @@ def f(m, v, i): blocks = self.split_and_operate(None, f, False) else: values = f(None, self.values.ravel(), None) + + if isinstance(values, DatetimeArray) and not values.tz: + # ensure that we get a DatetimeBlock. + values = values._data blocks = [make_block(values, ndim=self.ndim, placement=self.mgr_locs)] @@ -3056,7 +3064,10 @@ def get_block_type(values, dtype=None): assert not is_datetime64tz_dtype(values) cls = DatetimeBlock elif is_datetime64tz_dtype(values): - cls = DatetimeTZBlock + if dtype.tz: + cls = DatetimeTZBlock + else: + cls = DatetimeBlock elif is_interval_dtype(dtype) or is_period_dtype(dtype): cls = ObjectValuesExtensionBlock elif is_extension_array_dtype(values): @@ -3085,6 +3096,13 @@ def make_block(values, placement, klass=None, ndim=None, dtype=None, "in a future release.", DeprecationWarning) if klass is None: dtype = dtype or values.dtype + + if isinstance(dtype, (DatetimeDtype, TimedeltaDtype)): + # for DataFrame.__setitem__[scalar] + # this is... not great. + values, dtype = values._data, values.dtype + values = _block_shape(values, ndim) + klass = get_block_type(values, dtype) elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values): diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 7af347a141781..327b88c3b85c9 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -19,12 +19,14 @@ maybe_cast_to_datetime, maybe_cast_to_integer_array, maybe_castable, maybe_convert_platform, maybe_infer_to_datetimelike, maybe_upcast) from pandas.core.dtypes.common import ( - is_categorical_dtype, is_datetime64tz_dtype, is_dtype_equal, + _NS_DTYPE, is_categorical_dtype, is_datetime64tz_dtype, is_dtype_equal, is_extension_array_dtype, is_extension_type, is_float_dtype, is_integer_dtype, is_iterator, is_list_like, is_object_dtype, pandas_dtype) +from pandas.core.dtypes.dtypes import DatetimeDtype from pandas.core.dtypes.generic import ( - ABCDataFrame, ABCDatetimeIndex, ABCIndexClass, ABCPandasArray, - ABCPeriodIndex, ABCSeries, ABCTimedeltaIndex) + ABCDataFrame, ABCDatetimeArray, ABCDatetimeIndex, ABCIndexClass, + ABCPandasArray, ABCPeriodIndex, ABCSeries, ABCTimedeltaArray, + ABCTimedeltaIndex) from pandas.core.dtypes.missing import isna from pandas.core import algorithms, common as com @@ -540,6 +542,7 @@ def sanitize_array(data, index, dtype=None, copy=False, Sanitize input data to an ndarray, copy if specified, coerce to the dtype if specified. """ + from pandas.core.dtypes.generic import ABCDatetimeArray if dtype is not None: dtype = pandas_dtype(dtype) @@ -598,6 +601,18 @@ def sanitize_array(data, index, dtype=None, copy=False, if copy: subarr = data.copy() + + # Ensure that we don't allow the following in Internals + # * DatetimeArray[DatetimeDtype] (tz-naive) + # * TimedeltaArray + # For the time being, we only want to allow storing those + # as DatetimeBlock and TimedeltaBlocks. + + if isinstance(subarr, ABCDatetimeArray) and not subarr.tz: + subarr = subarr._data + if isinstance(subarr, ABCTimedeltaArray): + subarr = subarr._data + return subarr elif isinstance(data, (list, tuple)) and len(data) > 0: @@ -624,6 +639,11 @@ def sanitize_array(data, index, dtype=None, copy=False, else: subarr = _try_cast(data, False, dtype, copy, raise_cast_failure) + if isinstance(subarr, ABCDatetimeArray) and not subarr.tz: + subarr = subarr._data + if isinstance(subarr, ABCTimedeltaArray): + subarr = subarr._data + # scalar like, GH if getattr(subarr, 'ndim', 0) == 0: if isinstance(data, list): # pragma: no cover @@ -638,6 +658,10 @@ def sanitize_array(data, index, dtype=None, copy=False, # need to possibly convert the value here value = maybe_cast_to_datetime(value, dtype) + if isinstance(dtype, DatetimeDtype): + # for scalar timestamp / nat + dtype = _NS_DTYPE + subarr = construct_1d_arraylike_from_scalar( value, len(index), dtype) @@ -678,6 +702,9 @@ def sanitize_array(data, index, dtype=None, copy=False, except IncompatibleFrequency: pass + if isinstance(data, ABCDatetimeArray): + assert data.tz + return subarr @@ -702,6 +729,8 @@ def _try_cast(arr, take_fast_path, dtype, copy, raise_cast_failure): isinstance(subarr, np.ndarray))): subarr = construct_1d_object_array_from_listlike(subarr) elif not is_extension_type(subarr): + if isinstance(dtype, DatetimeDtype): + dtype = _NS_DTYPE subarr = construct_1d_ndarray_preserving_na(subarr, dtype, copy=copy) except (ValueError, TypeError): @@ -718,4 +747,19 @@ def _try_cast(arr, take_fast_path, dtype, copy, raise_cast_failure): raise else: subarr = np.array(arr, dtype=object, copy=copy) + return subarr + + +def _ensure_dta_tda_ndarray(arr): + """Ensure that an ndarray is returned for specific cases. + + * tz-naive DatatimeArray + * TimedeltaArray + """ + if isinstance(arr, ABCDatetimeArray): + if not arr.tz: + arr = arr._data + if isinstance(arr, ABCTimedeltaArray): + arr = arr._data + return arr diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index cafd3a9915fa0..ea75384d76804 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -144,6 +144,7 @@ def f(values, axis=None, skipna=True, **kwds): def _bn_ok_dtype(dt, name): # Bottleneck chokes on datetime64 + # import pdb; pdb.set_trace() if (not is_object_dtype(dt) and not (is_datetime_or_timedelta_dtype(dt) or is_datetime64tz_dtype(dt))): diff --git a/pandas/core/series.py b/pandas/core/series.py index eb412add7bbbb..6e4f037a2bea3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -22,7 +22,7 @@ is_iterator, is_list_like, is_scalar, is_string_like, is_timedelta64_dtype) from pandas.core.dtypes.generic import ( ABCDataFrame, ABCDatetimeArray, ABCDatetimeIndex, ABCSeries, - ABCSparseArray, ABCSparseSeries) + ABCSparseArray, ABCSparseSeries, ABCTimedeltaArray) from pandas.core.dtypes.missing import ( isna, na_value_for_dtype, notna, remove_na_arraylike) @@ -260,6 +260,9 @@ def __init__(self, data=None, index=None, dtype=None, name=None, else: data = sanitize_array(data, index, dtype, copy, raise_cast_failure=True) + if isinstance(data, ABCDatetimeArray): + assert data.tz + assert not isinstance(data, ABCTimedeltaArray) data = SingleBlockManager(data, index, fastpath=True) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 07cf358c765b3..8e3f116173dc6 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -48,7 +48,7 @@ class TestPDApi(Base): 'TimedeltaIndex', 'Timestamp', 'Interval', 'IntervalIndex', 'IntervalArray', 'CategoricalDtype', 'PeriodDtype', 'IntervalDtype', - 'DatetimeTZDtype', + 'DatetimeDtype', 'DatetimeTZDtype', 'TimedeltaDtype', 'Int8Dtype', 'Int16Dtype', 'Int32Dtype', 'Int64Dtype', 'UInt8Dtype', 'UInt16Dtype', 'UInt32Dtype', 'UInt64Dtype', ] diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 62e96fd39a759..ea194df7fea86 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -201,16 +201,29 @@ def test_is_datetime64_dtype(): assert not com.is_datetime64_dtype([1, 2, 3]) assert not com.is_datetime64_dtype(np.array([], dtype=int)) + tzd = DatetimeTZDtype(tz="CET") + assert not com.is_datetime64_dtype(tzd) + assert not com.is_datetime64_dtype( + pd.arrays.DatetimeArray._from_sequence(['2000'], dtype=tzd) + ) + assert com.is_datetime64_dtype(np.datetime64) assert com.is_datetime64_dtype(np.array([], dtype=np.datetime64)) + assert com.is_datetime64_dtype(pd.DatetimeDtype()) + assert com.is_datetime64_dtype( + pd.arrays.DatetimeArray._from_sequence(['2000']) + ) def test_is_datetime64tz_dtype(): assert not com.is_datetime64tz_dtype(object) assert not com.is_datetime64tz_dtype([1, 2, 3]) assert not com.is_datetime64tz_dtype(pd.DatetimeIndex([1, 2, 3])) - assert com.is_datetime64tz_dtype(pd.DatetimeIndex(['2000'], - tz="US/Eastern")) + idx = pd.DatetimeIndex(['2000'], tz="US/Eastern") + assert com.is_datetime64tz_dtype(idx) + assert com.is_datetime64tz_dtype(idx.dtype) + assert com.is_datetime64tz_dtype(idx._data) + assert com.is_datetime64tz_dtype(idx._data.dtype) def test_is_timedelta64_dtype(): @@ -225,7 +238,13 @@ def test_is_timedelta64_dtype(): assert com.is_timedelta64_dtype(np.timedelta64) assert com.is_timedelta64_dtype(pd.Series([], dtype="timedelta64[ns]")) - assert com.is_timedelta64_dtype(pd.to_timedelta(['0 days', '1 days'])) + + tdi = pd.to_timedelta(['0 days', '1 days']) + + assert com.is_timedelta64_dtype(tdi) + assert com.is_timedelta64_dtype(tdi.dtype) + assert com.is_timedelta64_dtype(tdi._data) + assert com.is_timedelta64_dtype(tdi._data.dtype) def test_is_period_dtype(): @@ -424,6 +443,18 @@ def test_is_datetime_or_timedelta_dtype(): assert com.is_datetime_or_timedelta_dtype( np.array([], dtype=np.datetime64)) + idx = pd.date_range("2000", periods=2) + assert com.is_datetime_or_timedelta_dtype(idx) + assert com.is_datetime_or_timedelta_dtype(idx.dtype) + assert com.is_datetime_or_timedelta_dtype(idx._data) + assert com.is_datetime_or_timedelta_dtype(idx._data.dtype) + + idx = pd.timedelta_range("1H", periods=2) + assert com.is_datetime_or_timedelta_dtype(idx) + assert com.is_datetime_or_timedelta_dtype(idx.dtype) + assert com.is_datetime_or_timedelta_dtype(idx._data) + assert com.is_datetime_or_timedelta_dtype(idx._data.dtype) + def test_is_numeric_v_string_like(): assert not com.is_numeric_v_string_like(1, 1) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index 0fe0a845f5129..55f1591bbb7f3 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -825,6 +825,62 @@ def test_update_dtype_errors(self, bad_dtype): dtype.update_dtype(bad_dtype) +class TestDatetimeDtype(Base): + def create(self): + return pd.DatetimeDtype() + + def test_equality(self): + a = pd.DatetimeDtype() + b = pd.DatetimeDtype() + assert a == b + + c = np.dtype("int8") + assert a != c + # TODO: equality to 'M8[ns]'? np.dtype('M8[ns]')? + + def test_construct_from_string(self): + result = pd.DatetimeDtype.construct_from_string('datetime64[ns]') + expected = pd.DatetimeDtype() + assert result == expected + + def test_attrs(self): + assert self.dtype.unit == 'ns' + assert self.dtype.kind == 'M' + assert str(self.dtype) == 'datetime64[ns]' + assert self.dtype.base == np.dtype('M8[ns]') + assert self.dtype.type is pd.Timestamp + assert self.dtype.na_value is pd.NaT + assert self.dtype.construct_array_type() is pd.arrays.DatetimeArray + + +class TestTimedeltaDtype(Base): + def create(self): + return pd.TimedeltaDtype() + + def test_equality(self): + a = pd.TimedeltaDtype() + b = pd.TimedeltaDtype() + assert a == b + + c = np.dtype("int") + assert a != c + # TODO: equality to 'm8[ns]'? np.dtype('m8[ns]')? + + def test_construct_from_string(self): + result = pd.DatetimeDtype.construct_from_string('datetime64[ns]') + expected = pd.DatetimeDtype() + assert result == expected + + def test_attrs(self): + assert self.dtype.unit == 'ns' + assert self.dtype.kind == 'm' + assert str(self.dtype) == 'timedelta64[ns]' + assert self.dtype.base == np.dtype('m8[ns]') + assert self.dtype.type is pd.Timedelta + assert self.dtype.na_value is pd.NaT + assert self.dtype.construct_array_type() is pd.arrays.TimedeltaArray + + @pytest.mark.parametrize('dtype', [ CategoricalDtype, IntervalDtype, diff --git a/pandas/tests/extension/test_datetime.py b/pandas/tests/extension/test_datetime.py index 00ad35bf6a924..c06e688f6e95e 100644 --- a/pandas/tests/extension/test_datetime.py +++ b/pandas/tests/extension/test_datetime.py @@ -1,237 +1,29 @@ +""" +Extension Tests for *tz-naive* arrays.DatetimeArray. + +Currently, we only run the Dtype tests, as we do not allow a +tz-naive DatetimeArray inside internals. +""" import numpy as np import pytest -from pandas.core.dtypes.dtypes import DatetimeTZDtype - import pandas as pd -from pandas.core.arrays import DatetimeArray from pandas.tests.extension import base -@pytest.fixture(params=["US/Central"]) -def dtype(request): - return DatetimeTZDtype(unit="ns", tz=request.param) - - @pytest.fixture -def data(dtype): - data = DatetimeArray(pd.date_range("2000", periods=100, tz=dtype.tz), - dtype=dtype) - return data - - -@pytest.fixture -def data_missing(dtype): - return DatetimeArray( - np.array(['NaT', '2000-01-01'], dtype='datetime64[ns]'), - dtype=dtype - ) - - -@pytest.fixture -def data_for_sorting(dtype): - a = pd.Timestamp('2000-01-01') - b = pd.Timestamp('2000-01-02') - c = pd.Timestamp('2000-01-03') - return DatetimeArray(np.array([b, c, a], dtype='datetime64[ns]'), - dtype=dtype) - - -@pytest.fixture -def data_missing_for_sorting(dtype): - a = pd.Timestamp('2000-01-01') - b = pd.Timestamp('2000-01-02') - return DatetimeArray(np.array([b, 'NaT', a], dtype='datetime64[ns]'), - dtype=dtype) - - -@pytest.fixture -def data_for_grouping(dtype): - """ - Expected to be like [B, B, NA, NA, A, A, B, C] - - Where A < B < C and NA is missing - """ - a = pd.Timestamp('2000-01-01') - b = pd.Timestamp('2000-01-02') - c = pd.Timestamp('2000-01-03') - na = 'NaT' - return DatetimeArray(np.array([b, b, na, na, a, a, b, c], - dtype='datetime64[ns]'), - dtype=dtype) +def dtype(): + return pd.DatetimeDtype() @pytest.fixture -def na_cmp(): - def cmp(a, b): - return a is pd.NaT and a is b - return cmp - - -@pytest.fixture -def na_value(): - return pd.NaT - - -# ---------------------------------------------------------------------------- -class BaseDatetimeTests(object): - pass - - -# ---------------------------------------------------------------------------- -# Tests -class TestDatetimeDtype(BaseDatetimeTests, base.BaseDtypeTests): - pass - - -class TestConstructors(BaseDatetimeTests, base.BaseConstructorsTests): - pass - - -class TestGetitem(BaseDatetimeTests, base.BaseGetitemTests): - pass - - -class TestMethods(BaseDatetimeTests, base.BaseMethodsTests): - @pytest.mark.skip(reason="Incorrect expected") - def test_value_counts(self, all_data, dropna): - pass - - def test_combine_add(self, data_repeated): - # Timestamp.__add__(Timestamp) not defined - pass - - -class TestInterface(BaseDatetimeTests, base.BaseInterfaceTests): - - def test_array_interface(self, data): - if data.tz: - # np.asarray(DTA) is currently always tz-naive. - pytest.skip("GH-23569") - else: - super(TestInterface, self).test_array_interface(data) - - -class TestArithmeticOps(BaseDatetimeTests, base.BaseArithmeticOpsTests): - implements = {'__sub__', '__rsub__'} - - def test_arith_series_with_scalar(self, data, all_arithmetic_operators): - if all_arithmetic_operators in self.implements: - s = pd.Series(data) - self.check_opname(s, all_arithmetic_operators, s.iloc[0], - exc=None) - else: - # ... but not the rest. - super(TestArithmeticOps, self).test_arith_series_with_scalar( - data, all_arithmetic_operators - ) - - def test_add_series_with_extension_array(self, data): - # Datetime + Datetime not implemented - s = pd.Series(data) - msg = 'cannot add DatetimeArray and DatetimeArray' - with pytest.raises(TypeError, match=msg): - s + data - - def test_arith_series_with_array(self, data, all_arithmetic_operators): - if all_arithmetic_operators in self.implements: - s = pd.Series(data) - self.check_opname(s, all_arithmetic_operators, s.iloc[0], - exc=None) - else: - # ... but not the rest. - super(TestArithmeticOps, self).test_arith_series_with_scalar( - data, all_arithmetic_operators - ) - - def test_error(self, data, all_arithmetic_operators): - pass - - @pytest.mark.xfail(reason="different implementation", strict=False) - def test_direct_arith_with_series_returns_not_implemented(self, data): - # Right now, we have trouble with this. Returning NotImplemented - # fails other tests like - # tests/arithmetic/test_datetime64::TestTimestampSeriesArithmetic:: - # test_dt64_seris_add_intlike - return super( - TestArithmeticOps, - self - ).test_direct_arith_with_series_returns_not_implemented(data) - - -class TestCasting(BaseDatetimeTests, base.BaseCastingTests): - pass - - -class TestComparisonOps(BaseDatetimeTests, base.BaseComparisonOpsTests): - - def _compare_other(self, s, data, op_name, other): - # the base test is not appropriate for us. We raise on comparison - # with (some) integers, depending on the value. - pass - - @pytest.mark.xfail(reason="different implementation", strict=False) - def test_direct_arith_with_series_returns_not_implemented(self, data): - return super( - TestComparisonOps, - self - ).test_direct_arith_with_series_returns_not_implemented(data) - - -class TestMissing(BaseDatetimeTests, base.BaseMissingTests): - pass - - -class TestReshaping(BaseDatetimeTests, base.BaseReshapingTests): - - @pytest.mark.skip(reason="We have DatetimeTZBlock") - def test_concat(self, data, in_frame): - pass - - def test_concat_mixed_dtypes(self, data): - # concat(Series[datetimetz], Series[category]) uses a - # plain np.array(values) on the DatetimeArray, which - # drops the tz. - super(TestReshaping, self).test_concat_mixed_dtypes(data) - - @pytest.mark.parametrize("obj", ["series", "frame"]) - def test_unstack(self, obj): - # GH-13287: can't use base test, since building the expected fails. - data = DatetimeArray._from_sequence(['2000', '2001', '2002', '2003'], - tz='US/Central') - index = pd.MultiIndex.from_product(([['A', 'B'], ['a', 'b']]), - names=['a', 'b']) - - if obj == "series": - ser = pd.Series(data, index=index) - expected = pd.DataFrame({ - "A": data.take([0, 1]), - "B": data.take([2, 3]) - }, index=pd.Index(['a', 'b'], name='b')) - expected.columns.name = 'a' - - else: - ser = pd.DataFrame({"A": data, "B": data}, index=index) - expected = pd.DataFrame( - {("A", "A"): data.take([0, 1]), - ("A", "B"): data.take([2, 3]), - ("B", "A"): data.take([0, 1]), - ("B", "B"): data.take([2, 3])}, - index=pd.Index(['a', 'b'], name='b') - ) - expected.columns.names = [None, 'a'] - - result = ser.unstack(0) - self.assert_equal(result, expected) - - -class TestSetitem(BaseDatetimeTests, base.BaseSetitemTests): - pass +def data(): + return pd.arrays.DatetimeArray(np.arange(0, 100, dtype='M8[ns]')) -class TestGroupby(BaseDatetimeTests, base.BaseGroupbyTests): +class BaseTimedeltaTests(object): pass -class TestPrinting(BaseDatetimeTests, base.BasePrintingTests): +class TestDtype(BaseTimedeltaTests, base.BaseDtypeTests): pass diff --git a/pandas/tests/extension/test_datetimetz.py b/pandas/tests/extension/test_datetimetz.py new file mode 100644 index 0000000000000..00ad35bf6a924 --- /dev/null +++ b/pandas/tests/extension/test_datetimetz.py @@ -0,0 +1,237 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.dtypes import DatetimeTZDtype + +import pandas as pd +from pandas.core.arrays import DatetimeArray +from pandas.tests.extension import base + + +@pytest.fixture(params=["US/Central"]) +def dtype(request): + return DatetimeTZDtype(unit="ns", tz=request.param) + + +@pytest.fixture +def data(dtype): + data = DatetimeArray(pd.date_range("2000", periods=100, tz=dtype.tz), + dtype=dtype) + return data + + +@pytest.fixture +def data_missing(dtype): + return DatetimeArray( + np.array(['NaT', '2000-01-01'], dtype='datetime64[ns]'), + dtype=dtype + ) + + +@pytest.fixture +def data_for_sorting(dtype): + a = pd.Timestamp('2000-01-01') + b = pd.Timestamp('2000-01-02') + c = pd.Timestamp('2000-01-03') + return DatetimeArray(np.array([b, c, a], dtype='datetime64[ns]'), + dtype=dtype) + + +@pytest.fixture +def data_missing_for_sorting(dtype): + a = pd.Timestamp('2000-01-01') + b = pd.Timestamp('2000-01-02') + return DatetimeArray(np.array([b, 'NaT', a], dtype='datetime64[ns]'), + dtype=dtype) + + +@pytest.fixture +def data_for_grouping(dtype): + """ + Expected to be like [B, B, NA, NA, A, A, B, C] + + Where A < B < C and NA is missing + """ + a = pd.Timestamp('2000-01-01') + b = pd.Timestamp('2000-01-02') + c = pd.Timestamp('2000-01-03') + na = 'NaT' + return DatetimeArray(np.array([b, b, na, na, a, a, b, c], + dtype='datetime64[ns]'), + dtype=dtype) + + +@pytest.fixture +def na_cmp(): + def cmp(a, b): + return a is pd.NaT and a is b + return cmp + + +@pytest.fixture +def na_value(): + return pd.NaT + + +# ---------------------------------------------------------------------------- +class BaseDatetimeTests(object): + pass + + +# ---------------------------------------------------------------------------- +# Tests +class TestDatetimeDtype(BaseDatetimeTests, base.BaseDtypeTests): + pass + + +class TestConstructors(BaseDatetimeTests, base.BaseConstructorsTests): + pass + + +class TestGetitem(BaseDatetimeTests, base.BaseGetitemTests): + pass + + +class TestMethods(BaseDatetimeTests, base.BaseMethodsTests): + @pytest.mark.skip(reason="Incorrect expected") + def test_value_counts(self, all_data, dropna): + pass + + def test_combine_add(self, data_repeated): + # Timestamp.__add__(Timestamp) not defined + pass + + +class TestInterface(BaseDatetimeTests, base.BaseInterfaceTests): + + def test_array_interface(self, data): + if data.tz: + # np.asarray(DTA) is currently always tz-naive. + pytest.skip("GH-23569") + else: + super(TestInterface, self).test_array_interface(data) + + +class TestArithmeticOps(BaseDatetimeTests, base.BaseArithmeticOpsTests): + implements = {'__sub__', '__rsub__'} + + def test_arith_series_with_scalar(self, data, all_arithmetic_operators): + if all_arithmetic_operators in self.implements: + s = pd.Series(data) + self.check_opname(s, all_arithmetic_operators, s.iloc[0], + exc=None) + else: + # ... but not the rest. + super(TestArithmeticOps, self).test_arith_series_with_scalar( + data, all_arithmetic_operators + ) + + def test_add_series_with_extension_array(self, data): + # Datetime + Datetime not implemented + s = pd.Series(data) + msg = 'cannot add DatetimeArray and DatetimeArray' + with pytest.raises(TypeError, match=msg): + s + data + + def test_arith_series_with_array(self, data, all_arithmetic_operators): + if all_arithmetic_operators in self.implements: + s = pd.Series(data) + self.check_opname(s, all_arithmetic_operators, s.iloc[0], + exc=None) + else: + # ... but not the rest. + super(TestArithmeticOps, self).test_arith_series_with_scalar( + data, all_arithmetic_operators + ) + + def test_error(self, data, all_arithmetic_operators): + pass + + @pytest.mark.xfail(reason="different implementation", strict=False) + def test_direct_arith_with_series_returns_not_implemented(self, data): + # Right now, we have trouble with this. Returning NotImplemented + # fails other tests like + # tests/arithmetic/test_datetime64::TestTimestampSeriesArithmetic:: + # test_dt64_seris_add_intlike + return super( + TestArithmeticOps, + self + ).test_direct_arith_with_series_returns_not_implemented(data) + + +class TestCasting(BaseDatetimeTests, base.BaseCastingTests): + pass + + +class TestComparisonOps(BaseDatetimeTests, base.BaseComparisonOpsTests): + + def _compare_other(self, s, data, op_name, other): + # the base test is not appropriate for us. We raise on comparison + # with (some) integers, depending on the value. + pass + + @pytest.mark.xfail(reason="different implementation", strict=False) + def test_direct_arith_with_series_returns_not_implemented(self, data): + return super( + TestComparisonOps, + self + ).test_direct_arith_with_series_returns_not_implemented(data) + + +class TestMissing(BaseDatetimeTests, base.BaseMissingTests): + pass + + +class TestReshaping(BaseDatetimeTests, base.BaseReshapingTests): + + @pytest.mark.skip(reason="We have DatetimeTZBlock") + def test_concat(self, data, in_frame): + pass + + def test_concat_mixed_dtypes(self, data): + # concat(Series[datetimetz], Series[category]) uses a + # plain np.array(values) on the DatetimeArray, which + # drops the tz. + super(TestReshaping, self).test_concat_mixed_dtypes(data) + + @pytest.mark.parametrize("obj", ["series", "frame"]) + def test_unstack(self, obj): + # GH-13287: can't use base test, since building the expected fails. + data = DatetimeArray._from_sequence(['2000', '2001', '2002', '2003'], + tz='US/Central') + index = pd.MultiIndex.from_product(([['A', 'B'], ['a', 'b']]), + names=['a', 'b']) + + if obj == "series": + ser = pd.Series(data, index=index) + expected = pd.DataFrame({ + "A": data.take([0, 1]), + "B": data.take([2, 3]) + }, index=pd.Index(['a', 'b'], name='b')) + expected.columns.name = 'a' + + else: + ser = pd.DataFrame({"A": data, "B": data}, index=index) + expected = pd.DataFrame( + {("A", "A"): data.take([0, 1]), + ("A", "B"): data.take([2, 3]), + ("B", "A"): data.take([0, 1]), + ("B", "B"): data.take([2, 3])}, + index=pd.Index(['a', 'b'], name='b') + ) + expected.columns.names = [None, 'a'] + + result = ser.unstack(0) + self.assert_equal(result, expected) + + +class TestSetitem(BaseDatetimeTests, base.BaseSetitemTests): + pass + + +class TestGroupby(BaseDatetimeTests, base.BaseGroupbyTests): + pass + + +class TestPrinting(BaseDatetimeTests, base.BasePrintingTests): + pass diff --git a/pandas/tests/extension/test_timedelta.py b/pandas/tests/extension/test_timedelta.py new file mode 100644 index 0000000000000..2fe67a76f3ad2 --- /dev/null +++ b/pandas/tests/extension/test_timedelta.py @@ -0,0 +1,29 @@ +""" +Extension Tests for arrays.TimedeltaArray. + +Currently, we only run the Dtype tests, as we do not allow a +TimedeltaArray inside internals. +""" +import numpy as np +import pytest + +import pandas as pd +from pandas.tests.extension import base + + +@pytest.fixture +def dtype(): + return pd.TimedeltaDtype() + + +@pytest.fixture +def data(): + return pd.arrays.TimedeltaArray(np.arange(0, 100, dtype='m8[ns]')) + + +class BaseTimedeltaTests(object): + pass + + +class TestDtype(BaseTimedeltaTests, base.BaseDtypeTests): + pass