diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 9947866a76e3f..81e1d84880f60 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -278,6 +278,9 @@ def apply_standard(self): if ( self.result_type in ["reduce", None] and not self.dtypes.apply(is_extension_array_dtype).any() + # Disallow dtypes where setting _index_data will break + # ExtensionArray values, see GH#31182 + and not self.dtypes.apply(lambda x: x.kind in ["m", "M"]).any() # Disallow complex_internals since libreduction shortcut raises a TypeError and not self.agg_axis._has_complex_internals ): diff --git a/pandas/core/base.py b/pandas/core/base.py index 6ad237cbc7c51..05e3302abddbe 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1265,6 +1265,10 @@ def unique(self): if hasattr(values, "unique"): result = values.unique() + if self.dtype.kind in ["m", "M"] and isinstance(self, ABCSeries): + # GH#31182 Series._values returns EA, unpack for backward-compat + if getattr(self.dtype, "tz", None) is None: + result = np.asarray(result) else: result = unique1d(values) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index d29954c677c5d..22901051ec345 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2151,6 +2151,10 @@ def get_values(self, dtype=None): return result.reshape(self.values.shape) return self.values + def internal_values(self): + # Override to return DatetimeArray and TimedeltaArray + return self.array_values() + class DatetimeBlock(DatetimeLikeBlockMixin, Block): __slots__ = () @@ -2284,6 +2288,7 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock): is_datetimetz = True is_extension = True + internal_values = Block.internal_values _can_hold_element = DatetimeBlock._can_hold_element to_native_types = DatetimeBlock.to_native_types fill_value = np.datetime64("NaT", "ns") diff --git a/pandas/core/series.py b/pandas/core/series.py index e9df0938d5f98..0aaa583885bc3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -493,7 +493,8 @@ def _values(self): """ Return the internal repr of this data (defined by Block.interval_values). This are the values as stored in the Block (ndarray or ExtensionArray - depending on the Block class). + depending on the Block class), with datetime64[ns] and timedelta64[ns] + wrapped in ExtensionArrays to match Index._values behavior. Differs from the public ``.values`` for certain data types, because of historical backwards compatibility of the public attribute (e.g. period @@ -502,8 +503,9 @@ def _values(self): cases). Differs from ``.array`` in that this still returns the numpy array if - the Block is backed by a numpy array, while ``.array`` ensures to always - return an ExtensionArray. + the Block is backed by a numpy array (except for datetime64 and + timedelta64 dtypes), while ``.array`` ensures to always return an + ExtensionArray. Differs from ``._ndarray_values``, as that ensures to always return a numpy array (it will call ``_ndarray_values`` on the ExtensionArray, if @@ -515,8 +517,9 @@ def _values(self): ----------- | ------------- | ------------- | ------------- | --------------- | Numeric | ndarray | ndarray | PandasArray | ndarray | Category | Categorical | Categorical | Categorical | ndarray[int] | - dt64[ns] | ndarray[M8ns] | ndarray[M8ns] | DatetimeArray | ndarray[M8ns] | + dt64[ns] | ndarray[M8ns] | DatetimeArray | DatetimeArray | ndarray[M8ns] | dt64[ns tz] | ndarray[M8ns] | DatetimeArray | DatetimeArray | ndarray[M8ns] | + td64[ns] | ndarray[m8ns] | TimedeltaArray| ndarray[m8ns] | ndarray[m8ns] | Period | ndarray[obj] | PeriodArray | PeriodArray | ndarray[int] | Nullable | EA | EA | EA | ndarray | diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index a5332eaea0432..7abf810e6bcfc 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -1583,7 +1583,7 @@ def test_string_na_nat_conversion(self, cache): for i in range(5): x = series[i] if isna(x): - expected[i] = iNaT + expected[i] = pd.NaT else: expected[i] = to_datetime(x, cache=cache) diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 7400b049961d5..8d2058ffab643 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -316,7 +316,12 @@ def test_invalid_td64_reductions(self, opname): ) td = s.diff() - msg = "reduction operation '{op}' not allowed for this dtype" + msg = "|".join( + [ + "reduction operation '{op}' not allowed for this dtype", + r"cannot perform {op} with type timedelta64\[ns\]", + ] + ) msg = msg.format(op=opname) with pytest.raises(TypeError, match=msg): @@ -648,7 +653,13 @@ def test_ops_consistency_on_empty(self, method): # timedelta64[ns] tdser = Series([], dtype="m8[ns]") if method == "var": - with pytest.raises(TypeError, match="operation 'var' not allowed"): + msg = "|".join( + [ + "operation 'var' not allowed", + r"cannot perform var with type timedelta64\[ns\]", + ] + ) + with pytest.raises(TypeError, match=msg): getattr(tdser, method)() else: result = getattr(tdser, method)()