diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index cadc5615cd654..7159f422e3fd6 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -1161,6 +1161,7 @@ Reshaping - Bug in :func:`to_datetime` raising an error when the input sequence contained unhashable items (:issue:`39756`) - Bug in :meth:`Series.explode` preserving the index when ``ignore_index`` was ``True`` and values were scalars (:issue:`40487`) - Bug in :func:`to_datetime` raising a ``ValueError`` when :class:`Series` contains ``None`` and ``NaT`` and has more than 50 elements (:issue:`39882`) +- Bug in :meth:`Series.unstack` and :meth:`DataFrame.unstack` with object-dtype values containing timezone-aware datetime objects incorrectly raising ``TypeError`` (:issue:`41875`) - Bug in :meth:`DataFrame.melt` raising ``InvalidIndexError`` when :class:`DataFrame` has duplicate columns used as ``value_vars`` (:issue:`41951`) Sparse diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 93859eb11dd44..2f45cae46b32e 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -25,11 +25,13 @@ is_object_dtype, needs_i8_conversion, ) +from pandas.core.dtypes.dtypes import ExtensionDtype from pandas.core.dtypes.missing import notna import pandas.core.algorithms as algos from pandas.core.arrays import SparseArray from pandas.core.arrays.categorical import factorize_from_iterable +from pandas.core.construction import ensure_wrapped_if_datetimelike from pandas.core.frame import DataFrame from pandas.core.indexes.api import ( Index, @@ -233,15 +235,22 @@ def get_new_values(self, values, fill_value=None): if mask_all: dtype = values.dtype new_values = np.empty(result_shape, dtype=dtype) + name = np.dtype(dtype).name else: dtype, fill_value = maybe_promote(values.dtype, fill_value) - new_values = np.empty(result_shape, dtype=dtype) - new_values.fill(fill_value) + if isinstance(dtype, ExtensionDtype): + # GH#41875 + cls = dtype.construct_array_type() + new_values = cls._empty(result_shape, dtype=dtype) + new_values[:] = fill_value + name = dtype.name + else: + new_values = np.empty(result_shape, dtype=dtype) + new_values.fill(fill_value) + name = np.dtype(dtype).name new_mask = np.zeros(result_shape, dtype=bool) - name = np.dtype(dtype).name - # we need to convert to a basic dtype # and possibly coerce an input to our output dtype # e.g. ints -> floats @@ -267,6 +276,10 @@ def get_new_values(self, values, fill_value=None): # reconstruct dtype if needed if needs_i8_conversion(values.dtype): + # view as datetime64 so we can wrap in DatetimeArray and use + # DTA's view method + new_values = new_values.view("M8[ns]") + new_values = ensure_wrapped_if_datetimelike(new_values) new_values = new_values.view(values.dtype) return new_values, new_mask diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py index 3e6b1cbfb311c..8f241679d5108 100644 --- a/pandas/tests/extension/base/reshaping.py +++ b/pandas/tests/extension/base/reshaping.py @@ -4,6 +4,7 @@ import pytest from pandas.core.dtypes.common import ( + is_datetime64tz_dtype, is_interval_dtype, is_period_dtype, ) @@ -328,6 +329,9 @@ def test_unstack(self, data, index, obj): ) if obj == "series": # TODO: special cases belong in dtype-specific tests + if is_datetime64tz_dtype(data.dtype): + assert expected.dtypes.apply(is_datetime64tz_dtype).all() + expected = expected.astype(object) if is_period_dtype(data.dtype): assert expected.dtypes.apply(is_period_dtype).all() expected = expected.astype(object) diff --git a/pandas/tests/extension/test_datetime.py b/pandas/tests/extension/test_datetime.py index bb8347f0a0122..54e31e05e8b0e 100644 --- a/pandas/tests/extension/test_datetime.py +++ b/pandas/tests/extension/test_datetime.py @@ -193,40 +193,6 @@ def test_concat_mixed_dtypes(self, data): # drops the tz. super().test_concat_mixed_dtypes(data) - @pytest.mark.parametrize("obj", ["series", "frame"]) - def test_unstack(self, obj): - # GH-13287: can't use base test, since building the expected fails. - dtype = DatetimeTZDtype(tz="US/Central") - data = DatetimeArray._from_sequence( - ["2000", "2001", "2002", "2003"], - dtype=dtype, - ) - index = pd.MultiIndex.from_product(([["A", "B"], ["a", "b"]]), names=["a", "b"]) - - if obj == "series": - ser = pd.Series(data, index=index) - expected = pd.DataFrame( - {"A": data.take([0, 1]), "B": data.take([2, 3])}, - index=pd.Index(["a", "b"], name="b"), - ) - expected.columns.name = "a" - - else: - ser = pd.DataFrame({"A": data, "B": data}, index=index) - expected = pd.DataFrame( - { - ("A", "A"): data.take([0, 1]), - ("A", "B"): data.take([2, 3]), - ("B", "A"): data.take([0, 1]), - ("B", "B"): data.take([2, 3]), - }, - index=pd.Index(["a", "b"], name="b"), - ) - expected.columns.names = [None, "a"] - - result = ser.unstack(0) - self.assert_equal(result, expected) - class TestSetitem(BaseDatetimeTests, base.BaseSetitemTests): pass