Backport PR pandas-dev#41875: BUG: unstack with object dtype of tzaware timestamps (pandas-dev#42062)

meeseeksmachine · jbrockmendel · web-flow · commit 571147f0bcf8 · 2021-06-17T11:48:16.000+01:00
Co-authored-by: jbrockmendel &lt;jbrockmendel@gmail.com&gt;
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -1161,6 +1161,7 @@ Reshaping
 - Bug in :func:`to_datetime` raising an error when the input sequence contained unhashable items (:issue:`39756`)
 - Bug in :meth:`Series.explode` preserving the index when ``ignore_index`` was ``True`` and values were scalars (:issue:`40487`)
 - Bug in :func:`to_datetime` raising a ``ValueError`` when :class:`Series` contains ``None`` and ``NaT`` and has more than 50 elements (:issue:`39882`)
+- Bug in :meth:`Series.unstack` and :meth:`DataFrame.unstack` with object-dtype values containing timezone-aware datetime objects incorrectly raising ``TypeError`` (:issue:`41875`)
 - Bug in :meth:`DataFrame.melt` raising ``InvalidIndexError`` when :class:`DataFrame` has duplicate columns used as ``value_vars`` (:issue:`41951`)
 
 Sparse
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
@@ -25,11 +25,13 @@
     is_object_dtype,
     needs_i8_conversion,
 )
+from pandas.core.dtypes.dtypes import ExtensionDtype
 from pandas.core.dtypes.missing import notna
 
 import pandas.core.algorithms as algos
 from pandas.core.arrays import SparseArray
 from pandas.core.arrays.categorical import factorize_from_iterable
+from pandas.core.construction import ensure_wrapped_if_datetimelike
 from pandas.core.frame import DataFrame
 from pandas.core.indexes.api import (
     Index,
@@ -233,15 +235,22 @@ def get_new_values(self, values, fill_value=None):
         if mask_all:
             dtype = values.dtype
             new_values = np.empty(result_shape, dtype=dtype)
+            name = np.dtype(dtype).name
         else:
             dtype, fill_value = maybe_promote(values.dtype, fill_value)
-            new_values = np.empty(result_shape, dtype=dtype)
-            new_values.fill(fill_value)
+            if isinstance(dtype, ExtensionDtype):
+                # GH#41875
+                cls = dtype.construct_array_type()
+                new_values = cls._empty(result_shape, dtype=dtype)
+                new_values[:] = fill_value
+                name = dtype.name
+            else:
+                new_values = np.empty(result_shape, dtype=dtype)
+                new_values.fill(fill_value)
+                name = np.dtype(dtype).name
 
         new_mask = np.zeros(result_shape, dtype=bool)
 
-        name = np.dtype(dtype).name
-
         # we need to convert to a basic dtype
         # and possibly coerce an input to our output dtype
         # e.g. ints -> floats
@@ -267,6 +276,10 @@ def get_new_values(self, values, fill_value=None):
 
         # reconstruct dtype if needed
         if needs_i8_conversion(values.dtype):
+            # view as datetime64 so we can wrap in DatetimeArray and use
+            #  DTA's view method
+            new_values = new_values.view("M8[ns]")
+            new_values = ensure_wrapped_if_datetimelike(new_values)
             new_values = new_values.view(values.dtype)
 
         return new_values, new_mask
diff --git a/pandas/tests/extension/base/reshaping.py b/pandas/tests/extension/base/reshaping.py
@@ -4,6 +4,7 @@
 import pytest
 
 from pandas.core.dtypes.common import (
+    is_datetime64tz_dtype,
     is_interval_dtype,
     is_period_dtype,
 )
@@ -328,6 +329,9 @@ def test_unstack(self, data, index, obj):
             )
             if obj == "series":
                 # TODO: special cases belong in dtype-specific tests
+                if is_datetime64tz_dtype(data.dtype):
+                    assert expected.dtypes.apply(is_datetime64tz_dtype).all()
+                    expected = expected.astype(object)
                 if is_period_dtype(data.dtype):
                     assert expected.dtypes.apply(is_period_dtype).all()
                     expected = expected.astype(object)
diff --git a/pandas/tests/extension/test_datetime.py b/pandas/tests/extension/test_datetime.py
@@ -193,40 +193,6 @@ def test_concat_mixed_dtypes(self, data):
         # drops the tz.
         super().test_concat_mixed_dtypes(data)
 
-    @pytest.mark.parametrize("obj", ["series", "frame"])
-    def test_unstack(self, obj):
-        # GH-13287: can't use base test, since building the expected fails.
-        dtype = DatetimeTZDtype(tz="US/Central")
-        data = DatetimeArray._from_sequence(
-            ["2000", "2001", "2002", "2003"],
-            dtype=dtype,
-        )
-        index = pd.MultiIndex.from_product(([["A", "B"], ["a", "b"]]), names=["a", "b"])
-
-        if obj == "series":
-            ser = pd.Series(data, index=index)
-            expected = pd.DataFrame(
-                {"A": data.take([0, 1]), "B": data.take([2, 3])},
-                index=pd.Index(["a", "b"], name="b"),
-            )
-            expected.columns.name = "a"
-
-        else:
-            ser = pd.DataFrame({"A": data, "B": data}, index=index)
-            expected = pd.DataFrame(
-                {
-                    ("A", "A"): data.take([0, 1]),
-                    ("A", "B"): data.take([2, 3]),
-                    ("B", "A"): data.take([0, 1]),
-                    ("B", "B"): data.take([2, 3]),
-                },
-                index=pd.Index(["a", "b"], name="b"),
-            )
-            expected.columns.names = [None, "a"]
-
-        result = ser.unstack(0)
-        self.assert_equal(result, expected)
-
 
 class TestSetitem(BaseDatetimeTests, base.BaseSetitemTests):
     pass