Skip to content

Commit 571147f

Browse files
Backport PR pandas-dev#41875: BUG: unstack with object dtype of tzaware timestamps (pandas-dev#42062)
Co-authored-by: jbrockmendel <[email protected]>
1 parent 0da9209 commit 571147f

File tree

4 files changed

+22
-38
lines changed

4 files changed

+22
-38
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1161,6 +1161,7 @@ Reshaping
11611161
- Bug in :func:`to_datetime` raising an error when the input sequence contained unhashable items (:issue:`39756`)
11621162
- Bug in :meth:`Series.explode` preserving the index when ``ignore_index`` was ``True`` and values were scalars (:issue:`40487`)
11631163
- Bug in :func:`to_datetime` raising a ``ValueError`` when :class:`Series` contains ``None`` and ``NaT`` and has more than 50 elements (:issue:`39882`)
1164+
- Bug in :meth:`Series.unstack` and :meth:`DataFrame.unstack` with object-dtype values containing timezone-aware datetime objects incorrectly raising ``TypeError`` (:issue:`41875`)
11641165
- Bug in :meth:`DataFrame.melt` raising ``InvalidIndexError`` when :class:`DataFrame` has duplicate columns used as ``value_vars`` (:issue:`41951`)
11651166

11661167
Sparse

pandas/core/reshape/reshape.py

+17-4
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,13 @@
2525
is_object_dtype,
2626
needs_i8_conversion,
2727
)
28+
from pandas.core.dtypes.dtypes import ExtensionDtype
2829
from pandas.core.dtypes.missing import notna
2930

3031
import pandas.core.algorithms as algos
3132
from pandas.core.arrays import SparseArray
3233
from pandas.core.arrays.categorical import factorize_from_iterable
34+
from pandas.core.construction import ensure_wrapped_if_datetimelike
3335
from pandas.core.frame import DataFrame
3436
from pandas.core.indexes.api import (
3537
Index,
@@ -233,15 +235,22 @@ def get_new_values(self, values, fill_value=None):
233235
if mask_all:
234236
dtype = values.dtype
235237
new_values = np.empty(result_shape, dtype=dtype)
238+
name = np.dtype(dtype).name
236239
else:
237240
dtype, fill_value = maybe_promote(values.dtype, fill_value)
238-
new_values = np.empty(result_shape, dtype=dtype)
239-
new_values.fill(fill_value)
241+
if isinstance(dtype, ExtensionDtype):
242+
# GH#41875
243+
cls = dtype.construct_array_type()
244+
new_values = cls._empty(result_shape, dtype=dtype)
245+
new_values[:] = fill_value
246+
name = dtype.name
247+
else:
248+
new_values = np.empty(result_shape, dtype=dtype)
249+
new_values.fill(fill_value)
250+
name = np.dtype(dtype).name
240251

241252
new_mask = np.zeros(result_shape, dtype=bool)
242253

243-
name = np.dtype(dtype).name
244-
245254
# we need to convert to a basic dtype
246255
# and possibly coerce an input to our output dtype
247256
# e.g. ints -> floats
@@ -267,6 +276,10 @@ def get_new_values(self, values, fill_value=None):
267276

268277
# reconstruct dtype if needed
269278
if needs_i8_conversion(values.dtype):
279+
# view as datetime64 so we can wrap in DatetimeArray and use
280+
# DTA's view method
281+
new_values = new_values.view("M8[ns]")
282+
new_values = ensure_wrapped_if_datetimelike(new_values)
270283
new_values = new_values.view(values.dtype)
271284

272285
return new_values, new_mask

pandas/tests/extension/base/reshaping.py

+4
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import pytest
55

66
from pandas.core.dtypes.common import (
7+
is_datetime64tz_dtype,
78
is_interval_dtype,
89
is_period_dtype,
910
)
@@ -328,6 +329,9 @@ def test_unstack(self, data, index, obj):
328329
)
329330
if obj == "series":
330331
# TODO: special cases belong in dtype-specific tests
332+
if is_datetime64tz_dtype(data.dtype):
333+
assert expected.dtypes.apply(is_datetime64tz_dtype).all()
334+
expected = expected.astype(object)
331335
if is_period_dtype(data.dtype):
332336
assert expected.dtypes.apply(is_period_dtype).all()
333337
expected = expected.astype(object)

pandas/tests/extension/test_datetime.py

-34
Original file line numberDiff line numberDiff line change
@@ -193,40 +193,6 @@ def test_concat_mixed_dtypes(self, data):
193193
# drops the tz.
194194
super().test_concat_mixed_dtypes(data)
195195

196-
@pytest.mark.parametrize("obj", ["series", "frame"])
197-
def test_unstack(self, obj):
198-
# GH-13287: can't use base test, since building the expected fails.
199-
dtype = DatetimeTZDtype(tz="US/Central")
200-
data = DatetimeArray._from_sequence(
201-
["2000", "2001", "2002", "2003"],
202-
dtype=dtype,
203-
)
204-
index = pd.MultiIndex.from_product(([["A", "B"], ["a", "b"]]), names=["a", "b"])
205-
206-
if obj == "series":
207-
ser = pd.Series(data, index=index)
208-
expected = pd.DataFrame(
209-
{"A": data.take([0, 1]), "B": data.take([2, 3])},
210-
index=pd.Index(["a", "b"], name="b"),
211-
)
212-
expected.columns.name = "a"
213-
214-
else:
215-
ser = pd.DataFrame({"A": data, "B": data}, index=index)
216-
expected = pd.DataFrame(
217-
{
218-
("A", "A"): data.take([0, 1]),
219-
("A", "B"): data.take([2, 3]),
220-
("B", "A"): data.take([0, 1]),
221-
("B", "B"): data.take([2, 3]),
222-
},
223-
index=pd.Index(["a", "b"], name="b"),
224-
)
225-
expected.columns.names = [None, "a"]
226-
227-
result = ser.unstack(0)
228-
self.assert_equal(result, expected)
229-
230196

231197
class TestSetitem(BaseDatetimeTests, base.BaseSetitemTests):
232198
pass

0 commit comments

Comments
 (0)