Skip to content

Commit 0787b53

Browse files
authored
BUG: pandas.DataFrame().stack() raise an error, while expected is empty (#36185)
1 parent d203ef8 commit 0787b53

File tree

5 files changed

+38
-5
lines changed

5 files changed

+38
-5
lines changed

doc/source/whatsnew/v1.2.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -730,6 +730,8 @@ Reshaping
730730
- Bug in :meth:`DataFrame.pivot_table` with ``aggfunc='count'`` or ``aggfunc='sum'`` returning ``NaN`` for missing categories when pivoted on a ``Categorical``. Now returning ``0`` (:issue:`31422`)
731731
- Bug in :func:`concat` and :class:`DataFrame` constructor where input index names are not preserved in some cases (:issue:`13475`)
732732
- Bug in func :meth:`crosstab` when using multiple columns with ``margins=True`` and ``normalize=True`` (:issue:`35144`)
733+
- Bug in :meth:`DataFrame.stack` where an empty DataFrame.stack would raise an error (:issue:`36113`). Now returning an empty Series with empty MultiIndex.
734+
- Bug in :meth:`Series.unstack`. Now a Series with single level of Index trying to unstack would raise a ValueError. (:issue:`36113`)
733735
- Bug in :meth:`DataFrame.agg` with ``func={'name':<FUNC>}`` incorrectly raising ``TypeError`` when ``DataFrame.columns==['Name']`` (:issue:`36212`)
734736
- Bug in :meth:`Series.transform` would give incorrect results or raise when the argument ``func`` was a dictionary (:issue:`35811`)
735737
- Bug in :meth:`DataFrame.pivot` did not preserve :class:`MultiIndex` level names for columns when rows and columns are both multiindexed (:issue:`36360`)

pandas/core/reshape/reshape.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,7 @@ def _unstack_multiple(data, clocs, fill_value=None):
399399

400400

401401
def unstack(obj, level, fill_value=None):
402+
402403
if isinstance(level, (tuple, list)):
403404
if len(level) != 1:
404405
# _unstack_multiple only handles MultiIndexes,
@@ -416,6 +417,13 @@ def unstack(obj, level, fill_value=None):
416417
return _unstack_frame(obj, level, fill_value=fill_value)
417418
else:
418419
return obj.T.stack(dropna=False)
420+
elif not isinstance(obj.index, MultiIndex):
421+
# GH 36113
422+
# Give nicer error messages when unstack a Series whose
423+
# Index is not a MultiIndex.
424+
raise ValueError(
425+
f"index must be a MultiIndex to unstack, {type(obj.index)} was passed"
426+
)
419427
else:
420428
if is_extension_array_dtype(obj.dtype):
421429
return _unstack_extension_series(obj, level, fill_value)
@@ -513,7 +521,7 @@ def factorize(index):
513521
verify_integrity=False,
514522
)
515523

516-
if frame._is_homogeneous_type:
524+
if not frame.empty and frame._is_homogeneous_type:
517525
# For homogeneous EAs, frame._values will coerce to object. So
518526
# we concatenate instead.
519527
dtypes = list(frame.dtypes._values)

pandas/core/sorting.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -610,7 +610,7 @@ def compress_group_index(group_index, sort: bool = True):
610610
if sort and len(obs_group_ids) > 0:
611611
obs_group_ids, comp_ids = _reorder_by_uniques(obs_group_ids, comp_ids)
612612

613-
return comp_ids, obs_group_ids
613+
return ensure_int64(comp_ids), ensure_int64(obs_group_ids)
614614

615615

616616
def _reorder_by_uniques(uniques, labels):

pandas/tests/frame/test_stack_unstack.py

+26
Original file line numberDiff line numberDiff line change
@@ -1175,6 +1175,32 @@ def test_stack_timezone_aware_values():
11751175
tm.assert_series_equal(result, expected)
11761176

11771177

1178+
@pytest.mark.parametrize("dropna", [True, False])
1179+
def test_stack_empty_frame(dropna):
1180+
# GH 36113
1181+
expected = Series(index=MultiIndex([[], []], [[], []]), dtype=np.float64)
1182+
result = DataFrame(dtype=np.float64).stack(dropna=dropna)
1183+
tm.assert_series_equal(result, expected)
1184+
1185+
1186+
@pytest.mark.parametrize("dropna", [True, False])
1187+
@pytest.mark.parametrize("fill_value", [None, 0])
1188+
def test_stack_unstack_empty_frame(dropna, fill_value):
1189+
# GH 36113
1190+
result = (
1191+
DataFrame(dtype=np.int64).stack(dropna=dropna).unstack(fill_value=fill_value)
1192+
)
1193+
expected = DataFrame(dtype=np.int64)
1194+
tm.assert_frame_equal(result, expected)
1195+
1196+
1197+
def test_unstack_single_index_series():
1198+
# GH 36113
1199+
msg = r"index must be a MultiIndex to unstack.*"
1200+
with pytest.raises(ValueError, match=msg):
1201+
Series(dtype=np.int64).unstack()
1202+
1203+
11781204
def test_unstacking_multi_index_df():
11791205
# see gh-30740
11801206
df = DataFrame(

pandas/tests/reshape/test_pivot.py

-3
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
import numpy as np
55
import pytest
66

7-
from pandas.compat import IS64
8-
97
import pandas as pd
108
from pandas import (
119
Categorical,
@@ -2102,7 +2100,6 @@ def test_pivot_duplicates(self):
21022100
with pytest.raises(ValueError, match="duplicate entries"):
21032101
data.pivot("a", "b", "c")
21042102

2105-
@pytest.mark.xfail(not IS64, reason="GH 36579: fail on 32-bit system")
21062103
def test_pivot_empty(self):
21072104
df = DataFrame(columns=["a", "b", "c"])
21082105
result = df.pivot("a", "b", "c")

0 commit comments

Comments
 (0)