diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 6f046d3a9379d..f9e348b04e318 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -727,6 +727,8 @@ Reshaping - Bug in :meth:`DataFrame.pivot_table` with ``aggfunc='count'`` or ``aggfunc='sum'`` returning ``NaN`` for missing categories when pivoted on a ``Categorical``. Now returning ``0`` (:issue:`31422`) - Bug in :func:`concat` and :class:`DataFrame` constructor where input index names are not preserved in some cases (:issue:`13475`) - Bug in func :meth:`crosstab` when using multiple columns with ``margins=True`` and ``normalize=True`` (:issue:`35144`) +- Bug in :meth:`DataFrame.stack` where an empty DataFrame.stack would raise an error (:issue:`36113`). Now returning an empty Series with empty MultiIndex. +- Bug in :meth:`Series.unstack`. Now a Series with single level of Index trying to unstack would raise a ValueError. (:issue:`36113`) - Bug in :meth:`DataFrame.agg` with ``func={'name':}`` incorrectly raising ``TypeError`` when ``DataFrame.columns==['Name']`` (:issue:`36212`) - Bug in :meth:`Series.transform` would give incorrect results or raise when the argument ``func`` was a dictionary (:issue:`35811`) - Bug in :meth:`DataFrame.pivot` did not preserve :class:`MultiIndex` level names for columns when rows and columns are both multiindexed (:issue:`36360`) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 44e165b2d06ee..c197e142fecbc 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -399,6 +399,7 @@ def _unstack_multiple(data, clocs, fill_value=None): def unstack(obj, level, fill_value=None): + if isinstance(level, (tuple, list)): if len(level) != 1: # _unstack_multiple only handles MultiIndexes, @@ -416,6 +417,13 @@ def unstack(obj, level, fill_value=None): return _unstack_frame(obj, level, fill_value=fill_value) else: return obj.T.stack(dropna=False) + elif not isinstance(obj.index, MultiIndex): + # GH 36113 + # Give nicer error messages when unstack a Series whose + # Index is not a MultiIndex. + raise ValueError( + f"index must be a MultiIndex to unstack, {type(obj.index)} was passed" + ) else: if is_extension_array_dtype(obj.dtype): return _unstack_extension_series(obj, level, fill_value) @@ -513,7 +521,7 @@ def factorize(index): verify_integrity=False, ) - if frame._is_homogeneous_type: + if not frame.empty and frame._is_homogeneous_type: # For homogeneous EAs, frame._values will coerce to object. So # we concatenate instead. dtypes = list(frame.dtypes._values) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 2a0da8b0fb35c..729f517c789a7 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -610,7 +610,7 @@ def compress_group_index(group_index, sort: bool = True): if sort and len(obs_group_ids) > 0: obs_group_ids, comp_ids = _reorder_by_uniques(obs_group_ids, comp_ids) - return comp_ids, obs_group_ids + return ensure_int64(comp_ids), ensure_int64(obs_group_ids) def _reorder_by_uniques(uniques, labels): diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 3fa17c1764de3..c70bfc4a3602b 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1175,6 +1175,32 @@ def test_stack_timezone_aware_values(): tm.assert_series_equal(result, expected) +@pytest.mark.parametrize("dropna", [True, False]) +def test_stack_empty_frame(dropna): + # GH 36113 + expected = Series(index=MultiIndex([[], []], [[], []]), dtype=np.float64) + result = DataFrame(dtype=np.float64).stack(dropna=dropna) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("dropna", [True, False]) +@pytest.mark.parametrize("fill_value", [None, 0]) +def test_stack_unstack_empty_frame(dropna, fill_value): + # GH 36113 + result = ( + DataFrame(dtype=np.int64).stack(dropna=dropna).unstack(fill_value=fill_value) + ) + expected = DataFrame(dtype=np.int64) + tm.assert_frame_equal(result, expected) + + +def test_unstack_single_index_series(): + # GH 36113 + msg = r"index must be a MultiIndex to unstack.*" + with pytest.raises(ValueError, match=msg): + Series(dtype=np.int64).unstack() + + def test_unstacking_multi_index_df(): # see gh-30740 df = DataFrame( diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 5a28cd5c418f0..f9b2a02920841 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -4,8 +4,6 @@ import numpy as np import pytest -from pandas.compat import IS64 - import pandas as pd from pandas import ( Categorical, @@ -2102,7 +2100,6 @@ def test_pivot_duplicates(self): with pytest.raises(ValueError, match="duplicate entries"): data.pivot("a", "b", "c") - @pytest.mark.xfail(not IS64, reason="GH 36579: fail on 32-bit system") def test_pivot_empty(self): df = DataFrame(columns=["a", "b", "c"]) result = df.pivot("a", "b", "c")