Skip to content

BUG: pandas.DataFrame().stack() raise an error, while expected is empty #36185

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 28 commits into from
Nov 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
165fd72
BUG: GH36113
steveya Sep 7, 2020
e0c1a8d
modify tests to avoid deprrecated errors
steveya Sep 7, 2020
f765acf
PEP 8 compliant
steveya Sep 7, 2020
109d312
remove trailing white space
steveya Sep 7, 2020
519a140
black format checked
steveya Sep 7, 2020
9d20ff5
DataFrame().stack should return an empty Series with dtype np.float64…
steveya Sep 9, 2020
d460db6
PEP8 again.
steveya Sep 9, 2020
bae2bd8
remove trailing space...\
steveya Sep 9, 2020
047ae40
add a comma to pass black lint
steveya Sep 9, 2020
c0fffe8
simply fixes and parameterize tests
steveya Sep 9, 2020
6b2b9bd
add error messages when unstack frame and series with single level index
steveya Sep 12, 2020
efc0603
apply ValueError location
steveya Sep 12, 2020
dac5f32
change the place where error is raised
steveya Sep 13, 2020
6524a6c
add a test for unstack series with one level of index. elaborate chan…
steveya Sep 16, 2020
6c71101
adding type information to exception message.
steveya Sep 22, 2020
e57aa51
Merge branch 'master' into GH36113
jreback Oct 10, 2020
f2f29bc
fix black format problem
steveya Oct 24, 2020
07d9ad5
resolve doc/source/whatsnew/v1.2.0.rst conflicts
steveya Nov 5, 2020
148b77d
fix unittest assert error message
steveya Nov 5, 2020
99f8280
change dtype of empty series and dataframe in test
steveya Nov 5, 2020
c4e244a
formatting
steveya Nov 5, 2020
668189f
change intp to int64 in testing of stack unstack empty frame
steveya Nov 20, 2020
20858db
Merge remote-tracking branch 'upstream/master' into GH36113
steveya Nov 20, 2020
4f95523
ensure indexer is of type int64
steveya Nov 22, 2020
7ab1155
Merge remote-tracking branch 'upstream/master' into GH36113
steveya Nov 25, 2020
475f158
remove xfail
steveya Nov 26, 2020
bdf49d3
remove unsed import
steveya Nov 26, 2020
f96453e
Merge remote-tracking branch 'upstream/master' into GH36113
steveya Nov 26, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,8 @@ Reshaping
- Bug in :meth:`DataFrame.pivot_table` with ``aggfunc='count'`` or ``aggfunc='sum'`` returning ``NaN`` for missing categories when pivoted on a ``Categorical``. Now returning ``0`` (:issue:`31422`)
- Bug in :func:`concat` and :class:`DataFrame` constructor where input index names are not preserved in some cases (:issue:`13475`)
- Bug in func :meth:`crosstab` when using multiple columns with ``margins=True`` and ``normalize=True`` (:issue:`35144`)
- Bug in :meth:`DataFrame.stack` where an empty DataFrame.stack would raise an error (:issue:`36113`). Now returning an empty Series with empty MultiIndex.
- Bug in :meth:`Series.unstack`. Now a Series with single level of Index trying to unstack would raise a ValueError. (:issue:`36113`)
- Bug in :meth:`DataFrame.agg` with ``func={'name':<FUNC>}`` incorrectly raising ``TypeError`` when ``DataFrame.columns==['Name']`` (:issue:`36212`)
- Bug in :meth:`Series.transform` would give incorrect results or raise when the argument ``func`` was a dictionary (:issue:`35811`)
- Bug in :meth:`DataFrame.pivot` did not preserve :class:`MultiIndex` level names for columns when rows and columns are both multiindexed (:issue:`36360`)
Expand Down
10 changes: 9 additions & 1 deletion pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,7 @@ def _unstack_multiple(data, clocs, fill_value=None):


def unstack(obj, level, fill_value=None):

if isinstance(level, (tuple, list)):
if len(level) != 1:
# _unstack_multiple only handles MultiIndexes,
Expand All @@ -416,6 +417,13 @@ def unstack(obj, level, fill_value=None):
return _unstack_frame(obj, level, fill_value=fill_value)
else:
return obj.T.stack(dropna=False)
elif not isinstance(obj.index, MultiIndex):
# GH 36113
# Give nicer error messages when unstack a Series whose
# Index is not a MultiIndex.
raise ValueError(
f"index must be a MultiIndex to unstack, {type(obj.index)} was passed"
)
else:
if is_extension_array_dtype(obj.dtype):
return _unstack_extension_series(obj, level, fill_value)
Expand Down Expand Up @@ -513,7 +521,7 @@ def factorize(index):
verify_integrity=False,
)

if frame._is_homogeneous_type:
if not frame.empty and frame._is_homogeneous_type:
# For homogeneous EAs, frame._values will coerce to object. So
# we concatenate instead.
dtypes = list(frame.dtypes._values)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -610,7 +610,7 @@ def compress_group_index(group_index, sort: bool = True):
if sort and len(obs_group_ids) > 0:
obs_group_ids, comp_ids = _reorder_by_uniques(obs_group_ids, comp_ids)

return comp_ids, obs_group_ids
return ensure_int64(comp_ids), ensure_int64(obs_group_ids)


def _reorder_by_uniques(uniques, labels):
Expand Down
26 changes: 26 additions & 0 deletions pandas/tests/frame/test_stack_unstack.py
Original file line number Diff line number Diff line change
Expand Up @@ -1175,6 +1175,32 @@ def test_stack_timezone_aware_values():
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize("dropna", [True, False])
def test_stack_empty_frame(dropna):
# GH 36113
expected = Series(index=MultiIndex([[], []], [[], []]), dtype=np.float64)
result = DataFrame(dtype=np.float64).stack(dropna=dropna)
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize("dropna", [True, False])
@pytest.mark.parametrize("fill_value", [None, 0])
def test_stack_unstack_empty_frame(dropna, fill_value):
# GH 36113
result = (
DataFrame(dtype=np.int64).stack(dropna=dropna).unstack(fill_value=fill_value)
)
expected = DataFrame(dtype=np.int64)
tm.assert_frame_equal(result, expected)


def test_unstack_single_index_series():
# GH 36113
msg = r"index must be a MultiIndex to unstack.*"
with pytest.raises(ValueError, match=msg):
Series(dtype=np.int64).unstack()


def test_unstacking_multi_index_df():
# see gh-30740
df = DataFrame(
Expand Down
3 changes: 0 additions & 3 deletions pandas/tests/reshape/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
import numpy as np
import pytest

from pandas.compat import IS64

import pandas as pd
from pandas import (
Categorical,
Expand Down Expand Up @@ -2102,7 +2100,6 @@ def test_pivot_duplicates(self):
with pytest.raises(ValueError, match="duplicate entries"):
data.pivot("a", "b", "c")

@pytest.mark.xfail(not IS64, reason="GH 36579: fail on 32-bit system")
def test_pivot_empty(self):
df = DataFrame(columns=["a", "b", "c"])
result = df.pivot("a", "b", "c")
Expand Down