From aa981ddde0763746b5fecce9389062b82b1ebbf7 Mon Sep 17 00:00:00 2001 From: patrick Date: Tue, 24 Mar 2020 23:51:49 +0100 Subject: [PATCH 1/7] BUG: Fix bug for unstack with a lot of indices (#32624) --- pandas/core/reshape/reshape.py | 2 +- pandas/tests/frame/test_reshape.py | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 14c2a05e5db2c..d421a18f5869c 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -363,7 +363,7 @@ def _unstack_multiple(data, clocs, fill_value=None): for i in range(len(clocs)): val = clocs[i] result = result.unstack(val, fill_value=fill_value) - clocs = [v if i > v else v - 1 for v in clocs] + clocs = [v if v < val else v - 1 for v in clocs] return result diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 4f039baa5c7bd..43250c0b306ee 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -765,6 +765,19 @@ def test_unstack_unused_level(self, cols): expected.index = expected.index.droplevel("C") tm.assert_frame_equal(result, expected) + def test_unstack_long_index(self): + # PH 32624: Error when using a lot of indices to unstack. The error occurred only, if a lot of indices are used. + df = pd.DataFrame([[1]], + columns=pd.MultiIndex.from_tuples([[0]], names=['c1']), + index=pd.MultiIndex.from_tuples([[0, 0, 1, 0, 0, 0, 1]], + names=['i1', 'i2', 'i3', 'i4', 'i5', 'i6', 'i7'])) + result = df.unstack(["i2", "i3", "i4", "i5", "i6", "i7"]) + expected = pd.DataFrame([[1]], + columns=pd.MultiIndex.from_tuples([[0, 0, 1, 0, 0, 0, 1]], + names=['c1', 'i2', 'i3', 'i4', 'i5', 'i6', 'i7']), + index=pd.Index([0], name='i1')) + tm.assert_frame_equal(result, expected) + def test_unstack_nan_index(self): # GH7466 def cast(val): val_str = "" if val != val else val From fbaa9cdf4b1ac928108ed2e98858b8b49735e85e Mon Sep 17 00:00:00 2001 From: patrick Date: Tue, 24 Mar 2020 23:52:35 +0100 Subject: [PATCH 2/7] CLN: Fix equals empty list with bool expression --- pandas/core/reshape/reshape.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index d421a18f5869c..88e61d2392773 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -338,7 +338,7 @@ def _unstack_multiple(data, clocs, fill_value=None): comp_ids, obs_ids = compress_group_index(group_index, sort=False) recons_codes = decons_obs_group_ids(comp_ids, obs_ids, shape, ccodes, xnull=False) - if rlocs == []: + if not rlocs: # Everything is in clocs, so the dummy df has a regular index dummy_index = Index(obs_ids, name="__placeholder__") else: From 700d9058b89fcfb6e81a780519b4976d2254ea2b Mon Sep 17 00:00:00 2001 From: patrick Date: Tue, 24 Mar 2020 23:53:35 +0100 Subject: [PATCH 3/7] DOC: Fix Typo parced --- pandas/core/tools/datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 7414165ab5711..3dd17f5747df9 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -260,7 +260,7 @@ def _convert_listlike_datetimes( Parameters ---------- arg : list, tuple, ndarray, Series, Index - date to be parced + date to be parsed name : object None or string for the Index name tz : object From 482f3797647ad2eb2adb430727d83bac0811a1d9 Mon Sep 17 00:00:00 2001 From: patrick Date: Wed, 25 Mar 2020 00:09:27 +0100 Subject: [PATCH 4/7] CLN: Add black pandas and flak8 issues --- pandas/tests/frame/test_reshape.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 43250c0b306ee..0989e2fee50cc 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -766,16 +766,25 @@ def test_unstack_unused_level(self, cols): tm.assert_frame_equal(result, expected) def test_unstack_long_index(self): - # PH 32624: Error when using a lot of indices to unstack. The error occurred only, if a lot of indices are used. - df = pd.DataFrame([[1]], - columns=pd.MultiIndex.from_tuples([[0]], names=['c1']), - index=pd.MultiIndex.from_tuples([[0, 0, 1, 0, 0, 0, 1]], - names=['i1', 'i2', 'i3', 'i4', 'i5', 'i6', 'i7'])) + # PH 32624: Error when using a lot of indices to unstack. + # The error occurred only, if a lot of indices are used. + df = pd.DataFrame( + [[1]], + columns=pd.MultiIndex.from_tuples([[0]], names=["c1"]), + index=pd.MultiIndex.from_tuples( + [[0, 0, 1, 0, 0, 0, 1]], + names=["i1", "i2", "i3", "i4", "i5", "i6", "i7"], + ), + ) result = df.unstack(["i2", "i3", "i4", "i5", "i6", "i7"]) - expected = pd.DataFrame([[1]], - columns=pd.MultiIndex.from_tuples([[0, 0, 1, 0, 0, 0, 1]], - names=['c1', 'i2', 'i3', 'i4', 'i5', 'i6', 'i7']), - index=pd.Index([0], name='i1')) + expected = pd.DataFrame( + [[1]], + columns=pd.MultiIndex.from_tuples( + [[0, 0, 1, 0, 0, 0, 1]], + names=["c1", "i2", "i3", "i4", "i5", "i6", "i7"], + ), + index=pd.Index([0], name="i1"), + ) tm.assert_frame_equal(result, expected) def test_unstack_nan_index(self): # GH7466 From d617caa75b336596290c0489eb268c8d0ea7fa30 Mon Sep 17 00:00:00 2001 From: patrick Date: Wed, 25 Mar 2020 20:47:07 +0100 Subject: [PATCH 5/7] Add unittest for #28306 and #24729 and revert typo. Add whats new --- doc/source/whatsnew/v1.1.0.rst | 1 + pandas/tests/frame/test_reshape.py | 33 ++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index dcbfe6aeb9a12..8d868550a7d12 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -387,6 +387,7 @@ Reshaping - Bug in :meth:`DataFrame.apply` where callback was called with :class:`Series` parameter even though ``raw=True`` requested. (:issue:`32423`) - Bug in :meth:`DataFrame.pivot_table` losing timezone information when creating a :class:`MultiIndex` level from a column with timezone-aware dtype (:issue:`32558`) - :meth:`DataFrame.agg` now provides more descriptive ``SpecificationError`` message when attempting to aggregating non-existant column (:issue:`32755`) +- Bug in :meth:`DataFrame.unstack` when MultiIndexed columns and MultiIndexed rows were used (:issue:`32624`, :issue:`24729` and :issue:`28306`) Sparse ^^^^^^ diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 0989e2fee50cc..7d62b8f717f0d 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -785,6 +785,39 @@ def test_unstack_long_index(self): ), index=pd.Index([0], name="i1"), ) + print(expected) + tm.assert_frame_equal(result, expected) + + def test_unstack_multi_level_cols(self): + # PH 24729: Unstack a df with multi level columns + df = pd.DataFrame( + [[0.0, 0.0], [0.0, 0.0]], + columns=pd.MultiIndex.from_tuples( + [["B", "C"], ["B", "D"]], names=["c1", "c2"] + ), + index=pd.MultiIndex.from_tuples( + [[10, 20, 30], [10, 20, 40]], names=["i1", "i2", "i3"], + ), + ) + assert df.unstack(["i2", "i1"]).columns.names[-2:] == ["i2", "i1"] + + def test_unstack_multi_level_rows_and_cols(self): + # PH 28306: Unstack df with multi level cols and rows + df = pd.DataFrame( + [[1, 2], [3, 4], [-1, -2], [-3, -4]], + columns=pd.MultiIndex.from_tuples([["a", "b", "c"], ["d", "e", "f"]]), + index=pd.MultiIndex.from_tuples( + [ + ["m1", "P3", 222], + ["m1", "A5", 111], + ["m2", "P3", 222], + ["m2", "A5", 111], + ], + names=["i1", "i2", "i3"], + ), + ) + result = df.unstack(["i3", "i2"]) + expected = df.unstack(["i3"]).unstack(["i2"]) tm.assert_frame_equal(result, expected) def test_unstack_nan_index(self): # GH7466 From 4028cca3fa302ccbd71b7108afb8ddf5526e3d1b Mon Sep 17 00:00:00 2001 From: patrick Date: Wed, 25 Mar 2020 20:47:54 +0100 Subject: [PATCH 6/7] Revert "DOC: Fix Typo parced" This reverts commit 700d9058 --- pandas/core/tools/datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 3dd17f5747df9..7414165ab5711 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -260,7 +260,7 @@ def _convert_listlike_datetimes( Parameters ---------- arg : list, tuple, ndarray, Series, Index - date to be parsed + date to be parced name : object None or string for the Index name tz : object From 008aebb82e48ceb391ffdd73974000a90df4ba7b Mon Sep 17 00:00:00 2001 From: patrick Date: Wed, 25 Mar 2020 20:48:19 +0100 Subject: [PATCH 7/7] Delete print statement --- pandas/tests/frame/test_reshape.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 7d62b8f717f0d..9d3c40ce926d7 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -785,7 +785,6 @@ def test_unstack_long_index(self): ), index=pd.Index([0], name="i1"), ) - print(expected) tm.assert_frame_equal(result, expected) def test_unstack_multi_level_cols(self):