From 74a210476a1533e4c6b201c9d5cdcda0fe89ea01 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Sun, 15 Oct 2017 23:56:35 +0200 Subject: [PATCH] BUG: support "fill_value" for ".unstack()" called with list of levels closes #13971 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/reshape/reshape.py | 8 +++---- pandas/tests/frame/test_reshape.py | 38 ++++++++++++++++++++++++------ 3 files changed, 36 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 642ee3c8e54c7..598e452640781 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -1010,6 +1010,7 @@ Reshaping - Fixes regression when sorting by multiple columns on a ``datetime64`` dtype ``Series`` with ``NaT`` values (:issue:`16836`) - Bug in :func:`pivot_table` where the result's columns did not preserve the categorical dtype of ``columns`` when ``dropna`` was ``False`` (:issue:`17842`) - Bug in ``DataFrame.drop_duplicates`` where dropping with non-unique column names raised a ``ValueError`` (:issue:`17836`) +- Bug in :func:`unstack` which, when called on a list of levels, would discard the ``fillna`` argument (:issue:`13971`) Numeric ^^^^^^^ diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 4eb35daba2282..b8885820f4a49 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -292,7 +292,7 @@ def get_new_index(self): names=self.new_index_names, verify_integrity=False) -def _unstack_multiple(data, clocs): +def _unstack_multiple(data, clocs, fill_value=None): if len(clocs) == 0: return data @@ -330,7 +330,7 @@ def _unstack_multiple(data, clocs): if isinstance(data, Series): dummy = data.copy() dummy.index = dummy_index - unstacked = dummy.unstack('__placeholder__') + unstacked = dummy.unstack('__placeholder__', fill_value=fill_value) new_levels = clevels new_names = cnames new_labels = recons_labels @@ -347,7 +347,7 @@ def _unstack_multiple(data, clocs): dummy = data.copy() dummy.index = dummy_index - unstacked = dummy.unstack('__placeholder__') + unstacked = dummy.unstack('__placeholder__', fill_value=fill_value) if isinstance(unstacked, Series): unstcols = unstacked.index else: @@ -460,7 +460,7 @@ def unstack(obj, level, fill_value=None): if len(level) != 1: # _unstack_multiple only handles MultiIndexes, # and isn't needed for a single level - return _unstack_multiple(obj, level) + return _unstack_multiple(obj, level, fill_value=fill_value) else: level = level[0] diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index e2f362ebdc895..d795aa835b00a 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -116,22 +116,22 @@ def test_pivot_index_none(self): tm.assert_frame_equal(result, expected) def test_stack_unstack(self): - f = self.frame.copy() - f[:] = np.arange(np.prod(f.shape)).reshape(f.shape) + df = self.frame.copy() + df[:] = np.arange(np.prod(df.shape)).reshape(df.shape) - stacked = f.stack() + stacked = df.stack() stacked_df = DataFrame({'foo': stacked, 'bar': stacked}) unstacked = stacked.unstack() unstacked_df = stacked_df.unstack() - assert_frame_equal(unstacked, f) - assert_frame_equal(unstacked_df['bar'], f) + assert_frame_equal(unstacked, df) + assert_frame_equal(unstacked_df['bar'], df) unstacked_cols = stacked.unstack(0) unstacked_cols_df = stacked_df.unstack(0) - assert_frame_equal(unstacked_cols.T, f) - assert_frame_equal(unstacked_cols_df['bar'].T, f) + assert_frame_equal(unstacked_cols.T, df) + assert_frame_equal(unstacked_cols_df['bar'].T, df) def test_unstack_fill(self): @@ -154,6 +154,30 @@ def test_unstack_fill(self): index=['x', 'y', 'z'], dtype=np.float) assert_frame_equal(result, expected) + # GH #13971: fill_value when unstacking multiple levels: + df = DataFrame({'x': ['a', 'a', 'b'], + 'y': ['j', 'k', 'j'], + 'z': [0, 1, 2], + 'w': [0, 1, 2]}).set_index(['x', 'y', 'z']) + unstacked = df.unstack(['x', 'y'], fill_value=0) + key = ('w', 'b', 'j') + expected = unstacked[key] + result = pd.Series([0, 0, 2], index=unstacked.index, name=key) + assert_series_equal(result, expected) + + stacked = unstacked.stack(['x', 'y']) + stacked.index = stacked.index.reorder_levels(df.index.names) + # Workaround for GH #17886 (unnecessarily casts to float): + stacked = stacked.astype(np.int64) + result = stacked.loc[df.index] + assert_frame_equal(result, df) + + # From a series + s = df['w'] + result = s.unstack(['x', 'y'], fill_value=0) + expected = unstacked['w'] + assert_frame_equal(result, expected) + def test_unstack_fill_frame(self): # From a dataframe