From 4fcb18b6d4a4763edf9e6723c557db915298f8c5 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 14 May 2018 16:56:33 -0700 Subject: [PATCH 1/5] Added test for issue --- pandas/tests/frame/test_reshape.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index d89731dc09044..ecc58aa59255b 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -861,6 +861,23 @@ def test_stack_preserve_categorical_dtype(self): tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("level", [0, 'baz']) + def test_unstack_swaplevel(self, level): + # GH 20994 + mi = pd.MultiIndex.from_product([[0], ['d', 'c']], + names=['bar', 'baz']) + df = pd.DataFrame([[0, 2], [1, 3]], index=mi, columns=['B', 'A']) + df.columns.name = 'foo' + + expected = pd.DataFrame([ + [3, 1, 2, 0]], columns=pd.MultiIndex.from_tuples([ + ('c', 'A'), ('c', 'B'),('d', 'A'), ('d', 'B')], names=[ + 'baz', 'foo'])) + expected.index.name = 'bar' + + result = df.unstack().swaplevel(axis=1).sort_index(axis=1, level=level) + tm.assert_frame_equal(result, expected) + def test_unstack_fill_frame_object(): # GH12815 Test unstacking with object. From 160557df650ab56107ebf27d594692ad9ae7554b Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 14 May 2018 19:01:48 -0700 Subject: [PATCH 2/5] Fixed conditional and index sorting --- pandas/core/frame.py | 9 +++++---- pandas/tests/frame/test_reshape.py | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0437c479c9d81..5e72cd7aa33b1 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4454,7 +4454,10 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, axis = self._get_axis_number(axis) labels = self._get_axis(axis) - if level: + # make sure that the axis is lexsorted to start + # if not we need to reconstruct to get the correct indexer + labels = labels._sort_levels_monotonic() + if level is not None: new_axis, indexer = labels.sortlevel(level, ascending=ascending, sort_remaining=sort_remaining) @@ -4462,9 +4465,6 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, elif isinstance(labels, MultiIndex): from pandas.core.sorting import lexsort_indexer - # make sure that the axis is lexsorted to start - # if not we need to reconstruct to get the correct indexer - labels = labels._sort_levels_monotonic() indexer = lexsort_indexer(labels._get_labels_for_sorting(), orders=ascending, na_position=na_position) @@ -4685,6 +4685,7 @@ def swaplevel(self, i=-2, j=-1, axis=0): result.index = result.index.swaplevel(i, j) else: result.columns = result.columns.swaplevel(i, j) + return result def reorder_levels(self, order, axis=0): diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index ecc58aa59255b..6e13aaf5a3e7f 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -862,7 +862,7 @@ def test_stack_preserve_categorical_dtype(self): tm.assert_series_equal(result, expected) @pytest.mark.parametrize("level", [0, 'baz']) - def test_unstack_swaplevel(self, level): + def test_unstack_swaplevel_sortlevel(self, level): # GH 20994 mi = pd.MultiIndex.from_product([[0], ['d', 'c']], names=['bar', 'baz']) From f087d42a9735c4452d7f4245bf7fddadd676c877 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Mon, 14 May 2018 19:02:43 -0700 Subject: [PATCH 3/5] LINT fixup --- pandas/core/frame.py | 1 - pandas/tests/frame/test_reshape.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5e72cd7aa33b1..7a3817d528056 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4685,7 +4685,6 @@ def swaplevel(self, i=-2, j=-1, axis=0): result.index = result.index.swaplevel(i, j) else: result.columns = result.columns.swaplevel(i, j) - return result def reorder_levels(self, order, axis=0): diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 6e13aaf5a3e7f..d05321abefca6 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -871,7 +871,7 @@ def test_unstack_swaplevel_sortlevel(self, level): expected = pd.DataFrame([ [3, 1, 2, 0]], columns=pd.MultiIndex.from_tuples([ - ('c', 'A'), ('c', 'B'),('d', 'A'), ('d', 'B')], names=[ + ('c', 'A'), ('c', 'B'), ('d', 'A'), ('d', 'B')], names=[ 'baz', 'foo'])) expected.index.name = 'bar' From 4d54e3b7157289d9774999c011f9b309eb333e50 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 15 May 2018 16:35:27 -0700 Subject: [PATCH 4/5] Added better mi sorting tests --- pandas/tests/frame/test_sorting.py | 34 ++++++++++++++++++++++------- pandas/tests/series/test_sorting.py | 9 ++++---- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index b60eb89e87da5..599ae683f914b 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -550,18 +550,36 @@ def test_sort_index(self): expected = frame.iloc[:, ::-1] assert_frame_equal(result, expected) - def test_sort_index_multiindex(self): + @pytest.mark.parametrize("level", ['A', 0]) # GH 21052 + def test_sort_index_multiindex(self, level): # GH13496 # sort rows by specified level of multi-index - mi = MultiIndex.from_tuples([[2, 1, 3], [1, 1, 1]], names=list('ABC')) - df = DataFrame([[1, 2], [3, 4]], mi) + mi = MultiIndex.from_tuples([ + [2, 1, 3], [2, 1, 2], [1, 1, 1]], names=list('ABC')) + df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mi) + + expected_mi = MultiIndex.from_tuples([ + [1, 1, 1], + [2, 1, 2], + [2, 1, 3]], names=list('ABC')) + expected = pd.DataFrame([ + [5, 6], + [3, 4], + [1, 2]], index=expected_mi) + result = df.sort_index(level=level) + assert_frame_equal(result, expected) - # MI sort, but no level: sort_level has no effect - mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) - df = DataFrame([[1, 2], [3, 4]], mi) - result = df.sort_index(sort_remaining=False) - expected = df.sort_index() + # sort_remaining=False + expected_mi = MultiIndex.from_tuples([ + [1, 1, 1], + [2, 1, 3], + [2, 1, 2]], names=list('ABC')) + expected = pd.DataFrame([ + [5, 6], + [1, 2], + [3, 4]], index=expected_mi) + result = df.sort_index(level=level, sort_remaining=False) assert_frame_equal(result, expected) def test_sort_index_intervalindex(self): diff --git a/pandas/tests/series/test_sorting.py b/pandas/tests/series/test_sorting.py index 01b4ea6eaa238..13e0d1b12c372 100644 --- a/pandas/tests/series/test_sorting.py +++ b/pandas/tests/series/test_sorting.py @@ -141,19 +141,20 @@ def test_sort_index_inplace(self): assert result is None tm.assert_series_equal(random_order, self.ts) - def test_sort_index_multiindex(self): + @pytest.mark.parametrize("level", ['A', 0]) # GH 21052 + def test_sort_index_multiindex(self, level): mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list('ABC')) s = Series([1, 2], mi) backwards = s.iloc[[1, 0]] # implicit sort_remaining=True - res = s.sort_index(level='A') + res = s.sort_index(level=level) assert_series_equal(backwards, res) # GH13496 - # rows share same level='A': sort has no effect without remaining lvls - res = s.sort_index(level='A', sort_remaining=False) + # sort has no effect without remaining lvls + res = s.sort_index(level=level, sort_remaining=False) assert_series_equal(s, res) def test_sort_index_kind(self): From 8f256489f874cd0a360d74826c5621175c5d84b3 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 15 May 2018 16:36:29 -0700 Subject: [PATCH 5/5] Fixed MI sorting for Series objs --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 0e2ae22f35af7..622fa2c226134 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2616,7 +2616,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, axis = self._get_axis_number(axis) index = self.index - if level: + if level is not None: new_index, indexer = index.sortlevel(level, ascending=ascending, sort_remaining=sort_remaining) elif isinstance(index, MultiIndex):