From d1d651357c78316bb762b98e33147b69b1ff4410 Mon Sep 17 00:00:00 2001 From: "Brandon M. Burroughs" Date: Thu, 20 Oct 2016 19:26:44 -0400 Subject: [PATCH 1/6] BUG Series.sort_index does not accept parameters kind and na_position --- doc/source/whatsnew/v0.19.1.txt | 1 + pandas/core/series.py | 9 ++++++--- pandas/tests/series/test_sorting.py | 25 +++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.19.1.txt b/doc/source/whatsnew/v0.19.1.txt index 5180b9a092f6c..3d2e8cbf0ad40 100644 --- a/doc/source/whatsnew/v0.19.1.txt +++ b/doc/source/whatsnew/v0.19.1.txt @@ -33,6 +33,7 @@ Bug Fixes +- Bug in ``Series.sort_index`` where parameters ``kind`` and ``na_position`` did not exist (:issue:`13589`, :issue:`14444`) - Bug in localizing an ambiguous timezone when a boolean is passed (:issue:`14402`) diff --git a/pandas/core/series.py b/pandas/core/series.py index 1c6b13885dd01..c1ce0ccbf3e24 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1773,7 +1773,7 @@ def _try_kind_sort(arr): @Appender(generic._shared_docs['sort_index'] % _shared_doc_kwargs) def sort_index(self, axis=0, level=None, ascending=True, inplace=False, - sort_remaining=True): + kind='quicksort', na_position='last', sort_remaining=True): axis = self._get_axis_number(axis) index = self.index @@ -1786,8 +1786,11 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, indexer = _ensure_platform_int(indexer) new_index = index.take(indexer) else: - new_index, indexer = index.sort_values(return_indexer=True, - ascending=ascending) + from pandas.core.groupby import _nargsort + + indexer = _nargsort(index, kind=kind, ascending=ascending, + na_position=na_position) + new_index = index.take(indexer) new_values = self._values.take(indexer) result = self._constructor(new_values, index=new_index) diff --git a/pandas/tests/series/test_sorting.py b/pandas/tests/series/test_sorting.py index 826201adbdb50..69e70c15cae50 100644 --- a/pandas/tests/series/test_sorting.py +++ b/pandas/tests/series/test_sorting.py @@ -144,3 +144,28 @@ def test_sort_index_multiindex(self): # rows share same level='A': sort has no effect without remaining lvls res = s.sort_index(level='A', sort_remaining=False) assert_series_equal(s, res) + + def test_sort_index_kind(self): + # GH #14444 & #13589: Add support for sort algo choosing + series = Series(index=[3, 2, 1, 4, 3]) + expected_series = Series(index=[1, 2, 3, 3, 4]) + + index_sorted_series = series.sort_index(kind='mergesort') + assert_series_equal(expected_series, index_sorted_series) + + index_sorted_series = series.sort_index(kind='quicksort') + assert_series_equal(expected_series, index_sorted_series) + + index_sorted_series = series.sort_index(kind='heapsort') + assert_series_equal(expected_series, index_sorted_series) + + def test_sort_index_na_position(self): + series = Series(index=[3, 2, 1, 4, 3, np.nan]) + + expected_series_first = Series(index=[np.nan, 1, 2, 3, 3, 4]) + index_sorted_series = series.sort_index(na_position='first') + assert_series_equal(expected_series_first, index_sorted_series) + + expected_series_last = Series(index=[1, 2, 3, 3, 4, np.nan]) + index_sorted_series = series.sort_index(na_position='last') + assert_series_equal(expected_series_last, index_sorted_series) From aae1c1c0c494c8702d06e403f84da8ee4ae3253a Mon Sep 17 00:00:00 2001 From: "Brandon M. Burroughs" Date: Thu, 27 Oct 2016 16:48:47 -0400 Subject: [PATCH 2/6] Moving ensure and take --- doc/source/whatsnew/v0.19.1.txt | 2 +- pandas/core/series.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.19.1.txt b/doc/source/whatsnew/v0.19.1.txt index 3d2e8cbf0ad40..42ddf85bcb949 100644 --- a/doc/source/whatsnew/v0.19.1.txt +++ b/doc/source/whatsnew/v0.19.1.txt @@ -30,10 +30,10 @@ Performance Improvements Bug Fixes ~~~~~~~~~ +- Bug in ``Series.sort_index`` where parameters ``kind`` and ``na_position`` did not exist (:issue:`13589`, :issue:`14444`) -- Bug in ``Series.sort_index`` where parameters ``kind`` and ``na_position`` did not exist (:issue:`13589`, :issue:`14444`) - Bug in localizing an ambiguous timezone when a boolean is passed (:issue:`14402`) diff --git a/pandas/core/series.py b/pandas/core/series.py index c1ce0ccbf3e24..943b65dfa3b20 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1783,14 +1783,14 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, elif isinstance(index, MultiIndex): from pandas.core.groupby import _lexsort_indexer indexer = _lexsort_indexer(index.labels, orders=ascending) - indexer = _ensure_platform_int(indexer) - new_index = index.take(indexer) + else: from pandas.core.groupby import _nargsort - indexer = _nargsort(index, kind=kind, ascending=ascending, na_position=na_position) - new_index = index.take(indexer) + + indexer = _ensure_platform_int(indexer) + new_index = index.take(indexer) new_values = self._values.take(indexer) result = self._constructor(new_values, index=new_index) From 43dd994d9b4bd74eff7ace984324446de4e1fb84 Mon Sep 17 00:00:00 2001 From: "Brandon M. Burroughs" Date: Sun, 20 Nov 2016 16:56:10 -0500 Subject: [PATCH 3/6] Adding na_position for multiindex --- pandas/core/series.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 943b65dfa3b20..f766cb5973f7c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1782,7 +1782,8 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, sort_remaining=sort_remaining) elif isinstance(index, MultiIndex): from pandas.core.groupby import _lexsort_indexer - indexer = _lexsort_indexer(index.labels, orders=ascending) + indexer = _lexsort_indexer(index.labels, orders=ascending, + na_position=na_position) else: from pandas.core.groupby import _nargsort From b013103ee05398d164dc25c09455660ba711f375 Mon Sep 17 00:00:00 2001 From: "Brandon M. Burroughs" Date: Sun, 20 Nov 2016 18:02:28 -0500 Subject: [PATCH 4/6] Moving to 0.20.0 --- doc/source/whatsnew/v0.19.1.txt | 1 - doc/source/whatsnew/v0.20.0.txt | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.19.1.txt b/doc/source/whatsnew/v0.19.1.txt index 42ddf85bcb949..5180b9a092f6c 100644 --- a/doc/source/whatsnew/v0.19.1.txt +++ b/doc/source/whatsnew/v0.19.1.txt @@ -30,7 +30,6 @@ Performance Improvements Bug Fixes ~~~~~~~~~ -- Bug in ``Series.sort_index`` where parameters ``kind`` and ``na_position`` did not exist (:issue:`13589`, :issue:`14444`) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 7fa9991138fba..99d0e063a28df 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -29,6 +29,7 @@ New features Other enhancements ^^^^^^^^^^^^^^^^^^ +- ``Series.sort_index`` accepts parameters ``kind`` and ``na_position`` (:issue:`13589`, :issue:`14444`) From 423c7717ba182a539e495bb08659b1705f61c8da Mon Sep 17 00:00:00 2001 From: "Brandon M. Burroughs" Date: Fri, 2 Dec 2016 17:40:30 -0500 Subject: [PATCH 5/6] Adding note that na_position doesn't work for MultiIndex --- pandas/core/generic.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 697438df87d4f..98bc00a8e7e0d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2029,7 +2029,8 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False, DataFrames, this option is only applied when sorting on a single column or label. na_position : {'first', 'last'}, default 'last' - `first` puts NaNs at the beginning, `last` puts NaNs at the end + `first` puts NaNs at the beginning, `last` puts NaNs at the end. + Not implemented for MultiIndex. sort_remaining : bool, default True if true and sorting by level and index is multilevel, sort by other levels too (in order) after sorting by specified level From 05e9e526bfaa3e7ac9599c3f15321a2b7fc7859f Mon Sep 17 00:00:00 2001 From: "Brandon M. Burroughs" Date: Fri, 2 Dec 2016 18:37:20 -0500 Subject: [PATCH 6/6] Removing na_position argument --- pandas/core/series.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index f766cb5973f7c..4e0df4fdacc4f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1782,9 +1782,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False, sort_remaining=sort_remaining) elif isinstance(index, MultiIndex): from pandas.core.groupby import _lexsort_indexer - indexer = _lexsort_indexer(index.labels, orders=ascending, - na_position=na_position) - + indexer = _lexsort_indexer(index.labels, orders=ascending) else: from pandas.core.groupby import _nargsort indexer = _nargsort(index, kind=kind, ascending=ascending,