Skip to content

Commit dd5cef5

Browse files
linebpjreback
authored andcommitted
BUG: na_position doesn't work for sort_index() with MultiIndex
closes #14784 Author: Line Pedersen <[email protected]> Closes #15845 from linebp/json_normalize_seperator and squashes the following commits: 66f809e [Line Pedersen] BUG GH14784 na_position doesn't work for sort_index() with MultiIndex
1 parent f114af0 commit dd5cef5

File tree

5 files changed

+79
-2
lines changed

5 files changed

+79
-2
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1564,6 +1564,7 @@ Indexing
15641564
- Bug in the HTML display with with a ``MultiIndex`` and truncation (:issue:`14882`)
15651565
- Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`)
15661566
- Bug in ``pd.concat()`` where the names of ``MultiIndex`` of resulting ``DataFrame`` are not handled correctly when ``None`` is presented in the names of ``MultiIndex`` of input ``DataFrame`` (:issue:`15787`)
1567+
- Bug in ``DataFrame.sort_index()`` and ``Series.sort_index()`` where ``na_position`` doesn't work with a ``MultiIndex`` (:issue:`14784`)
15671568

15681569
I/O
15691570
^^^

pandas/core/frame.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -3352,7 +3352,8 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
33523352
# make sure that the axis is lexsorted to start
33533353
# if not we need to reconstruct to get the correct indexer
33543354
labels = labels._sort_levels_monotonic()
3355-
indexer = lexsort_indexer(labels.labels, orders=ascending,
3355+
indexer = lexsort_indexer(labels._get_labels_for_sorting(),
3356+
orders=ascending,
33563357
na_position=na_position)
33573358
else:
33583359
from pandas.core.sorting import nargsort

pandas/core/indexes/multi.py

+16
Original file line numberDiff line numberDiff line change
@@ -1635,6 +1635,22 @@ def reorder_levels(self, order):
16351635
def __getslice__(self, i, j):
16361636
return self.__getitem__(slice(i, j))
16371637

1638+
def _get_labels_for_sorting(self):
1639+
"""
1640+
we categorizing our labels by using the
1641+
available catgories (all, not just observed)
1642+
excluding any missing ones (-1); this is in preparation
1643+
for sorting, where we need to disambiguate that -1 is not
1644+
a valid valid
1645+
"""
1646+
from pandas.core.categorical import Categorical
1647+
1648+
return [Categorical.from_codes(label,
1649+
np.arange(np.array(label).max() + 1,
1650+
dtype=label.dtype),
1651+
ordered=True)
1652+
for label in self.labels]
1653+
16381654
def sortlevel(self, level=0, ascending=True, sort_remaining=True):
16391655
"""
16401656
Sort MultiIndex at the requested level. The result will respect the

pandas/core/series.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1753,7 +1753,9 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
17531753
elif isinstance(index, MultiIndex):
17541754
from pandas.core.sorting import lexsort_indexer
17551755
labels = index._sort_levels_monotonic()
1756-
indexer = lexsort_indexer(labels.labels, orders=ascending)
1756+
indexer = lexsort_indexer(labels._get_labels_for_sorting(),
1757+
orders=ascending,
1758+
na_position=na_position)
17571759
else:
17581760
from pandas.core.sorting import nargsort
17591761

pandas/tests/test_multilevel.py

+57
Original file line numberDiff line numberDiff line change
@@ -2634,3 +2634,60 @@ def test_sort_non_lexsorted(self):
26342634

26352635
with pytest.raises(UnsortedIndexError):
26362636
result.loc[pd.IndexSlice['B':'C', 'a':'c'], :]
2637+
2638+
def test_sort_index_nan(self):
2639+
# GH 14784
2640+
# incorrect sorting w.r.t. nans
2641+
tuples = [[12, 13], [np.nan, np.nan], [np.nan, 3], [1, 2]]
2642+
mi = MultiIndex.from_tuples(tuples)
2643+
2644+
df = DataFrame(np.arange(16).reshape(4, 4),
2645+
index=mi, columns=list('ABCD'))
2646+
s = Series(np.arange(4), index=mi)
2647+
2648+
df2 = DataFrame({
2649+
'date': pd.to_datetime([
2650+
'20121002', '20121007', '20130130', '20130202', '20130305',
2651+
'20121002', '20121207', '20130130', '20130202', '20130305',
2652+
'20130202', '20130305'
2653+
]),
2654+
'user_id': [1, 1, 1, 1, 1, 3, 3, 3, 5, 5, 5, 5],
2655+
'whole_cost': [1790, np.nan, 280, 259, np.nan, 623, 90, 312,
2656+
np.nan, 301, 359, 801],
2657+
'cost': [12, 15, 10, 24, 39, 1, 0, np.nan, 45, 34, 1, 12]
2658+
}).set_index(['date', 'user_id'])
2659+
2660+
# sorting frame, default nan position is last
2661+
result = df.sort_index()
2662+
expected = df.iloc[[3, 0, 2, 1], :]
2663+
tm.assert_frame_equal(result, expected)
2664+
2665+
# sorting frame, nan position last
2666+
result = df.sort_index(na_position='last')
2667+
expected = df.iloc[[3, 0, 2, 1], :]
2668+
tm.assert_frame_equal(result, expected)
2669+
2670+
# sorting frame, nan position first
2671+
result = df.sort_index(na_position='first')
2672+
expected = df.iloc[[1, 2, 3, 0], :]
2673+
tm.assert_frame_equal(result, expected)
2674+
2675+
# sorting frame with removed rows
2676+
result = df2.dropna().sort_index()
2677+
expected = df2.sort_index().dropna()
2678+
tm.assert_frame_equal(result, expected)
2679+
2680+
# sorting series, default nan position is last
2681+
result = s.sort_index()
2682+
expected = s.iloc[[3, 0, 2, 1]]
2683+
tm.assert_series_equal(result, expected)
2684+
2685+
# sorting series, nan position last
2686+
result = s.sort_index(na_position='last')
2687+
expected = s.iloc[[3, 0, 2, 1]]
2688+
tm.assert_series_equal(result, expected)
2689+
2690+
# sorting series, nan position first
2691+
result = s.sort_index(na_position='first')
2692+
expected = s.iloc[[1, 2, 3, 0]]
2693+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)