pandas-dev · linebp · Apr 4, 2017
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
@@ -1564,6 +1564,7 @@ Indexing
 - Bug in the HTML display with with a ``MultiIndex`` and truncation (:issue:`14882`)
 - Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`)
 - Bug in ``pd.concat()`` where the names of ``MultiIndex`` of resulting ``DataFrame`` are not handled correctly when ``None`` is presented in the names of ``MultiIndex`` of input ``DataFrame`` (:issue:`15787`)
+- Bug in ``DataFrame.sort_index()`` and ``Series.sort_index()`` ``na_position`` doesn't work with ``MultiIndex``
 
 I/O
 ^^^

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -3352,7 +3352,8 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
             # make sure that the axis is lexsorted to start
             # if not we need to reconstruct to get the correct indexer
             labels = labels._sort_levels_monotonic()
-            indexer = lexsort_indexer(labels.labels, orders=ascending,
+            indexer = lexsort_indexer(labels._get_labels_for_sorting(),
+                                      orders=ascending,
                                       na_position=na_position)
         else:
             from pandas.core.sorting import nargsort

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -1635,6 +1635,22 @@ def reorder_levels(self, order):
     def __getslice__(self, i, j):
         return self.__getitem__(slice(i, j))
 
+    def _get_labels_for_sorting(self):
+        """
+        we categorizing our labels by using the
+        available catgories (all, not just observed)
+        excluding any missing ones (-1); this is in preparation
+        for sorting, where we need to disambiguate that -1 is not
+        a valid valid
+        """
+        from pandas.core.categorical import Categorical
+
+        return [Categorical.from_codes(label,
+                                       np.arange(np.array(label).max() + 1,
+                                                 dtype=label.dtype),
+                                       ordered=True)
+                for label in self.labels]
+
     def sortlevel(self, level=0, ascending=True, sort_remaining=True):
         """
         Sort MultiIndex at the requested level. The result will respect the

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -1753,7 +1753,9 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
         elif isinstance(index, MultiIndex):
             from pandas.core.sorting import lexsort_indexer
             labels = index._sort_levels_monotonic()
-            indexer = lexsort_indexer(labels.labels, orders=ascending)
+            indexer = lexsort_indexer(labels._get_labels_for_sorting(),
+                                      orders=ascending,
+                                      na_position=na_position)
         else:
             from pandas.core.sorting import nargsort
 

diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
@@ -2634,3 +2634,60 @@ def test_sort_non_lexsorted(self):
 
         with pytest.raises(UnsortedIndexError):
             result.loc[pd.IndexSlice['B':'C', 'a':'c'], :]
+
+    def test_sort_index_nan(self):
+        tuples = [[12, 13], [np.nan, np.nan], [np.nan, 3], [1, 2]]
+        mi = MultiIndex.from_tuples(tuples)
+
+        df = DataFrame(np.arange(16).reshape(4, 4),
+                       index=mi, columns=list('ABCD'))
+        s = Series(np.arange(4), index=mi)
+
+        df2 = DataFrame({
+            'date': pd.to_datetime([
+                '20121002', '20121007', '20130130', '20130202', '20130305',
+                '20121002', '20121207', '20130130', '20130202', '20130305',
+                '20130202', '20130305'
+            ]),
+            'user_id': [1, 1, 1, 1, 1, 3, 3, 3, 5, 5, 5, 5],
+            'whole_cost': [1790, np.nan, 280, 259, np.nan, 623, 90, 312,
+                           np.nan, 301, 359, 801],
+            'cost': [12, 15, 10, 24, 39, 1, 0, np.nan, 45, 34, 1, 12]
+        }).set_index(['date', 'user_id'])
+
+        # sorting frame, default nan position is last
+        result = df.sort_index()
+        expected = df.iloc[[3, 0, 2, 1], :]
+        tm.assert_frame_equal(result, expected)
+
+        # sorting frame, nan position last
+        result = df.sort_index(na_position='last')
+        expected = df.iloc[[3, 0, 2, 1], :]
+        tm.assert_frame_equal(result, expected)
+
+        # sorting frame, nan position first
+        result = df.sort_index(na_position='first')
+        expected = df.iloc[[1, 2, 3, 0], :]
+        tm.assert_frame_equal(result, expected)
+
+        # sorting frame with removed rows
+        result = df2.dropna().sort_index()
+        expected = df2.sort_index().dropna()
+        tm.assert_frame_equal(result, expected)
+
+        # sorting series, default nan position is last
+        result = s.sort_index()
+        expected = s.iloc[[3, 0, 2, 1]]
+        tm.assert_series_equal(result, expected)
+
+        # sorting series, nan position last
+        result = s.sort_index(na_position='last')
+        expected = s.iloc[[3, 0, 2, 1]]
+        tm.assert_series_equal(result, expected)
+
+        # sorting series, nan position first
+        result = s.sort_index(na_position='first')
+        expected = s.iloc[[1, 2, 3, 0]]
+        tm.assert_series_equal(result, expected)
+
+