|
| 1 | +import numpy as np |
| 2 | +import pytest |
| 3 | + |
| 4 | +import pandas as pd |
| 5 | +from pandas import CategoricalDtype, DataFrame, IntervalIndex, MultiIndex, Series |
| 6 | +import pandas.util.testing as tm |
| 7 | + |
| 8 | + |
| 9 | +class TestDataFrameSortIndex: |
| 10 | + def test_sort_index_nan(self): |
| 11 | + # GH#3917 |
| 12 | + |
| 13 | + # Test DataFrame with nan label |
| 14 | + df = DataFrame( |
| 15 | + {"A": [1, 2, np.nan, 1, 6, 8, 4], "B": [9, np.nan, 5, 2, 5, 4, 5]}, |
| 16 | + index=[1, 2, 3, 4, 5, 6, np.nan], |
| 17 | + ) |
| 18 | + |
| 19 | + # NaN label, ascending=True, na_position='last' |
| 20 | + sorted_df = df.sort_index(kind="quicksort", ascending=True, na_position="last") |
| 21 | + expected = DataFrame( |
| 22 | + {"A": [1, 2, np.nan, 1, 6, 8, 4], "B": [9, np.nan, 5, 2, 5, 4, 5]}, |
| 23 | + index=[1, 2, 3, 4, 5, 6, np.nan], |
| 24 | + ) |
| 25 | + tm.assert_frame_equal(sorted_df, expected) |
| 26 | + |
| 27 | + # NaN label, ascending=True, na_position='first' |
| 28 | + sorted_df = df.sort_index(na_position="first") |
| 29 | + expected = DataFrame( |
| 30 | + {"A": [4, 1, 2, np.nan, 1, 6, 8], "B": [5, 9, np.nan, 5, 2, 5, 4]}, |
| 31 | + index=[np.nan, 1, 2, 3, 4, 5, 6], |
| 32 | + ) |
| 33 | + tm.assert_frame_equal(sorted_df, expected) |
| 34 | + |
| 35 | + # NaN label, ascending=False, na_position='last' |
| 36 | + sorted_df = df.sort_index(kind="quicksort", ascending=False) |
| 37 | + expected = DataFrame( |
| 38 | + {"A": [8, 6, 1, np.nan, 2, 1, 4], "B": [4, 5, 2, 5, np.nan, 9, 5]}, |
| 39 | + index=[6, 5, 4, 3, 2, 1, np.nan], |
| 40 | + ) |
| 41 | + tm.assert_frame_equal(sorted_df, expected) |
| 42 | + |
| 43 | + # NaN label, ascending=False, na_position='first' |
| 44 | + sorted_df = df.sort_index( |
| 45 | + kind="quicksort", ascending=False, na_position="first" |
| 46 | + ) |
| 47 | + expected = DataFrame( |
| 48 | + {"A": [4, 8, 6, 1, np.nan, 2, 1], "B": [5, 4, 5, 2, 5, np.nan, 9]}, |
| 49 | + index=[np.nan, 6, 5, 4, 3, 2, 1], |
| 50 | + ) |
| 51 | + tm.assert_frame_equal(sorted_df, expected) |
| 52 | + |
| 53 | + def test_sort_index_multi_index(self): |
| 54 | + # GH#25775, testing that sorting by index works with a multi-index. |
| 55 | + df = DataFrame( |
| 56 | + {"a": [3, 1, 2], "b": [0, 0, 0], "c": [0, 1, 2], "d": list("abc")} |
| 57 | + ) |
| 58 | + result = df.set_index(list("abc")).sort_index(level=list("ba")) |
| 59 | + |
| 60 | + expected = DataFrame( |
| 61 | + {"a": [1, 2, 3], "b": [0, 0, 0], "c": [1, 2, 0], "d": list("bca")} |
| 62 | + ) |
| 63 | + expected = expected.set_index(list("abc")) |
| 64 | + |
| 65 | + tm.assert_frame_equal(result, expected) |
| 66 | + |
| 67 | + def test_sort_index_inplace(self): |
| 68 | + frame = DataFrame( |
| 69 | + np.random.randn(4, 4), index=[1, 2, 3, 4], columns=["A", "B", "C", "D"] |
| 70 | + ) |
| 71 | + |
| 72 | + # axis=0 |
| 73 | + unordered = frame.loc[[3, 2, 4, 1]] |
| 74 | + a_id = id(unordered["A"]) |
| 75 | + df = unordered.copy() |
| 76 | + df.sort_index(inplace=True) |
| 77 | + expected = frame |
| 78 | + tm.assert_frame_equal(df, expected) |
| 79 | + assert a_id != id(df["A"]) |
| 80 | + |
| 81 | + df = unordered.copy() |
| 82 | + df.sort_index(ascending=False, inplace=True) |
| 83 | + expected = frame[::-1] |
| 84 | + tm.assert_frame_equal(df, expected) |
| 85 | + |
| 86 | + # axis=1 |
| 87 | + unordered = frame.loc[:, ["D", "B", "C", "A"]] |
| 88 | + df = unordered.copy() |
| 89 | + df.sort_index(axis=1, inplace=True) |
| 90 | + expected = frame |
| 91 | + tm.assert_frame_equal(df, expected) |
| 92 | + |
| 93 | + df = unordered.copy() |
| 94 | + df.sort_index(axis=1, ascending=False, inplace=True) |
| 95 | + expected = frame.iloc[:, ::-1] |
| 96 | + tm.assert_frame_equal(df, expected) |
| 97 | + |
| 98 | + def test_sort_index_different_sortorder(self): |
| 99 | + A = np.arange(20).repeat(5) |
| 100 | + B = np.tile(np.arange(5), 20) |
| 101 | + |
| 102 | + indexer = np.random.permutation(100) |
| 103 | + A = A.take(indexer) |
| 104 | + B = B.take(indexer) |
| 105 | + |
| 106 | + df = DataFrame({"A": A, "B": B, "C": np.random.randn(100)}) |
| 107 | + |
| 108 | + ex_indexer = np.lexsort((df.B.max() - df.B, df.A)) |
| 109 | + expected = df.take(ex_indexer) |
| 110 | + |
| 111 | + # test with multiindex, too |
| 112 | + idf = df.set_index(["A", "B"]) |
| 113 | + |
| 114 | + result = idf.sort_index(ascending=[1, 0]) |
| 115 | + expected = idf.take(ex_indexer) |
| 116 | + tm.assert_frame_equal(result, expected) |
| 117 | + |
| 118 | + # also, Series! |
| 119 | + result = idf["C"].sort_index(ascending=[1, 0]) |
| 120 | + tm.assert_series_equal(result, expected["C"]) |
| 121 | + |
| 122 | + def test_sort_index_level(self): |
| 123 | + mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC")) |
| 124 | + df = DataFrame([[1, 2], [3, 4]], mi) |
| 125 | + |
| 126 | + result = df.sort_index(level="A", sort_remaining=False) |
| 127 | + expected = df |
| 128 | + tm.assert_frame_equal(result, expected) |
| 129 | + |
| 130 | + result = df.sort_index(level=["A", "B"], sort_remaining=False) |
| 131 | + expected = df |
| 132 | + tm.assert_frame_equal(result, expected) |
| 133 | + |
| 134 | + # Error thrown by sort_index when |
| 135 | + # first index is sorted last (GH#26053) |
| 136 | + result = df.sort_index(level=["C", "B", "A"]) |
| 137 | + expected = df.iloc[[1, 0]] |
| 138 | + tm.assert_frame_equal(result, expected) |
| 139 | + |
| 140 | + result = df.sort_index(level=["B", "C", "A"]) |
| 141 | + expected = df.iloc[[1, 0]] |
| 142 | + tm.assert_frame_equal(result, expected) |
| 143 | + |
| 144 | + result = df.sort_index(level=["C", "A"]) |
| 145 | + expected = df.iloc[[1, 0]] |
| 146 | + tm.assert_frame_equal(result, expected) |
| 147 | + |
| 148 | + def test_sort_index_categorical_index(self): |
| 149 | + |
| 150 | + df = DataFrame( |
| 151 | + { |
| 152 | + "A": np.arange(6, dtype="int64"), |
| 153 | + "B": Series(list("aabbca")).astype(CategoricalDtype(list("cab"))), |
| 154 | + } |
| 155 | + ).set_index("B") |
| 156 | + |
| 157 | + result = df.sort_index() |
| 158 | + expected = df.iloc[[4, 0, 1, 5, 2, 3]] |
| 159 | + tm.assert_frame_equal(result, expected) |
| 160 | + |
| 161 | + result = df.sort_index(ascending=False) |
| 162 | + expected = df.iloc[[2, 3, 0, 1, 5, 4]] |
| 163 | + tm.assert_frame_equal(result, expected) |
| 164 | + |
| 165 | + def test_sort_index(self): |
| 166 | + # GH#13496 |
| 167 | + |
| 168 | + frame = DataFrame( |
| 169 | + np.arange(16).reshape(4, 4), |
| 170 | + index=[1, 2, 3, 4], |
| 171 | + columns=["A", "B", "C", "D"], |
| 172 | + ) |
| 173 | + |
| 174 | + # axis=0 : sort rows by index labels |
| 175 | + unordered = frame.loc[[3, 2, 4, 1]] |
| 176 | + result = unordered.sort_index(axis=0) |
| 177 | + expected = frame |
| 178 | + tm.assert_frame_equal(result, expected) |
| 179 | + |
| 180 | + result = unordered.sort_index(ascending=False) |
| 181 | + expected = frame[::-1] |
| 182 | + tm.assert_frame_equal(result, expected) |
| 183 | + |
| 184 | + # axis=1 : sort columns by column names |
| 185 | + unordered = frame.iloc[:, [2, 1, 3, 0]] |
| 186 | + result = unordered.sort_index(axis=1) |
| 187 | + tm.assert_frame_equal(result, frame) |
| 188 | + |
| 189 | + result = unordered.sort_index(axis=1, ascending=False) |
| 190 | + expected = frame.iloc[:, ::-1] |
| 191 | + tm.assert_frame_equal(result, expected) |
| 192 | + |
| 193 | + @pytest.mark.parametrize("level", ["A", 0]) # GH#21052 |
| 194 | + def test_sort_index_multiindex(self, level): |
| 195 | + # GH#13496 |
| 196 | + |
| 197 | + # sort rows by specified level of multi-index |
| 198 | + mi = MultiIndex.from_tuples( |
| 199 | + [[2, 1, 3], [2, 1, 2], [1, 1, 1]], names=list("ABC") |
| 200 | + ) |
| 201 | + df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mi) |
| 202 | + |
| 203 | + expected_mi = MultiIndex.from_tuples( |
| 204 | + [[1, 1, 1], [2, 1, 2], [2, 1, 3]], names=list("ABC") |
| 205 | + ) |
| 206 | + expected = pd.DataFrame([[5, 6], [3, 4], [1, 2]], index=expected_mi) |
| 207 | + result = df.sort_index(level=level) |
| 208 | + tm.assert_frame_equal(result, expected) |
| 209 | + |
| 210 | + # sort_remaining=False |
| 211 | + expected_mi = MultiIndex.from_tuples( |
| 212 | + [[1, 1, 1], [2, 1, 3], [2, 1, 2]], names=list("ABC") |
| 213 | + ) |
| 214 | + expected = pd.DataFrame([[5, 6], [1, 2], [3, 4]], index=expected_mi) |
| 215 | + result = df.sort_index(level=level, sort_remaining=False) |
| 216 | + tm.assert_frame_equal(result, expected) |
| 217 | + |
| 218 | + def test_sort_index_intervalindex(self): |
| 219 | + # this is a de-facto sort via unstack |
| 220 | + # confirming that we sort in the order of the bins |
| 221 | + y = Series(np.random.randn(100)) |
| 222 | + x1 = Series(np.sign(np.random.randn(100))) |
| 223 | + x2 = pd.cut(Series(np.random.randn(100)), bins=[-3, -0.5, 0, 0.5, 3]) |
| 224 | + model = pd.concat([y, x1, x2], axis=1, keys=["Y", "X1", "X2"]) |
| 225 | + |
| 226 | + result = model.groupby(["X1", "X2"], observed=True).mean().unstack() |
| 227 | + expected = IntervalIndex.from_tuples( |
| 228 | + [(-3.0, -0.5), (-0.5, 0.0), (0.0, 0.5), (0.5, 3.0)], closed="right" |
| 229 | + ) |
| 230 | + result = result.columns.levels[1].categories |
| 231 | + tm.assert_index_equal(result, expected) |
0 commit comments