Skip to content

Commit 70175be

Browse files
authored
REF: misplaced sort_index tests (#33774)
1 parent 3df51fc commit 70175be

File tree

3 files changed

+362
-328
lines changed

3 files changed

+362
-328
lines changed

pandas/tests/frame/methods/test_sort_index.py

+339-1
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,156 @@
22
import pytest
33

44
import pandas as pd
5-
from pandas import CategoricalDtype, DataFrame, IntervalIndex, MultiIndex, Series
5+
from pandas import CategoricalDtype, DataFrame, Index, IntervalIndex, MultiIndex, Series
66
import pandas._testing as tm
77

88

99
class TestDataFrameSortIndex:
10+
def test_sort_index_and_reconstruction_doc_example(self):
11+
# doc example
12+
df = DataFrame(
13+
{"value": [1, 2, 3, 4]},
14+
index=MultiIndex(
15+
levels=[["a", "b"], ["bb", "aa"]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]]
16+
),
17+
)
18+
assert df.index.is_lexsorted()
19+
assert not df.index.is_monotonic
20+
21+
# sort it
22+
expected = DataFrame(
23+
{"value": [2, 1, 4, 3]},
24+
index=MultiIndex(
25+
levels=[["a", "b"], ["aa", "bb"]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]]
26+
),
27+
)
28+
result = df.sort_index()
29+
assert result.index.is_lexsorted()
30+
assert result.index.is_monotonic
31+
32+
tm.assert_frame_equal(result, expected)
33+
34+
# reconstruct
35+
result = df.sort_index().copy()
36+
result.index = result.index._sort_levels_monotonic()
37+
assert result.index.is_lexsorted()
38+
assert result.index.is_monotonic
39+
40+
tm.assert_frame_equal(result, expected)
41+
42+
def test_sort_index_non_existent_label_multiindex(self):
43+
# GH#12261
44+
df = DataFrame(0, columns=[], index=MultiIndex.from_product([[], []]))
45+
df.loc["b", "2"] = 1
46+
df.loc["a", "3"] = 1
47+
result = df.sort_index().index.is_monotonic
48+
assert result is True
49+
50+
def test_sort_index_reorder_on_ops(self):
51+
# GH#15687
52+
df = DataFrame(
53+
np.random.randn(8, 2),
54+
index=MultiIndex.from_product(
55+
[["a", "b"], ["big", "small"], ["red", "blu"]],
56+
names=["letter", "size", "color"],
57+
),
58+
columns=["near", "far"],
59+
)
60+
df = df.sort_index()
61+
62+
def my_func(group):
63+
group.index = ["newz", "newa"]
64+
return group
65+
66+
result = df.groupby(level=["letter", "size"]).apply(my_func).sort_index()
67+
expected = MultiIndex.from_product(
68+
[["a", "b"], ["big", "small"], ["newa", "newz"]],
69+
names=["letter", "size", None],
70+
)
71+
72+
tm.assert_index_equal(result.index, expected)
73+
74+
def test_sort_index_nan_multiindex(self):
75+
# GH#14784
76+
# incorrect sorting w.r.t. nans
77+
tuples = [[12, 13], [np.nan, np.nan], [np.nan, 3], [1, 2]]
78+
mi = MultiIndex.from_tuples(tuples)
79+
80+
df = DataFrame(np.arange(16).reshape(4, 4), index=mi, columns=list("ABCD"))
81+
s = Series(np.arange(4), index=mi)
82+
83+
df2 = DataFrame(
84+
{
85+
"date": pd.DatetimeIndex(
86+
[
87+
"20121002",
88+
"20121007",
89+
"20130130",
90+
"20130202",
91+
"20130305",
92+
"20121002",
93+
"20121207",
94+
"20130130",
95+
"20130202",
96+
"20130305",
97+
"20130202",
98+
"20130305",
99+
]
100+
),
101+
"user_id": [1, 1, 1, 1, 1, 3, 3, 3, 5, 5, 5, 5],
102+
"whole_cost": [
103+
1790,
104+
np.nan,
105+
280,
106+
259,
107+
np.nan,
108+
623,
109+
90,
110+
312,
111+
np.nan,
112+
301,
113+
359,
114+
801,
115+
],
116+
"cost": [12, 15, 10, 24, 39, 1, 0, np.nan, 45, 34, 1, 12],
117+
}
118+
).set_index(["date", "user_id"])
119+
120+
# sorting frame, default nan position is last
121+
result = df.sort_index()
122+
expected = df.iloc[[3, 0, 2, 1], :]
123+
tm.assert_frame_equal(result, expected)
124+
125+
# sorting frame, nan position last
126+
result = df.sort_index(na_position="last")
127+
expected = df.iloc[[3, 0, 2, 1], :]
128+
tm.assert_frame_equal(result, expected)
129+
130+
# sorting frame, nan position first
131+
result = df.sort_index(na_position="first")
132+
expected = df.iloc[[1, 2, 3, 0], :]
133+
tm.assert_frame_equal(result, expected)
134+
135+
# sorting frame with removed rows
136+
result = df2.dropna().sort_index()
137+
expected = df2.sort_index().dropna()
138+
tm.assert_frame_equal(result, expected)
139+
140+
# sorting series, default nan position is last
141+
result = s.sort_index()
142+
expected = s.iloc[[3, 0, 2, 1]]
143+
tm.assert_series_equal(result, expected)
144+
145+
# sorting series, nan position last
146+
result = s.sort_index(na_position="last")
147+
expected = s.iloc[[3, 0, 2, 1]]
148+
tm.assert_series_equal(result, expected)
149+
150+
# sorting series, nan position first
151+
result = s.sort_index(na_position="first")
152+
expected = s.iloc[[1, 2, 3, 0]]
153+
tm.assert_series_equal(result, expected)
154+
10155
def test_sort_index_nan(self):
11156
# GH#3917
12157

@@ -318,3 +463,196 @@ def test_sort_index_ignore_index_multi_index(
318463

319464
tm.assert_frame_equal(result_df, expected_df)
320465
tm.assert_frame_equal(df, DataFrame(original_dict, index=mi))
466+
467+
def test_sort_index_categorical_multiindex(self):
468+
# GH#15058
469+
df = DataFrame(
470+
{
471+
"a": range(6),
472+
"l1": pd.Categorical(
473+
["a", "a", "b", "b", "c", "c"],
474+
categories=["c", "a", "b"],
475+
ordered=True,
476+
),
477+
"l2": [0, 1, 0, 1, 0, 1],
478+
}
479+
)
480+
result = df.set_index(["l1", "l2"]).sort_index()
481+
expected = DataFrame(
482+
[4, 5, 0, 1, 2, 3],
483+
columns=["a"],
484+
index=MultiIndex(
485+
levels=[
486+
pd.CategoricalIndex(
487+
["c", "a", "b"],
488+
categories=["c", "a", "b"],
489+
ordered=True,
490+
name="l1",
491+
dtype="category",
492+
),
493+
[0, 1],
494+
],
495+
codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
496+
names=["l1", "l2"],
497+
),
498+
)
499+
tm.assert_frame_equal(result, expected)
500+
501+
def test_sort_index_and_reconstruction(self):
502+
503+
# GH#15622
504+
# lexsortedness should be identical
505+
# across MultiIndex construction methods
506+
507+
df = DataFrame([[1, 1], [2, 2]], index=list("ab"))
508+
expected = DataFrame(
509+
[[1, 1], [2, 2], [1, 1], [2, 2]],
510+
index=MultiIndex.from_tuples(
511+
[(0.5, "a"), (0.5, "b"), (0.8, "a"), (0.8, "b")]
512+
),
513+
)
514+
assert expected.index.is_lexsorted()
515+
516+
result = DataFrame(
517+
[[1, 1], [2, 2], [1, 1], [2, 2]],
518+
index=MultiIndex.from_product([[0.5, 0.8], list("ab")]),
519+
)
520+
result = result.sort_index()
521+
assert result.index.is_lexsorted()
522+
assert result.index.is_monotonic
523+
524+
tm.assert_frame_equal(result, expected)
525+
526+
result = DataFrame(
527+
[[1, 1], [2, 2], [1, 1], [2, 2]],
528+
index=MultiIndex(
529+
levels=[[0.5, 0.8], ["a", "b"]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]]
530+
),
531+
)
532+
result = result.sort_index()
533+
assert result.index.is_lexsorted()
534+
535+
tm.assert_frame_equal(result, expected)
536+
537+
concatted = pd.concat([df, df], keys=[0.8, 0.5])
538+
result = concatted.sort_index()
539+
540+
assert result.index.is_lexsorted()
541+
assert result.index.is_monotonic
542+
543+
tm.assert_frame_equal(result, expected)
544+
545+
# GH#14015
546+
df = DataFrame(
547+
[[1, 2], [6, 7]],
548+
columns=MultiIndex.from_tuples(
549+
[(0, "20160811 12:00:00"), (0, "20160809 12:00:00")],
550+
names=["l1", "Date"],
551+
),
552+
)
553+
554+
df.columns.set_levels(
555+
pd.to_datetime(df.columns.levels[1]), level=1, inplace=True
556+
)
557+
assert not df.columns.is_lexsorted()
558+
assert not df.columns.is_monotonic
559+
result = df.sort_index(axis=1)
560+
assert result.columns.is_lexsorted()
561+
assert result.columns.is_monotonic
562+
result = df.sort_index(axis=1, level=1)
563+
assert result.columns.is_lexsorted()
564+
assert result.columns.is_monotonic
565+
566+
# TODO: better name, de-duplicate with test_sort_index_level above
567+
def test_sort_index_level2(self):
568+
mi = MultiIndex(
569+
levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
570+
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
571+
names=["first", "second"],
572+
)
573+
frame = DataFrame(
574+
np.random.randn(10, 3),
575+
index=mi,
576+
columns=Index(["A", "B", "C"], name="exp"),
577+
)
578+
579+
df = frame.copy()
580+
df.index = np.arange(len(df))
581+
582+
# axis=1
583+
584+
# series
585+
a_sorted = frame["A"].sort_index(level=0)
586+
587+
# preserve names
588+
assert a_sorted.index.names == frame.index.names
589+
590+
# inplace
591+
rs = frame.copy()
592+
rs.sort_index(level=0, inplace=True)
593+
tm.assert_frame_equal(rs, frame.sort_index(level=0))
594+
595+
def test_sort_index_level_large_cardinality(self):
596+
597+
# GH#2684 (int64)
598+
index = MultiIndex.from_arrays([np.arange(4000)] * 3)
599+
df = DataFrame(np.random.randn(4000), index=index, dtype=np.int64)
600+
601+
# it works!
602+
result = df.sort_index(level=0)
603+
assert result.index.lexsort_depth == 3
604+
605+
# GH#2684 (int32)
606+
index = MultiIndex.from_arrays([np.arange(4000)] * 3)
607+
df = DataFrame(np.random.randn(4000), index=index, dtype=np.int32)
608+
609+
# it works!
610+
result = df.sort_index(level=0)
611+
assert (result.dtypes.values == df.dtypes.values).all()
612+
assert result.index.lexsort_depth == 3
613+
614+
def test_sort_index_level_by_name(self):
615+
mi = MultiIndex(
616+
levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
617+
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
618+
names=["first", "second"],
619+
)
620+
frame = DataFrame(
621+
np.random.randn(10, 3),
622+
index=mi,
623+
columns=Index(["A", "B", "C"], name="exp"),
624+
)
625+
626+
frame.index.names = ["first", "second"]
627+
result = frame.sort_index(level="second")
628+
expected = frame.sort_index(level=1)
629+
tm.assert_frame_equal(result, expected)
630+
631+
def test_sort_index_level_mixed(self):
632+
mi = MultiIndex(
633+
levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
634+
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
635+
names=["first", "second"],
636+
)
637+
frame = DataFrame(
638+
np.random.randn(10, 3),
639+
index=mi,
640+
columns=Index(["A", "B", "C"], name="exp"),
641+
)
642+
643+
sorted_before = frame.sort_index(level=1)
644+
645+
df = frame.copy()
646+
df["foo"] = "bar"
647+
sorted_after = df.sort_index(level=1)
648+
tm.assert_frame_equal(sorted_before, sorted_after.drop(["foo"], axis=1))
649+
650+
dft = frame.T
651+
sorted_before = dft.sort_index(level=1, axis=1)
652+
dft["foo", "three"] = "bar"
653+
654+
sorted_after = dft.sort_index(level=1, axis=1)
655+
tm.assert_frame_equal(
656+
sorted_before.drop([("foo", "three")], axis=1),
657+
sorted_after.drop([("foo", "three")], axis=1),
658+
)

pandas/tests/series/methods/test_sort_index.py

+23
Original file line numberDiff line numberDiff line change
@@ -170,3 +170,26 @@ def test_sort_index_ignore_index(
170170

171171
tm.assert_series_equal(result_ser, expected)
172172
tm.assert_series_equal(ser, Series(original_list))
173+
174+
def test_sort_index_ascending_list(self):
175+
# GH#16934
176+
177+
# Set up a Series with a three level MultiIndex
178+
arrays = [
179+
["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
180+
["one", "two", "one", "two", "one", "two", "one", "two"],
181+
[4, 3, 2, 1, 4, 3, 2, 1],
182+
]
183+
tuples = zip(*arrays)
184+
mi = MultiIndex.from_tuples(tuples, names=["first", "second", "third"])
185+
ser = Series(range(8), index=mi)
186+
187+
# Sort with boolean ascending
188+
result = ser.sort_index(level=["third", "first"], ascending=False)
189+
expected = ser.iloc[[4, 0, 5, 1, 6, 2, 7, 3]]
190+
tm.assert_series_equal(result, expected)
191+
192+
# Sort with list of boolean ascending
193+
result = ser.sort_index(level=["third", "first"], ascending=[False, True])
194+
expected = ser.iloc[[0, 4, 1, 5, 2, 6, 3, 7]]
195+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)