|
5 | 5 | import pytest
|
6 | 6 |
|
7 | 7 | import pandas as pd
|
8 |
| -from pandas import Categorical, DataFrame, Index, MultiIndex, Series, date_range, isna |
| 8 | +from pandas import ( |
| 9 | + Categorical, |
| 10 | + CategoricalIndex, |
| 11 | + DataFrame, |
| 12 | + Index, |
| 13 | + MultiIndex, |
| 14 | + Series, |
| 15 | + date_range, |
| 16 | + isna, |
| 17 | +) |
9 | 18 | import pandas._testing as tm
|
| 19 | +from pandas.api.types import CategoricalDtype as CDT |
10 | 20 | import pandas.core.common as com
|
11 | 21 |
|
12 | 22 |
|
@@ -745,3 +755,94 @@ def test_reindex_multi_categorical_time(self):
|
745 | 755 | result = df2.reindex(midx)
|
746 | 756 | expected = DataFrame({"a": [0, 1, 2, 3, 4, 5, 6, np.nan, 8]}, index=midx)
|
747 | 757 | tm.assert_frame_equal(result, expected)
|
| 758 | + |
| 759 | + def test_reindex_with_categoricalindex(self): |
| 760 | + df = DataFrame( |
| 761 | + { |
| 762 | + "A": np.arange(3, dtype="int64"), |
| 763 | + }, |
| 764 | + index=CategoricalIndex(list("abc"), dtype=CDT(list("cabe")), name="B"), |
| 765 | + ) |
| 766 | + |
| 767 | + # reindexing |
| 768 | + # convert to a regular index |
| 769 | + result = df.reindex(["a", "b", "e"]) |
| 770 | + expected = DataFrame({"A": [0, 1, np.nan], "B": Series(list("abe"))}).set_index( |
| 771 | + "B" |
| 772 | + ) |
| 773 | + tm.assert_frame_equal(result, expected, check_index_type=True) |
| 774 | + |
| 775 | + result = df.reindex(["a", "b"]) |
| 776 | + expected = DataFrame({"A": [0, 1], "B": Series(list("ab"))}).set_index("B") |
| 777 | + tm.assert_frame_equal(result, expected, check_index_type=True) |
| 778 | + |
| 779 | + result = df.reindex(["e"]) |
| 780 | + expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B") |
| 781 | + tm.assert_frame_equal(result, expected, check_index_type=True) |
| 782 | + |
| 783 | + result = df.reindex(["d"]) |
| 784 | + expected = DataFrame({"A": [np.nan], "B": Series(["d"])}).set_index("B") |
| 785 | + tm.assert_frame_equal(result, expected, check_index_type=True) |
| 786 | + |
| 787 | + # since we are actually reindexing with a Categorical |
| 788 | + # then return a Categorical |
| 789 | + cats = list("cabe") |
| 790 | + |
| 791 | + result = df.reindex(Categorical(["a", "e"], categories=cats)) |
| 792 | + expected = DataFrame( |
| 793 | + {"A": [0, np.nan], "B": Series(list("ae")).astype(CDT(cats))} |
| 794 | + ).set_index("B") |
| 795 | + tm.assert_frame_equal(result, expected, check_index_type=True) |
| 796 | + |
| 797 | + result = df.reindex(Categorical(["a"], categories=cats)) |
| 798 | + expected = DataFrame( |
| 799 | + {"A": [0], "B": Series(list("a")).astype(CDT(cats))} |
| 800 | + ).set_index("B") |
| 801 | + tm.assert_frame_equal(result, expected, check_index_type=True) |
| 802 | + |
| 803 | + result = df.reindex(["a", "b", "e"]) |
| 804 | + expected = DataFrame({"A": [0, 1, np.nan], "B": Series(list("abe"))}).set_index( |
| 805 | + "B" |
| 806 | + ) |
| 807 | + tm.assert_frame_equal(result, expected, check_index_type=True) |
| 808 | + |
| 809 | + result = df.reindex(["a", "b"]) |
| 810 | + expected = DataFrame({"A": [0, 1], "B": Series(list("ab"))}).set_index("B") |
| 811 | + tm.assert_frame_equal(result, expected, check_index_type=True) |
| 812 | + |
| 813 | + result = df.reindex(["e"]) |
| 814 | + expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B") |
| 815 | + tm.assert_frame_equal(result, expected, check_index_type=True) |
| 816 | + |
| 817 | + # give back the type of categorical that we received |
| 818 | + result = df.reindex(Categorical(["a", "e"], categories=cats, ordered=True)) |
| 819 | + expected = DataFrame( |
| 820 | + {"A": [0, np.nan], "B": Series(list("ae")).astype(CDT(cats, ordered=True))} |
| 821 | + ).set_index("B") |
| 822 | + tm.assert_frame_equal(result, expected, check_index_type=True) |
| 823 | + |
| 824 | + result = df.reindex(Categorical(["a", "d"], categories=["a", "d"])) |
| 825 | + expected = DataFrame( |
| 826 | + {"A": [0, np.nan], "B": Series(list("ad")).astype(CDT(["a", "d"]))} |
| 827 | + ).set_index("B") |
| 828 | + tm.assert_frame_equal(result, expected, check_index_type=True) |
| 829 | + |
| 830 | + df2 = DataFrame( |
| 831 | + { |
| 832 | + "A": np.arange(6, dtype="int64"), |
| 833 | + }, |
| 834 | + index=CategoricalIndex(list("aabbca"), dtype=CDT(list("cabe")), name="B"), |
| 835 | + ) |
| 836 | + # passed duplicate indexers are not allowed |
| 837 | + msg = "cannot reindex from a duplicate axis" |
| 838 | + with pytest.raises(ValueError, match=msg): |
| 839 | + df2.reindex(["a", "b"]) |
| 840 | + |
| 841 | + # args NotImplemented ATM |
| 842 | + msg = r"argument {} is not implemented for CategoricalIndex\.reindex" |
| 843 | + with pytest.raises(NotImplementedError, match=msg.format("method")): |
| 844 | + df.reindex(["a"], method="ffill") |
| 845 | + with pytest.raises(NotImplementedError, match=msg.format("level")): |
| 846 | + df.reindex(["a"], level=1) |
| 847 | + with pytest.raises(NotImplementedError, match=msg.format("limit")): |
| 848 | + df.reindex(["a"], limit=2) |
0 commit comments