diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index d870259c2539b..c18cd1f252c83 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -146,76 +146,6 @@ def test_contains_list(self): with pytest.raises(TypeError, match="unhashable type"): ["a", "b"] in idx - def test_map(self): - ci = pd.CategoricalIndex(list("ABABC"), categories=list("CBA"), ordered=True) - result = ci.map(lambda x: x.lower()) - exp = pd.CategoricalIndex(list("ababc"), categories=list("cba"), ordered=True) - tm.assert_index_equal(result, exp) - - ci = pd.CategoricalIndex( - list("ABABC"), categories=list("BAC"), ordered=False, name="XXX" - ) - result = ci.map(lambda x: x.lower()) - exp = pd.CategoricalIndex( - list("ababc"), categories=list("bac"), ordered=False, name="XXX" - ) - tm.assert_index_equal(result, exp) - - # GH 12766: Return an index not an array - tm.assert_index_equal( - ci.map(lambda x: 1), Index(np.array([1] * 5, dtype=np.int64), name="XXX") - ) - - # change categories dtype - ci = pd.CategoricalIndex(list("ABABC"), categories=list("BAC"), ordered=False) - - def f(x): - return {"A": 10, "B": 20, "C": 30}.get(x) - - result = ci.map(f) - exp = pd.CategoricalIndex( - [10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False - ) - tm.assert_index_equal(result, exp) - - result = ci.map(pd.Series([10, 20, 30], index=["A", "B", "C"])) - tm.assert_index_equal(result, exp) - - result = ci.map({"A": 10, "B": 20, "C": 30}) - tm.assert_index_equal(result, exp) - - def test_map_with_categorical_series(self): - # GH 12756 - a = pd.Index([1, 2, 3, 4]) - b = pd.Series(["even", "odd", "even", "odd"], dtype="category") - c = pd.Series(["even", "odd", "even", "odd"]) - - exp = CategoricalIndex(["odd", "even", "odd", np.nan]) - tm.assert_index_equal(a.map(b), exp) - exp = pd.Index(["odd", "even", "odd", np.nan]) - tm.assert_index_equal(a.map(c), exp) - - @pytest.mark.parametrize( - ("data", "f"), - ( - ([1, 1, np.nan], pd.isna), - ([1, 2, np.nan], pd.isna), - ([1, 1, np.nan], {1: False}), - ([1, 2, np.nan], {1: False, 2: False}), - ([1, 1, np.nan], pd.Series([False, False])), - ([1, 2, np.nan], pd.Series([False, False, False])), - ), - ) - def test_map_with_nan(self, data, f): # GH 24241 - values = pd.Categorical(data) - result = values.map(f) - if data[1] == 1: - expected = pd.Categorical([False, False, np.nan]) - tm.assert_categorical_equal(result, expected) - else: - expected = pd.Index([False, False, np.nan]) - tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("klass", [list, tuple, np.array, pd.Series]) def test_where(self, klass): i = self.create_index() @@ -384,89 +314,6 @@ def test_astype_category(self, name, dtype_ordered, index_ordered): expected = index tm.assert_index_equal(result, expected) - def test_reindex_base(self): - # Determined by cat ordering. - idx = CategoricalIndex(list("cab"), categories=list("cab")) - expected = np.arange(len(idx), dtype=np.intp) - - actual = idx.get_indexer(idx) - tm.assert_numpy_array_equal(expected, actual) - - with pytest.raises(ValueError, match="Invalid fill method"): - idx.get_indexer(idx, method="invalid") - - def test_reindexing(self): - np.random.seed(123456789) - - ci = self.create_index() - oidx = Index(np.array(ci)) - - for n in [1, 2, 5, len(ci)]: - finder = oidx[np.random.randint(0, len(ci), size=n)] - expected = oidx.get_indexer_non_unique(finder)[0] - - actual = ci.get_indexer(finder) - tm.assert_numpy_array_equal(expected, actual) - - # see gh-17323 - # - # Even when indexer is equal to the - # members in the index, we should - # respect duplicates instead of taking - # the fast-track path. - for finder in [list("aabbca"), list("aababca")]: - expected = oidx.get_indexer_non_unique(finder)[0] - - actual = ci.get_indexer(finder) - tm.assert_numpy_array_equal(expected, actual) - - def test_reindex_dtype(self): - c = CategoricalIndex(["a", "b", "c", "a"]) - res, indexer = c.reindex(["a", "c"]) - tm.assert_index_equal(res, Index(["a", "a", "c"]), exact=True) - tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) - - c = CategoricalIndex(["a", "b", "c", "a"]) - res, indexer = c.reindex(Categorical(["a", "c"])) - - exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"]) - tm.assert_index_equal(res, exp, exact=True) - tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) - - c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) - res, indexer = c.reindex(["a", "c"]) - exp = Index(["a", "a", "c"], dtype="object") - tm.assert_index_equal(res, exp, exact=True) - tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) - - c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) - res, indexer = c.reindex(Categorical(["a", "c"])) - exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"]) - tm.assert_index_equal(res, exp, exact=True) - tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) - - def test_reindex_duplicate_target(self): - # See GH25459 - cat = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"]) - res, indexer = cat.reindex(["a", "c", "c"]) - exp = Index(["a", "c", "c"], dtype="object") - tm.assert_index_equal(res, exp, exact=True) - tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp)) - - res, indexer = cat.reindex( - CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"]) - ) - exp = CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"]) - tm.assert_index_equal(res, exp, exact=True) - tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp)) - - def test_reindex_empty_index(self): - # See GH16770 - c = CategoricalIndex([]) - res, indexer = c.reindex(["a", "b"]) - tm.assert_index_equal(res, Index(["a", "b"]), exact=True) - tm.assert_numpy_array_equal(indexer, np.array([-1, -1], dtype=np.intp)) - @pytest.mark.parametrize( "data, non_lexsorted_data", [[[1, 2, 3], [9, 0, 1, 2, 3]], [list("abc"), list("fabcd")]], @@ -518,75 +365,6 @@ def test_drop_duplicates(self): tm.assert_index_equal(idx.drop_duplicates(), expected) tm.assert_index_equal(idx.unique(), expected) - def test_get_indexer(self): - - idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc")) - idx2 = CategoricalIndex(list("abf")) - - for indexer in [idx2, list("abf"), Index(list("abf"))]: - r1 = idx1.get_indexer(idx2) - tm.assert_almost_equal(r1, np.array([0, 1, 2, -1], dtype=np.intp)) - - msg = ( - "method='pad' and method='backfill' not implemented yet for " - "CategoricalIndex" - ) - with pytest.raises(NotImplementedError, match=msg): - idx2.get_indexer(idx1, method="pad") - with pytest.raises(NotImplementedError, match=msg): - idx2.get_indexer(idx1, method="backfill") - - msg = "method='nearest' not implemented yet for CategoricalIndex" - with pytest.raises(NotImplementedError, match=msg): - idx2.get_indexer(idx1, method="nearest") - - def test_get_loc(self): - # GH 12531 - cidx1 = CategoricalIndex(list("abcde"), categories=list("edabc")) - idx1 = Index(list("abcde")) - assert cidx1.get_loc("a") == idx1.get_loc("a") - assert cidx1.get_loc("e") == idx1.get_loc("e") - - for i in [cidx1, idx1]: - with pytest.raises(KeyError, match="'NOT-EXIST'"): - i.get_loc("NOT-EXIST") - - # non-unique - cidx2 = CategoricalIndex(list("aacded"), categories=list("edabc")) - idx2 = Index(list("aacded")) - - # results in bool array - res = cidx2.get_loc("d") - tm.assert_numpy_array_equal(res, idx2.get_loc("d")) - tm.assert_numpy_array_equal( - res, np.array([False, False, False, True, False, True]) - ) - # unique element results in scalar - res = cidx2.get_loc("e") - assert res == idx2.get_loc("e") - assert res == 4 - - for i in [cidx2, idx2]: - with pytest.raises(KeyError, match="'NOT-EXIST'"): - i.get_loc("NOT-EXIST") - - # non-unique, sliceable - cidx3 = CategoricalIndex(list("aabbb"), categories=list("abc")) - idx3 = Index(list("aabbb")) - - # results in slice - res = cidx3.get_loc("a") - assert res == idx3.get_loc("a") - assert res == slice(0, 2, None) - - res = cidx3.get_loc("b") - assert res == idx3.get_loc("b") - assert res == slice(2, 5, None) - - for i in [cidx3, idx3]: - with pytest.raises(KeyError, match="'c'"): - i.get_loc("c") - def test_repr_roundtrip(self): ci = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True) @@ -837,122 +615,6 @@ def test_fillna_categorical(self): with pytest.raises(ValueError, match=msg): idx.fillna(2.0) - def test_take_fill_value(self): - # GH 12631 - - # numeric category - idx = pd.CategoricalIndex([1, 2, 3], name="xxx") - result = idx.take(np.array([1, 0, -1])) - expected = pd.CategoricalIndex([2, 1, 3], name="xxx") - tm.assert_index_equal(result, expected) - tm.assert_categorical_equal(result.values, expected.values) - - # fill_value - result = idx.take(np.array([1, 0, -1]), fill_value=True) - expected = pd.CategoricalIndex([2, 1, np.nan], categories=[1, 2, 3], name="xxx") - tm.assert_index_equal(result, expected) - tm.assert_categorical_equal(result.values, expected.values) - - # allow_fill=False - result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) - expected = pd.CategoricalIndex([2, 1, 3], name="xxx") - tm.assert_index_equal(result, expected) - tm.assert_categorical_equal(result.values, expected.values) - - # object category - idx = pd.CategoricalIndex( - list("CBA"), categories=list("ABC"), ordered=True, name="xxx" - ) - result = idx.take(np.array([1, 0, -1])) - expected = pd.CategoricalIndex( - list("BCA"), categories=list("ABC"), ordered=True, name="xxx" - ) - tm.assert_index_equal(result, expected) - tm.assert_categorical_equal(result.values, expected.values) - - # fill_value - result = idx.take(np.array([1, 0, -1]), fill_value=True) - expected = pd.CategoricalIndex( - ["B", "C", np.nan], categories=list("ABC"), ordered=True, name="xxx" - ) - tm.assert_index_equal(result, expected) - tm.assert_categorical_equal(result.values, expected.values) - - # allow_fill=False - result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) - expected = pd.CategoricalIndex( - list("BCA"), categories=list("ABC"), ordered=True, name="xxx" - ) - tm.assert_index_equal(result, expected) - tm.assert_categorical_equal(result.values, expected.values) - - msg = ( - "When allow_fill=True and fill_value is not None, " - "all indices must be >= -1" - ) - with pytest.raises(ValueError, match=msg): - idx.take(np.array([1, 0, -2]), fill_value=True) - with pytest.raises(ValueError, match=msg): - idx.take(np.array([1, 0, -5]), fill_value=True) - - with pytest.raises(IndexError): - idx.take(np.array([1, -5])) - - def test_take_fill_value_datetime(self): - - # datetime category - idx = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx") - idx = pd.CategoricalIndex(idx) - result = idx.take(np.array([1, 0, -1])) - expected = pd.DatetimeIndex( - ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx" - ) - expected = pd.CategoricalIndex(expected) - tm.assert_index_equal(result, expected) - - # fill_value - result = idx.take(np.array([1, 0, -1]), fill_value=True) - expected = pd.DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx") - exp_cats = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"]) - expected = pd.CategoricalIndex(expected, categories=exp_cats) - tm.assert_index_equal(result, expected) - - # allow_fill=False - result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) - expected = pd.DatetimeIndex( - ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx" - ) - expected = pd.CategoricalIndex(expected) - tm.assert_index_equal(result, expected) - - msg = ( - "When allow_fill=True and fill_value is not None, " - "all indices must be >= -1" - ) - with pytest.raises(ValueError, match=msg): - idx.take(np.array([1, 0, -2]), fill_value=True) - with pytest.raises(ValueError, match=msg): - idx.take(np.array([1, 0, -5]), fill_value=True) - - with pytest.raises(IndexError): - idx.take(np.array([1, -5])) - - def test_take_invalid_kwargs(self): - idx = pd.CategoricalIndex([1, 2, 3], name="foo") - indices = [1, 0, -1] - - msg = r"take\(\) got an unexpected keyword argument 'foo'" - with pytest.raises(TypeError, match=msg): - idx.take(indices, foo=2) - - msg = "the 'out' parameter is not supported" - with pytest.raises(ValueError, match=msg): - idx.take(indices, out=indices) - - msg = "the 'mode' parameter is not supported" - with pytest.raises(ValueError, match=msg): - idx.take(indices, mode="clip") - @pytest.mark.parametrize( "dtype, engine_type", [ @@ -976,19 +638,10 @@ def test_engine_type(self, dtype, engine_type): assert np.issubdtype(ci.codes.dtype, dtype) assert isinstance(ci._engine, engine_type) - @pytest.mark.parametrize( - "data, categories", - [ - (list("abcbca"), list("cab")), - (pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)), - ], - ids=["string", "interval"], - ) - def test_map_str(self, data, categories, ordered_fixture): - # GH 31202 - override base class since we want to maintain categorical/ordered - index = CategoricalIndex(data, categories=categories, ordered=ordered_fixture) - result = index.map(str) - expected = CategoricalIndex( - map(str, data), categories=map(str, categories), ordered=ordered_fixture - ) - tm.assert_index_equal(result, expected) + def test_reindex_base(self): + # See test_reindex.py + pass + + def test_map_str(self): + # See test_map.py + pass diff --git a/pandas/tests/indexes/categorical/test_indexing.py b/pandas/tests/indexes/categorical/test_indexing.py new file mode 100644 index 0000000000000..6fce6542d228e --- /dev/null +++ b/pandas/tests/indexes/categorical/test_indexing.py @@ -0,0 +1,233 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import CategoricalIndex, Index +import pandas._testing as tm + + +class TestTake: + def test_take_fill_value(self): + # GH 12631 + + # numeric category + idx = pd.CategoricalIndex([1, 2, 3], name="xxx") + result = idx.take(np.array([1, 0, -1])) + expected = pd.CategoricalIndex([2, 1, 3], name="xxx") + tm.assert_index_equal(result, expected) + tm.assert_categorical_equal(result.values, expected.values) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = pd.CategoricalIndex([2, 1, np.nan], categories=[1, 2, 3], name="xxx") + tm.assert_index_equal(result, expected) + tm.assert_categorical_equal(result.values, expected.values) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = pd.CategoricalIndex([2, 1, 3], name="xxx") + tm.assert_index_equal(result, expected) + tm.assert_categorical_equal(result.values, expected.values) + + # object category + idx = pd.CategoricalIndex( + list("CBA"), categories=list("ABC"), ordered=True, name="xxx" + ) + result = idx.take(np.array([1, 0, -1])) + expected = pd.CategoricalIndex( + list("BCA"), categories=list("ABC"), ordered=True, name="xxx" + ) + tm.assert_index_equal(result, expected) + tm.assert_categorical_equal(result.values, expected.values) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = pd.CategoricalIndex( + ["B", "C", np.nan], categories=list("ABC"), ordered=True, name="xxx" + ) + tm.assert_index_equal(result, expected) + tm.assert_categorical_equal(result.values, expected.values) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = pd.CategoricalIndex( + list("BCA"), categories=list("ABC"), ordered=True, name="xxx" + ) + tm.assert_index_equal(result, expected) + tm.assert_categorical_equal(result.values, expected.values) + + msg = ( + "When allow_fill=True and fill_value is not None, " + "all indices must be >= -1" + ) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with pytest.raises(IndexError): + idx.take(np.array([1, -5])) + + def test_take_fill_value_datetime(self): + + # datetime category + idx = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx") + idx = pd.CategoricalIndex(idx) + result = idx.take(np.array([1, 0, -1])) + expected = pd.DatetimeIndex( + ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx" + ) + expected = pd.CategoricalIndex(expected) + tm.assert_index_equal(result, expected) + + # fill_value + result = idx.take(np.array([1, 0, -1]), fill_value=True) + expected = pd.DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx") + exp_cats = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"]) + expected = pd.CategoricalIndex(expected, categories=exp_cats) + tm.assert_index_equal(result, expected) + + # allow_fill=False + result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) + expected = pd.DatetimeIndex( + ["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx" + ) + expected = pd.CategoricalIndex(expected) + tm.assert_index_equal(result, expected) + + msg = ( + "When allow_fill=True and fill_value is not None, " + "all indices must be >= -1" + ) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -2]), fill_value=True) + with pytest.raises(ValueError, match=msg): + idx.take(np.array([1, 0, -5]), fill_value=True) + + with pytest.raises(IndexError): + idx.take(np.array([1, -5])) + + def test_take_invalid_kwargs(self): + idx = pd.CategoricalIndex([1, 2, 3], name="foo") + indices = [1, 0, -1] + + msg = r"take\(\) got an unexpected keyword argument 'foo'" + with pytest.raises(TypeError, match=msg): + idx.take(indices, foo=2) + + msg = "the 'out' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, out=indices) + + msg = "the 'mode' parameter is not supported" + with pytest.raises(ValueError, match=msg): + idx.take(indices, mode="clip") + + +class TestGetLoc: + def test_get_loc(self): + # GH 12531 + cidx1 = CategoricalIndex(list("abcde"), categories=list("edabc")) + idx1 = Index(list("abcde")) + assert cidx1.get_loc("a") == idx1.get_loc("a") + assert cidx1.get_loc("e") == idx1.get_loc("e") + + for i in [cidx1, idx1]: + with pytest.raises(KeyError, match="'NOT-EXIST'"): + i.get_loc("NOT-EXIST") + + # non-unique + cidx2 = CategoricalIndex(list("aacded"), categories=list("edabc")) + idx2 = Index(list("aacded")) + + # results in bool array + res = cidx2.get_loc("d") + tm.assert_numpy_array_equal(res, idx2.get_loc("d")) + tm.assert_numpy_array_equal( + res, np.array([False, False, False, True, False, True]) + ) + # unique element results in scalar + res = cidx2.get_loc("e") + assert res == idx2.get_loc("e") + assert res == 4 + + for i in [cidx2, idx2]: + with pytest.raises(KeyError, match="'NOT-EXIST'"): + i.get_loc("NOT-EXIST") + + # non-unique, sliceable + cidx3 = CategoricalIndex(list("aabbb"), categories=list("abc")) + idx3 = Index(list("aabbb")) + + # results in slice + res = cidx3.get_loc("a") + assert res == idx3.get_loc("a") + assert res == slice(0, 2, None) + + res = cidx3.get_loc("b") + assert res == idx3.get_loc("b") + assert res == slice(2, 5, None) + + for i in [cidx3, idx3]: + with pytest.raises(KeyError, match="'c'"): + i.get_loc("c") + + +class TestGetIndexer: + def test_get_indexer_base(self): + # Determined by cat ordering. + idx = CategoricalIndex(list("cab"), categories=list("cab")) + expected = np.arange(len(idx), dtype=np.intp) + + actual = idx.get_indexer(idx) + tm.assert_numpy_array_equal(expected, actual) + + with pytest.raises(ValueError, match="Invalid fill method"): + idx.get_indexer(idx, method="invalid") + + def test_get_indexer_non_unique(self): + np.random.seed(123456789) + + ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False) + oidx = Index(np.array(ci)) + + for n in [1, 2, 5, len(ci)]: + finder = oidx[np.random.randint(0, len(ci), size=n)] + expected = oidx.get_indexer_non_unique(finder)[0] + + actual = ci.get_indexer(finder) + tm.assert_numpy_array_equal(expected, actual) + + # see gh-17323 + # + # Even when indexer is equal to the + # members in the index, we should + # respect duplicates instead of taking + # the fast-track path. + for finder in [list("aabbca"), list("aababca")]: + expected = oidx.get_indexer_non_unique(finder)[0] + + actual = ci.get_indexer(finder) + tm.assert_numpy_array_equal(expected, actual) + + def test_get_indexer(self): + + idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc")) + idx2 = CategoricalIndex(list("abf")) + + for indexer in [idx2, list("abf"), Index(list("abf"))]: + r1 = idx1.get_indexer(idx2) + tm.assert_almost_equal(r1, np.array([0, 1, 2, -1], dtype=np.intp)) + + msg = ( + "method='pad' and method='backfill' not implemented yet for " + "CategoricalIndex" + ) + with pytest.raises(NotImplementedError, match=msg): + idx2.get_indexer(idx1, method="pad") + with pytest.raises(NotImplementedError, match=msg): + idx2.get_indexer(idx1, method="backfill") + + msg = "method='nearest' not implemented yet for CategoricalIndex" + with pytest.raises(NotImplementedError, match=msg): + idx2.get_indexer(idx1, method="nearest") diff --git a/pandas/tests/indexes/categorical/test_map.py b/pandas/tests/indexes/categorical/test_map.py new file mode 100644 index 0000000000000..943359a72e971 --- /dev/null +++ b/pandas/tests/indexes/categorical/test_map.py @@ -0,0 +1,95 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import CategoricalIndex, Index +import pandas._testing as tm + + +class TestMap: + @pytest.mark.parametrize( + "data, categories", + [ + (list("abcbca"), list("cab")), + (pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)), + ], + ids=["string", "interval"], + ) + def test_map_str(self, data, categories, ordered_fixture): + # GH 31202 - override base class since we want to maintain categorical/ordered + index = CategoricalIndex(data, categories=categories, ordered=ordered_fixture) + result = index.map(str) + expected = CategoricalIndex( + map(str, data), categories=map(str, categories), ordered=ordered_fixture + ) + tm.assert_index_equal(result, expected) + + def test_map(self): + ci = pd.CategoricalIndex(list("ABABC"), categories=list("CBA"), ordered=True) + result = ci.map(lambda x: x.lower()) + exp = pd.CategoricalIndex(list("ababc"), categories=list("cba"), ordered=True) + tm.assert_index_equal(result, exp) + + ci = pd.CategoricalIndex( + list("ABABC"), categories=list("BAC"), ordered=False, name="XXX" + ) + result = ci.map(lambda x: x.lower()) + exp = pd.CategoricalIndex( + list("ababc"), categories=list("bac"), ordered=False, name="XXX" + ) + tm.assert_index_equal(result, exp) + + # GH 12766: Return an index not an array + tm.assert_index_equal( + ci.map(lambda x: 1), Index(np.array([1] * 5, dtype=np.int64), name="XXX") + ) + + # change categories dtype + ci = pd.CategoricalIndex(list("ABABC"), categories=list("BAC"), ordered=False) + + def f(x): + return {"A": 10, "B": 20, "C": 30}.get(x) + + result = ci.map(f) + exp = pd.CategoricalIndex( + [10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False + ) + tm.assert_index_equal(result, exp) + + result = ci.map(pd.Series([10, 20, 30], index=["A", "B", "C"])) + tm.assert_index_equal(result, exp) + + result = ci.map({"A": 10, "B": 20, "C": 30}) + tm.assert_index_equal(result, exp) + + def test_map_with_categorical_series(self): + # GH 12756 + a = pd.Index([1, 2, 3, 4]) + b = pd.Series(["even", "odd", "even", "odd"], dtype="category") + c = pd.Series(["even", "odd", "even", "odd"]) + + exp = CategoricalIndex(["odd", "even", "odd", np.nan]) + tm.assert_index_equal(a.map(b), exp) + exp = pd.Index(["odd", "even", "odd", np.nan]) + tm.assert_index_equal(a.map(c), exp) + + @pytest.mark.parametrize( + ("data", "f"), + ( + ([1, 1, np.nan], pd.isna), + ([1, 2, np.nan], pd.isna), + ([1, 1, np.nan], {1: False}), + ([1, 2, np.nan], {1: False, 2: False}), + ([1, 1, np.nan], pd.Series([False, False])), + ([1, 2, np.nan], pd.Series([False, False, False])), + ), + ) + def test_map_with_nan(self, data, f): # GH 24241 + values = pd.Categorical(data) + result = values.map(f) + if data[1] == 1: + expected = pd.Categorical([False, False, np.nan]) + tm.assert_categorical_equal(result, expected) + else: + expected = pd.Index([False, False, np.nan]) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/categorical/test_reindex.py b/pandas/tests/indexes/categorical/test_reindex.py new file mode 100644 index 0000000000000..f59ddc42ce4e4 --- /dev/null +++ b/pandas/tests/indexes/categorical/test_reindex.py @@ -0,0 +1,53 @@ +import numpy as np + +from pandas import Categorical, CategoricalIndex, Index +import pandas._testing as tm + + +class TestReindex: + def test_reindex_dtype(self): + c = CategoricalIndex(["a", "b", "c", "a"]) + res, indexer = c.reindex(["a", "c"]) + tm.assert_index_equal(res, Index(["a", "a", "c"]), exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) + + c = CategoricalIndex(["a", "b", "c", "a"]) + res, indexer = c.reindex(Categorical(["a", "c"])) + + exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"]) + tm.assert_index_equal(res, exp, exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) + + c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) + res, indexer = c.reindex(["a", "c"]) + exp = Index(["a", "a", "c"], dtype="object") + tm.assert_index_equal(res, exp, exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) + + c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) + res, indexer = c.reindex(Categorical(["a", "c"])) + exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"]) + tm.assert_index_equal(res, exp, exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp)) + + def test_reindex_duplicate_target(self): + # See GH25459 + cat = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"]) + res, indexer = cat.reindex(["a", "c", "c"]) + exp = Index(["a", "c", "c"], dtype="object") + tm.assert_index_equal(res, exp, exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp)) + + res, indexer = cat.reindex( + CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"]) + ) + exp = CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"]) + tm.assert_index_equal(res, exp, exact=True) + tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp)) + + def test_reindex_empty_index(self): + # See GH16770 + c = CategoricalIndex([]) + res, indexer = c.reindex(["a", "b"]) + tm.assert_index_equal(res, Index(["a", "b"]), exact=True) + tm.assert_numpy_array_equal(indexer, np.array([-1, -1], dtype=np.intp))