Skip to content

Commit d3c59e4

Browse files
authored
REF: CategoricalIndex indexing tests (#31559)
1 parent f0f058f commit d3c59e4

File tree

4 files changed

+388
-354
lines changed

4 files changed

+388
-354
lines changed

pandas/tests/indexes/categorical/test_category.py

+7-354
Original file line numberDiff line numberDiff line change
@@ -146,76 +146,6 @@ def test_contains_list(self):
146146
with pytest.raises(TypeError, match="unhashable type"):
147147
["a", "b"] in idx
148148

149-
def test_map(self):
150-
ci = pd.CategoricalIndex(list("ABABC"), categories=list("CBA"), ordered=True)
151-
result = ci.map(lambda x: x.lower())
152-
exp = pd.CategoricalIndex(list("ababc"), categories=list("cba"), ordered=True)
153-
tm.assert_index_equal(result, exp)
154-
155-
ci = pd.CategoricalIndex(
156-
list("ABABC"), categories=list("BAC"), ordered=False, name="XXX"
157-
)
158-
result = ci.map(lambda x: x.lower())
159-
exp = pd.CategoricalIndex(
160-
list("ababc"), categories=list("bac"), ordered=False, name="XXX"
161-
)
162-
tm.assert_index_equal(result, exp)
163-
164-
# GH 12766: Return an index not an array
165-
tm.assert_index_equal(
166-
ci.map(lambda x: 1), Index(np.array([1] * 5, dtype=np.int64), name="XXX")
167-
)
168-
169-
# change categories dtype
170-
ci = pd.CategoricalIndex(list("ABABC"), categories=list("BAC"), ordered=False)
171-
172-
def f(x):
173-
return {"A": 10, "B": 20, "C": 30}.get(x)
174-
175-
result = ci.map(f)
176-
exp = pd.CategoricalIndex(
177-
[10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False
178-
)
179-
tm.assert_index_equal(result, exp)
180-
181-
result = ci.map(pd.Series([10, 20, 30], index=["A", "B", "C"]))
182-
tm.assert_index_equal(result, exp)
183-
184-
result = ci.map({"A": 10, "B": 20, "C": 30})
185-
tm.assert_index_equal(result, exp)
186-
187-
def test_map_with_categorical_series(self):
188-
# GH 12756
189-
a = pd.Index([1, 2, 3, 4])
190-
b = pd.Series(["even", "odd", "even", "odd"], dtype="category")
191-
c = pd.Series(["even", "odd", "even", "odd"])
192-
193-
exp = CategoricalIndex(["odd", "even", "odd", np.nan])
194-
tm.assert_index_equal(a.map(b), exp)
195-
exp = pd.Index(["odd", "even", "odd", np.nan])
196-
tm.assert_index_equal(a.map(c), exp)
197-
198-
@pytest.mark.parametrize(
199-
("data", "f"),
200-
(
201-
([1, 1, np.nan], pd.isna),
202-
([1, 2, np.nan], pd.isna),
203-
([1, 1, np.nan], {1: False}),
204-
([1, 2, np.nan], {1: False, 2: False}),
205-
([1, 1, np.nan], pd.Series([False, False])),
206-
([1, 2, np.nan], pd.Series([False, False, False])),
207-
),
208-
)
209-
def test_map_with_nan(self, data, f): # GH 24241
210-
values = pd.Categorical(data)
211-
result = values.map(f)
212-
if data[1] == 1:
213-
expected = pd.Categorical([False, False, np.nan])
214-
tm.assert_categorical_equal(result, expected)
215-
else:
216-
expected = pd.Index([False, False, np.nan])
217-
tm.assert_index_equal(result, expected)
218-
219149
@pytest.mark.parametrize("klass", [list, tuple, np.array, pd.Series])
220150
def test_where(self, klass):
221151
i = self.create_index()
@@ -384,89 +314,6 @@ def test_astype_category(self, name, dtype_ordered, index_ordered):
384314
expected = index
385315
tm.assert_index_equal(result, expected)
386316

387-
def test_reindex_base(self):
388-
# Determined by cat ordering.
389-
idx = CategoricalIndex(list("cab"), categories=list("cab"))
390-
expected = np.arange(len(idx), dtype=np.intp)
391-
392-
actual = idx.get_indexer(idx)
393-
tm.assert_numpy_array_equal(expected, actual)
394-
395-
with pytest.raises(ValueError, match="Invalid fill method"):
396-
idx.get_indexer(idx, method="invalid")
397-
398-
def test_reindexing(self):
399-
np.random.seed(123456789)
400-
401-
ci = self.create_index()
402-
oidx = Index(np.array(ci))
403-
404-
for n in [1, 2, 5, len(ci)]:
405-
finder = oidx[np.random.randint(0, len(ci), size=n)]
406-
expected = oidx.get_indexer_non_unique(finder)[0]
407-
408-
actual = ci.get_indexer(finder)
409-
tm.assert_numpy_array_equal(expected, actual)
410-
411-
# see gh-17323
412-
#
413-
# Even when indexer is equal to the
414-
# members in the index, we should
415-
# respect duplicates instead of taking
416-
# the fast-track path.
417-
for finder in [list("aabbca"), list("aababca")]:
418-
expected = oidx.get_indexer_non_unique(finder)[0]
419-
420-
actual = ci.get_indexer(finder)
421-
tm.assert_numpy_array_equal(expected, actual)
422-
423-
def test_reindex_dtype(self):
424-
c = CategoricalIndex(["a", "b", "c", "a"])
425-
res, indexer = c.reindex(["a", "c"])
426-
tm.assert_index_equal(res, Index(["a", "a", "c"]), exact=True)
427-
tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))
428-
429-
c = CategoricalIndex(["a", "b", "c", "a"])
430-
res, indexer = c.reindex(Categorical(["a", "c"]))
431-
432-
exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"])
433-
tm.assert_index_equal(res, exp, exact=True)
434-
tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))
435-
436-
c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
437-
res, indexer = c.reindex(["a", "c"])
438-
exp = Index(["a", "a", "c"], dtype="object")
439-
tm.assert_index_equal(res, exp, exact=True)
440-
tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))
441-
442-
c = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
443-
res, indexer = c.reindex(Categorical(["a", "c"]))
444-
exp = CategoricalIndex(["a", "a", "c"], categories=["a", "c"])
445-
tm.assert_index_equal(res, exp, exact=True)
446-
tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.intp))
447-
448-
def test_reindex_duplicate_target(self):
449-
# See GH25459
450-
cat = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"])
451-
res, indexer = cat.reindex(["a", "c", "c"])
452-
exp = Index(["a", "c", "c"], dtype="object")
453-
tm.assert_index_equal(res, exp, exact=True)
454-
tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp))
455-
456-
res, indexer = cat.reindex(
457-
CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"])
458-
)
459-
exp = CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"])
460-
tm.assert_index_equal(res, exp, exact=True)
461-
tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp))
462-
463-
def test_reindex_empty_index(self):
464-
# See GH16770
465-
c = CategoricalIndex([])
466-
res, indexer = c.reindex(["a", "b"])
467-
tm.assert_index_equal(res, Index(["a", "b"]), exact=True)
468-
tm.assert_numpy_array_equal(indexer, np.array([-1, -1], dtype=np.intp))
469-
470317
@pytest.mark.parametrize(
471318
"data, non_lexsorted_data",
472319
[[[1, 2, 3], [9, 0, 1, 2, 3]], [list("abc"), list("fabcd")]],
@@ -518,75 +365,6 @@ def test_drop_duplicates(self):
518365
tm.assert_index_equal(idx.drop_duplicates(), expected)
519366
tm.assert_index_equal(idx.unique(), expected)
520367

521-
def test_get_indexer(self):
522-
523-
idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc"))
524-
idx2 = CategoricalIndex(list("abf"))
525-
526-
for indexer in [idx2, list("abf"), Index(list("abf"))]:
527-
r1 = idx1.get_indexer(idx2)
528-
tm.assert_almost_equal(r1, np.array([0, 1, 2, -1], dtype=np.intp))
529-
530-
msg = (
531-
"method='pad' and method='backfill' not implemented yet for "
532-
"CategoricalIndex"
533-
)
534-
with pytest.raises(NotImplementedError, match=msg):
535-
idx2.get_indexer(idx1, method="pad")
536-
with pytest.raises(NotImplementedError, match=msg):
537-
idx2.get_indexer(idx1, method="backfill")
538-
539-
msg = "method='nearest' not implemented yet for CategoricalIndex"
540-
with pytest.raises(NotImplementedError, match=msg):
541-
idx2.get_indexer(idx1, method="nearest")
542-
543-
def test_get_loc(self):
544-
# GH 12531
545-
cidx1 = CategoricalIndex(list("abcde"), categories=list("edabc"))
546-
idx1 = Index(list("abcde"))
547-
assert cidx1.get_loc("a") == idx1.get_loc("a")
548-
assert cidx1.get_loc("e") == idx1.get_loc("e")
549-
550-
for i in [cidx1, idx1]:
551-
with pytest.raises(KeyError, match="'NOT-EXIST'"):
552-
i.get_loc("NOT-EXIST")
553-
554-
# non-unique
555-
cidx2 = CategoricalIndex(list("aacded"), categories=list("edabc"))
556-
idx2 = Index(list("aacded"))
557-
558-
# results in bool array
559-
res = cidx2.get_loc("d")
560-
tm.assert_numpy_array_equal(res, idx2.get_loc("d"))
561-
tm.assert_numpy_array_equal(
562-
res, np.array([False, False, False, True, False, True])
563-
)
564-
# unique element results in scalar
565-
res = cidx2.get_loc("e")
566-
assert res == idx2.get_loc("e")
567-
assert res == 4
568-
569-
for i in [cidx2, idx2]:
570-
with pytest.raises(KeyError, match="'NOT-EXIST'"):
571-
i.get_loc("NOT-EXIST")
572-
573-
# non-unique, sliceable
574-
cidx3 = CategoricalIndex(list("aabbb"), categories=list("abc"))
575-
idx3 = Index(list("aabbb"))
576-
577-
# results in slice
578-
res = cidx3.get_loc("a")
579-
assert res == idx3.get_loc("a")
580-
assert res == slice(0, 2, None)
581-
582-
res = cidx3.get_loc("b")
583-
assert res == idx3.get_loc("b")
584-
assert res == slice(2, 5, None)
585-
586-
for i in [cidx3, idx3]:
587-
with pytest.raises(KeyError, match="'c'"):
588-
i.get_loc("c")
589-
590368
def test_repr_roundtrip(self):
591369

592370
ci = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
@@ -837,122 +615,6 @@ def test_fillna_categorical(self):
837615
with pytest.raises(ValueError, match=msg):
838616
idx.fillna(2.0)
839617

840-
def test_take_fill_value(self):
841-
# GH 12631
842-
843-
# numeric category
844-
idx = pd.CategoricalIndex([1, 2, 3], name="xxx")
845-
result = idx.take(np.array([1, 0, -1]))
846-
expected = pd.CategoricalIndex([2, 1, 3], name="xxx")
847-
tm.assert_index_equal(result, expected)
848-
tm.assert_categorical_equal(result.values, expected.values)
849-
850-
# fill_value
851-
result = idx.take(np.array([1, 0, -1]), fill_value=True)
852-
expected = pd.CategoricalIndex([2, 1, np.nan], categories=[1, 2, 3], name="xxx")
853-
tm.assert_index_equal(result, expected)
854-
tm.assert_categorical_equal(result.values, expected.values)
855-
856-
# allow_fill=False
857-
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
858-
expected = pd.CategoricalIndex([2, 1, 3], name="xxx")
859-
tm.assert_index_equal(result, expected)
860-
tm.assert_categorical_equal(result.values, expected.values)
861-
862-
# object category
863-
idx = pd.CategoricalIndex(
864-
list("CBA"), categories=list("ABC"), ordered=True, name="xxx"
865-
)
866-
result = idx.take(np.array([1, 0, -1]))
867-
expected = pd.CategoricalIndex(
868-
list("BCA"), categories=list("ABC"), ordered=True, name="xxx"
869-
)
870-
tm.assert_index_equal(result, expected)
871-
tm.assert_categorical_equal(result.values, expected.values)
872-
873-
# fill_value
874-
result = idx.take(np.array([1, 0, -1]), fill_value=True)
875-
expected = pd.CategoricalIndex(
876-
["B", "C", np.nan], categories=list("ABC"), ordered=True, name="xxx"
877-
)
878-
tm.assert_index_equal(result, expected)
879-
tm.assert_categorical_equal(result.values, expected.values)
880-
881-
# allow_fill=False
882-
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
883-
expected = pd.CategoricalIndex(
884-
list("BCA"), categories=list("ABC"), ordered=True, name="xxx"
885-
)
886-
tm.assert_index_equal(result, expected)
887-
tm.assert_categorical_equal(result.values, expected.values)
888-
889-
msg = (
890-
"When allow_fill=True and fill_value is not None, "
891-
"all indices must be >= -1"
892-
)
893-
with pytest.raises(ValueError, match=msg):
894-
idx.take(np.array([1, 0, -2]), fill_value=True)
895-
with pytest.raises(ValueError, match=msg):
896-
idx.take(np.array([1, 0, -5]), fill_value=True)
897-
898-
with pytest.raises(IndexError):
899-
idx.take(np.array([1, -5]))
900-
901-
def test_take_fill_value_datetime(self):
902-
903-
# datetime category
904-
idx = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx")
905-
idx = pd.CategoricalIndex(idx)
906-
result = idx.take(np.array([1, 0, -1]))
907-
expected = pd.DatetimeIndex(
908-
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx"
909-
)
910-
expected = pd.CategoricalIndex(expected)
911-
tm.assert_index_equal(result, expected)
912-
913-
# fill_value
914-
result = idx.take(np.array([1, 0, -1]), fill_value=True)
915-
expected = pd.DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx")
916-
exp_cats = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"])
917-
expected = pd.CategoricalIndex(expected, categories=exp_cats)
918-
tm.assert_index_equal(result, expected)
919-
920-
# allow_fill=False
921-
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
922-
expected = pd.DatetimeIndex(
923-
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx"
924-
)
925-
expected = pd.CategoricalIndex(expected)
926-
tm.assert_index_equal(result, expected)
927-
928-
msg = (
929-
"When allow_fill=True and fill_value is not None, "
930-
"all indices must be >= -1"
931-
)
932-
with pytest.raises(ValueError, match=msg):
933-
idx.take(np.array([1, 0, -2]), fill_value=True)
934-
with pytest.raises(ValueError, match=msg):
935-
idx.take(np.array([1, 0, -5]), fill_value=True)
936-
937-
with pytest.raises(IndexError):
938-
idx.take(np.array([1, -5]))
939-
940-
def test_take_invalid_kwargs(self):
941-
idx = pd.CategoricalIndex([1, 2, 3], name="foo")
942-
indices = [1, 0, -1]
943-
944-
msg = r"take\(\) got an unexpected keyword argument 'foo'"
945-
with pytest.raises(TypeError, match=msg):
946-
idx.take(indices, foo=2)
947-
948-
msg = "the 'out' parameter is not supported"
949-
with pytest.raises(ValueError, match=msg):
950-
idx.take(indices, out=indices)
951-
952-
msg = "the 'mode' parameter is not supported"
953-
with pytest.raises(ValueError, match=msg):
954-
idx.take(indices, mode="clip")
955-
956618
@pytest.mark.parametrize(
957619
"dtype, engine_type",
958620
[
@@ -976,19 +638,10 @@ def test_engine_type(self, dtype, engine_type):
976638
assert np.issubdtype(ci.codes.dtype, dtype)
977639
assert isinstance(ci._engine, engine_type)
978640

979-
@pytest.mark.parametrize(
980-
"data, categories",
981-
[
982-
(list("abcbca"), list("cab")),
983-
(pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)),
984-
],
985-
ids=["string", "interval"],
986-
)
987-
def test_map_str(self, data, categories, ordered_fixture):
988-
# GH 31202 - override base class since we want to maintain categorical/ordered
989-
index = CategoricalIndex(data, categories=categories, ordered=ordered_fixture)
990-
result = index.map(str)
991-
expected = CategoricalIndex(
992-
map(str, data), categories=map(str, categories), ordered=ordered_fixture
993-
)
994-
tm.assert_index_equal(result, expected)
641+
def test_reindex_base(self):
642+
# See test_reindex.py
643+
pass
644+
645+
def test_map_str(self):
646+
# See test_map.py
647+
pass

0 commit comments

Comments
 (0)