Skip to content

Commit c863d2b

Browse files
authored
REF: CategoricalIndex indexing tests (#33018)
1 parent 9616d98 commit c863d2b

File tree

4 files changed

+173
-162
lines changed

4 files changed

+173
-162
lines changed

pandas/tests/indexes/categorical/test_category.py

-161
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
import pandas._config.config as cf
5-
64
from pandas._libs import index as libindex
75

86
from pandas.core.dtypes.dtypes import CategoricalDtype
@@ -100,65 +98,6 @@ def test_method_delegation(self):
10098
with pytest.raises(ValueError, match=msg):
10199
ci.set_categories(list("cab"), inplace=True)
102100

103-
def test_contains(self):
104-
105-
ci = self.create_index(categories=list("cabdef"))
106-
107-
assert "a" in ci
108-
assert "z" not in ci
109-
assert "e" not in ci
110-
assert np.nan not in ci
111-
112-
# assert codes NOT in index
113-
assert 0 not in ci
114-
assert 1 not in ci
115-
116-
ci = CategoricalIndex(list("aabbca") + [np.nan], categories=list("cabdef"))
117-
assert np.nan in ci
118-
119-
@pytest.mark.parametrize(
120-
"item, expected",
121-
[
122-
(pd.Interval(0, 1), True),
123-
(1.5, True),
124-
(pd.Interval(0.5, 1.5), False),
125-
("a", False),
126-
(pd.Timestamp(1), False),
127-
(pd.Timedelta(1), False),
128-
],
129-
ids=str,
130-
)
131-
def test_contains_interval(self, item, expected):
132-
# GH 23705
133-
ci = CategoricalIndex(IntervalIndex.from_breaks(range(3)))
134-
result = item in ci
135-
assert result is expected
136-
137-
def test_contains_list(self):
138-
# GH#21729
139-
idx = pd.CategoricalIndex([1, 2, 3])
140-
141-
assert "a" not in idx
142-
143-
with pytest.raises(TypeError, match="unhashable type"):
144-
["a"] in idx
145-
146-
with pytest.raises(TypeError, match="unhashable type"):
147-
["a", "b"] in idx
148-
149-
@pytest.mark.parametrize("klass", [list, tuple, np.array, pd.Series])
150-
def test_where(self, klass):
151-
i = self.create_index()
152-
cond = [True] * len(i)
153-
expected = i
154-
result = i.where(klass(cond))
155-
tm.assert_index_equal(result, expected)
156-
157-
cond = [False] + [True] * (len(i) - 1)
158-
expected = CategoricalIndex([np.nan] + i[1:].tolist(), categories=i.categories)
159-
result = i.where(klass(cond))
160-
tm.assert_index_equal(result, expected)
161-
162101
def test_append(self):
163102

164103
ci = self.create_index()
@@ -503,106 +442,6 @@ def test_frame_repr(self):
503442
expected = " A\na 1\nb 2\nc 3"
504443
assert result == expected
505444

506-
def test_string_categorical_index_repr(self):
507-
# short
508-
idx = pd.CategoricalIndex(["a", "bb", "ccc"])
509-
expected = """CategoricalIndex(['a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa
510-
assert repr(idx) == expected
511-
512-
# multiple lines
513-
idx = pd.CategoricalIndex(["a", "bb", "ccc"] * 10)
514-
expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
515-
'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb',
516-
'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
517-
categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa
518-
519-
assert repr(idx) == expected
520-
521-
# truncated
522-
idx = pd.CategoricalIndex(["a", "bb", "ccc"] * 100)
523-
expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
524-
...
525-
'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
526-
categories=['a', 'bb', 'ccc'], ordered=False, dtype='category', length=300)""" # noqa
527-
528-
assert repr(idx) == expected
529-
530-
# larger categories
531-
idx = pd.CategoricalIndex(list("abcdefghijklmmo"))
532-
expected = """CategoricalIndex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
533-
'm', 'm', 'o'],
534-
categories=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', ...], ordered=False, dtype='category')""" # noqa
535-
536-
assert repr(idx) == expected
537-
538-
# short
539-
idx = pd.CategoricalIndex(["あ", "いい", "ううう"])
540-
expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa
541-
assert repr(idx) == expected
542-
543-
# multiple lines
544-
idx = pd.CategoricalIndex(["あ", "いい", "ううう"] * 10)
545-
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ',
546-
'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
547-
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
548-
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa
549-
550-
assert repr(idx) == expected
551-
552-
# truncated
553-
idx = pd.CategoricalIndex(["あ", "いい", "ううう"] * 100)
554-
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ',
555-
...
556-
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
557-
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa
558-
559-
assert repr(idx) == expected
560-
561-
# larger categories
562-
idx = pd.CategoricalIndex(list("あいうえおかきくけこさしすせそ"))
563-
expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し',
564-
'す', 'せ', 'そ'],
565-
categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')""" # noqa
566-
567-
assert repr(idx) == expected
568-
569-
# Emable Unicode option -----------------------------------------
570-
with cf.option_context("display.unicode.east_asian_width", True):
571-
572-
# short
573-
idx = pd.CategoricalIndex(["あ", "いい", "ううう"])
574-
expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa
575-
assert repr(idx) == expected
576-
577-
# multiple lines
578-
idx = pd.CategoricalIndex(["あ", "いい", "ううう"] * 10)
579-
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
580-
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',
581-
'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
582-
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
583-
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa
584-
585-
assert repr(idx) == expected
586-
587-
# truncated
588-
idx = pd.CategoricalIndex(["あ", "いい", "ううう"] * 100)
589-
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
590-
'ううう', 'あ',
591-
...
592-
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',
593-
'あ', 'いい', 'ううう'],
594-
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa
595-
596-
assert repr(idx) == expected
597-
598-
# larger categories
599-
idx = pd.CategoricalIndex(list("あいうえおかきくけこさしすせそ"))
600-
expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ',
601-
'さ', 'し', 'す', 'せ', 'そ'],
602-
categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')""" # noqa
603-
604-
assert repr(idx) == expected
605-
606445
def test_fillna_categorical(self):
607446
# GH 11343
608447
idx = CategoricalIndex([1.0, np.nan, 3.0, 1.0], name="x")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
"""
2+
Tests for CategoricalIndex.__repr__ and related methods.
3+
"""
4+
import pandas._config.config as cf
5+
6+
import pandas as pd
7+
8+
9+
class TestCategoricalIndexRepr:
10+
def test_string_categorical_index_repr(self):
11+
# short
12+
idx = pd.CategoricalIndex(["a", "bb", "ccc"])
13+
expected = """CategoricalIndex(['a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa
14+
assert repr(idx) == expected
15+
16+
# multiple lines
17+
idx = pd.CategoricalIndex(["a", "bb", "ccc"] * 10)
18+
expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
19+
'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb',
20+
'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
21+
categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa
22+
23+
assert repr(idx) == expected
24+
25+
# truncated
26+
idx = pd.CategoricalIndex(["a", "bb", "ccc"] * 100)
27+
expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
28+
...
29+
'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
30+
categories=['a', 'bb', 'ccc'], ordered=False, dtype='category', length=300)""" # noqa
31+
32+
assert repr(idx) == expected
33+
34+
# larger categories
35+
idx = pd.CategoricalIndex(list("abcdefghijklmmo"))
36+
expected = """CategoricalIndex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
37+
'm', 'm', 'o'],
38+
categories=['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', ...], ordered=False, dtype='category')""" # noqa
39+
40+
assert repr(idx) == expected
41+
42+
# short
43+
idx = pd.CategoricalIndex(["あ", "いい", "ううう"])
44+
expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa
45+
assert repr(idx) == expected
46+
47+
# multiple lines
48+
idx = pd.CategoricalIndex(["あ", "いい", "ううう"] * 10)
49+
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ',
50+
'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
51+
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
52+
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa
53+
54+
assert repr(idx) == expected
55+
56+
# truncated
57+
idx = pd.CategoricalIndex(["あ", "いい", "ううう"] * 100)
58+
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ',
59+
...
60+
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
61+
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa
62+
63+
assert repr(idx) == expected
64+
65+
# larger categories
66+
idx = pd.CategoricalIndex(list("あいうえおかきくけこさしすせそ"))
67+
expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し',
68+
'す', 'せ', 'そ'],
69+
categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')""" # noqa
70+
71+
assert repr(idx) == expected
72+
73+
# Emable Unicode option -----------------------------------------
74+
with cf.option_context("display.unicode.east_asian_width", True):
75+
76+
# short
77+
idx = pd.CategoricalIndex(["あ", "いい", "ううう"])
78+
expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa
79+
assert repr(idx) == expected
80+
81+
# multiple lines
82+
idx = pd.CategoricalIndex(["あ", "いい", "ううう"] * 10)
83+
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
84+
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',
85+
'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
86+
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
87+
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa
88+
89+
assert repr(idx) == expected
90+
91+
# truncated
92+
idx = pd.CategoricalIndex(["あ", "いい", "ううう"] * 100)
93+
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
94+
'ううう', 'あ',
95+
...
96+
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',
97+
'あ', 'いい', 'ううう'],
98+
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa
99+
100+
assert repr(idx) == expected
101+
102+
# larger categories
103+
idx = pd.CategoricalIndex(list("あいうえおかきくけこさしすせそ"))
104+
expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ',
105+
'さ', 'し', 'す', 'せ', 'そ'],
106+
categories=['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', ...], ordered=False, dtype='category')""" # noqa
107+
108+
assert repr(idx) == expected

pandas/tests/indexes/categorical/test_indexing.py

+65-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import pytest
33

44
import pandas as pd
5-
from pandas import CategoricalIndex, Index
5+
from pandas import CategoricalIndex, Index, IntervalIndex
66
import pandas._testing as tm
77

88

@@ -250,3 +250,67 @@ def test_get_indexer(self):
250250
msg = "method='nearest' not implemented yet for CategoricalIndex"
251251
with pytest.raises(NotImplementedError, match=msg):
252252
idx2.get_indexer(idx1, method="nearest")
253+
254+
255+
class TestWhere:
256+
@pytest.mark.parametrize("klass", [list, tuple, np.array, pd.Series])
257+
def test_where(self, klass):
258+
i = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
259+
cond = [True] * len(i)
260+
expected = i
261+
result = i.where(klass(cond))
262+
tm.assert_index_equal(result, expected)
263+
264+
cond = [False] + [True] * (len(i) - 1)
265+
expected = CategoricalIndex([np.nan] + i[1:].tolist(), categories=i.categories)
266+
result = i.where(klass(cond))
267+
tm.assert_index_equal(result, expected)
268+
269+
270+
class TestContains:
271+
def test_contains(self):
272+
273+
ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=False)
274+
275+
assert "a" in ci
276+
assert "z" not in ci
277+
assert "e" not in ci
278+
assert np.nan not in ci
279+
280+
# assert codes NOT in index
281+
assert 0 not in ci
282+
assert 1 not in ci
283+
284+
def test_contains_nan(self):
285+
ci = CategoricalIndex(list("aabbca") + [np.nan], categories=list("cabdef"))
286+
assert np.nan in ci
287+
288+
@pytest.mark.parametrize(
289+
"item, expected",
290+
[
291+
(pd.Interval(0, 1), True),
292+
(1.5, True),
293+
(pd.Interval(0.5, 1.5), False),
294+
("a", False),
295+
(pd.Timestamp(1), False),
296+
(pd.Timedelta(1), False),
297+
],
298+
ids=str,
299+
)
300+
def test_contains_interval(self, item, expected):
301+
# GH 23705
302+
ci = CategoricalIndex(IntervalIndex.from_breaks(range(3)))
303+
result = item in ci
304+
assert result is expected
305+
306+
def test_contains_list(self):
307+
# GH#21729
308+
idx = pd.CategoricalIndex([1, 2, 3])
309+
310+
assert "a" not in idx
311+
312+
with pytest.raises(TypeError, match="unhashable type"):
313+
["a"] in idx
314+
315+
with pytest.raises(TypeError, match="unhashable type"):
316+
["a", "b"] in idx

0 commit comments

Comments
 (0)