Skip to content

Commit 3395742

Browse files
WillAydjreback
authored andcommitted
Split test_categorical into subpackage (#18497) (#18508)
1 parent 9629fef commit 3395742

37 files changed

+4985
-4843
lines changed

pandas/tests/categorical/__init__.py

Whitespace-only changes.

pandas/tests/categorical/common.py

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# -*- coding: utf-8 -*-
2+
3+
from pandas import Categorical
4+
5+
6+
class TestCategorical(object):
7+
8+
def setup_method(self, method):
9+
self.factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'],
10+
ordered=True)
+320
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,320 @@
1+
# -*- coding: utf-8 -*-
2+
3+
import pytest
4+
import sys
5+
6+
import numpy as np
7+
8+
import pandas.util.testing as tm
9+
from pandas import Categorical, Index, Series
10+
11+
from pandas.compat import PYPY
12+
13+
14+
class TestCategoricalAnalytics(object):
15+
16+
def test_min_max(self):
17+
18+
# unordered cats have no min/max
19+
cat = Categorical(["a", "b", "c", "d"], ordered=False)
20+
pytest.raises(TypeError, lambda: cat.min())
21+
pytest.raises(TypeError, lambda: cat.max())
22+
23+
cat = Categorical(["a", "b", "c", "d"], ordered=True)
24+
_min = cat.min()
25+
_max = cat.max()
26+
assert _min == "a"
27+
assert _max == "d"
28+
29+
cat = Categorical(["a", "b", "c", "d"],
30+
categories=['d', 'c', 'b', 'a'], ordered=True)
31+
_min = cat.min()
32+
_max = cat.max()
33+
assert _min == "d"
34+
assert _max == "a"
35+
36+
cat = Categorical([np.nan, "b", "c", np.nan],
37+
categories=['d', 'c', 'b', 'a'], ordered=True)
38+
_min = cat.min()
39+
_max = cat.max()
40+
assert np.isnan(_min)
41+
assert _max == "b"
42+
43+
_min = cat.min(numeric_only=True)
44+
assert _min == "c"
45+
_max = cat.max(numeric_only=True)
46+
assert _max == "b"
47+
48+
cat = Categorical([np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1],
49+
ordered=True)
50+
_min = cat.min()
51+
_max = cat.max()
52+
assert np.isnan(_min)
53+
assert _max == 1
54+
55+
_min = cat.min(numeric_only=True)
56+
assert _min == 2
57+
_max = cat.max(numeric_only=True)
58+
assert _max == 1
59+
60+
@pytest.mark.parametrize("values,categories,exp_mode", [
61+
([1, 1, 2, 4, 5, 5, 5], [5, 4, 3, 2, 1], [5]),
62+
([1, 1, 1, 4, 5, 5, 5], [5, 4, 3, 2, 1], [5, 1]),
63+
([1, 2, 3, 4, 5], [5, 4, 3, 2, 1], [5, 4, 3, 2, 1]),
64+
([np.nan, np.nan, np.nan, 4, 5], [5, 4, 3, 2, 1], [5, 4]),
65+
([np.nan, np.nan, np.nan, 4, 5, 4], [5, 4, 3, 2, 1], [4]),
66+
([np.nan, np.nan, 4, 5, 4], [5, 4, 3, 2, 1], [4])])
67+
def test_mode(self, values, categories, exp_mode):
68+
s = Categorical(values, categories=categories, ordered=True)
69+
res = s.mode()
70+
exp = Categorical(exp_mode, categories=categories, ordered=True)
71+
tm.assert_categorical_equal(res, exp)
72+
73+
def test_searchsorted(self):
74+
# https://github.com/pandas-dev/pandas/issues/8420
75+
# https://github.com/pandas-dev/pandas/issues/14522
76+
77+
c1 = Categorical(['cheese', 'milk', 'apple', 'bread', 'bread'],
78+
categories=['cheese', 'milk', 'apple', 'bread'],
79+
ordered=True)
80+
s1 = Series(c1)
81+
c2 = Categorical(['cheese', 'milk', 'apple', 'bread', 'bread'],
82+
categories=['cheese', 'milk', 'apple', 'bread'],
83+
ordered=False)
84+
s2 = Series(c2)
85+
86+
# Searching for single item argument, side='left' (default)
87+
res_cat = c1.searchsorted('apple')
88+
res_ser = s1.searchsorted('apple')
89+
exp = np.array([2], dtype=np.intp)
90+
tm.assert_numpy_array_equal(res_cat, exp)
91+
tm.assert_numpy_array_equal(res_ser, exp)
92+
93+
# Searching for single item array, side='left' (default)
94+
res_cat = c1.searchsorted(['bread'])
95+
res_ser = s1.searchsorted(['bread'])
96+
exp = np.array([3], dtype=np.intp)
97+
tm.assert_numpy_array_equal(res_cat, exp)
98+
tm.assert_numpy_array_equal(res_ser, exp)
99+
100+
# Searching for several items array, side='right'
101+
res_cat = c1.searchsorted(['apple', 'bread'], side='right')
102+
res_ser = s1.searchsorted(['apple', 'bread'], side='right')
103+
exp = np.array([3, 5], dtype=np.intp)
104+
tm.assert_numpy_array_equal(res_cat, exp)
105+
tm.assert_numpy_array_equal(res_ser, exp)
106+
107+
# Searching for a single value that is not from the Categorical
108+
pytest.raises(ValueError, lambda: c1.searchsorted('cucumber'))
109+
pytest.raises(ValueError, lambda: s1.searchsorted('cucumber'))
110+
111+
# Searching for multiple values one of each is not from the Categorical
112+
pytest.raises(ValueError,
113+
lambda: c1.searchsorted(['bread', 'cucumber']))
114+
pytest.raises(ValueError,
115+
lambda: s1.searchsorted(['bread', 'cucumber']))
116+
117+
# searchsorted call for unordered Categorical
118+
pytest.raises(ValueError, lambda: c2.searchsorted('apple'))
119+
pytest.raises(ValueError, lambda: s2.searchsorted('apple'))
120+
121+
with tm.assert_produces_warning(FutureWarning):
122+
res = c1.searchsorted(v=['bread'])
123+
exp = np.array([3], dtype=np.intp)
124+
tm.assert_numpy_array_equal(res, exp)
125+
126+
def test_unique(self):
127+
# categories are reordered based on value when ordered=False
128+
cat = Categorical(["a", "b"])
129+
exp = Index(["a", "b"])
130+
res = cat.unique()
131+
tm.assert_index_equal(res.categories, exp)
132+
tm.assert_categorical_equal(res, cat)
133+
134+
cat = Categorical(["a", "b", "a", "a"], categories=["a", "b", "c"])
135+
res = cat.unique()
136+
tm.assert_index_equal(res.categories, exp)
137+
tm.assert_categorical_equal(res, Categorical(exp))
138+
139+
cat = Categorical(["c", "a", "b", "a", "a"],
140+
categories=["a", "b", "c"])
141+
exp = Index(["c", "a", "b"])
142+
res = cat.unique()
143+
tm.assert_index_equal(res.categories, exp)
144+
exp_cat = Categorical(exp, categories=['c', 'a', 'b'])
145+
tm.assert_categorical_equal(res, exp_cat)
146+
147+
# nan must be removed
148+
cat = Categorical(["b", np.nan, "b", np.nan, "a"],
149+
categories=["a", "b", "c"])
150+
res = cat.unique()
151+
exp = Index(["b", "a"])
152+
tm.assert_index_equal(res.categories, exp)
153+
exp_cat = Categorical(["b", np.nan, "a"], categories=["b", "a"])
154+
tm.assert_categorical_equal(res, exp_cat)
155+
156+
def test_unique_ordered(self):
157+
# keep categories order when ordered=True
158+
cat = Categorical(['b', 'a', 'b'], categories=['a', 'b'], ordered=True)
159+
res = cat.unique()
160+
exp_cat = Categorical(['b', 'a'], categories=['a', 'b'], ordered=True)
161+
tm.assert_categorical_equal(res, exp_cat)
162+
163+
cat = Categorical(['c', 'b', 'a', 'a'], categories=['a', 'b', 'c'],
164+
ordered=True)
165+
res = cat.unique()
166+
exp_cat = Categorical(['c', 'b', 'a'], categories=['a', 'b', 'c'],
167+
ordered=True)
168+
tm.assert_categorical_equal(res, exp_cat)
169+
170+
cat = Categorical(['b', 'a', 'a'], categories=['a', 'b', 'c'],
171+
ordered=True)
172+
res = cat.unique()
173+
exp_cat = Categorical(['b', 'a'], categories=['a', 'b'], ordered=True)
174+
tm.assert_categorical_equal(res, exp_cat)
175+
176+
cat = Categorical(['b', 'b', np.nan, 'a'], categories=['a', 'b', 'c'],
177+
ordered=True)
178+
res = cat.unique()
179+
exp_cat = Categorical(['b', np.nan, 'a'], categories=['a', 'b'],
180+
ordered=True)
181+
tm.assert_categorical_equal(res, exp_cat)
182+
183+
def test_unique_index_series(self):
184+
c = Categorical([3, 1, 2, 2, 1], categories=[3, 2, 1])
185+
# Categorical.unique sorts categories by appearance order
186+
# if ordered=False
187+
exp = Categorical([3, 1, 2], categories=[3, 1, 2])
188+
tm.assert_categorical_equal(c.unique(), exp)
189+
190+
tm.assert_index_equal(Index(c).unique(), Index(exp))
191+
tm.assert_categorical_equal(Series(c).unique(), exp)
192+
193+
c = Categorical([1, 1, 2, 2], categories=[3, 2, 1])
194+
exp = Categorical([1, 2], categories=[1, 2])
195+
tm.assert_categorical_equal(c.unique(), exp)
196+
tm.assert_index_equal(Index(c).unique(), Index(exp))
197+
tm.assert_categorical_equal(Series(c).unique(), exp)
198+
199+
c = Categorical([3, 1, 2, 2, 1], categories=[3, 2, 1], ordered=True)
200+
# Categorical.unique keeps categories order if ordered=True
201+
exp = Categorical([3, 1, 2], categories=[3, 2, 1], ordered=True)
202+
tm.assert_categorical_equal(c.unique(), exp)
203+
204+
tm.assert_index_equal(Index(c).unique(), Index(exp))
205+
tm.assert_categorical_equal(Series(c).unique(), exp)
206+
207+
def test_shift(self):
208+
# GH 9416
209+
cat = Categorical(['a', 'b', 'c', 'd', 'a'])
210+
211+
# shift forward
212+
sp1 = cat.shift(1)
213+
xp1 = Categorical([np.nan, 'a', 'b', 'c', 'd'])
214+
tm.assert_categorical_equal(sp1, xp1)
215+
tm.assert_categorical_equal(cat[:-1], sp1[1:])
216+
217+
# shift back
218+
sn2 = cat.shift(-2)
219+
xp2 = Categorical(['c', 'd', 'a', np.nan, np.nan],
220+
categories=['a', 'b', 'c', 'd'])
221+
tm.assert_categorical_equal(sn2, xp2)
222+
tm.assert_categorical_equal(cat[2:], sn2[:-2])
223+
224+
# shift by zero
225+
tm.assert_categorical_equal(cat, cat.shift(0))
226+
227+
def test_nbytes(self):
228+
cat = Categorical([1, 2, 3])
229+
exp = 3 + 3 * 8 # 3 int8s for values + 3 int64s for categories
230+
assert cat.nbytes == exp
231+
232+
def test_memory_usage(self):
233+
cat = Categorical([1, 2, 3])
234+
235+
# .categories is an index, so we include the hashtable
236+
assert 0 < cat.nbytes <= cat.memory_usage()
237+
assert 0 < cat.nbytes <= cat.memory_usage(deep=True)
238+
239+
cat = Categorical(['foo', 'foo', 'bar'])
240+
assert cat.memory_usage(deep=True) > cat.nbytes
241+
242+
if not PYPY:
243+
# sys.getsizeof will call the .memory_usage with
244+
# deep=True, and add on some GC overhead
245+
diff = cat.memory_usage(deep=True) - sys.getsizeof(cat)
246+
assert abs(diff) < 100
247+
248+
def test_map(self):
249+
c = Categorical(list('ABABC'), categories=list('CBA'), ordered=True)
250+
result = c.map(lambda x: x.lower())
251+
exp = Categorical(list('ababc'), categories=list('cba'), ordered=True)
252+
tm.assert_categorical_equal(result, exp)
253+
254+
c = Categorical(list('ABABC'), categories=list('ABC'), ordered=False)
255+
result = c.map(lambda x: x.lower())
256+
exp = Categorical(list('ababc'), categories=list('abc'), ordered=False)
257+
tm.assert_categorical_equal(result, exp)
258+
259+
result = c.map(lambda x: 1)
260+
# GH 12766: Return an index not an array
261+
tm.assert_index_equal(result, Index(np.array([1] * 5, dtype=np.int64)))
262+
263+
def test_validate_inplace(self):
264+
cat = Categorical(['A', 'B', 'B', 'C', 'A'])
265+
invalid_values = [1, "True", [1, 2, 3], 5.0]
266+
267+
for value in invalid_values:
268+
with pytest.raises(ValueError):
269+
cat.set_ordered(value=True, inplace=value)
270+
271+
with pytest.raises(ValueError):
272+
cat.as_ordered(inplace=value)
273+
274+
with pytest.raises(ValueError):
275+
cat.as_unordered(inplace=value)
276+
277+
with pytest.raises(ValueError):
278+
cat.set_categories(['X', 'Y', 'Z'], rename=True, inplace=value)
279+
280+
with pytest.raises(ValueError):
281+
cat.rename_categories(['X', 'Y', 'Z'], inplace=value)
282+
283+
with pytest.raises(ValueError):
284+
cat.reorder_categories(
285+
['X', 'Y', 'Z'], ordered=True, inplace=value)
286+
287+
with pytest.raises(ValueError):
288+
cat.add_categories(
289+
new_categories=['D', 'E', 'F'], inplace=value)
290+
291+
with pytest.raises(ValueError):
292+
cat.remove_categories(removals=['D', 'E', 'F'], inplace=value)
293+
294+
with pytest.raises(ValueError):
295+
cat.remove_unused_categories(inplace=value)
296+
297+
with pytest.raises(ValueError):
298+
cat.sort_values(inplace=value)
299+
300+
def test_repeat(self):
301+
# GH10183
302+
cat = Categorical(["a", "b"], categories=["a", "b"])
303+
exp = Categorical(["a", "a", "b", "b"], categories=["a", "b"])
304+
res = cat.repeat(2)
305+
tm.assert_categorical_equal(res, exp)
306+
307+
def test_numpy_repeat(self):
308+
cat = Categorical(["a", "b"], categories=["a", "b"])
309+
exp = Categorical(["a", "a", "b", "b"], categories=["a", "b"])
310+
tm.assert_categorical_equal(np.repeat(cat, 2), exp)
311+
312+
msg = "the 'axis' parameter is not supported"
313+
tm.assert_raises_regex(ValueError, msg, np.repeat, cat, 2, axis=1)
314+
315+
def test_isna(self):
316+
exp = np.array([False, False, True])
317+
c = Categorical(["a", "b", np.nan])
318+
res = c.isna()
319+
320+
tm.assert_numpy_array_equal(res, exp)

0 commit comments

Comments
 (0)