Skip to content

Commit d262a8b

Browse files
committed
Split test_categorical into subpackage (pandas-dev#18497)
1 parent 674fb96 commit d262a8b

14 files changed

+4963
-4831
lines changed

pandas/tests/categorical/__init__.py

Whitespace-only changes.

pandas/tests/categorical/test_api.py

+1,679
Large diffs are not rendered by default.

pandas/tests/categorical/test_constructors.py

+625
Large diffs are not rendered by default.
+130
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
# -*- coding: utf-8 -*-
2+
3+
import pytest
4+
5+
import pandas as pd
6+
import pandas.util.testing as tm
7+
from pandas.core.dtypes.dtypes import CategoricalDtype
8+
from pandas import (Categorical, Index, Series, DataFrame, CategoricalIndex)
9+
10+
11+
class TestCategoricalDtypes(object):
12+
13+
def test_is_equal_dtype(self):
14+
15+
# test dtype comparisons between cats
16+
17+
c1 = Categorical(list('aabca'), categories=list('abc'), ordered=False)
18+
c2 = Categorical(list('aabca'), categories=list('cab'), ordered=False)
19+
c3 = Categorical(list('aabca'), categories=list('cab'), ordered=True)
20+
assert c1.is_dtype_equal(c1)
21+
assert c2.is_dtype_equal(c2)
22+
assert c3.is_dtype_equal(c3)
23+
assert c1.is_dtype_equal(c2)
24+
assert not c1.is_dtype_equal(c3)
25+
assert not c1.is_dtype_equal(Index(list('aabca')))
26+
assert not c1.is_dtype_equal(c1.astype(object))
27+
assert c1.is_dtype_equal(CategoricalIndex(c1))
28+
assert (c1.is_dtype_equal(
29+
CategoricalIndex(c1, categories=list('cab'))))
30+
assert not c1.is_dtype_equal(CategoricalIndex(c1, ordered=True))
31+
32+
def test_set_dtype_same(self):
33+
c = Categorical(['a', 'b', 'c'])
34+
result = c._set_dtype(CategoricalDtype(['a', 'b', 'c']))
35+
tm.assert_categorical_equal(result, c)
36+
37+
def test_set_dtype_new_categories(self):
38+
c = Categorical(['a', 'b', 'c'])
39+
result = c._set_dtype(CategoricalDtype(list('abcd')))
40+
tm.assert_numpy_array_equal(result.codes, c.codes)
41+
tm.assert_index_equal(result.dtype.categories, Index(list('abcd')))
42+
43+
@pytest.mark.parametrize('values, categories, new_categories', [
44+
# No NaNs, same cats, same order
45+
(['a', 'b', 'a'], ['a', 'b'], ['a', 'b'],),
46+
# No NaNs, same cats, different order
47+
(['a', 'b', 'a'], ['a', 'b'], ['b', 'a'],),
48+
# Same, unsorted
49+
(['b', 'a', 'a'], ['a', 'b'], ['a', 'b'],),
50+
# No NaNs, same cats, different order
51+
(['b', 'a', 'a'], ['a', 'b'], ['b', 'a'],),
52+
# NaNs
53+
(['a', 'b', 'c'], ['a', 'b'], ['a', 'b']),
54+
(['a', 'b', 'c'], ['a', 'b'], ['b', 'a']),
55+
(['b', 'a', 'c'], ['a', 'b'], ['a', 'b']),
56+
(['b', 'a', 'c'], ['a', 'b'], ['a', 'b']),
57+
# Introduce NaNs
58+
(['a', 'b', 'c'], ['a', 'b'], ['a']),
59+
(['a', 'b', 'c'], ['a', 'b'], ['b']),
60+
(['b', 'a', 'c'], ['a', 'b'], ['a']),
61+
(['b', 'a', 'c'], ['a', 'b'], ['a']),
62+
# No overlap
63+
(['a', 'b', 'c'], ['a', 'b'], ['d', 'e']),
64+
])
65+
@pytest.mark.parametrize('ordered', [True, False])
66+
def test_set_dtype_many(self, values, categories, new_categories,
67+
ordered):
68+
c = Categorical(values, categories)
69+
expected = Categorical(values, new_categories, ordered)
70+
result = c._set_dtype(expected.dtype)
71+
tm.assert_categorical_equal(result, expected)
72+
73+
def test_set_dtype_no_overlap(self):
74+
c = Categorical(['a', 'b', 'c'], ['d', 'e'])
75+
result = c._set_dtype(CategoricalDtype(['a', 'b']))
76+
expected = Categorical([None, None, None], categories=['a', 'b'])
77+
tm.assert_categorical_equal(result, expected)
78+
79+
80+
class TestCategoricalBlockDtypes(object):
81+
82+
def test_dtypes(self):
83+
84+
# GH8143
85+
index = ['cat', 'obj', 'num']
86+
cat = Categorical(['a', 'b', 'c'])
87+
obj = Series(['a', 'b', 'c'])
88+
num = Series([1, 2, 3])
89+
df = pd.concat([Series(cat), obj, num], axis=1, keys=index)
90+
91+
result = df.dtypes == 'object'
92+
expected = Series([False, True, False], index=index)
93+
tm.assert_series_equal(result, expected)
94+
95+
result = df.dtypes == 'int64'
96+
expected = Series([False, False, True], index=index)
97+
tm.assert_series_equal(result, expected)
98+
99+
result = df.dtypes == 'category'
100+
expected = Series([True, False, False], index=index)
101+
tm.assert_series_equal(result, expected)
102+
103+
def test_codes_dtypes(self):
104+
105+
# GH 8453
106+
result = Categorical(['foo', 'bar', 'baz'])
107+
assert result.codes.dtype == 'int8'
108+
109+
result = Categorical(['foo%05d' % i for i in range(400)])
110+
assert result.codes.dtype == 'int16'
111+
112+
result = Categorical(['foo%05d' % i for i in range(40000)])
113+
assert result.codes.dtype == 'int32'
114+
115+
# adding cats
116+
result = Categorical(['foo', 'bar', 'baz'])
117+
assert result.codes.dtype == 'int8'
118+
result = result.add_categories(['foo%05d' % i for i in range(400)])
119+
assert result.codes.dtype == 'int16'
120+
121+
# removing cats
122+
result = result.remove_categories(['foo%05d' % i for i in range(300)])
123+
assert result.codes.dtype == 'int8'
124+
125+
@pytest.mark.parametrize('columns', [['x'], ['x', 'y'], ['x', 'y', 'z']])
126+
def test_empty_astype(self, columns):
127+
# GH 18004
128+
msg = '> 1 ndim Categorical are not supported at this time'
129+
with tm.assert_raises_regex(NotImplementedError, msg):
130+
DataFrame(columns=columns).astype('category')

0 commit comments

Comments
 (0)