Skip to content

Commit 574e6c3

Browse files
committed
First round of comments (will squash later)
1 parent d262a8b commit 574e6c3

File tree

7 files changed

+221
-190
lines changed

7 files changed

+221
-190
lines changed

pandas/tests/categorical/common.py

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# -*- coding: utf-8 -*-
2+
3+
import numpy as np
4+
5+
import pandas as pd
6+
from pandas import Categorical, DataFrame
7+
8+
9+
class TestCategorical(object):
10+
11+
def setup_method(self, method):
12+
self.factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'],
13+
ordered=True)
14+
15+
16+
class TestCategoricalBlock(object):
17+
18+
def setup_method(self, method):
19+
self.factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])
20+
21+
df = DataFrame({'value': np.random.randint(0, 10000, 100)})
22+
labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)]
23+
cat_labels = Categorical(labels, labels)
24+
25+
df = df.sort_values(by=['value'], ascending=True)
26+
df['value_group'] = pd.cut(df.value, range(0, 10500, 500),
27+
right=False, labels=cat_labels)
28+
self.cat = df

pandas/tests/categorical/test_constructors.py

+59-3
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,8 @@
1111
period_range, timedelta_range, NaT,
1212
Interval, IntervalIndex)
1313
from pandas.core.dtypes.dtypes import CategoricalDtype
14-
from pandas.core.dtypes.common import (
15-
is_float_dtype,
16-
is_integer_dtype)
14+
from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype
15+
from pandas.tests.categorical.common import TestCategoricalBlock
1716

1817

1918
class TestCategoricalConstructors(object):
@@ -511,6 +510,63 @@ def test_construction_with_ordered(self):
511510
assert cat.ordered
512511

513512

513+
class TestCategoricalBlockConstructorsWithFactor(TestCategoricalBlock):
514+
515+
def test_basic(self):
516+
517+
# test basic creation / coercion of categoricals
518+
s = Series(self.factor, name='A')
519+
assert s.dtype == 'category'
520+
assert len(s) == len(self.factor)
521+
str(s.values)
522+
str(s)
523+
524+
# in a frame
525+
df = DataFrame({'A': self.factor})
526+
result = df['A']
527+
tm.assert_series_equal(result, s)
528+
result = df.iloc[:, 0]
529+
tm.assert_series_equal(result, s)
530+
assert len(df) == len(self.factor)
531+
str(df.values)
532+
str(df)
533+
534+
df = DataFrame({'A': s})
535+
result = df['A']
536+
tm.assert_series_equal(result, s)
537+
assert len(df) == len(self.factor)
538+
str(df.values)
539+
str(df)
540+
541+
# multiples
542+
df = DataFrame({'A': s, 'B': s, 'C': 1})
543+
result1 = df['A']
544+
result2 = df['B']
545+
tm.assert_series_equal(result1, s)
546+
tm.assert_series_equal(result2, s, check_names=False)
547+
assert result2.name == 'B'
548+
assert len(df) == len(self.factor)
549+
str(df.values)
550+
str(df)
551+
552+
# GH8623
553+
x = DataFrame([[1, 'John P. Doe'], [2, 'Jane Dove'],
554+
[1, 'John P. Doe']],
555+
columns=['person_id', 'person_name'])
556+
x['person_name'] = Categorical(x.person_name
557+
) # doing this breaks transform
558+
559+
expected = x.iloc[0].person_name
560+
result = x.person_name.iloc[0]
561+
assert result == expected
562+
563+
result = x.person_name[0]
564+
assert result == expected
565+
566+
result = x.person_name.loc[0]
567+
assert result == expected
568+
569+
514570
class TestCategoricalBlockConstructors(object):
515571

516572
def test_construction_series(self):

pandas/tests/categorical/test_generic.py

+5-125
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,14 @@
55

66
import numpy as np
77

8-
import pandas as pd
98
import pandas.util.testing as tm
109
from pandas import (Categorical, Index, Series, DataFrame, CategoricalIndex)
1110
from pandas.core.dtypes.dtypes import CategoricalDtype
11+
from pandas.tests.categorical.common import (TestCategorical,
12+
TestCategoricalBlock)
1213

1314

14-
class TestCategoricalGeneric(object):
15-
16-
def setup_method(self, method):
17-
self.factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'],
18-
ordered=True)
15+
class TestCategoricalGeneric(TestCategorical):
1916

2017
def test_categories_none(self):
2118
factor = Categorical(['a', 'b', 'b', 'a',
@@ -66,38 +63,6 @@ def test_describe(self):
6663
name='categories'))
6764
tm.assert_frame_equal(desc, expected)
6865

69-
def test_getitem(self):
70-
assert self.factor[0] == 'a'
71-
assert self.factor[-1] == 'c'
72-
73-
subf = self.factor[[0, 1, 2]]
74-
tm.assert_numpy_array_equal(subf._codes,
75-
np.array([0, 1, 1], dtype=np.int8))
76-
77-
subf = self.factor[np.asarray(self.factor) == 'c']
78-
tm.assert_numpy_array_equal(subf._codes,
79-
np.array([2, 2, 2], dtype=np.int8))
80-
81-
def test_setitem(self):
82-
83-
# int/positional
84-
c = self.factor.copy()
85-
c[0] = 'b'
86-
assert c[0] == 'b'
87-
c[-1] = 'a'
88-
assert c[-1] == 'a'
89-
90-
# boolean
91-
c = self.factor.copy()
92-
indexer = np.zeros(len(c), dtype='bool')
93-
indexer[0] = True
94-
indexer[-1] = True
95-
c[indexer] = 'c'
96-
expected = Categorical(['c', 'b', 'b', 'a', 'a', 'c', 'c', 'c'],
97-
ordered=True)
98-
99-
tm.assert_categorical_equal(c, expected)
100-
10166
def test_set_categories_inplace(self):
10267
cat = self.factor.copy()
10368
cat.set_categories(['a', 'b', 'c', 'd'], inplace=True)
@@ -212,81 +177,8 @@ def f():
212177
res = cat_rev > "b"
213178
tm.assert_numpy_array_equal(res, exp)
214179

215-
def test_print(self):
216-
expected = ["[a, b, b, a, a, c, c, c]",
217-
"Categories (3, object): [a < b < c]"]
218-
expected = "\n".join(expected)
219-
actual = repr(self.factor)
220-
assert actual == expected
221-
222-
223-
class TestCategoricalGenericBlock(object):
224-
225-
def setup_method(self, method):
226-
self.factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])
227-
228-
df = DataFrame({'value': np.random.randint(0, 10000, 100)})
229-
labels = ["{0} - {1}".format(i, i + 499) for i in range(0, 10000, 500)]
230-
cat_labels = Categorical(labels, labels)
231-
232-
df = df.sort_values(by=['value'], ascending=True)
233-
df['value_group'] = pd.cut(df.value, range(0, 10500, 500),
234-
right=False, labels=cat_labels)
235-
self.cat = df
236-
237-
def test_basic(self):
238-
239-
# test basic creation / coercion of categoricals
240-
s = Series(self.factor, name='A')
241-
assert s.dtype == 'category'
242-
assert len(s) == len(self.factor)
243-
str(s.values)
244-
str(s)
245-
246-
# in a frame
247-
df = DataFrame({'A': self.factor})
248-
result = df['A']
249-
tm.assert_series_equal(result, s)
250-
result = df.iloc[:, 0]
251-
tm.assert_series_equal(result, s)
252-
assert len(df) == len(self.factor)
253-
str(df.values)
254-
str(df)
255-
256-
df = DataFrame({'A': s})
257-
result = df['A']
258-
tm.assert_series_equal(result, s)
259-
assert len(df) == len(self.factor)
260-
str(df.values)
261-
str(df)
262-
263-
# multiples
264-
df = DataFrame({'A': s, 'B': s, 'C': 1})
265-
result1 = df['A']
266-
result2 = df['B']
267-
tm.assert_series_equal(result1, s)
268-
tm.assert_series_equal(result2, s, check_names=False)
269-
assert result2.name == 'B'
270-
assert len(df) == len(self.factor)
271-
str(df.values)
272-
str(df)
273-
274-
# GH8623
275-
x = DataFrame([[1, 'John P. Doe'], [2, 'Jane Dove'],
276-
[1, 'John P. Doe']],
277-
columns=['person_id', 'person_name'])
278-
x['person_name'] = Categorical(x.person_name
279-
) # doing this breaks transform
280-
281-
expected = x.iloc[0].person_name
282-
result = x.person_name.iloc[0]
283-
assert result == expected
284-
285-
result = x.person_name[0]
286-
assert result == expected
287-
288-
result = x.person_name.loc[0]
289-
assert result == expected
180+
181+
class TestCategoricalGenericBlock(TestCategoricalBlock):
290182

291183
def test_describe(self):
292184

@@ -310,18 +202,6 @@ def test_describe(self):
310202
res = df3.describe()
311203
tm.assert_numpy_array_equal(res["cat"].values, res["s"].values)
312204

313-
def test_groupby_sort(self):
314-
315-
# http://stackoverflow.com/questions/23814368/sorting-pandas-categorical-labels-after-groupby
316-
# This should result in a properly sorted Series so that the plot
317-
# has a sorted x axis
318-
# self.cat.groupby(['value_group'])['value_group'].count().plot(kind='bar')
319-
320-
res = self.cat.groupby(['value_group'])['value_group'].count()
321-
exp = res[sorted(res.index, key=lambda x: float(x.split()[0]))]
322-
exp.index = CategoricalIndex(exp.index, name=exp.index.name)
323-
tm.assert_series_equal(res, exp)
324-
325205
def test_astype_to_other(self):
326206

327207
s = self.cat['value_group']
+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# -*- coding: utf-8 -*-
2+
3+
4+
import pandas.util.testing as tm
5+
from pandas import CategoricalIndex
6+
from pandas.tests.categorical.common import TestCategoricalBlock
7+
8+
9+
class TestCategoricalBlockGroubpy(TestCategoricalBlock):
10+
11+
def test_groupby_sort(self):
12+
13+
# http://stackoverflow.com/questions/23814368/sorting-pandas-categorical-labels-after-groupby
14+
# This should result in a properly sorted Series so that the plot
15+
# has a sorted x axis
16+
# self.cat.groupby(['value_group'])['value_group'].count().plot(kind='bar')
17+
18+
res = self.cat.groupby(['value_group'])['value_group'].count()
19+
exp = res[sorted(res.index, key=lambda x: float(x.split()[0]))]
20+
exp.index = CategoricalIndex(exp.index, name=exp.index.name)
21+
tm.assert_series_equal(res, exp)

pandas/tests/categorical/test_indexing.py

+38-4
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,44 @@
88
import pandas.compat as compat
99
import pandas.util.testing as tm
1010
from pandas.core.dtypes.dtypes import CategoricalDtype
11-
from pandas import (Categorical, Index, Series, DataFrame, PeriodIndex,
12-
Interval)
13-
from pandas.core.dtypes.common import (
14-
is_categorical_dtype)
11+
from pandas import Categorical, Index, Series, DataFrame, PeriodIndex, Interval
12+
from pandas.core.dtypes.common import is_categorical_dtype
13+
from pandas.tests.categorical.common import TestCategorical
14+
15+
16+
class TestCategoricalIndexingWithFactor(TestCategorical):
17+
18+
def test_getitem(self):
19+
assert self.factor[0] == 'a'
20+
assert self.factor[-1] == 'c'
21+
22+
subf = self.factor[[0, 1, 2]]
23+
tm.assert_numpy_array_equal(subf._codes,
24+
np.array([0, 1, 1], dtype=np.int8))
25+
26+
subf = self.factor[np.asarray(self.factor) == 'c']
27+
tm.assert_numpy_array_equal(subf._codes,
28+
np.array([2, 2, 2], dtype=np.int8))
29+
30+
def test_setitem(self):
31+
32+
# int/positional
33+
c = self.factor.copy()
34+
c[0] = 'b'
35+
assert c[0] == 'b'
36+
c[-1] = 'a'
37+
assert c[-1] == 'a'
38+
39+
# boolean
40+
c = self.factor.copy()
41+
indexer = np.zeros(len(c), dtype='bool')
42+
indexer[0] = True
43+
indexer[-1] = True
44+
c[indexer] = 'c'
45+
expected = Categorical(['c', 'b', 'b', 'a', 'a', 'c', 'c', 'c'],
46+
ordered=True)
47+
48+
tm.assert_categorical_equal(c, expected)
1549

1650

1751
class TestCategoricalIndexing(object):

0 commit comments

Comments
 (0)