Skip to content

Commit 3903d6f

Browse files
committed
TST: split up test_concat.py pandas-dev#37243
* created test_categorical.py
1 parent 18b4864 commit 3903d6f

File tree

3 files changed

+224
-213
lines changed

3 files changed

+224
-213
lines changed

pandas/tests/reshape/concat/test_append_common.py

+23
Original file line numberDiff line numberDiff line change
@@ -725,3 +725,26 @@ def test_concat_categorical_empty(self):
725725

726726
tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
727727
tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
728+
729+
def test_categorical_concat_append(self):
730+
cat = Categorical(["a", "b"], categories=["a", "b"])
731+
vals = [1, 2]
732+
df = DataFrame({"cats": cat, "vals": vals})
733+
cat2 = Categorical(["a", "b", "a", "b"], categories=["a", "b"])
734+
vals2 = [1, 2, 1, 2]
735+
exp = DataFrame({"cats": cat2, "vals": vals2}, index=Index([0, 1, 0, 1]))
736+
737+
tm.assert_frame_equal(pd.concat([df, df]), exp)
738+
tm.assert_frame_equal(df.append(df), exp)
739+
740+
# GH 13524 can concat different categories
741+
cat3 = Categorical(["a", "b"], categories=["a", "b", "c"])
742+
vals3 = [1, 2]
743+
df_different_categories = DataFrame({"cats": cat3, "vals": vals3})
744+
745+
res = pd.concat([df, df_different_categories], ignore_index=True)
746+
exp = DataFrame({"cats": list("abab"), "vals": [1, 2, 1, 2]})
747+
tm.assert_frame_equal(res, exp)
748+
749+
res = df.append(df_different_categories, ignore_index=True)
750+
tm.assert_frame_equal(res, exp)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
import numpy as np
2+
import pytest
3+
4+
from pandas.core.dtypes.dtypes import CategoricalDtype
5+
6+
import pandas as pd
7+
from pandas import Categorical, DataFrame, Series
8+
import pandas._testing as tm
9+
10+
11+
@pytest.fixture(params=[True, False])
12+
def sort(request):
13+
"""Boolean sort keyword for concat and DataFrame.append."""
14+
return request.param
15+
16+
17+
class TestCategoricalConcat:
18+
def test_categorical_concat(self, sort):
19+
# See GH 10177
20+
df1 = DataFrame(
21+
np.arange(18, dtype="int64").reshape(6, 3), columns=["a", "b", "c"]
22+
)
23+
24+
df2 = DataFrame(np.arange(14, dtype="int64").reshape(7, 2), columns=["a", "c"])
25+
26+
cat_values = ["one", "one", "two", "one", "two", "two", "one"]
27+
df2["h"] = Series(Categorical(cat_values))
28+
29+
res = pd.concat((df1, df2), axis=0, ignore_index=True, sort=sort)
30+
exp = DataFrame(
31+
{
32+
"a": [0, 3, 6, 9, 12, 15, 0, 2, 4, 6, 8, 10, 12],
33+
"b": [
34+
1,
35+
4,
36+
7,
37+
10,
38+
13,
39+
16,
40+
np.nan,
41+
np.nan,
42+
np.nan,
43+
np.nan,
44+
np.nan,
45+
np.nan,
46+
np.nan,
47+
],
48+
"c": [2, 5, 8, 11, 14, 17, 1, 3, 5, 7, 9, 11, 13],
49+
"h": [None] * 6 + cat_values,
50+
}
51+
)
52+
tm.assert_frame_equal(res, exp)
53+
54+
def test_categorical_concat_dtypes(self):
55+
56+
# GH8143
57+
index = ["cat", "obj", "num"]
58+
cat = Categorical(["a", "b", "c"])
59+
obj = Series(["a", "b", "c"])
60+
num = Series([1, 2, 3])
61+
df = pd.concat([Series(cat), obj, num], axis=1, keys=index)
62+
63+
result = df.dtypes == "object"
64+
expected = Series([False, True, False], index=index)
65+
tm.assert_series_equal(result, expected)
66+
67+
result = df.dtypes == "int64"
68+
expected = Series([False, False, True], index=index)
69+
tm.assert_series_equal(result, expected)
70+
71+
result = df.dtypes == "category"
72+
expected = Series([True, False, False], index=index)
73+
tm.assert_series_equal(result, expected)
74+
75+
def test_concat_categoricalindex(self):
76+
# GH 16111, categories that aren't lexsorted
77+
categories = [9, 0, 1, 2, 3]
78+
79+
a = Series(1, index=pd.CategoricalIndex([9, 0], categories=categories))
80+
b = Series(2, index=pd.CategoricalIndex([0, 1], categories=categories))
81+
c = Series(3, index=pd.CategoricalIndex([1, 2], categories=categories))
82+
83+
result = pd.concat([a, b, c], axis=1)
84+
85+
exp_idx = pd.CategoricalIndex([9, 0, 1, 2], categories=categories)
86+
exp = DataFrame(
87+
{
88+
0: [1, 1, np.nan, np.nan],
89+
1: [np.nan, 2, 2, np.nan],
90+
2: [np.nan, np.nan, 3, 3],
91+
},
92+
columns=[0, 1, 2],
93+
index=exp_idx,
94+
)
95+
tm.assert_frame_equal(result, exp)
96+
97+
def test_categorical_concat_preserve(self):
98+
99+
# GH 8641 series concat not preserving category dtype
100+
# GH 13524 can concat different categories
101+
s = Series(list("abc"), dtype="category")
102+
s2 = Series(list("abd"), dtype="category")
103+
104+
exp = Series(list("abcabd"))
105+
res = pd.concat([s, s2], ignore_index=True)
106+
tm.assert_series_equal(res, exp)
107+
108+
exp = Series(list("abcabc"), dtype="category")
109+
res = pd.concat([s, s], ignore_index=True)
110+
tm.assert_series_equal(res, exp)
111+
112+
exp = Series(list("abcabc"), index=[0, 1, 2, 0, 1, 2], dtype="category")
113+
res = pd.concat([s, s])
114+
tm.assert_series_equal(res, exp)
115+
116+
a = Series(np.arange(6, dtype="int64"))
117+
b = Series(list("aabbca"))
118+
119+
df2 = DataFrame({"A": a, "B": b.astype(CategoricalDtype(list("cab")))})
120+
res = pd.concat([df2, df2])
121+
exp = DataFrame(
122+
{
123+
"A": pd.concat([a, a]),
124+
"B": pd.concat([b, b]).astype(CategoricalDtype(list("cab"))),
125+
}
126+
)
127+
tm.assert_frame_equal(res, exp)
128+
129+
def test_categorical_index_preserver(self):
130+
131+
a = Series(np.arange(6, dtype="int64"))
132+
b = Series(list("aabbca"))
133+
134+
df2 = DataFrame(
135+
{"A": a, "B": b.astype(CategoricalDtype(list("cab")))}
136+
).set_index("B")
137+
result = pd.concat([df2, df2])
138+
expected = DataFrame(
139+
{
140+
"A": pd.concat([a, a]),
141+
"B": pd.concat([b, b]).astype(CategoricalDtype(list("cab"))),
142+
}
143+
).set_index("B")
144+
tm.assert_frame_equal(result, expected)
145+
146+
# wrong categories
147+
df3 = DataFrame(
148+
{"A": a, "B": Categorical(b, categories=list("abe"))}
149+
).set_index("B")
150+
msg = "categories must match existing categories when appending"
151+
with pytest.raises(TypeError, match=msg):
152+
pd.concat([df2, df3])
153+
154+
def test_concat_categorical_tz(self):
155+
# GH-23816
156+
a = Series(pd.date_range("2017-01-01", periods=2, tz="US/Pacific"))
157+
b = Series(["a", "b"], dtype="category")
158+
result = pd.concat([a, b], ignore_index=True)
159+
expected = Series(
160+
[
161+
pd.Timestamp("2017-01-01", tz="US/Pacific"),
162+
pd.Timestamp("2017-01-02", tz="US/Pacific"),
163+
"a",
164+
"b",
165+
]
166+
)
167+
tm.assert_series_equal(result, expected)
168+
169+
def test_concat_categorical_unchanged(self):
170+
# GH-12007
171+
# test fix for when concat on categorical and float
172+
# coerces dtype categorical -> float
173+
df = DataFrame(Series(["a", "b", "c"], dtype="category", name="A"))
174+
ser = Series([0, 1, 2], index=[0, 1, 3], name="B")
175+
result = pd.concat([df, ser], axis=1)
176+
expected = DataFrame(
177+
{
178+
"A": Series(["a", "b", "c", np.nan], dtype="category"),
179+
"B": Series([0, 1, np.nan, 2], dtype="float"),
180+
}
181+
)
182+
tm.assert_equal(result, expected)
183+
184+
def test_categorical_concat_gh7864(self):
185+
# GH 7864
186+
# make sure ordering is preserved
187+
df = DataFrame({"id": [1, 2, 3, 4, 5, 6], "raw_grade": list("abbaae")})
188+
df["grade"] = Categorical(df["raw_grade"])
189+
df["grade"].cat.set_categories(["e", "a", "b"])
190+
191+
df1 = df[0:3]
192+
df2 = df[3:]
193+
194+
tm.assert_index_equal(df["grade"].cat.categories, df1["grade"].cat.categories)
195+
tm.assert_index_equal(df["grade"].cat.categories, df2["grade"].cat.categories)
196+
197+
dfx = pd.concat([df1, df2])
198+
tm.assert_index_equal(df["grade"].cat.categories, dfx["grade"].cat.categories)
199+
200+
dfa = df1.append(df2)
201+
tm.assert_index_equal(df["grade"].cat.categories, dfa["grade"].cat.categories)

0 commit comments

Comments
 (0)