Skip to content

Commit e149880

Browse files
authored
TST: split up test_concat.py #37243 - more follows up (#37387)
1 parent d89331b commit e149880

13 files changed

+1023
-993
lines changed

pandas/tests/reshape/concat/__init__.py

Whitespace-only changes.
+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import pytest
2+
3+
4+
@pytest.fixture(params=[True, False])
5+
def sort(request):
6+
"""Boolean sort keyword for concat and DataFrame.append."""
7+
return request.param

pandas/tests/reshape/concat/test_append.py

-6
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,6 @@
1111
import pandas._testing as tm
1212

1313

14-
@pytest.fixture(params=[True, False])
15-
def sort(request):
16-
"""Boolean sort keyword for concat and DataFrame.append."""
17-
return request.param
18-
19-
2014
class TestAppend:
2115
def test_append(self, sort, float_frame):
2216
mixed_frame = float_frame.copy()

pandas/tests/reshape/concat/test_append_common.py

+23
Original file line numberDiff line numberDiff line change
@@ -725,3 +725,26 @@ def test_concat_categorical_empty(self):
725725

726726
tm.assert_series_equal(pd.concat([s2, s1], ignore_index=True), exp)
727727
tm.assert_series_equal(s2.append(s1, ignore_index=True), exp)
728+
729+
def test_categorical_concat_append(self):
730+
cat = Categorical(["a", "b"], categories=["a", "b"])
731+
vals = [1, 2]
732+
df = DataFrame({"cats": cat, "vals": vals})
733+
cat2 = Categorical(["a", "b", "a", "b"], categories=["a", "b"])
734+
vals2 = [1, 2, 1, 2]
735+
exp = DataFrame({"cats": cat2, "vals": vals2}, index=Index([0, 1, 0, 1]))
736+
737+
tm.assert_frame_equal(pd.concat([df, df]), exp)
738+
tm.assert_frame_equal(df.append(df), exp)
739+
740+
# GH 13524 can concat different categories
741+
cat3 = Categorical(["a", "b"], categories=["a", "b", "c"])
742+
vals3 = [1, 2]
743+
df_different_categories = DataFrame({"cats": cat3, "vals": vals3})
744+
745+
res = pd.concat([df, df_different_categories], ignore_index=True)
746+
exp = DataFrame({"cats": list("abab"), "vals": [1, 2, 1, 2]})
747+
tm.assert_frame_equal(res, exp)
748+
749+
res = df.append(df_different_categories, ignore_index=True)
750+
tm.assert_frame_equal(res, exp)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
import numpy as np
2+
import pytest
3+
4+
from pandas.core.dtypes.dtypes import CategoricalDtype
5+
6+
import pandas as pd
7+
from pandas import Categorical, DataFrame, Series
8+
import pandas._testing as tm
9+
10+
11+
class TestCategoricalConcat:
12+
def test_categorical_concat(self, sort):
13+
# See GH 10177
14+
df1 = DataFrame(
15+
np.arange(18, dtype="int64").reshape(6, 3), columns=["a", "b", "c"]
16+
)
17+
18+
df2 = DataFrame(np.arange(14, dtype="int64").reshape(7, 2), columns=["a", "c"])
19+
20+
cat_values = ["one", "one", "two", "one", "two", "two", "one"]
21+
df2["h"] = Series(Categorical(cat_values))
22+
23+
res = pd.concat((df1, df2), axis=0, ignore_index=True, sort=sort)
24+
exp = DataFrame(
25+
{
26+
"a": [0, 3, 6, 9, 12, 15, 0, 2, 4, 6, 8, 10, 12],
27+
"b": [
28+
1,
29+
4,
30+
7,
31+
10,
32+
13,
33+
16,
34+
np.nan,
35+
np.nan,
36+
np.nan,
37+
np.nan,
38+
np.nan,
39+
np.nan,
40+
np.nan,
41+
],
42+
"c": [2, 5, 8, 11, 14, 17, 1, 3, 5, 7, 9, 11, 13],
43+
"h": [None] * 6 + cat_values,
44+
}
45+
)
46+
tm.assert_frame_equal(res, exp)
47+
48+
def test_categorical_concat_dtypes(self):
49+
50+
# GH8143
51+
index = ["cat", "obj", "num"]
52+
cat = Categorical(["a", "b", "c"])
53+
obj = Series(["a", "b", "c"])
54+
num = Series([1, 2, 3])
55+
df = pd.concat([Series(cat), obj, num], axis=1, keys=index)
56+
57+
result = df.dtypes == "object"
58+
expected = Series([False, True, False], index=index)
59+
tm.assert_series_equal(result, expected)
60+
61+
result = df.dtypes == "int64"
62+
expected = Series([False, False, True], index=index)
63+
tm.assert_series_equal(result, expected)
64+
65+
result = df.dtypes == "category"
66+
expected = Series([True, False, False], index=index)
67+
tm.assert_series_equal(result, expected)
68+
69+
def test_concat_categoricalindex(self):
70+
# GH 16111, categories that aren't lexsorted
71+
categories = [9, 0, 1, 2, 3]
72+
73+
a = Series(1, index=pd.CategoricalIndex([9, 0], categories=categories))
74+
b = Series(2, index=pd.CategoricalIndex([0, 1], categories=categories))
75+
c = Series(3, index=pd.CategoricalIndex([1, 2], categories=categories))
76+
77+
result = pd.concat([a, b, c], axis=1)
78+
79+
exp_idx = pd.CategoricalIndex([9, 0, 1, 2], categories=categories)
80+
exp = DataFrame(
81+
{
82+
0: [1, 1, np.nan, np.nan],
83+
1: [np.nan, 2, 2, np.nan],
84+
2: [np.nan, np.nan, 3, 3],
85+
},
86+
columns=[0, 1, 2],
87+
index=exp_idx,
88+
)
89+
tm.assert_frame_equal(result, exp)
90+
91+
def test_categorical_concat_preserve(self):
92+
93+
# GH 8641 series concat not preserving category dtype
94+
# GH 13524 can concat different categories
95+
s = Series(list("abc"), dtype="category")
96+
s2 = Series(list("abd"), dtype="category")
97+
98+
exp = Series(list("abcabd"))
99+
res = pd.concat([s, s2], ignore_index=True)
100+
tm.assert_series_equal(res, exp)
101+
102+
exp = Series(list("abcabc"), dtype="category")
103+
res = pd.concat([s, s], ignore_index=True)
104+
tm.assert_series_equal(res, exp)
105+
106+
exp = Series(list("abcabc"), index=[0, 1, 2, 0, 1, 2], dtype="category")
107+
res = pd.concat([s, s])
108+
tm.assert_series_equal(res, exp)
109+
110+
a = Series(np.arange(6, dtype="int64"))
111+
b = Series(list("aabbca"))
112+
113+
df2 = DataFrame({"A": a, "B": b.astype(CategoricalDtype(list("cab")))})
114+
res = pd.concat([df2, df2])
115+
exp = DataFrame(
116+
{
117+
"A": pd.concat([a, a]),
118+
"B": pd.concat([b, b]).astype(CategoricalDtype(list("cab"))),
119+
}
120+
)
121+
tm.assert_frame_equal(res, exp)
122+
123+
def test_categorical_index_preserver(self):
124+
125+
a = Series(np.arange(6, dtype="int64"))
126+
b = Series(list("aabbca"))
127+
128+
df2 = DataFrame(
129+
{"A": a, "B": b.astype(CategoricalDtype(list("cab")))}
130+
).set_index("B")
131+
result = pd.concat([df2, df2])
132+
expected = DataFrame(
133+
{
134+
"A": pd.concat([a, a]),
135+
"B": pd.concat([b, b]).astype(CategoricalDtype(list("cab"))),
136+
}
137+
).set_index("B")
138+
tm.assert_frame_equal(result, expected)
139+
140+
# wrong categories
141+
df3 = DataFrame(
142+
{"A": a, "B": Categorical(b, categories=list("abe"))}
143+
).set_index("B")
144+
msg = "categories must match existing categories when appending"
145+
with pytest.raises(TypeError, match=msg):
146+
pd.concat([df2, df3])
147+
148+
def test_concat_categorical_tz(self):
149+
# GH-23816
150+
a = Series(pd.date_range("2017-01-01", periods=2, tz="US/Pacific"))
151+
b = Series(["a", "b"], dtype="category")
152+
result = pd.concat([a, b], ignore_index=True)
153+
expected = Series(
154+
[
155+
pd.Timestamp("2017-01-01", tz="US/Pacific"),
156+
pd.Timestamp("2017-01-02", tz="US/Pacific"),
157+
"a",
158+
"b",
159+
]
160+
)
161+
tm.assert_series_equal(result, expected)
162+
163+
def test_concat_categorical_unchanged(self):
164+
# GH-12007
165+
# test fix for when concat on categorical and float
166+
# coerces dtype categorical -> float
167+
df = DataFrame(Series(["a", "b", "c"], dtype="category", name="A"))
168+
ser = Series([0, 1, 2], index=[0, 1, 3], name="B")
169+
result = pd.concat([df, ser], axis=1)
170+
expected = DataFrame(
171+
{
172+
"A": Series(["a", "b", "c", np.nan], dtype="category"),
173+
"B": Series([0, 1, np.nan, 2], dtype="float"),
174+
}
175+
)
176+
tm.assert_equal(result, expected)
177+
178+
def test_categorical_concat_gh7864(self):
179+
# GH 7864
180+
# make sure ordering is preserved
181+
df = DataFrame({"id": [1, 2, 3, 4, 5, 6], "raw_grade": list("abbaae")})
182+
df["grade"] = Categorical(df["raw_grade"])
183+
df["grade"].cat.set_categories(["e", "a", "b"])
184+
185+
df1 = df[0:3]
186+
df2 = df[3:]
187+
188+
tm.assert_index_equal(df["grade"].cat.categories, df1["grade"].cat.categories)
189+
tm.assert_index_equal(df["grade"].cat.categories, df2["grade"].cat.categories)
190+
191+
dfx = pd.concat([df1, df2])
192+
tm.assert_index_equal(df["grade"].cat.categories, dfx["grade"].cat.categories)
193+
194+
dfa = df1.append(df2)
195+
tm.assert_index_equal(df["grade"].cat.categories, dfa["grade"].cat.categories)

0 commit comments

Comments
 (0)