Skip to content

Commit 75d49cf

Browse files
committed
TST: split up test_concat.py pandas-dev#37243
1 parent de5349a commit 75d49cf

12 files changed

+3414
-3273
lines changed

pandas/tests/reshape/concat/__init__.py

Whitespace-only changes.
+392
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,392 @@
1+
import datetime as dt
2+
from datetime import datetime
3+
from itertools import combinations
4+
5+
import dateutil
6+
import numpy as np
7+
import pytest
8+
9+
import pandas as pd
10+
from pandas import (
11+
DataFrame,
12+
Index,
13+
Series,
14+
Timestamp,
15+
concat,
16+
isna,
17+
)
18+
import pandas._testing as tm
19+
20+
21+
@pytest.fixture(params=[True, False])
22+
def sort(request):
23+
"""Boolean sort keyword for concat and DataFrame.append."""
24+
return request.param
25+
26+
27+
class TestAppend:
28+
def test_append(self, sort, float_frame):
29+
mixed_frame = float_frame.copy()
30+
mixed_frame["foo"] = "bar"
31+
32+
begin_index = float_frame.index[:5]
33+
end_index = float_frame.index[5:]
34+
35+
begin_frame = float_frame.reindex(begin_index)
36+
end_frame = float_frame.reindex(end_index)
37+
38+
appended = begin_frame.append(end_frame)
39+
tm.assert_almost_equal(appended["A"], float_frame["A"])
40+
41+
del end_frame["A"]
42+
partial_appended = begin_frame.append(end_frame, sort=sort)
43+
assert "A" in partial_appended
44+
45+
partial_appended = end_frame.append(begin_frame, sort=sort)
46+
assert "A" in partial_appended
47+
48+
# mixed type handling
49+
appended = mixed_frame[:5].append(mixed_frame[5:])
50+
tm.assert_frame_equal(appended, mixed_frame)
51+
52+
# what to test here
53+
mixed_appended = mixed_frame[:5].append(float_frame[5:], sort=sort)
54+
mixed_appended2 = float_frame[:5].append(mixed_frame[5:], sort=sort)
55+
56+
# all equal except 'foo' column
57+
tm.assert_frame_equal(
58+
mixed_appended.reindex(columns=["A", "B", "C", "D"]),
59+
mixed_appended2.reindex(columns=["A", "B", "C", "D"]),
60+
)
61+
62+
def test_append_empty(self, float_frame):
63+
empty = DataFrame()
64+
65+
appended = float_frame.append(empty)
66+
tm.assert_frame_equal(float_frame, appended)
67+
assert appended is not float_frame
68+
69+
appended = empty.append(float_frame)
70+
tm.assert_frame_equal(float_frame, appended)
71+
assert appended is not float_frame
72+
73+
def test_append_overlap_raises(self, float_frame):
74+
msg = "Indexes have overlapping values"
75+
with pytest.raises(ValueError, match=msg):
76+
float_frame.append(float_frame, verify_integrity=True)
77+
78+
def test_append_new_columns(self):
79+
# see gh-6129: new columns
80+
df = DataFrame({"a": {"x": 1, "y": 2}, "b": {"x": 3, "y": 4}})
81+
row = Series([5, 6, 7], index=["a", "b", "c"], name="z")
82+
expected = DataFrame(
83+
{
84+
"a": {"x": 1, "y": 2, "z": 5},
85+
"b": {"x": 3, "y": 4, "z": 6},
86+
"c": {"z": 7},
87+
}
88+
)
89+
result = df.append(row)
90+
tm.assert_frame_equal(result, expected)
91+
92+
def test_append_length0_frame(self, sort):
93+
df = DataFrame(columns=["A", "B", "C"])
94+
df3 = DataFrame(index=[0, 1], columns=["A", "B"])
95+
df5 = df.append(df3, sort=sort)
96+
97+
expected = DataFrame(index=[0, 1], columns=["A", "B", "C"])
98+
tm.assert_frame_equal(df5, expected)
99+
100+
def test_append_records(self):
101+
arr1 = np.zeros((2,), dtype=("i4,f4,a10"))
102+
arr1[:] = [(1, 2.0, "Hello"), (2, 3.0, "World")]
103+
104+
arr2 = np.zeros((3,), dtype=("i4,f4,a10"))
105+
arr2[:] = [(3, 4.0, "foo"), (5, 6.0, "bar"), (7.0, 8.0, "baz")]
106+
107+
df1 = DataFrame(arr1)
108+
df2 = DataFrame(arr2)
109+
110+
result = df1.append(df2, ignore_index=True)
111+
expected = DataFrame(np.concatenate((arr1, arr2)))
112+
tm.assert_frame_equal(result, expected)
113+
114+
# rewrite sort fixture, since we also want to test default of None
115+
def test_append_sorts(self, sort):
116+
df1 = pd.DataFrame({"a": [1, 2], "b": [1, 2]}, columns=["b", "a"])
117+
df2 = pd.DataFrame({"a": [1, 2], "c": [3, 4]}, index=[2, 3])
118+
119+
with tm.assert_produces_warning(None):
120+
result = df1.append(df2, sort=sort)
121+
122+
# for None / True
123+
expected = pd.DataFrame(
124+
{"b": [1, 2, None, None], "a": [1, 2, 1, 2], "c": [None, None, 3, 4]},
125+
columns=["a", "b", "c"],
126+
)
127+
if sort is False:
128+
expected = expected[["b", "a", "c"]]
129+
tm.assert_frame_equal(result, expected)
130+
131+
def test_append_different_columns(self, sort):
132+
df = DataFrame(
133+
{
134+
"bools": np.random.randn(10) > 0,
135+
"ints": np.random.randint(0, 10, 10),
136+
"floats": np.random.randn(10),
137+
"strings": ["foo", "bar"] * 5,
138+
}
139+
)
140+
141+
a = df[:5].loc[:, ["bools", "ints", "floats"]]
142+
b = df[5:].loc[:, ["strings", "ints", "floats"]]
143+
144+
appended = a.append(b, sort=sort)
145+
assert isna(appended["strings"][0:4]).all()
146+
assert isna(appended["bools"][5:]).all()
147+
148+
def test_append_many(self, sort, float_frame):
149+
chunks = [
150+
float_frame[:5],
151+
float_frame[5:10],
152+
float_frame[10:15],
153+
float_frame[15:],
154+
]
155+
156+
result = chunks[0].append(chunks[1:])
157+
tm.assert_frame_equal(result, float_frame)
158+
159+
chunks[-1] = chunks[-1].copy()
160+
chunks[-1]["foo"] = "bar"
161+
result = chunks[0].append(chunks[1:], sort=sort)
162+
tm.assert_frame_equal(result.loc[:, float_frame.columns], float_frame)
163+
assert (result["foo"][15:] == "bar").all()
164+
assert result["foo"][:15].isna().all()
165+
166+
def test_append_preserve_index_name(self):
167+
# #980
168+
df1 = DataFrame(columns=["A", "B", "C"])
169+
df1 = df1.set_index(["A"])
170+
df2 = DataFrame(data=[[1, 4, 7], [2, 5, 8], [3, 6, 9]], columns=["A", "B", "C"])
171+
df2 = df2.set_index(["A"])
172+
173+
result = df1.append(df2)
174+
assert result.index.name == "A"
175+
176+
indexes_can_append = [
177+
pd.RangeIndex(3),
178+
pd.Index([4, 5, 6]),
179+
pd.Index([4.5, 5.5, 6.5]),
180+
pd.Index(list("abc")),
181+
pd.CategoricalIndex("A B C".split()),
182+
pd.CategoricalIndex("D E F".split(), ordered=True),
183+
pd.IntervalIndex.from_breaks([7, 8, 9, 10]),
184+
pd.DatetimeIndex(
185+
[
186+
dt.datetime(2013, 1, 3, 0, 0),
187+
dt.datetime(2013, 1, 3, 6, 10),
188+
dt.datetime(2013, 1, 3, 7, 12),
189+
]
190+
),
191+
]
192+
193+
indexes_cannot_append_with_other = [
194+
pd.MultiIndex.from_arrays(["A B C".split(), "D E F".split()])
195+
]
196+
197+
all_indexes = indexes_can_append + indexes_cannot_append_with_other
198+
199+
@pytest.mark.parametrize("index", all_indexes, ids=lambda x: type(x).__name__)
200+
def test_append_same_columns_type(self, index):
201+
# GH18359
202+
203+
# df wider than ser
204+
df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=index)
205+
ser_index = index[:2]
206+
ser = pd.Series([7, 8], index=ser_index, name=2)
207+
result = df.append(ser)
208+
expected = pd.DataFrame(
209+
[[1.0, 2.0, 3.0], [4, 5, 6], [7, 8, np.nan]], index=[0, 1, 2], columns=index
210+
)
211+
tm.assert_frame_equal(result, expected)
212+
213+
# ser wider than df
214+
ser_index = index
215+
index = index[:2]
216+
df = pd.DataFrame([[1, 2], [4, 5]], columns=index)
217+
ser = pd.Series([7, 8, 9], index=ser_index, name=2)
218+
result = df.append(ser)
219+
expected = pd.DataFrame(
220+
[[1, 2, np.nan], [4, 5, np.nan], [7, 8, 9]],
221+
index=[0, 1, 2],
222+
columns=ser_index,
223+
)
224+
tm.assert_frame_equal(result, expected)
225+
226+
@pytest.mark.parametrize(
227+
"df_columns, series_index",
228+
combinations(indexes_can_append, r=2),
229+
ids=lambda x: type(x).__name__,
230+
)
231+
def test_append_different_columns_types(self, df_columns, series_index):
232+
# GH18359
233+
# See also test 'test_append_different_columns_types_raises' below
234+
# for errors raised when appending
235+
236+
df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns)
237+
ser = pd.Series([7, 8, 9], index=series_index, name=2)
238+
239+
result = df.append(ser)
240+
idx_diff = ser.index.difference(df_columns)
241+
combined_columns = Index(df_columns.tolist()).append(idx_diff)
242+
expected = pd.DataFrame(
243+
[
244+
[1.0, 2.0, 3.0, np.nan, np.nan, np.nan],
245+
[4, 5, 6, np.nan, np.nan, np.nan],
246+
[np.nan, np.nan, np.nan, 7, 8, 9],
247+
],
248+
index=[0, 1, 2],
249+
columns=combined_columns,
250+
)
251+
tm.assert_frame_equal(result, expected)
252+
253+
@pytest.mark.parametrize(
254+
"index_can_append", indexes_can_append, ids=lambda x: type(x).__name__
255+
)
256+
@pytest.mark.parametrize(
257+
"index_cannot_append_with_other",
258+
indexes_cannot_append_with_other,
259+
ids=lambda x: type(x).__name__,
260+
)
261+
def test_append_different_columns_types_raises(
262+
self, index_can_append, index_cannot_append_with_other
263+
):
264+
# GH18359
265+
# Dataframe.append will raise if MultiIndex appends
266+
# or is appended to a different index type
267+
#
268+
# See also test 'test_append_different_columns_types' above for
269+
# appending without raising.
270+
271+
df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=index_can_append)
272+
ser = pd.Series([7, 8, 9], index=index_cannot_append_with_other, name=2)
273+
msg = (
274+
r"Expected tuple, got (int|long|float|str|"
275+
r"pandas._libs.interval.Interval)|"
276+
r"object of type '(int|float|Timestamp|"
277+
r"pandas._libs.interval.Interval)' has no len\(\)|"
278+
)
279+
with pytest.raises(TypeError, match=msg):
280+
df.append(ser)
281+
282+
df = pd.DataFrame(
283+
[[1, 2, 3], [4, 5, 6]], columns=index_cannot_append_with_other
284+
)
285+
ser = pd.Series([7, 8, 9], index=index_can_append, name=2)
286+
287+
with pytest.raises(TypeError, match=msg):
288+
df.append(ser)
289+
290+
def test_append_dtype_coerce(self, sort):
291+
292+
# GH 4993
293+
# appending with datetime will incorrectly convert datetime64
294+
295+
df1 = DataFrame(
296+
index=[1, 2],
297+
data=[dt.datetime(2013, 1, 1, 0, 0), dt.datetime(2013, 1, 2, 0, 0)],
298+
columns=["start_time"],
299+
)
300+
df2 = DataFrame(
301+
index=[4, 5],
302+
data=[
303+
[dt.datetime(2013, 1, 3, 0, 0), dt.datetime(2013, 1, 3, 6, 10)],
304+
[dt.datetime(2013, 1, 4, 0, 0), dt.datetime(2013, 1, 4, 7, 10)],
305+
],
306+
columns=["start_time", "end_time"],
307+
)
308+
309+
expected = concat(
310+
[
311+
Series(
312+
[
313+
pd.NaT,
314+
pd.NaT,
315+
dt.datetime(2013, 1, 3, 6, 10),
316+
dt.datetime(2013, 1, 4, 7, 10),
317+
],
318+
name="end_time",
319+
),
320+
Series(
321+
[
322+
dt.datetime(2013, 1, 1, 0, 0),
323+
dt.datetime(2013, 1, 2, 0, 0),
324+
dt.datetime(2013, 1, 3, 0, 0),
325+
dt.datetime(2013, 1, 4, 0, 0),
326+
],
327+
name="start_time",
328+
),
329+
],
330+
axis=1,
331+
sort=sort,
332+
)
333+
result = df1.append(df2, ignore_index=True, sort=sort)
334+
if sort:
335+
expected = expected[["end_time", "start_time"]]
336+
else:
337+
expected = expected[["start_time", "end_time"]]
338+
339+
tm.assert_frame_equal(result, expected)
340+
341+
def test_append_missing_column_proper_upcast(self, sort):
342+
df1 = DataFrame({"A": np.array([1, 2, 3, 4], dtype="i8")})
343+
df2 = DataFrame({"B": np.array([True, False, True, False], dtype=bool)})
344+
345+
appended = df1.append(df2, ignore_index=True, sort=sort)
346+
assert appended["A"].dtype == "f8"
347+
assert appended["B"].dtype == "O"
348+
349+
def test_append_empty_frame_to_series_with_dateutil_tz(self):
350+
# GH 23682
351+
date = Timestamp("2018-10-24 07:30:00", tz=dateutil.tz.tzutc())
352+
s = Series({"date": date, "a": 1.0, "b": 2.0})
353+
df = DataFrame(columns=["c", "d"])
354+
result_a = df.append(s, ignore_index=True)
355+
expected = DataFrame(
356+
[[np.nan, np.nan, 1.0, 2.0, date]], columns=["c", "d", "a", "b", "date"]
357+
)
358+
# These columns get cast to object after append
359+
expected["c"] = expected["c"].astype(object)
360+
expected["d"] = expected["d"].astype(object)
361+
tm.assert_frame_equal(result_a, expected)
362+
363+
expected = DataFrame(
364+
[[np.nan, np.nan, 1.0, 2.0, date]] * 2, columns=["c", "d", "a", "b", "date"]
365+
)
366+
expected["c"] = expected["c"].astype(object)
367+
expected["d"] = expected["d"].astype(object)
368+
369+
result_b = result_a.append(s, ignore_index=True)
370+
tm.assert_frame_equal(result_b, expected)
371+
372+
# column order is different
373+
expected = expected[["c", "d", "date", "a", "b"]]
374+
result = df.append([s, s], ignore_index=True)
375+
tm.assert_frame_equal(result, expected)
376+
377+
def test_append_empty_tz_frame_with_datetime64ns(self):
378+
# https://github.com/pandas-dev/pandas/issues/35460
379+
df = pd.DataFrame(columns=["a"]).astype("datetime64[ns, UTC]")
380+
381+
# pd.NaT gets inferred as tz-naive, so append result is tz-naive
382+
result = df.append({"a": pd.NaT}, ignore_index=True)
383+
expected = pd.DataFrame({"a": [pd.NaT]}).astype("datetime64[ns]")
384+
tm.assert_frame_equal(result, expected)
385+
386+
# also test with typed value to append
387+
df = pd.DataFrame(columns=["a"]).astype("datetime64[ns, UTC]")
388+
result = df.append(
389+
pd.Series({"a": pd.NaT}, dtype="datetime64[ns]"), ignore_index=True
390+
)
391+
expected = pd.DataFrame({"a": [pd.NaT]}).astype("datetime64[ns]")
392+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)