Skip to content

Commit b49aeac

Browse files
authored
TST: moved file test_concat.py to folder ./concat/ (#37243) (#37360)
1 parent 901b1a7 commit b49aeac

File tree

5 files changed

+1469
-1435
lines changed

5 files changed

+1469
-1435
lines changed
+383
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,383 @@
1+
import datetime as dt
2+
from datetime import datetime
3+
from itertools import combinations
4+
5+
import dateutil
6+
import numpy as np
7+
import pytest
8+
9+
import pandas as pd
10+
from pandas import DataFrame, Index, Series, Timestamp, concat, isna
11+
import pandas._testing as tm
12+
13+
14+
@pytest.fixture(params=[True, False])
15+
def sort(request):
16+
"""Boolean sort keyword for concat and DataFrame.append."""
17+
return request.param
18+
19+
20+
class TestAppend:
21+
def test_append(self, sort, float_frame):
22+
mixed_frame = float_frame.copy()
23+
mixed_frame["foo"] = "bar"
24+
25+
begin_index = float_frame.index[:5]
26+
end_index = float_frame.index[5:]
27+
28+
begin_frame = float_frame.reindex(begin_index)
29+
end_frame = float_frame.reindex(end_index)
30+
31+
appended = begin_frame.append(end_frame)
32+
tm.assert_almost_equal(appended["A"], float_frame["A"])
33+
34+
del end_frame["A"]
35+
partial_appended = begin_frame.append(end_frame, sort=sort)
36+
assert "A" in partial_appended
37+
38+
partial_appended = end_frame.append(begin_frame, sort=sort)
39+
assert "A" in partial_appended
40+
41+
# mixed type handling
42+
appended = mixed_frame[:5].append(mixed_frame[5:])
43+
tm.assert_frame_equal(appended, mixed_frame)
44+
45+
# what to test here
46+
mixed_appended = mixed_frame[:5].append(float_frame[5:], sort=sort)
47+
mixed_appended2 = float_frame[:5].append(mixed_frame[5:], sort=sort)
48+
49+
# all equal except 'foo' column
50+
tm.assert_frame_equal(
51+
mixed_appended.reindex(columns=["A", "B", "C", "D"]),
52+
mixed_appended2.reindex(columns=["A", "B", "C", "D"]),
53+
)
54+
55+
def test_append_empty(self, float_frame):
56+
empty = DataFrame()
57+
58+
appended = float_frame.append(empty)
59+
tm.assert_frame_equal(float_frame, appended)
60+
assert appended is not float_frame
61+
62+
appended = empty.append(float_frame)
63+
tm.assert_frame_equal(float_frame, appended)
64+
assert appended is not float_frame
65+
66+
def test_append_overlap_raises(self, float_frame):
67+
msg = "Indexes have overlapping values"
68+
with pytest.raises(ValueError, match=msg):
69+
float_frame.append(float_frame, verify_integrity=True)
70+
71+
def test_append_new_columns(self):
72+
# see gh-6129: new columns
73+
df = DataFrame({"a": {"x": 1, "y": 2}, "b": {"x": 3, "y": 4}})
74+
row = Series([5, 6, 7], index=["a", "b", "c"], name="z")
75+
expected = DataFrame(
76+
{
77+
"a": {"x": 1, "y": 2, "z": 5},
78+
"b": {"x": 3, "y": 4, "z": 6},
79+
"c": {"z": 7},
80+
}
81+
)
82+
result = df.append(row)
83+
tm.assert_frame_equal(result, expected)
84+
85+
def test_append_length0_frame(self, sort):
86+
df = DataFrame(columns=["A", "B", "C"])
87+
df3 = DataFrame(index=[0, 1], columns=["A", "B"])
88+
df5 = df.append(df3, sort=sort)
89+
90+
expected = DataFrame(index=[0, 1], columns=["A", "B", "C"])
91+
tm.assert_frame_equal(df5, expected)
92+
93+
def test_append_records(self):
94+
arr1 = np.zeros((2,), dtype=("i4,f4,a10"))
95+
arr1[:] = [(1, 2.0, "Hello"), (2, 3.0, "World")]
96+
97+
arr2 = np.zeros((3,), dtype=("i4,f4,a10"))
98+
arr2[:] = [(3, 4.0, "foo"), (5, 6.0, "bar"), (7.0, 8.0, "baz")]
99+
100+
df1 = DataFrame(arr1)
101+
df2 = DataFrame(arr2)
102+
103+
result = df1.append(df2, ignore_index=True)
104+
expected = DataFrame(np.concatenate((arr1, arr2)))
105+
tm.assert_frame_equal(result, expected)
106+
107+
# rewrite sort fixture, since we also want to test default of None
108+
def test_append_sorts(self, sort):
109+
df1 = DataFrame({"a": [1, 2], "b": [1, 2]}, columns=["b", "a"])
110+
df2 = DataFrame({"a": [1, 2], "c": [3, 4]}, index=[2, 3])
111+
112+
with tm.assert_produces_warning(None):
113+
result = df1.append(df2, sort=sort)
114+
115+
# for None / True
116+
expected = DataFrame(
117+
{"b": [1, 2, None, None], "a": [1, 2, 1, 2], "c": [None, None, 3, 4]},
118+
columns=["a", "b", "c"],
119+
)
120+
if sort is False:
121+
expected = expected[["b", "a", "c"]]
122+
tm.assert_frame_equal(result, expected)
123+
124+
def test_append_different_columns(self, sort):
125+
df = DataFrame(
126+
{
127+
"bools": np.random.randn(10) > 0,
128+
"ints": np.random.randint(0, 10, 10),
129+
"floats": np.random.randn(10),
130+
"strings": ["foo", "bar"] * 5,
131+
}
132+
)
133+
134+
a = df[:5].loc[:, ["bools", "ints", "floats"]]
135+
b = df[5:].loc[:, ["strings", "ints", "floats"]]
136+
137+
appended = a.append(b, sort=sort)
138+
assert isna(appended["strings"][0:4]).all()
139+
assert isna(appended["bools"][5:]).all()
140+
141+
def test_append_many(self, sort, float_frame):
142+
chunks = [
143+
float_frame[:5],
144+
float_frame[5:10],
145+
float_frame[10:15],
146+
float_frame[15:],
147+
]
148+
149+
result = chunks[0].append(chunks[1:])
150+
tm.assert_frame_equal(result, float_frame)
151+
152+
chunks[-1] = chunks[-1].copy()
153+
chunks[-1]["foo"] = "bar"
154+
result = chunks[0].append(chunks[1:], sort=sort)
155+
tm.assert_frame_equal(result.loc[:, float_frame.columns], float_frame)
156+
assert (result["foo"][15:] == "bar").all()
157+
assert result["foo"][:15].isna().all()
158+
159+
def test_append_preserve_index_name(self):
160+
# #980
161+
df1 = DataFrame(columns=["A", "B", "C"])
162+
df1 = df1.set_index(["A"])
163+
df2 = DataFrame(data=[[1, 4, 7], [2, 5, 8], [3, 6, 9]], columns=["A", "B", "C"])
164+
df2 = df2.set_index(["A"])
165+
166+
result = df1.append(df2)
167+
assert result.index.name == "A"
168+
169+
indexes_can_append = [
170+
pd.RangeIndex(3),
171+
Index([4, 5, 6]),
172+
Index([4.5, 5.5, 6.5]),
173+
Index(list("abc")),
174+
pd.CategoricalIndex("A B C".split()),
175+
pd.CategoricalIndex("D E F".split(), ordered=True),
176+
pd.IntervalIndex.from_breaks([7, 8, 9, 10]),
177+
pd.DatetimeIndex(
178+
[
179+
dt.datetime(2013, 1, 3, 0, 0),
180+
dt.datetime(2013, 1, 3, 6, 10),
181+
dt.datetime(2013, 1, 3, 7, 12),
182+
]
183+
),
184+
]
185+
186+
indexes_cannot_append_with_other = [
187+
pd.MultiIndex.from_arrays(["A B C".split(), "D E F".split()])
188+
]
189+
190+
all_indexes = indexes_can_append + indexes_cannot_append_with_other
191+
192+
@pytest.mark.parametrize("index", all_indexes, ids=lambda x: type(x).__name__)
193+
def test_append_same_columns_type(self, index):
194+
# GH18359
195+
196+
# df wider than ser
197+
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=index)
198+
ser_index = index[:2]
199+
ser = Series([7, 8], index=ser_index, name=2)
200+
result = df.append(ser)
201+
expected = DataFrame(
202+
[[1.0, 2.0, 3.0], [4, 5, 6], [7, 8, np.nan]], index=[0, 1, 2], columns=index
203+
)
204+
tm.assert_frame_equal(result, expected)
205+
206+
# ser wider than df
207+
ser_index = index
208+
index = index[:2]
209+
df = DataFrame([[1, 2], [4, 5]], columns=index)
210+
ser = Series([7, 8, 9], index=ser_index, name=2)
211+
result = df.append(ser)
212+
expected = DataFrame(
213+
[[1, 2, np.nan], [4, 5, np.nan], [7, 8, 9]],
214+
index=[0, 1, 2],
215+
columns=ser_index,
216+
)
217+
tm.assert_frame_equal(result, expected)
218+
219+
@pytest.mark.parametrize(
220+
"df_columns, series_index",
221+
combinations(indexes_can_append, r=2),
222+
ids=lambda x: type(x).__name__,
223+
)
224+
def test_append_different_columns_types(self, df_columns, series_index):
225+
# GH18359
226+
# See also test 'test_append_different_columns_types_raises' below
227+
# for errors raised when appending
228+
229+
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=df_columns)
230+
ser = Series([7, 8, 9], index=series_index, name=2)
231+
232+
result = df.append(ser)
233+
idx_diff = ser.index.difference(df_columns)
234+
combined_columns = Index(df_columns.tolist()).append(idx_diff)
235+
expected = DataFrame(
236+
[
237+
[1.0, 2.0, 3.0, np.nan, np.nan, np.nan],
238+
[4, 5, 6, np.nan, np.nan, np.nan],
239+
[np.nan, np.nan, np.nan, 7, 8, 9],
240+
],
241+
index=[0, 1, 2],
242+
columns=combined_columns,
243+
)
244+
tm.assert_frame_equal(result, expected)
245+
246+
@pytest.mark.parametrize(
247+
"index_can_append", indexes_can_append, ids=lambda x: type(x).__name__
248+
)
249+
@pytest.mark.parametrize(
250+
"index_cannot_append_with_other",
251+
indexes_cannot_append_with_other,
252+
ids=lambda x: type(x).__name__,
253+
)
254+
def test_append_different_columns_types_raises(
255+
self, index_can_append, index_cannot_append_with_other
256+
):
257+
# GH18359
258+
# Dataframe.append will raise if MultiIndex appends
259+
# or is appended to a different index type
260+
#
261+
# See also test 'test_append_different_columns_types' above for
262+
# appending without raising.
263+
264+
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=index_can_append)
265+
ser = Series([7, 8, 9], index=index_cannot_append_with_other, name=2)
266+
msg = (
267+
r"Expected tuple, got (int|long|float|str|"
268+
r"pandas._libs.interval.Interval)|"
269+
r"object of type '(int|float|Timestamp|"
270+
r"pandas._libs.interval.Interval)' has no len\(\)|"
271+
)
272+
with pytest.raises(TypeError, match=msg):
273+
df.append(ser)
274+
275+
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=index_cannot_append_with_other)
276+
ser = Series([7, 8, 9], index=index_can_append, name=2)
277+
278+
with pytest.raises(TypeError, match=msg):
279+
df.append(ser)
280+
281+
def test_append_dtype_coerce(self, sort):
282+
283+
# GH 4993
284+
# appending with datetime will incorrectly convert datetime64
285+
286+
df1 = DataFrame(
287+
index=[1, 2],
288+
data=[dt.datetime(2013, 1, 1, 0, 0), dt.datetime(2013, 1, 2, 0, 0)],
289+
columns=["start_time"],
290+
)
291+
df2 = DataFrame(
292+
index=[4, 5],
293+
data=[
294+
[dt.datetime(2013, 1, 3, 0, 0), dt.datetime(2013, 1, 3, 6, 10)],
295+
[dt.datetime(2013, 1, 4, 0, 0), dt.datetime(2013, 1, 4, 7, 10)],
296+
],
297+
columns=["start_time", "end_time"],
298+
)
299+
300+
expected = concat(
301+
[
302+
Series(
303+
[
304+
pd.NaT,
305+
pd.NaT,
306+
dt.datetime(2013, 1, 3, 6, 10),
307+
dt.datetime(2013, 1, 4, 7, 10),
308+
],
309+
name="end_time",
310+
),
311+
Series(
312+
[
313+
dt.datetime(2013, 1, 1, 0, 0),
314+
dt.datetime(2013, 1, 2, 0, 0),
315+
dt.datetime(2013, 1, 3, 0, 0),
316+
dt.datetime(2013, 1, 4, 0, 0),
317+
],
318+
name="start_time",
319+
),
320+
],
321+
axis=1,
322+
sort=sort,
323+
)
324+
result = df1.append(df2, ignore_index=True, sort=sort)
325+
if sort:
326+
expected = expected[["end_time", "start_time"]]
327+
else:
328+
expected = expected[["start_time", "end_time"]]
329+
330+
tm.assert_frame_equal(result, expected)
331+
332+
def test_append_missing_column_proper_upcast(self, sort):
333+
df1 = DataFrame({"A": np.array([1, 2, 3, 4], dtype="i8")})
334+
df2 = DataFrame({"B": np.array([True, False, True, False], dtype=bool)})
335+
336+
appended = df1.append(df2, ignore_index=True, sort=sort)
337+
assert appended["A"].dtype == "f8"
338+
assert appended["B"].dtype == "O"
339+
340+
def test_append_empty_frame_to_series_with_dateutil_tz(self):
341+
# GH 23682
342+
date = Timestamp("2018-10-24 07:30:00", tz=dateutil.tz.tzutc())
343+
s = Series({"date": date, "a": 1.0, "b": 2.0})
344+
df = DataFrame(columns=["c", "d"])
345+
result_a = df.append(s, ignore_index=True)
346+
expected = DataFrame(
347+
[[np.nan, np.nan, 1.0, 2.0, date]], columns=["c", "d", "a", "b", "date"]
348+
)
349+
# These columns get cast to object after append
350+
expected["c"] = expected["c"].astype(object)
351+
expected["d"] = expected["d"].astype(object)
352+
tm.assert_frame_equal(result_a, expected)
353+
354+
expected = DataFrame(
355+
[[np.nan, np.nan, 1.0, 2.0, date]] * 2, columns=["c", "d", "a", "b", "date"]
356+
)
357+
expected["c"] = expected["c"].astype(object)
358+
expected["d"] = expected["d"].astype(object)
359+
360+
result_b = result_a.append(s, ignore_index=True)
361+
tm.assert_frame_equal(result_b, expected)
362+
363+
# column order is different
364+
expected = expected[["c", "d", "date", "a", "b"]]
365+
result = df.append([s, s], ignore_index=True)
366+
tm.assert_frame_equal(result, expected)
367+
368+
def test_append_empty_tz_frame_with_datetime64ns(self):
369+
# https://github.com/pandas-dev/pandas/issues/35460
370+
df = DataFrame(columns=["a"]).astype("datetime64[ns, UTC]")
371+
372+
# pd.NaT gets inferred as tz-naive, so append result is tz-naive
373+
result = df.append({"a": pd.NaT}, ignore_index=True)
374+
expected = DataFrame({"a": [pd.NaT]}).astype("datetime64[ns]")
375+
tm.assert_frame_equal(result, expected)
376+
377+
# also test with typed value to append
378+
df = DataFrame(columns=["a"]).astype("datetime64[ns, UTC]")
379+
result = df.append(
380+
Series({"a": pd.NaT}, dtype="datetime64[ns]"), ignore_index=True
381+
)
382+
expected = DataFrame({"a": [pd.NaT]}).astype("datetime64[ns]")
383+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)