Skip to content

Commit 84d0415

Browse files
committed
TST: split up test_concat.py pandas-dev#37243
* created test_invalid.py * created test_sort.py
1 parent 73d54f5 commit 84d0415

File tree

3 files changed

+143
-121
lines changed

3 files changed

+143
-121
lines changed

pandas/tests/reshape/concat/test_concat.py

+1-121
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
from collections import abc, deque
22
from decimal import Decimal
3-
from io import StringIO
43
from warnings import catch_warnings
54

65
import numpy as np
76
import pytest
87

98
import pandas as pd
10-
from pandas import DataFrame, Index, MultiIndex, Series, concat, date_range, read_csv
9+
from pandas import DataFrame, Index, MultiIndex, Series, concat, date_range
1110
import pandas._testing as tm
1211
from pandas.core.arrays import SparseArray
1312
from pandas.core.construction import create_series_with_explicit_dtype
@@ -616,48 +615,6 @@ def __iter__(self):
616615

617616
tm.assert_frame_equal(pd.concat(CustomIterator2(), ignore_index=True), expected)
618617

619-
def test_concat_invalid(self):
620-
621-
# trying to concat a ndframe with a non-ndframe
622-
df1 = tm.makeCustomDataframe(10, 2)
623-
for obj in [1, dict(), [1, 2], (1, 2)]:
624-
625-
msg = (
626-
f"cannot concatenate object of type '{type(obj)}'; "
627-
"only Series and DataFrame objs are valid"
628-
)
629-
with pytest.raises(TypeError, match=msg):
630-
concat([df1, obj])
631-
632-
def test_concat_invalid_first_argument(self):
633-
df1 = tm.makeCustomDataframe(10, 2)
634-
df2 = tm.makeCustomDataframe(10, 2)
635-
msg = (
636-
"first argument must be an iterable of pandas "
637-
'objects, you passed an object of type "DataFrame"'
638-
)
639-
with pytest.raises(TypeError, match=msg):
640-
concat(df1, df2)
641-
642-
# generator ok though
643-
concat(DataFrame(np.random.rand(5, 5)) for _ in range(3))
644-
645-
# text reader ok
646-
# GH6583
647-
data = """index,A,B,C,D
648-
foo,2,3,4,5
649-
bar,7,8,9,10
650-
baz,12,13,14,15
651-
qux,12,13,14,15
652-
foo2,12,13,14,15
653-
bar2,12,13,14,15
654-
"""
655-
656-
reader = read_csv(StringIO(data), chunksize=1)
657-
result = concat(reader, ignore_index=True)
658-
expected = read_csv(StringIO(data))
659-
tm.assert_frame_equal(result, expected)
660-
661618
def test_default_index(self):
662619
# is_series and ignore_index
663620
s1 = Series([1, 2, 3], name="x")
@@ -809,83 +766,6 @@ def test_concat_empty_and_non_empty_frame_regression():
809766
tm.assert_frame_equal(result, expected)
810767

811768

812-
def test_concat_sorts_columns(sort):
813-
# GH-4588
814-
df1 = DataFrame({"a": [1, 2], "b": [1, 2]}, columns=["b", "a"])
815-
df2 = DataFrame({"a": [3, 4], "c": [5, 6]})
816-
817-
# for sort=True/None
818-
expected = DataFrame(
819-
{"a": [1, 2, 3, 4], "b": [1, 2, None, None], "c": [None, None, 5, 6]},
820-
columns=["a", "b", "c"],
821-
)
822-
823-
if sort is False:
824-
expected = expected[["b", "a", "c"]]
825-
826-
# default
827-
with tm.assert_produces_warning(None):
828-
result = pd.concat([df1, df2], ignore_index=True, sort=sort)
829-
tm.assert_frame_equal(result, expected)
830-
831-
832-
def test_concat_sorts_index(sort):
833-
df1 = DataFrame({"a": [1, 2, 3]}, index=["c", "a", "b"])
834-
df2 = DataFrame({"b": [1, 2]}, index=["a", "b"])
835-
836-
# For True/None
837-
expected = DataFrame(
838-
{"a": [2, 3, 1], "b": [1, 2, None]}, index=["a", "b", "c"], columns=["a", "b"]
839-
)
840-
if sort is False:
841-
expected = expected.loc[["c", "a", "b"]]
842-
843-
# Warn and sort by default
844-
with tm.assert_produces_warning(None):
845-
result = pd.concat([df1, df2], axis=1, sort=sort)
846-
tm.assert_frame_equal(result, expected)
847-
848-
849-
def test_concat_inner_sort(sort):
850-
# https://github.com/pandas-dev/pandas/pull/20613
851-
df1 = DataFrame({"a": [1, 2], "b": [1, 2], "c": [1, 2]}, columns=["b", "a", "c"])
852-
df2 = DataFrame({"a": [1, 2], "b": [3, 4]}, index=[3, 4])
853-
854-
with tm.assert_produces_warning(None):
855-
# unset sort should *not* warn for inner join
856-
# since that never sorted
857-
result = pd.concat([df1, df2], sort=sort, join="inner", ignore_index=True)
858-
859-
expected = DataFrame({"b": [1, 2, 3, 4], "a": [1, 2, 1, 2]}, columns=["b", "a"])
860-
if sort is True:
861-
expected = expected[["a", "b"]]
862-
tm.assert_frame_equal(result, expected)
863-
864-
865-
def test_concat_aligned_sort():
866-
# GH-4588
867-
df = DataFrame({"c": [1, 2], "b": [3, 4], "a": [5, 6]}, columns=["c", "b", "a"])
868-
result = pd.concat([df, df], sort=True, ignore_index=True)
869-
expected = DataFrame(
870-
{"a": [5, 6, 5, 6], "b": [3, 4, 3, 4], "c": [1, 2, 1, 2]},
871-
columns=["a", "b", "c"],
872-
)
873-
tm.assert_frame_equal(result, expected)
874-
875-
result = pd.concat([df, df[["c", "b"]]], join="inner", sort=True, ignore_index=True)
876-
expected = expected[["b", "c"]]
877-
tm.assert_frame_equal(result, expected)
878-
879-
880-
def test_concat_aligned_sort_does_not_raise():
881-
# GH-4588
882-
# We catch TypeErrors from sorting internally and do not re-raise.
883-
df = DataFrame({1: [1, 2], "a": [3, 4]}, columns=[1, "a"])
884-
expected = DataFrame({1: [1, 2, 1, 2], "a": [3, 4, 3, 4]}, columns=[1, "a"])
885-
result = pd.concat([df, df], ignore_index=True, sort=True)
886-
tm.assert_frame_equal(result, expected)
887-
888-
889769
def test_concat_sparse():
890770
# GH 23557
891771
a = Series(SparseArray([0, 1, 2]))
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
from io import StringIO
2+
3+
import numpy as np
4+
import pytest
5+
6+
from pandas import DataFrame, concat, read_csv
7+
import pandas._testing as tm
8+
9+
10+
class TestInvalidConcat:
11+
def test_concat_invalid(self):
12+
13+
# trying to concat a ndframe with a non-ndframe
14+
df1 = tm.makeCustomDataframe(10, 2)
15+
for obj in [1, dict(), [1, 2], (1, 2)]:
16+
17+
msg = (
18+
f"cannot concatenate object of type '{type(obj)}'; "
19+
"only Series and DataFrame objs are valid"
20+
)
21+
with pytest.raises(TypeError, match=msg):
22+
concat([df1, obj])
23+
24+
def test_concat_invalid_first_argument(self):
25+
df1 = tm.makeCustomDataframe(10, 2)
26+
df2 = tm.makeCustomDataframe(10, 2)
27+
msg = (
28+
"first argument must be an iterable of pandas "
29+
'objects, you passed an object of type "DataFrame"'
30+
)
31+
with pytest.raises(TypeError, match=msg):
32+
concat(df1, df2)
33+
34+
# generator ok though
35+
concat(DataFrame(np.random.rand(5, 5)) for _ in range(3))
36+
37+
# text reader ok
38+
# GH6583
39+
data = """index,A,B,C,D
40+
foo,2,3,4,5
41+
bar,7,8,9,10
42+
baz,12,13,14,15
43+
qux,12,13,14,15
44+
foo2,12,13,14,15
45+
bar2,12,13,14,15
46+
"""
47+
48+
reader = read_csv(StringIO(data), chunksize=1)
49+
result = concat(reader, ignore_index=True)
50+
expected = read_csv(StringIO(data))
51+
tm.assert_frame_equal(result, expected)
+91
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
import pytest
2+
3+
import pandas as pd
4+
from pandas import DataFrame
5+
import pandas._testing as tm
6+
7+
8+
@pytest.fixture(params=[True, False])
9+
def sort(request):
10+
"""Boolean sort keyword for concat and DataFrame.append."""
11+
return request.param
12+
13+
14+
class TestConcatSort:
15+
def test_concat_sorts_columns(self, sort):
16+
# GH-4588
17+
df1 = DataFrame({"a": [1, 2], "b": [1, 2]}, columns=["b", "a"])
18+
df2 = DataFrame({"a": [3, 4], "c": [5, 6]})
19+
20+
# for sort=True/None
21+
expected = DataFrame(
22+
{"a": [1, 2, 3, 4], "b": [1, 2, None, None], "c": [None, None, 5, 6]},
23+
columns=["a", "b", "c"],
24+
)
25+
26+
if sort is False:
27+
expected = expected[["b", "a", "c"]]
28+
29+
# default
30+
with tm.assert_produces_warning(None):
31+
result = pd.concat([df1, df2], ignore_index=True, sort=sort)
32+
tm.assert_frame_equal(result, expected)
33+
34+
def test_concat_sorts_index(self, sort):
35+
df1 = DataFrame({"a": [1, 2, 3]}, index=["c", "a", "b"])
36+
df2 = DataFrame({"b": [1, 2]}, index=["a", "b"])
37+
38+
# For True/None
39+
expected = DataFrame(
40+
{"a": [2, 3, 1], "b": [1, 2, None]},
41+
index=["a", "b", "c"],
42+
columns=["a", "b"],
43+
)
44+
if sort is False:
45+
expected = expected.loc[["c", "a", "b"]]
46+
47+
# Warn and sort by default
48+
with tm.assert_produces_warning(None):
49+
result = pd.concat([df1, df2], axis=1, sort=sort)
50+
tm.assert_frame_equal(result, expected)
51+
52+
def test_concat_inner_sort(self, sort):
53+
# https://github.com/pandas-dev/pandas/pull/20613
54+
df1 = DataFrame(
55+
{"a": [1, 2], "b": [1, 2], "c": [1, 2]}, columns=["b", "a", "c"]
56+
)
57+
df2 = DataFrame({"a": [1, 2], "b": [3, 4]}, index=[3, 4])
58+
59+
with tm.assert_produces_warning(None):
60+
# unset sort should *not* warn for inner join
61+
# since that never sorted
62+
result = pd.concat([df1, df2], sort=sort, join="inner", ignore_index=True)
63+
64+
expected = DataFrame({"b": [1, 2, 3, 4], "a": [1, 2, 1, 2]}, columns=["b", "a"])
65+
if sort is True:
66+
expected = expected[["a", "b"]]
67+
tm.assert_frame_equal(result, expected)
68+
69+
def test_concat_aligned_sort(self):
70+
# GH-4588
71+
df = DataFrame({"c": [1, 2], "b": [3, 4], "a": [5, 6]}, columns=["c", "b", "a"])
72+
result = pd.concat([df, df], sort=True, ignore_index=True)
73+
expected = DataFrame(
74+
{"a": [5, 6, 5, 6], "b": [3, 4, 3, 4], "c": [1, 2, 1, 2]},
75+
columns=["a", "b", "c"],
76+
)
77+
tm.assert_frame_equal(result, expected)
78+
79+
result = pd.concat(
80+
[df, df[["c", "b"]]], join="inner", sort=True, ignore_index=True
81+
)
82+
expected = expected[["b", "c"]]
83+
tm.assert_frame_equal(result, expected)
84+
85+
def test_concat_aligned_sort_does_not_raise(self):
86+
# GH-4588
87+
# We catch TypeErrors from sorting internally and do not re-raise.
88+
df = DataFrame({1: [1, 2], "a": [3, 4]}, columns=[1, "a"])
89+
expected = DataFrame({1: [1, 2, 1, 2], "a": [3, 4, 3, 4]}, columns=[1, "a"])
90+
result = pd.concat([df, df], ignore_index=True, sort=True)
91+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)