Skip to content

Commit ca9a84c

Browse files
committed
TST: split up test_concat.py pandas-dev#37243
* created test_index.py * small changes to test_empty.py, test_datetimes.py, test_dataframe.py
1 parent 743811f commit ca9a84c

File tree

5 files changed

+313
-291
lines changed

5 files changed

+313
-291
lines changed

pandas/tests/reshape/concat/test_concat.py

+1-290
Original file line numberDiff line numberDiff line change
@@ -102,16 +102,6 @@ def test_concat_keys_specific_levels(self):
102102

103103
assert result.columns.names == ["group_key", None]
104104

105-
def test_concat_dataframe_keys_bug(self, sort):
106-
t1 = DataFrame(
107-
{"value": Series([1, 2, 3], index=Index(["a", "b", "c"], name="id"))}
108-
)
109-
t2 = DataFrame({"value": Series([7, 8], index=Index(["a", "b"], name="id"))})
110-
111-
# it works
112-
result = concat([t1, t2], axis=1, keys=["t1", "t2"], sort=sort)
113-
assert list(result.columns) == [("t1", "value"), ("t2", "value")]
114-
115105
@pytest.mark.parametrize("mapping", ["mapping", "dict"])
116106
def test_concat_mapping(self, mapping, non_dict_mapping_subclass):
117107
constructor = dict if mapping == "dict" else non_dict_mapping_subclass
@@ -139,106 +129,6 @@ def test_concat_mapping(self, mapping, non_dict_mapping_subclass):
139129
expected = concat([frames[k] for k in keys], keys=keys)
140130
tm.assert_frame_equal(result, expected)
141131

142-
def test_concat_ignore_index(self, sort):
143-
frame1 = DataFrame(
144-
{"test1": ["a", "b", "c"], "test2": [1, 2, 3], "test3": [4.5, 3.2, 1.2]}
145-
)
146-
frame2 = DataFrame({"test3": [5.2, 2.2, 4.3]})
147-
frame1.index = Index(["x", "y", "z"])
148-
frame2.index = Index(["x", "y", "q"])
149-
150-
v1 = concat([frame1, frame2], axis=1, ignore_index=True, sort=sort)
151-
152-
nan = np.nan
153-
expected = DataFrame(
154-
[
155-
[nan, nan, nan, 4.3],
156-
["a", 1, 4.5, 5.2],
157-
["b", 2, 3.2, 2.2],
158-
["c", 3, 1.2, nan],
159-
],
160-
index=Index(["q", "x", "y", "z"]),
161-
)
162-
if not sort:
163-
expected = expected.loc[["x", "y", "z", "q"]]
164-
165-
tm.assert_frame_equal(v1, expected)
166-
167-
@pytest.mark.parametrize(
168-
"name_in1,name_in2,name_in3,name_out",
169-
[
170-
("idx", "idx", "idx", "idx"),
171-
("idx", "idx", None, None),
172-
("idx", None, None, None),
173-
("idx1", "idx2", None, None),
174-
("idx1", "idx1", "idx2", None),
175-
("idx1", "idx2", "idx3", None),
176-
(None, None, None, None),
177-
],
178-
)
179-
def test_concat_same_index_names(self, name_in1, name_in2, name_in3, name_out):
180-
# GH13475
181-
indices = [
182-
Index(["a", "b", "c"], name=name_in1),
183-
Index(["b", "c", "d"], name=name_in2),
184-
Index(["c", "d", "e"], name=name_in3),
185-
]
186-
frames = [
187-
DataFrame({c: [0, 1, 2]}, index=i) for i, c in zip(indices, ["x", "y", "z"])
188-
]
189-
result = pd.concat(frames, axis=1)
190-
191-
exp_ind = Index(["a", "b", "c", "d", "e"], name=name_out)
192-
expected = DataFrame(
193-
{
194-
"x": [0, 1, 2, np.nan, np.nan],
195-
"y": [np.nan, 0, 1, 2, np.nan],
196-
"z": [np.nan, np.nan, 0, 1, 2],
197-
},
198-
index=exp_ind,
199-
)
200-
201-
tm.assert_frame_equal(result, expected)
202-
203-
def test_concat_multiindex_with_keys(self):
204-
index = MultiIndex(
205-
levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
206-
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
207-
names=["first", "second"],
208-
)
209-
frame = DataFrame(
210-
np.random.randn(10, 3),
211-
index=index,
212-
columns=Index(["A", "B", "C"], name="exp"),
213-
)
214-
result = concat([frame, frame], keys=[0, 1], names=["iteration"])
215-
216-
assert result.index.names == ("iteration",) + index.names
217-
tm.assert_frame_equal(result.loc[0], frame)
218-
tm.assert_frame_equal(result.loc[1], frame)
219-
assert result.index.nlevels == 3
220-
221-
def test_concat_multiindex_with_none_in_index_names(self):
222-
# GH 15787
223-
index = pd.MultiIndex.from_product([[1], range(5)], names=["level1", None])
224-
df = DataFrame({"col": range(5)}, index=index, dtype=np.int32)
225-
226-
result = concat([df, df], keys=[1, 2], names=["level2"])
227-
index = pd.MultiIndex.from_product(
228-
[[1, 2], [1], range(5)], names=["level2", "level1", None]
229-
)
230-
expected = DataFrame({"col": list(range(5)) * 2}, index=index, dtype=np.int32)
231-
tm.assert_frame_equal(result, expected)
232-
233-
result = concat([df, df[:2]], keys=[1, 2], names=["level2"])
234-
level2 = [1] * 5 + [2] * 2
235-
level1 = [1] * 7
236-
no_name = list(range(5)) + list(range(2))
237-
tuples = list(zip(level2, level1, no_name))
238-
index = pd.MultiIndex.from_tuples(tuples, names=["level2", "level1", None])
239-
expected = DataFrame({"col": no_name}, index=index, dtype=np.int32)
240-
tm.assert_frame_equal(result, expected)
241-
242132
def test_concat_keys_and_levels(self):
243133
df = DataFrame(np.random.randn(1, 3))
244134
df2 = DataFrame(np.random.randn(1, 4))
@@ -293,28 +183,6 @@ def test_concat_keys_levels_no_overlap(self):
293183
with pytest.raises(ValueError, match=msg):
294184
concat([df, df2], keys=["one", "two"], levels=[["foo", "bar", "baz"]])
295185

296-
def test_concat_rename_index(self):
297-
a = DataFrame(
298-
np.random.rand(3, 3),
299-
columns=list("ABC"),
300-
index=Index(list("abc"), name="index_a"),
301-
)
302-
b = DataFrame(
303-
np.random.rand(3, 3),
304-
columns=list("ABC"),
305-
index=Index(list("abc"), name="index_b"),
306-
)
307-
308-
result = concat([a, b], keys=["key0", "key1"], names=["lvl0", "lvl1"])
309-
310-
exp = concat([a, b], keys=["key0", "key1"], names=["lvl0"])
311-
names = list(exp.index.names)
312-
names[1] = "lvl1"
313-
exp.index.set_names(names, inplace=True)
314-
315-
tm.assert_frame_equal(result, exp)
316-
assert result.index.names == exp.index.names
317-
318186
def test_crossed_dtypes_weird_corner(self):
319187
columns = ["A", "B", "C", "D"]
320188
df1 = DataFrame(
@@ -348,53 +216,6 @@ def test_crossed_dtypes_weird_corner(self):
348216
result = concat([df, df2], keys=["one", "two"], names=["first", "second"])
349217
assert result.index.names == ("first", "second")
350218

351-
def test_dups_index(self):
352-
# GH 4771
353-
354-
# single dtypes
355-
df = DataFrame(
356-
np.random.randint(0, 10, size=40).reshape(10, 4),
357-
columns=["A", "A", "C", "C"],
358-
)
359-
360-
result = concat([df, df], axis=1)
361-
tm.assert_frame_equal(result.iloc[:, :4], df)
362-
tm.assert_frame_equal(result.iloc[:, 4:], df)
363-
364-
result = concat([df, df], axis=0)
365-
tm.assert_frame_equal(result.iloc[:10], df)
366-
tm.assert_frame_equal(result.iloc[10:], df)
367-
368-
# multi dtypes
369-
df = concat(
370-
[
371-
DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"]),
372-
DataFrame(
373-
np.random.randint(0, 10, size=20).reshape(10, 2), columns=["A", "C"]
374-
),
375-
],
376-
axis=1,
377-
)
378-
379-
result = concat([df, df], axis=1)
380-
tm.assert_frame_equal(result.iloc[:, :6], df)
381-
tm.assert_frame_equal(result.iloc[:, 6:], df)
382-
383-
result = concat([df, df], axis=0)
384-
tm.assert_frame_equal(result.iloc[:10], df)
385-
tm.assert_frame_equal(result.iloc[10:], df)
386-
387-
# append
388-
result = df.iloc[0:8, :].append(df.iloc[8:])
389-
tm.assert_frame_equal(result, df)
390-
391-
result = df.iloc[0:8, :].append(df.iloc[8:9]).append(df.iloc[9:10])
392-
tm.assert_frame_equal(result, df)
393-
394-
expected = concat([df, df], axis=0)
395-
result = df.append(df)
396-
tm.assert_frame_equal(result, expected)
397-
398219
def test_with_mixed_tuples(self, sort):
399220
# 10697
400221
# columns have mixed tuples, so handle properly
@@ -511,17 +332,6 @@ def test_concat_exclude_none(self):
511332
with pytest.raises(ValueError, match="All objects passed were None"):
512333
concat([None, None])
513334

514-
def test_concat_timedelta64_block(self):
515-
from pandas import to_timedelta
516-
517-
rng = to_timedelta(np.arange(10), unit="s")
518-
519-
df = DataFrame({"time": rng})
520-
521-
result = concat([df, df])
522-
assert (result.iloc[:10]["time"] == rng).all()
523-
assert (result.iloc[10:]["time"] == rng).all()
524-
525335
def test_concat_keys_with_none(self):
526336
# #1649
527337
df0 = DataFrame([[10, 20, 30], [10, 20, 30], [10, 20, 30]])
@@ -615,75 +425,6 @@ def __iter__(self):
615425

616426
tm.assert_frame_equal(pd.concat(CustomIterator2(), ignore_index=True), expected)
617427

618-
def test_default_index(self):
619-
# is_series and ignore_index
620-
s1 = Series([1, 2, 3], name="x")
621-
s2 = Series([4, 5, 6], name="y")
622-
res = pd.concat([s1, s2], axis=1, ignore_index=True)
623-
assert isinstance(res.columns, pd.RangeIndex)
624-
exp = DataFrame([[1, 4], [2, 5], [3, 6]])
625-
# use check_index_type=True to check the result have
626-
# RangeIndex (default index)
627-
tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True)
628-
629-
# is_series and all inputs have no names
630-
s1 = Series([1, 2, 3])
631-
s2 = Series([4, 5, 6])
632-
res = pd.concat([s1, s2], axis=1, ignore_index=False)
633-
assert isinstance(res.columns, pd.RangeIndex)
634-
exp = DataFrame([[1, 4], [2, 5], [3, 6]])
635-
exp.columns = pd.RangeIndex(2)
636-
tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True)
637-
638-
# is_dataframe and ignore_index
639-
df1 = DataFrame({"A": [1, 2], "B": [5, 6]})
640-
df2 = DataFrame({"A": [3, 4], "B": [7, 8]})
641-
642-
res = pd.concat([df1, df2], axis=0, ignore_index=True)
643-
exp = DataFrame([[1, 5], [2, 6], [3, 7], [4, 8]], columns=["A", "B"])
644-
tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True)
645-
646-
res = pd.concat([df1, df2], axis=1, ignore_index=True)
647-
exp = DataFrame([[1, 5, 3, 7], [2, 6, 4, 8]])
648-
tm.assert_frame_equal(res, exp, check_index_type=True, check_column_type=True)
649-
650-
def test_concat_multiindex_rangeindex(self):
651-
# GH13542
652-
# when multi-index levels are RangeIndex objects
653-
# there is a bug in concat with objects of len 1
654-
655-
df = DataFrame(np.random.randn(9, 2))
656-
df.index = MultiIndex(
657-
levels=[pd.RangeIndex(3), pd.RangeIndex(3)],
658-
codes=[np.repeat(np.arange(3), 3), np.tile(np.arange(3), 3)],
659-
)
660-
661-
res = concat([df.iloc[[2, 3, 4], :], df.iloc[[5], :]])
662-
exp = df.iloc[[2, 3, 4, 5], :]
663-
tm.assert_frame_equal(res, exp)
664-
665-
def test_concat_multiindex_dfs_with_deepcopy(self):
666-
# GH 9967
667-
from copy import deepcopy
668-
669-
example_multiindex1 = pd.MultiIndex.from_product([["a"], ["b"]])
670-
example_dataframe1 = DataFrame([0], index=example_multiindex1)
671-
672-
example_multiindex2 = pd.MultiIndex.from_product([["a"], ["c"]])
673-
example_dataframe2 = DataFrame([1], index=example_multiindex2)
674-
675-
example_dict = {"s1": example_dataframe1, "s2": example_dataframe2}
676-
expected_index = pd.MultiIndex(
677-
levels=[["s1", "s2"], ["a"], ["b", "c"]],
678-
codes=[[0, 1], [0, 0], [0, 1]],
679-
names=["testname", None, None],
680-
)
681-
expected = DataFrame([[0], [1]], index=expected_index)
682-
result_copy = pd.concat(deepcopy(example_dict), names=["testname"])
683-
tm.assert_frame_equal(result_copy, expected)
684-
result_no_copy = pd.concat(example_dict, names=["testname"])
685-
tm.assert_frame_equal(result_no_copy, expected)
686-
687428
def test_concat_order(self):
688429
# GH 17344
689430
dfs = [DataFrame(index=range(3), columns=["a", 1, None])]
@@ -701,7 +442,7 @@ def test_concat_different_extension_dtypes_upcasts(self):
701442
expected = Series([1, 2, Decimal(1), Decimal(2)], dtype=object)
702443
tm.assert_series_equal(result, expected)
703444

704-
def test_concat_odered_dict(self):
445+
def test_concat_ordered_dict(self):
705446
# GH 21510
706447
expected = pd.concat(
707448
[Series(range(3)), Series(range(4))], keys=["First", "Another"]
@@ -711,22 +452,6 @@ def test_concat_odered_dict(self):
711452
)
712453
tm.assert_series_equal(result, expected)
713454

714-
def test_concat_empty_dataframe_dtypes(self):
715-
df = DataFrame(columns=list("abc"))
716-
df["a"] = df["a"].astype(np.bool_)
717-
df["b"] = df["b"].astype(np.int32)
718-
df["c"] = df["c"].astype(np.float64)
719-
720-
result = pd.concat([df, df])
721-
assert result["a"].dtype == np.bool_
722-
assert result["b"].dtype == np.int32
723-
assert result["c"].dtype == np.float64
724-
725-
result = pd.concat([df, df.astype(np.float64)])
726-
assert result["a"].dtype == np.object_
727-
assert result["b"].dtype == np.float64
728-
assert result["c"].dtype == np.float64
729-
730455

731456
@pytest.mark.parametrize("pdt", [Series, pd.DataFrame])
732457
@pytest.mark.parametrize("dt", np.sctypes["float"])
@@ -787,20 +512,6 @@ def test_concat_dense_sparse():
787512
tm.assert_series_equal(result, expected)
788513

789514

790-
@pytest.mark.parametrize("test_series", [True, False])
791-
def test_concat_copy_index(test_series, axis):
792-
# GH 29879
793-
if test_series:
794-
ser = Series([1, 2])
795-
comb = concat([ser, ser], axis=axis, copy=True)
796-
assert comb.index is not ser.index
797-
else:
798-
df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"])
799-
comb = concat([df, df], axis=axis, copy=True)
800-
assert comb.index is not df.index
801-
assert comb.columns is not df.columns
802-
803-
804515
@pytest.mark.parametrize("keys", [["e", "f", "f"], ["f", "e", "f"]])
805516
def test_duplicate_keys(keys):
806517
# GH 33654

pandas/tests/reshape/concat/test_dataframe.py

+17-1
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,16 @@
22
import pytest
33

44
import pandas as pd
5-
from pandas import DataFrame, Index, Series
5+
from pandas import DataFrame, Index, Series, concat
66
import pandas._testing as tm
77

88

9+
@pytest.fixture(params=[True, False])
10+
def sort(request):
11+
"""Boolean sort keyword for concat and DataFrame.append."""
12+
return request.param
13+
14+
915
class TestDataFrameConcat:
1016
def test_concat_multiple_frames_dtypes(self):
1117

@@ -157,3 +163,13 @@ def test_concat_astype_dup_col(self):
157163
np.array(["b", "b"]).reshape(1, 2), columns=["a", "a"]
158164
).astype("category")
159165
tm.assert_frame_equal(result, expected)
166+
167+
def test_concat_dataframe_keys_bug(self, sort):
168+
t1 = DataFrame(
169+
{"value": Series([1, 2, 3], index=Index(["a", "b", "c"], name="id"))}
170+
)
171+
t2 = DataFrame({"value": Series([7, 8], index=Index(["a", "b"], name="id"))})
172+
173+
# it works
174+
result = concat([t1, t2], axis=1, keys=["t1", "t2"], sort=sort)
175+
assert list(result.columns) == [("t1", "value"), ("t2", "value")]

0 commit comments

Comments
 (0)