Skip to content

Commit ef8df3d

Browse files
jbrockmendelTLouf
authored andcommitted
TST: split pd.merge test with indicator=True (pandas-dev#41520)
1 parent 16cea42 commit ef8df3d

File tree

1 file changed

+33
-11
lines changed

1 file changed

+33
-11
lines changed

pandas/tests/reshape/merge/test_merge.py

+33-11
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,19 @@ def series_of_dtype_all_na(request):
102102
return request.param
103103

104104

105+
@pytest.fixture
106+
def dfs_for_indicator():
107+
df1 = DataFrame({"col1": [0, 1], "col_conflict": [1, 2], "col_left": ["a", "b"]})
108+
df2 = DataFrame(
109+
{
110+
"col1": [1, 2, 3, 4, 5],
111+
"col_conflict": [1, 2, 3, 4, 5],
112+
"col_right": [2, 2, 2, 2, 2],
113+
}
114+
)
115+
return df1, df2
116+
117+
105118
class TestMerge:
106119
def setup_method(self, method):
107120
# aggregate multiple columns
@@ -543,6 +556,7 @@ def check2(exp, kwarg):
543556
result = merge(left, right, how="outer", **kwarg)
544557
tm.assert_frame_equal(result, exp)
545558

559+
# TODO: should the next loop be un-indented? doing so breaks this test
546560
for kwarg in [
547561
{"left_index": True, "right_index": True},
548562
{"left_index": True, "right_on": "x"},
@@ -652,6 +666,7 @@ def test_merge_nan_right(self):
652666
)
653667
tm.assert_frame_equal(result, expected, check_dtype=False)
654668

669+
def test_merge_nan_right2(self):
655670
df1 = DataFrame({"i1": [0, 1], "i2": [0.5, 1.5]})
656671
df2 = DataFrame({"i1": [0], "i3": [0.7]})
657672
result = df1.join(df2, rsuffix="_", on="i1")
@@ -695,6 +710,9 @@ def test_join_append_timedeltas(self, using_array_manager):
695710
expected = expected.astype(object)
696711
tm.assert_frame_equal(result, expected)
697712

713+
def test_join_append_timedeltas2(self):
714+
# timedelta64 issues with join/merge
715+
# GH 5695
698716
td = np.timedelta64(300000000)
699717
lhs = DataFrame(Series([td, td], index=["A", "B"]))
700718
rhs = DataFrame(Series([td], index=["A"]))
@@ -806,6 +824,7 @@ def test_merge_on_datetime64tz(self):
806824
result = merge(left, right, on="key", how="outer")
807825
tm.assert_frame_equal(result, expected)
808826

827+
def test_merge_datetime64tz_values(self):
809828
left = DataFrame(
810829
{
811830
"key": [1, 2],
@@ -923,6 +942,7 @@ def test_merge_on_periods(self):
923942
result = merge(left, right, on="key", how="outer")
924943
tm.assert_frame_equal(result, expected)
925944

945+
def test_merge_period_values(self):
926946
left = DataFrame(
927947
{"key": [1, 2], "value": pd.period_range("20151010", periods=2, freq="D")}
928948
)
@@ -944,20 +964,11 @@ def test_merge_on_periods(self):
944964
assert result["value_x"].dtype == "Period[D]"
945965
assert result["value_y"].dtype == "Period[D]"
946966

947-
def test_indicator(self):
967+
def test_indicator(self, dfs_for_indicator):
948968
# PR #10054. xref #7412 and closes #8790.
949-
df1 = DataFrame(
950-
{"col1": [0, 1], "col_conflict": [1, 2], "col_left": ["a", "b"]}
951-
)
969+
df1, df2 = dfs_for_indicator
952970
df1_copy = df1.copy()
953971

954-
df2 = DataFrame(
955-
{
956-
"col1": [1, 2, 3, 4, 5],
957-
"col_conflict": [1, 2, 3, 4, 5],
958-
"col_right": [2, 2, 2, 2, 2],
959-
}
960-
)
961972
df2_copy = df2.copy()
962973

963974
df_result = DataFrame(
@@ -1016,14 +1027,19 @@ def test_indicator(self):
10161027
)
10171028
tm.assert_frame_equal(test_custom_name, df_result_custom_name)
10181029

1030+
def test_merge_indicator_arg_validation(self, dfs_for_indicator):
10191031
# Check only accepts strings and booleans
1032+
df1, df2 = dfs_for_indicator
1033+
10201034
msg = "indicator option can only accept boolean or string arguments"
10211035
with pytest.raises(ValueError, match=msg):
10221036
merge(df1, df2, on="col1", how="outer", indicator=5)
10231037
with pytest.raises(ValueError, match=msg):
10241038
df1.merge(df2, on="col1", how="outer", indicator=5)
10251039

1040+
def test_merge_indicator_result_integrity(self, dfs_for_indicator):
10261041
# Check result integrity
1042+
df1, df2 = dfs_for_indicator
10271043

10281044
test2 = merge(df1, df2, on="col1", how="left", indicator=True)
10291045
assert (test2._merge != "right_only").all()
@@ -1040,7 +1056,10 @@ def test_indicator(self):
10401056
test4 = df1.merge(df2, on="col1", how="inner", indicator=True)
10411057
assert (test4._merge == "both").all()
10421058

1059+
def test_merge_indicator_invalid(self, dfs_for_indicator):
10431060
# Check if working name in df
1061+
df1, _ = dfs_for_indicator
1062+
10441063
for i in ["_right_indicator", "_left_indicator", "_merge"]:
10451064
df_badcolumn = DataFrame({"col1": [1, 2], i: [2, 2]})
10461065

@@ -1071,6 +1090,7 @@ def test_indicator(self):
10711090
df_badcolumn, on="col1", how="outer", indicator="custom_column_name"
10721091
)
10731092

1093+
def test_merge_indicator_multiple_columns(self):
10741094
# Merge on multiple columns
10751095
df3 = DataFrame({"col1": [0, 1], "col2": ["a", "b"]})
10761096

@@ -1538,6 +1558,8 @@ def test_merge_incompat_infer_boolean_object(self):
15381558
result = merge(df2, df1, on="key")
15391559
tm.assert_frame_equal(result, expected)
15401560

1561+
def test_merge_incompat_infer_boolean_object_with_missing(self):
1562+
# GH21119: bool + object bool merge OK
15411563
# with missing value
15421564
df1 = DataFrame({"key": Series([True, False, np.nan], dtype=object)})
15431565
df2 = DataFrame({"key": [True, False]})

0 commit comments

Comments
 (0)