Skip to content

TST: split pd.merge test with indicator=True #41520

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 17, 2021
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 27 additions & 4 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,7 @@ def check2(exp, kwarg):
result = merge(left, right, how="outer", **kwarg)
tm.assert_frame_equal(result, exp)

# TODO: should the next loop be un-indented? doing so breaks this test
for kwarg in [
{"left_index": True, "right_index": True},
{"left_index": True, "right_on": "x"},
Expand Down Expand Up @@ -652,6 +653,7 @@ def test_merge_nan_right(self):
)
tm.assert_frame_equal(result, expected, check_dtype=False)

def test_merge_nan_right2(self):
df1 = DataFrame({"i1": [0, 1], "i2": [0.5, 1.5]})
df2 = DataFrame({"i1": [0], "i3": [0.7]})
result = df1.join(df2, rsuffix="_", on="i1")
Expand Down Expand Up @@ -695,6 +697,9 @@ def test_join_append_timedeltas(self, using_array_manager):
expected = expected.astype(object)
tm.assert_frame_equal(result, expected)

def test_join_append_timedeltas2(self):
# timedelta64 issues with join/merge
# GH 5695
td = np.timedelta64(300000000)
lhs = DataFrame(Series([td, td], index=["A", "B"]))
rhs = DataFrame(Series([td], index=["A"]))
Expand Down Expand Up @@ -806,6 +811,7 @@ def test_merge_on_datetime64tz(self):
result = merge(left, right, on="key", how="outer")
tm.assert_frame_equal(result, expected)

def test_merge_datetime64tz_values(self):
left = DataFrame(
{
"key": [1, 2],
Expand Down Expand Up @@ -923,6 +929,7 @@ def test_merge_on_periods(self):
result = merge(left, right, on="key", how="outer")
tm.assert_frame_equal(result, expected)

def test_merge_period_values(self):
left = DataFrame(
{"key": [1, 2], "value": pd.period_range("20151010", periods=2, freq="D")}
)
Expand All @@ -944,20 +951,25 @@ def test_merge_on_periods(self):
assert result["value_x"].dtype == "Period[D]"
assert result["value_y"].dtype == "Period[D]"

def test_indicator(self):
# PR #10054. xref #7412 and closes #8790.
@pytest.fixture
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you move to top

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated + green

def dfs_for_indicator(self):
df1 = DataFrame(
{"col1": [0, 1], "col_conflict": [1, 2], "col_left": ["a", "b"]}
)
df1_copy = df1.copy()

df2 = DataFrame(
{
"col1": [1, 2, 3, 4, 5],
"col_conflict": [1, 2, 3, 4, 5],
"col_right": [2, 2, 2, 2, 2],
}
)
return df1, df2

def test_indicator(self, dfs_for_indicator):
# PR #10054. xref #7412 and closes #8790.
df1, df2 = dfs_for_indicator
df1_copy = df1.copy()

df2_copy = df2.copy()

df_result = DataFrame(
Expand Down Expand Up @@ -1016,14 +1028,19 @@ def test_indicator(self):
)
tm.assert_frame_equal(test_custom_name, df_result_custom_name)

def test_merge_indicator_arg_validation(self, dfs_for_indicator):
# Check only accepts strings and booleans
df1, df2 = dfs_for_indicator

msg = "indicator option can only accept boolean or string arguments"
with pytest.raises(ValueError, match=msg):
merge(df1, df2, on="col1", how="outer", indicator=5)
with pytest.raises(ValueError, match=msg):
df1.merge(df2, on="col1", how="outer", indicator=5)

def test_merge_indicator_result_integrity(self, dfs_for_indicator):
# Check result integrity
df1, df2 = dfs_for_indicator

test2 = merge(df1, df2, on="col1", how="left", indicator=True)
assert (test2._merge != "right_only").all()
Expand All @@ -1040,7 +1057,10 @@ def test_indicator(self):
test4 = df1.merge(df2, on="col1", how="inner", indicator=True)
assert (test4._merge == "both").all()

def test_merge_indicator_invalid(self, dfs_for_indicator):
# Check if working name in df
df1, _ = dfs_for_indicator

for i in ["_right_indicator", "_left_indicator", "_merge"]:
df_badcolumn = DataFrame({"col1": [1, 2], i: [2, 2]})

Expand Down Expand Up @@ -1071,6 +1091,7 @@ def test_indicator(self):
df_badcolumn, on="col1", how="outer", indicator="custom_column_name"
)

def test_merge_indicator_multiple_columns(self):
# Merge on multiple columns
df3 = DataFrame({"col1": [0, 1], "col2": ["a", "b"]})

Expand Down Expand Up @@ -1538,6 +1559,8 @@ def test_merge_incompat_infer_boolean_object(self):
result = merge(df2, df1, on="key")
tm.assert_frame_equal(result, expected)

def test_merge_incompat_infer_boolean_object_with_missing(self):
# GH21119: bool + object bool merge OK
# with missing value
df1 = DataFrame({"key": Series([True, False, np.nan], dtype=object)})
df2 = DataFrame({"key": [True, False]})
Expand Down