@@ -102,6 +102,19 @@ def series_of_dtype_all_na(request):
102
102
return request .param
103
103
104
104
105
+ @pytest .fixture
106
+ def dfs_for_indicator ():
107
+ df1 = DataFrame ({"col1" : [0 , 1 ], "col_conflict" : [1 , 2 ], "col_left" : ["a" , "b" ]})
108
+ df2 = DataFrame (
109
+ {
110
+ "col1" : [1 , 2 , 3 , 4 , 5 ],
111
+ "col_conflict" : [1 , 2 , 3 , 4 , 5 ],
112
+ "col_right" : [2 , 2 , 2 , 2 , 2 ],
113
+ }
114
+ )
115
+ return df1 , df2
116
+
117
+
105
118
class TestMerge :
106
119
def setup_method (self , method ):
107
120
# aggregate multiple columns
@@ -543,6 +556,7 @@ def check2(exp, kwarg):
543
556
result = merge (left , right , how = "outer" , ** kwarg )
544
557
tm .assert_frame_equal (result , exp )
545
558
559
+ # TODO: should the next loop be un-indented? doing so breaks this test
546
560
for kwarg in [
547
561
{"left_index" : True , "right_index" : True },
548
562
{"left_index" : True , "right_on" : "x" },
@@ -652,6 +666,7 @@ def test_merge_nan_right(self):
652
666
)
653
667
tm .assert_frame_equal (result , expected , check_dtype = False )
654
668
669
+ def test_merge_nan_right2 (self ):
655
670
df1 = DataFrame ({"i1" : [0 , 1 ], "i2" : [0.5 , 1.5 ]})
656
671
df2 = DataFrame ({"i1" : [0 ], "i3" : [0.7 ]})
657
672
result = df1 .join (df2 , rsuffix = "_" , on = "i1" )
@@ -695,6 +710,9 @@ def test_join_append_timedeltas(self, using_array_manager):
695
710
expected = expected .astype (object )
696
711
tm .assert_frame_equal (result , expected )
697
712
713
+ def test_join_append_timedeltas2 (self ):
714
+ # timedelta64 issues with join/merge
715
+ # GH 5695
698
716
td = np .timedelta64 (300000000 )
699
717
lhs = DataFrame (Series ([td , td ], index = ["A" , "B" ]))
700
718
rhs = DataFrame (Series ([td ], index = ["A" ]))
@@ -806,6 +824,7 @@ def test_merge_on_datetime64tz(self):
806
824
result = merge (left , right , on = "key" , how = "outer" )
807
825
tm .assert_frame_equal (result , expected )
808
826
827
+ def test_merge_datetime64tz_values (self ):
809
828
left = DataFrame (
810
829
{
811
830
"key" : [1 , 2 ],
@@ -923,6 +942,7 @@ def test_merge_on_periods(self):
923
942
result = merge (left , right , on = "key" , how = "outer" )
924
943
tm .assert_frame_equal (result , expected )
925
944
945
+ def test_merge_period_values (self ):
926
946
left = DataFrame (
927
947
{"key" : [1 , 2 ], "value" : pd .period_range ("20151010" , periods = 2 , freq = "D" )}
928
948
)
@@ -944,20 +964,11 @@ def test_merge_on_periods(self):
944
964
assert result ["value_x" ].dtype == "Period[D]"
945
965
assert result ["value_y" ].dtype == "Period[D]"
946
966
947
- def test_indicator (self ):
967
+ def test_indicator (self , dfs_for_indicator ):
948
968
# PR #10054. xref #7412 and closes #8790.
949
- df1 = DataFrame (
950
- {"col1" : [0 , 1 ], "col_conflict" : [1 , 2 ], "col_left" : ["a" , "b" ]}
951
- )
969
+ df1 , df2 = dfs_for_indicator
952
970
df1_copy = df1 .copy ()
953
971
954
- df2 = DataFrame (
955
- {
956
- "col1" : [1 , 2 , 3 , 4 , 5 ],
957
- "col_conflict" : [1 , 2 , 3 , 4 , 5 ],
958
- "col_right" : [2 , 2 , 2 , 2 , 2 ],
959
- }
960
- )
961
972
df2_copy = df2 .copy ()
962
973
963
974
df_result = DataFrame (
@@ -1016,14 +1027,19 @@ def test_indicator(self):
1016
1027
)
1017
1028
tm .assert_frame_equal (test_custom_name , df_result_custom_name )
1018
1029
1030
+ def test_merge_indicator_arg_validation (self , dfs_for_indicator ):
1019
1031
# Check only accepts strings and booleans
1032
+ df1 , df2 = dfs_for_indicator
1033
+
1020
1034
msg = "indicator option can only accept boolean or string arguments"
1021
1035
with pytest .raises (ValueError , match = msg ):
1022
1036
merge (df1 , df2 , on = "col1" , how = "outer" , indicator = 5 )
1023
1037
with pytest .raises (ValueError , match = msg ):
1024
1038
df1 .merge (df2 , on = "col1" , how = "outer" , indicator = 5 )
1025
1039
1040
+ def test_merge_indicator_result_integrity (self , dfs_for_indicator ):
1026
1041
# Check result integrity
1042
+ df1 , df2 = dfs_for_indicator
1027
1043
1028
1044
test2 = merge (df1 , df2 , on = "col1" , how = "left" , indicator = True )
1029
1045
assert (test2 ._merge != "right_only" ).all ()
@@ -1040,7 +1056,10 @@ def test_indicator(self):
1040
1056
test4 = df1 .merge (df2 , on = "col1" , how = "inner" , indicator = True )
1041
1057
assert (test4 ._merge == "both" ).all ()
1042
1058
1059
+ def test_merge_indicator_invalid (self , dfs_for_indicator ):
1043
1060
# Check if working name in df
1061
+ df1 , _ = dfs_for_indicator
1062
+
1044
1063
for i in ["_right_indicator" , "_left_indicator" , "_merge" ]:
1045
1064
df_badcolumn = DataFrame ({"col1" : [1 , 2 ], i : [2 , 2 ]})
1046
1065
@@ -1071,6 +1090,7 @@ def test_indicator(self):
1071
1090
df_badcolumn , on = "col1" , how = "outer" , indicator = "custom_column_name"
1072
1091
)
1073
1092
1093
+ def test_merge_indicator_multiple_columns (self ):
1074
1094
# Merge on multiple columns
1075
1095
df3 = DataFrame ({"col1" : [0 , 1 ], "col2" : ["a" , "b" ]})
1076
1096
@@ -1538,6 +1558,8 @@ def test_merge_incompat_infer_boolean_object(self):
1538
1558
result = merge (df2 , df1 , on = "key" )
1539
1559
tm .assert_frame_equal (result , expected )
1540
1560
1561
+ def test_merge_incompat_infer_boolean_object_with_missing (self ):
1562
+ # GH21119: bool + object bool merge OK
1541
1563
# with missing value
1542
1564
df1 = DataFrame ({"key" : Series ([True , False , np .nan ], dtype = object )})
1543
1565
df2 = DataFrame ({"key" : [True , False ]})
0 commit comments