@@ -901,14 +901,78 @@ def test_merge_on_multikey(self):
901
901
# TODO: columns aren't in the same order yet
902
902
assert_frame_equal (joined , expected .ix [:, joined .columns ])
903
903
904
+ left = self .data .join (self .to_join , on = ['key1' , 'key2' ], sort = True )
905
+ right = expected .ix [:, joined .columns ].sort (['key1' , 'key2' ],
906
+ kind = 'mergesort' )
907
+ assert_frame_equal (left , right )
908
+
909
+ def test_left_join_multi_index (self ):
910
+ icols = ['1st' , '2nd' , '3rd' ]
911
+
912
+ def bind_cols (df ):
913
+ iord = lambda a : 0 if a != a else ord (a )
914
+ f = lambda ts : ts .map (iord ) - ord ('a' )
915
+ return f (df ['1st' ]) + f (df ['3rd' ])* 1e2 + df ['2nd' ].fillna (0 ) * 1e4
916
+
917
+ def run_asserts (left , right ):
918
+ for sort in [False , True ]:
919
+ res = left .join (right , on = icols , how = 'left' , sort = sort )
920
+
921
+ self .assertTrue (len (left ) < len (res ) + 1 )
922
+ self .assertFalse (res ['4th' ].isnull ().any ())
923
+ self .assertFalse (res ['5th' ].isnull ().any ())
924
+
925
+ tm .assert_series_equal (res ['4th' ], - res ['5th' ])
926
+ tm .assert_series_equal (res ['4th' ], bind_cols (res .iloc [:, :- 2 ]))
927
+
928
+ if sort :
929
+ tm .assert_frame_equal (res ,
930
+ res .sort (icols , kind = 'mergesort' ))
931
+
932
+ out = merge (left , right .reset_index (), on = icols ,
933
+ sort = sort , how = 'left' )
934
+
935
+ res .index = np .arange (len (res ))
936
+ tm .assert_frame_equal (out , res )
937
+
938
+ lc = list (map (chr , np .arange (ord ('a' ), ord ('z' ) + 1 )))
939
+ left = DataFrame (np .random .choice (lc , (5000 , 2 )),
940
+ columns = ['1st' , '3rd' ])
941
+ left .insert (1 , '2nd' , np .random .randint (0 , 1000 , len (left )))
942
+
943
+ i = np .random .permutation (len (left ))
944
+ right = left .iloc [i ].copy ()
945
+
946
+ left ['4th' ] = bind_cols (left )
947
+ right ['5th' ] = - bind_cols (right )
948
+ right .set_index (icols , inplace = True )
949
+
950
+ run_asserts (left , right )
951
+
952
+ # inject some nulls
953
+ left .loc [1 ::23 , '1st' ] = np .nan
954
+ left .loc [2 ::37 , '2nd' ] = np .nan
955
+ left .loc [3 ::43 , '3rd' ] = np .nan
956
+ left ['4th' ] = bind_cols (left )
957
+
958
+ i = np .random .permutation (len (left ))
959
+ right = left .iloc [i , :- 1 ]
960
+ right ['5th' ] = - bind_cols (right )
961
+ right .set_index (icols , inplace = True )
962
+
963
+ run_asserts (left , right )
964
+
904
965
def test_merge_right_vs_left (self ):
905
966
# compare left vs right merge with multikey
906
- merged1 = self .data .merge (self .to_join , left_on = ['key1' , 'key2' ],
907
- right_index = True , how = 'left' )
908
- merged2 = self .to_join .merge (self .data , right_on = ['key1' , 'key2' ],
909
- left_index = True , how = 'right' )
910
- merged2 = merged2 .ix [:, merged1 .columns ]
911
- assert_frame_equal (merged1 , merged2 )
967
+ for sort in [False , True ]:
968
+ merged1 = self .data .merge (self .to_join , left_on = ['key1' , 'key2' ],
969
+ right_index = True , how = 'left' , sort = sort )
970
+
971
+ merged2 = self .to_join .merge (self .data , right_on = ['key1' , 'key2' ],
972
+ left_index = True , how = 'right' , sort = sort )
973
+
974
+ merged2 = merged2 .ix [:, merged1 .columns ]
975
+ assert_frame_equal (merged1 , merged2 )
912
976
913
977
def test_compress_group_combinations (self ):
914
978
@@ -943,6 +1007,8 @@ def test_left_join_index_preserve_order(self):
943
1007
expected .loc [(expected .k1 == 1 ) & (expected .k2 == 'foo' ),'v2' ] = 7
944
1008
945
1009
tm .assert_frame_equal (result , expected )
1010
+ tm .assert_frame_equal (result .sort (['k1' , 'k2' ], kind = 'mergesort' ),
1011
+ left .join (right , on = ['k1' , 'k2' ], sort = True ))
946
1012
947
1013
# test join with multi dtypes blocks
948
1014
left = DataFrame ({'k1' : [0 , 1 , 2 ] * 8 ,
@@ -961,6 +1027,8 @@ def test_left_join_index_preserve_order(self):
961
1027
expected .loc [(expected .k1 == 1 ) & (expected .k2 == 'foo' ),'v2' ] = 7
962
1028
963
1029
tm .assert_frame_equal (result , expected )
1030
+ tm .assert_frame_equal (result .sort (['k1' , 'k2' ], kind = 'mergesort' ),
1031
+ left .join (right , on = ['k1' , 'k2' ], sort = True ))
964
1032
965
1033
# do a right join for an extra test
966
1034
joined = merge (right , left , left_index = True ,
@@ -1022,6 +1090,12 @@ def test_left_join_index_multi_match_multiindex(self):
1022
1090
1023
1091
tm .assert_frame_equal (result , expected )
1024
1092
1093
+ result = left .join (right , on = ['cola' , 'colb' , 'colc' ],
1094
+ how = 'left' , sort = True )
1095
+
1096
+ tm .assert_frame_equal (result ,
1097
+ expected .sort (['cola' , 'colb' , 'colc' ], kind = 'mergesort' ))
1098
+
1025
1099
# GH7331 - maintain left frame order in left merge
1026
1100
right .reset_index (inplace = True )
1027
1101
right .columns = left .columns [:3 ].tolist () + right .columns [- 1 :].tolist ()
@@ -1066,6 +1140,9 @@ def test_left_join_index_multi_match(self):
1066
1140
1067
1141
tm .assert_frame_equal (result , expected )
1068
1142
1143
+ result = left .join (right , on = 'tag' , how = 'left' , sort = True )
1144
+ tm .assert_frame_equal (result , expected .sort ('tag' , kind = 'mergesort' ))
1145
+
1069
1146
# GH7331 - maintain left frame order in left merge
1070
1147
result = merge (left , right .reset_index (), how = 'left' , on = 'tag' )
1071
1148
expected .index = np .arange (len (expected ))
@@ -1094,6 +1171,10 @@ def _test(dtype1,dtype2):
1094
1171
1095
1172
tm .assert_frame_equal (result , expected )
1096
1173
1174
+ result = left .join (right , on = ['k1' , 'k2' ], sort = True )
1175
+ expected .sort (['k1' , 'k2' ], kind = 'mergesort' , inplace = True )
1176
+ tm .assert_frame_equal (result , expected )
1177
+
1097
1178
for d1 in [np .int64 ,np .int32 ,np .int16 ,np .int8 ,np .uint8 ]:
1098
1179
for d2 in [np .int64 ,np .float64 ,np .float32 ,np .float16 ]:
1099
1180
_test (np .dtype (d1 ),np .dtype (d2 ))
0 commit comments