13
13
from pandas .compat import lzip
14
14
from pandas .core .reshape .concat import concat
15
15
from pandas .core .reshape .merge import merge
16
- from pandas .util .testing import assert_frame_equal
16
+
17
+
18
+ @pytest .fixture
19
+ def left ():
20
+ # a little relevant example with NAs
21
+ key1 = ['bar' , 'bar' , 'bar' , 'foo' , 'foo' , 'baz' , 'baz' , 'qux' ,
22
+ 'qux' , 'snap' ]
23
+ key2 = ['two' , 'one' , 'three' , 'one' , 'two' , 'one' , 'two' , 'two' ,
24
+ 'three' , 'one' ]
25
+
26
+ data = np .random .randn (len (key1 ))
27
+ return DataFrame ({'key1' : key1 , 'key2' : key2 , 'data' : data })
28
+
29
+
30
+ @pytest .fixture
31
+ def right ():
32
+ index = MultiIndex (levels = [['foo' , 'bar' , 'baz' , 'qux' ],
33
+ ['one' , 'two' , 'three' ]],
34
+ labels = [[0 , 0 , 0 , 1 , 1 , 2 , 2 , 3 , 3 , 3 ],
35
+ [0 , 1 , 2 , 0 , 1 , 1 , 2 , 0 , 1 , 2 ]],
36
+ names = ['first' , 'second' ])
37
+
38
+ return DataFrame (np .random .randn (10 , 3 ), index = index ,
39
+ columns = ['j_one' , 'j_two' , 'j_three' ])
17
40
18
41
19
42
class TestMergeMulti (object ):
20
43
21
- def setup_method (self ):
22
- self .index = MultiIndex (levels = [['foo' , 'bar' , 'baz' , 'qux' ],
23
- ['one' , 'two' , 'three' ]],
24
- labels = [[0 , 0 , 0 , 1 , 1 , 2 , 2 , 3 , 3 , 3 ],
25
- [0 , 1 , 2 , 0 , 1 , 1 , 2 , 0 , 1 , 2 ]],
26
- names = ['first' , 'second' ])
27
- self .to_join = DataFrame (np .random .randn (10 , 3 ), index = self .index ,
28
- columns = ['j_one' , 'j_two' , 'j_three' ])
29
-
30
- # a little relevant example with NAs
31
- key1 = ['bar' , 'bar' , 'bar' , 'foo' , 'foo' , 'baz' , 'baz' , 'qux' ,
32
- 'qux' , 'snap' ]
33
- key2 = ['two' , 'one' , 'three' , 'one' , 'two' , 'one' , 'two' , 'two' ,
34
- 'three' , 'one' ]
35
-
36
- data = np .random .randn (len (key1 ))
37
- self .data = DataFrame ({'key1' : key1 , 'key2' : key2 ,
38
- 'data' : data })
39
-
40
- def test_merge_on_multikey (self ):
41
- joined = self .data .join (self .to_join , on = ['key1' , 'key2' ])
42
-
43
- join_key = Index (lzip (self .data ['key1' ], self .data ['key2' ]))
44
- indexer = self .to_join .index .get_indexer (join_key )
45
- ex_values = self .to_join .values .take (indexer , axis = 0 )
44
+ def test_merge_on_multikey (self , left , right ):
45
+ joined = left .join (right , on = ['key1' , 'key2' ])
46
+
47
+ join_key = Index (lzip (left ['key1' ], left ['key2' ]))
48
+ indexer = right .index .get_indexer (join_key )
49
+ ex_values = right .values .take (indexer , axis = 0 )
46
50
ex_values [indexer == - 1 ] = np .nan
47
- expected = self .data .join (DataFrame (ex_values ,
48
- columns = self .to_join .columns ))
51
+ expected = left .join (DataFrame (ex_values , columns = right .columns ))
49
52
50
53
# TODO: columns aren't in the same order yet
51
- assert_frame_equal (joined , expected .loc [:, joined .columns ])
54
+ tm . assert_frame_equal (joined , expected .loc [:, joined .columns ])
52
55
53
- left = self . data . join (self . to_join , on = ['key1' , 'key2' ], sort = True )
56
+ left = left . join (right , on = ['key1' , 'key2' ], sort = True )
54
57
right = expected .loc [:, joined .columns ].sort_values (['key1' , 'key2' ],
55
58
kind = 'mergesort' )
56
- assert_frame_equal (left , right )
59
+ tm . assert_frame_equal (left , right )
57
60
58
61
def test_left_join_multi_index (self ):
59
62
icols = ['1st' , '2nd' , '3rd' ]
@@ -119,18 +122,18 @@ def run_asserts(left, right):
119
122
120
123
run_asserts (left , right )
121
124
122
- def test_merge_right_vs_left (self ):
125
+ def test_merge_right_vs_left (self , left , right ):
123
126
# compare left vs right merge with multikey
124
127
for sort in [False , True ]:
125
- merged1 = self . data . merge (self . to_join , left_on = ['key1' , 'key2' ],
126
- right_index = True , how = 'left' , sort = sort )
128
+ merged1 = left . merge (right , left_on = ['key1' , 'key2' ],
129
+ right_index = True , how = 'left' , sort = sort )
127
130
128
- merged2 = self . to_join . merge (self . data , right_on = ['key1' , 'key2' ],
129
- left_index = True , how = 'right' ,
130
- sort = sort )
131
+ merged2 = right . merge (left , right_on = ['key1' , 'key2' ],
132
+ left_index = True , how = 'right' ,
133
+ sort = sort )
131
134
132
135
merged2 = merged2 .loc [:, merged1 .columns ]
133
- assert_frame_equal (merged1 , merged2 )
136
+ tm . assert_frame_equal (merged1 , merged2 )
134
137
135
138
def test_compress_group_combinations (self ):
136
139
@@ -393,15 +396,13 @@ def test_join_multi_levels(self):
393
396
'nl0000289965' ]))
394
397
.set_index (['household_id' , 'asset_id' ])
395
398
.reindex (columns = ['male' , 'wealth' , 'name' , 'share' ]))
396
- assert_frame_equal (result , expected )
397
-
398
- assert_frame_equal (result , expected )
399
+ tm .assert_frame_equal (result , expected )
399
400
400
401
# equivalency
401
- result2 = (merge (household .reset_index (), portfolio .reset_index (),
402
+ result = (merge (household .reset_index (), portfolio .reset_index (),
402
403
on = ['household_id' ], how = 'inner' )
403
404
.set_index (['household_id' , 'asset_id' ]))
404
- assert_frame_equal (result2 , expected )
405
+ tm . assert_frame_equal (result , expected )
405
406
406
407
result = household .join (portfolio , how = 'outer' )
407
408
expected = (concat ([
@@ -412,7 +413,7 @@ def test_join_multi_levels(self):
412
413
[(4 , np .nan )],
413
414
names = ['household_id' , 'asset_id' ])))
414
415
], axis = 0 , sort = True ).reindex (columns = expected .columns ))
415
- assert_frame_equal (result , expected )
416
+ tm . assert_frame_equal (result , expected )
416
417
417
418
# invalid cases
418
419
household .index .name = 'foo'
@@ -471,7 +472,7 @@ def test_join_multi_levels2(self):
471
472
result = (merge (household .reset_index (), log_return .reset_index (),
472
473
on = ['asset_id' ], how = 'inner' )
473
474
.set_index (['household_id' , 'asset_id' , 't' ]))
474
- assert_frame_equal (result , expected )
475
+ tm . assert_frame_equal (result , expected )
475
476
476
477
expected = (
477
478
DataFrame (dict (
@@ -496,7 +497,7 @@ def test_join_multi_levels2(self):
496
497
on = ['asset_id' ], how = 'outer' )
497
498
.set_index (['household_id' , 'asset_id' , 't' ]))
498
499
499
- assert_frame_equal (result , expected )
500
+ tm . assert_frame_equal (result , expected )
500
501
501
502
502
503
@pytest .fixture
@@ -564,17 +565,17 @@ def test_join_multi_empty_frames(self, left_multi, right_multi, join_type,
564
565
result = left_multi .join (right_multi , how = join_type ).sort_index ()
565
566
tm .assert_frame_equal (result , expected )
566
567
567
- @pytest .mark .parametrize ("klass " , [None , np .asarray , Series , Index ])
568
- def test_merge_datetime_index (self , klass ):
568
+ @pytest .mark .parametrize ("box " , [None , np .asarray , Series , Index ])
569
+ def test_merge_datetime_index (self , box ):
569
570
# see gh-19038
570
571
df = DataFrame ([1 , 2 , 3 ],
571
572
["2016-01-01" , "2017-01-01" , "2018-01-01" ],
572
573
columns = ["a" ])
573
574
df .index = pd .to_datetime (df .index )
574
575
on_vector = df .index .year
575
576
576
- if klass is not None :
577
- on_vector = klass (on_vector )
577
+ if box is not None :
578
+ on_vector = box (on_vector )
578
579
579
580
expected = DataFrame (
580
581
OrderedDict ([
@@ -596,3 +597,26 @@ def test_merge_datetime_index(self, klass):
596
597
597
598
result = df .merge (df , on = [df .index .year ], how = "inner" )
598
599
tm .assert_frame_equal (result , expected )
600
+
601
+ def test_single_common_level (self ):
602
+ index_left = pd .MultiIndex .from_tuples ([('K0' , 'X0' ), ('K0' , 'X1' ),
603
+ ('K1' , 'X2' )],
604
+ names = ['key' , 'X' ])
605
+
606
+ left = pd .DataFrame ({'A' : ['A0' , 'A1' , 'A2' ],
607
+ 'B' : ['B0' , 'B1' , 'B2' ]},
608
+ index = index_left )
609
+
610
+ index_right = pd .MultiIndex .from_tuples ([('K0' , 'Y0' ), ('K1' , 'Y1' ),
611
+ ('K2' , 'Y2' ), ('K2' , 'Y3' )],
612
+ names = ['key' , 'Y' ])
613
+
614
+ right = pd .DataFrame ({'C' : ['C0' , 'C1' , 'C2' , 'C3' ],
615
+ 'D' : ['D0' , 'D1' , 'D2' , 'D3' ]},
616
+ index = index_right )
617
+
618
+ result = left .join (right )
619
+ expected = pd .merge (left .reset_index (), right .reset_index (),
620
+ on = ['key' ], how = 'inner' ).set_index (['key' , 'X' , 'Y' ])
621
+
622
+ tm .assert_frame_equal (result , expected )
0 commit comments