@@ -35,29 +35,29 @@ class TestMerge(unittest.TestCase):
35
35
36
36
def setUp (self ):
37
37
# aggregate multiple columns
38
- self .df = DataFrame ({'key1' : get_test_data (),
39
- 'key2' : get_test_data (),
40
- 'data1' : np .random .randn (N ),
41
- 'data2' : np .random .randn (N )})
38
+ self .df = DataFrame ({'key1' : get_test_data (),
39
+ 'key2' : get_test_data (),
40
+ 'data1' : np .random .randn (N ),
41
+ 'data2' : np .random .randn (N )})
42
42
43
43
# exclude a couple keys for fun
44
44
self .df = self .df [self .df ['key2' ] > 1 ]
45
45
46
46
self .df2 = DataFrame ({'key1' : get_test_data (n = N // 5 ),
47
47
'key2' : get_test_data (ngroups = NGROUPS // 2 ,
48
48
n = N // 5 ),
49
- 'value' : np .random .randn (N // 5 )})
49
+ 'value' : np .random .randn (N // 5 )})
50
50
51
51
index , data = tm .getMixedTypeDict ()
52
52
self .target = DataFrame (data , index = index )
53
53
54
54
# Join on string value
55
- self .source = DataFrame ({'MergedA' : data ['A' ], 'MergedD' : data ['D' ]},
55
+ self .source = DataFrame ({'MergedA' : data ['A' ], 'MergedD' : data ['D' ]},
56
56
index = data ['C' ])
57
57
58
- self .left = DataFrame ({'key' : ['a' , 'b' , 'c' , 'd' , 'e' , 'e' , 'a' ],
59
- 'v1' : np .random .randn (7 )})
60
- self .right = DataFrame ({'v2' : np .random .randn (4 )},
58
+ self .left = DataFrame ({'key' : ['a' , 'b' , 'c' , 'd' , 'e' , 'e' , 'a' ],
59
+ 'v1' : np .random .randn (7 )})
60
+ self .right = DataFrame ({'v2' : np .random .randn (4 )},
61
61
index = ['d' , 'b' , 'c' , 'a' ])
62
62
63
63
def test_cython_left_outer_join (self ):
@@ -134,9 +134,6 @@ def test_cython_inner_join(self):
134
134
self .assert_ (np .array_equal (ls , exp_ls ))
135
135
self .assert_ (np .array_equal (rs , exp_rs ))
136
136
137
- def test_cython_full_outer_join (self ):
138
- pass
139
-
140
137
def test_left_outer_join (self ):
141
138
joined_key2 = merge (self .df , self .df2 , on = 'key2' )
142
139
_check_join (self .df , self .df2 , joined_key2 , ['key2' ], how = 'left' )
@@ -197,11 +194,11 @@ def test_join_on(self):
197
194
self .assert_ (np .array_equal (merged ['MergedD' ], target ['D' ]))
198
195
199
196
# join with duplicates (fix regression from DataFrame/Matrix merge)
200
- df = DataFrame ({'key' : ['a' , 'a' , 'b' , 'b' , 'c' ]})
201
- df2 = DataFrame ({'value' : [0 , 1 , 2 ]}, index = ['a' , 'b' , 'c' ])
197
+ df = DataFrame ({'key' : ['a' , 'a' , 'b' , 'b' , 'c' ]})
198
+ df2 = DataFrame ({'value' : [0 , 1 , 2 ]}, index = ['a' , 'b' , 'c' ])
202
199
joined = df .join (df2 , on = 'key' )
203
- expected = DataFrame ({'key' : ['a' , 'a' , 'b' , 'b' , 'c' ],
204
- 'value' : [0 , 0 , 1 , 1 , 2 ]})
200
+ expected = DataFrame ({'key' : ['a' , 'a' , 'b' , 'b' , 'c' ],
201
+ 'value' : [0 , 0 , 1 , 1 , 2 ]})
205
202
assert_frame_equal (joined , expected )
206
203
207
204
# Test when some are missing
@@ -245,8 +242,8 @@ def test_join_with_len0(self):
245
242
self .assertEqual (len (merged2 ), 0 )
246
243
247
244
def test_join_on_inner (self ):
248
- df = DataFrame ({'key' : ['a' , 'a' , 'd' , 'b' , 'b' , 'c' ]})
249
- df2 = DataFrame ({'value' : [0 , 1 ]}, index = ['a' , 'b' ])
245
+ df = DataFrame ({'key' : ['a' , 'a' , 'd' , 'b' , 'b' , 'c' ]})
246
+ df2 = DataFrame ({'value' : [0 , 1 ]}, index = ['a' , 'b' ])
250
247
251
248
joined = df .join (df2 , on = 'key' , how = 'inner' )
252
249
@@ -257,8 +254,8 @@ def test_join_on_inner(self):
257
254
self .assert_ (joined .index .equals (expected .index ))
258
255
259
256
def test_join_on_singlekey_list (self ):
260
- df = DataFrame ({'key' : ['a' , 'a' , 'b' , 'b' , 'c' ]})
261
- df2 = DataFrame ({'value' : [0 , 1 , 2 ]}, index = ['a' , 'b' , 'c' ])
257
+ df = DataFrame ({'key' : ['a' , 'a' , 'b' , 'b' , 'c' ]})
258
+ df2 = DataFrame ({'value' : [0 , 1 , 2 ]}, index = ['a' , 'b' , 'c' ])
262
259
263
260
# corner cases
264
261
joined = df .join (df2 , on = ['key' ])
@@ -277,18 +274,18 @@ def test_join_on_series_buglet(self):
277
274
ds = Series ([2 ], index = [1 ], name = 'b' )
278
275
result = df .join (ds , on = 'a' )
279
276
expected = DataFrame ({'a' : [1 , 1 ],
280
- 'b' : [2 , 2 ]}, index = df .index )
277
+ 'b' : [2 , 2 ]}, index = df .index )
281
278
tm .assert_frame_equal (result , expected )
282
279
283
280
def test_join_index_mixed (self ):
284
281
285
- df1 = DataFrame ({'A' : 1. , 'B' : 2 , 'C' : 'foo' , 'D' : True },
282
+ df1 = DataFrame ({'A' : 1. , 'B' : 2 , 'C' : 'foo' , 'D' : True },
286
283
index = np .arange (10 ),
287
284
columns = ['A' , 'B' , 'C' , 'D' ])
288
285
self .assert_ (df1 ['B' ].dtype == np .int64 )
289
286
self .assert_ (df1 ['D' ].dtype == np .bool_ )
290
287
291
- df2 = DataFrame ({'A' : 1. , 'B' : 2 , 'C' : 'foo' , 'D' : True },
288
+ df2 = DataFrame ({'A' : 1. , 'B' : 2 , 'C' : 'foo' , 'D' : True },
292
289
index = np .arange (0 , 10 , 2 ),
293
290
columns = ['A' , 'B' , 'C' , 'D' ])
294
291
@@ -375,8 +372,8 @@ def test_join_inner_multiindex(self):
375
372
'three' , 'one' ]
376
373
377
374
data = np .random .randn (len (key1 ))
378
- data = DataFrame ({'key1' : key1 , 'key2' : key2 ,
379
- 'data' : data })
375
+ data = DataFrame ({'key1' : key1 , 'key2' : key2 ,
376
+ 'data' : data })
380
377
381
378
index = MultiIndex (levels = [['foo' , 'bar' , 'baz' , 'qux' ],
382
379
['one' , 'two' , 'three' ]],
@@ -416,9 +413,9 @@ def test_join_float64_float32(self):
416
413
assert_frame_equal (joined , expected )
417
414
418
415
def test_merge_index_singlekey_right_vs_left (self ):
419
- left = DataFrame ({'key' : ['a' , 'b' , 'c' , 'd' , 'e' , 'e' , 'a' ],
420
- 'v1' : np .random .randn (7 )})
421
- right = DataFrame ({'v2' : np .random .randn (4 )},
416
+ left = DataFrame ({'key' : ['a' , 'b' , 'c' , 'd' , 'e' , 'e' , 'a' ],
417
+ 'v1' : np .random .randn (7 )})
418
+ right = DataFrame ({'v2' : np .random .randn (4 )},
422
419
index = ['d' , 'b' , 'c' , 'a' ])
423
420
424
421
merged1 = merge (left , right , left_on = 'key' ,
@@ -434,9 +431,9 @@ def test_merge_index_singlekey_right_vs_left(self):
434
431
assert_frame_equal (merged1 , merged2 .ix [:, merged1 .columns ])
435
432
436
433
def test_merge_index_singlekey_inner (self ):
437
- left = DataFrame ({'key' : ['a' , 'b' , 'c' , 'd' , 'e' , 'e' , 'a' ],
438
- 'v1' : np .random .randn (7 )})
439
- right = DataFrame ({'v2' : np .random .randn (4 )},
434
+ left = DataFrame ({'key' : ['a' , 'b' , 'c' , 'd' , 'e' , 'e' , 'a' ],
435
+ 'v1' : np .random .randn (7 )})
436
+ right = DataFrame ({'v2' : np .random .randn (4 )},
440
437
index = ['d' , 'b' , 'c' , 'a' ])
441
438
442
439
# inner join
@@ -459,6 +456,9 @@ def test_merge_misspecified(self):
459
456
self .assertRaises (Exception , merge , self .left , self .left ,
460
457
left_on = 'key' , on = 'key' )
461
458
459
+ self .assertRaises (Exception , merge , self .df , self .df2 ,
460
+ left_on = ['key1' ], right_on = ['key1' , 'key2' ])
461
+
462
462
def test_merge_overlap (self ):
463
463
merged = merge (self .left , self .left , on = 'key' )
464
464
exp_len = (self .left ['key' ].value_counts () ** 2 ).sum ()
@@ -467,9 +467,9 @@ def test_merge_overlap(self):
467
467
self .assert_ ('v1.y' in merged )
468
468
469
469
def test_merge_different_column_key_names (self ):
470
- left = DataFrame ({'lkey' : ['foo' , 'bar' , 'baz' , 'foo' ],
471
- 'value' : [1 , 2 , 3 , 4 ]})
472
- right = DataFrame ({'rkey' : ['foo' , 'bar' , 'qux' , 'foo' ],
470
+ left = DataFrame ({'lkey' : ['foo' , 'bar' , 'baz' , 'foo' ],
471
+ 'value' : [1 , 2 , 3 , 4 ]})
472
+ right = DataFrame ({'rkey' : ['foo' , 'bar' , 'qux' , 'foo' ],
473
473
'value' : [5 , 6 , 7 , 8 ]})
474
474
475
475
merged = left .merge (right , left_on = 'lkey' , right_on = 'rkey' ,
@@ -552,6 +552,13 @@ def test_handle_join_key_pass_array(self):
552
552
self .assert_ (np .array_equal (merged ['key_0' ],
553
553
np .array ([1 , 1 , 1 , 1 , 2 , 2 , 3 , 4 , 5 ])))
554
554
555
+ left = DataFrame ({'value' : range (3 )})
556
+ right = DataFrame ({'rvalue' : range (6 )})
557
+
558
+ key = np .array ([0 , 1 , 1 , 2 , 2 , 3 ])
559
+ merged = merge (left , right , left_index = True , right_on = key , how = 'outer' )
560
+ self .assert_ (np .array_equal (merged ['key_0' ], key ))
561
+
555
562
class TestMergeMulti (unittest .TestCase ):
556
563
557
564
def setUp (self ):
0 commit comments