Skip to content

Commit 75d378b

Browse files
committed
TST: add append/join tests for merging dup columns
BUG: join on dup columns (internally) failing
1 parent ecf0847 commit 75d378b

File tree

2 files changed

+47
-10
lines changed

2 files changed

+47
-10
lines changed

pandas/tools/merge.py

+26-10
Original file line numberDiff line numberDiff line change
@@ -649,6 +649,7 @@ def __init__(self, data_list, join_index, indexers, axis=1, copy=True):
649649
for data, indexer in zip(data_list, indexers):
650650
if not data.is_consolidated():
651651
data = data.consolidate()
652+
data._set_ref_locs()
652653
self.units.append(_JoinUnit(data.blocks, indexer))
653654

654655
self.join_index = join_index
@@ -682,7 +683,6 @@ def get_result(self):
682683
blockmaps = self._prepare_blocks()
683684
kinds = _get_merge_block_kinds(blockmaps)
684685

685-
result_is_unique = self.result_axes[0].is_unique
686686
result_blocks = []
687687

688688
# maybe want to enable flexible copying <-- what did I mean?
@@ -692,23 +692,28 @@ def get_result(self):
692692
if klass in mapping:
693693
klass_blocks.extend((unit, b) for b in mapping[klass])
694694
res_blk = self._get_merged_block(klass_blocks)
695-
696-
# if we have a unique result index, need to clear the _ref_locs
697-
# a non-unique is set as we are creating
698-
if result_is_unique:
699-
res_blk.set_ref_locs(None)
700-
701695
result_blocks.append(res_blk)
702696

703697
return BlockManager(result_blocks, self.result_axes)
704698

705699
def _get_merged_block(self, to_merge):
706700
if len(to_merge) > 1:
701+
702+
# placement set here
707703
return self._merge_blocks(to_merge)
708704
else:
709705
unit, block = to_merge[0]
710-
return unit.reindex_block(block, self.axis,
711-
self.result_items, copy=self.copy)
706+
blk = unit.reindex_block(block, self.axis,
707+
self.result_items, copy=self.copy)
708+
709+
# set placement / invalidate on a unique result
710+
if self.result_items.is_unique and blk._ref_locs is not None:
711+
if not self.copy:
712+
blk = blk.copy()
713+
blk.set_ref_locs(None)
714+
715+
return blk
716+
712717

713718
def _merge_blocks(self, merge_chunks):
714719
"""
@@ -736,7 +741,18 @@ def _merge_blocks(self, merge_chunks):
736741

737742
# does not sort
738743
new_block_items = _concat_indexes([b.items for _, b in merge_chunks])
739-
return make_block(out, new_block_items, self.result_items)
744+
745+
# need to set placement if we have a non-unique result
746+
# calculate by the existing placement plus the offset in the result set
747+
placement = None
748+
if not self.result_items.is_unique:
749+
nchunks = len(merge_chunks)
750+
offsets = np.array([0] + [ len(self.result_items) / nchunks ] * (nchunks-1)).cumsum()
751+
placement = []
752+
for (unit, blk), offset in zip(merge_chunks,offsets):
753+
placement.extend(blk.ref_locs+offset)
754+
755+
return make_block(out, new_block_items, self.result_items, placement=placement)
740756

741757

742758
class _JoinUnit(object):

pandas/tools/tests/test_merge.py

+21
Original file line numberDiff line numberDiff line change
@@ -1423,6 +1423,27 @@ def test_dups_index(self):
14231423
assert_frame_equal(result.iloc[:10],df)
14241424
assert_frame_equal(result.iloc[10:],df)
14251425

1426+
# append
1427+
result = df.iloc[0:8,:].append(df.iloc[8:])
1428+
assert_frame_equal(result, df)
1429+
1430+
result = df.iloc[0:8,:].append(df.iloc[8:9]).append(df.iloc[9:10])
1431+
assert_frame_equal(result, df)
1432+
1433+
expected = concat([df,df],axis=0)
1434+
result = df.append(df)
1435+
assert_frame_equal(result, expected)
1436+
1437+
def test_join_dups(self):
1438+
df = concat([DataFrame(np.random.randn(10,4),columns=['A','A','B','B']),
1439+
DataFrame(np.random.randint(0,10,size=20).reshape(10,2),columns=['A','C'])],
1440+
axis=1)
1441+
1442+
expected = concat([df,df],axis=1)
1443+
result = df.join(df,rsuffix='_2')
1444+
result.columns = expected.columns
1445+
assert_frame_equal(result, expected)
1446+
14261447
def test_handle_empty_objects(self):
14271448
df = DataFrame(np.random.randn(10, 4), columns=list('abcd'))
14281449

0 commit comments

Comments
 (0)