Skip to content

BUG: Fix an issue in merging blocks where the resulting DataFrame had partially set _ref_locs #4410

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 31, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,10 @@ pandas 0.13
- Fixed an issue where ``PeriodIndex`` joining with self was returning a new
instance rather than the same instance (:issue:`4379`); also adds a test
for this for the other index types
- Fixed a bug with all the dtypes being converted to object when using the CSV cparser
- Fixed a bug with all the dtypes being converted to object when using the CSV cparser
with the usecols parameter (:issue: `3192`)
- Fix an issue in merging blocks where the resulting DataFrame had partially
set _ref_locs (:issue:`4403`)

pandas 0.12
===========
Expand Down
23 changes: 23 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# -*- coding: utf-8 -*-

from __future__ import print_function
# pylint: disable-msg=W0612,E1101
from copy import deepcopy
Expand Down Expand Up @@ -2956,6 +2958,27 @@ def check(result, expected=None):
expected = np.array([[1,2.5],[3,4.5]])
self.assert_((result == expected).all().all())

# rename, GH 4403
df4 = DataFrame({'TClose': [22.02],
'RT': [0.0454],
'TExg': [0.0422]},
index=MultiIndex.from_tuples([(600809, 20130331)], names=['STK_ID', 'RPT_Date']))

df5 = DataFrame({'STK_ID': [600809] * 3,
'RPT_Date': [20120930,20121231,20130331],
'STK_Name': [u('饡驦'), u('饡驦'), u('饡驦')],
'TClose': [38.05, 41.66, 30.01]},
index=MultiIndex.from_tuples([(600809, 20120930), (600809, 20121231),(600809,20130331)], names=['STK_ID', 'RPT_Date']))

k = pd.merge(df4,df5,how='inner',left_index=True,right_index=True)
result = k.rename(columns={'TClose_x':'TClose', 'TClose_y':'QT_Close'})
str(result)
result.dtypes

expected = DataFrame([[0.0454, 22.02, 0.0422, 20130331, 600809, u('饡驦'), 30.01 ]],
columns=['RT','TClose','TExg','RPT_Date','STK_ID','STK_Name','QT_Close']).set_index(['STK_ID','RPT_Date'],drop=False)
assert_frame_equal(result,expected)

def test_insert_benchmark(self):
# from the vb_suite/frame_methods/frame_insert_columns
N = 10
Expand Down
9 changes: 8 additions & 1 deletion pandas/tools/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -683,6 +683,7 @@ def get_result(self):
blockmaps = self._prepare_blocks()
kinds = _get_merge_block_kinds(blockmaps)

result_is_unique = self.result_axes[0].is_unique
result_blocks = []

# maybe want to enable flexible copying <-- what did I mean?
Expand All @@ -692,6 +693,12 @@ def get_result(self):
if klass in mapping:
klass_blocks.extend((unit, b) for b in mapping[klass])
res_blk = self._get_merged_block(klass_blocks)

# if we have a unique result index, need to clear the _ref_locs
# a non-unique is set as we are creating
if result_is_unique:
res_blk.set_ref_locs(None)

result_blocks.append(res_blk)

return BlockManager(result_blocks, self.result_axes)
Expand Down Expand Up @@ -1070,7 +1077,7 @@ def _concat_blocks(self, blocks):
# map the column location to the block location
# GH3602
if not self.new_axes[0].is_unique:
block._ref_locs = indexer
block.set_ref_locs(indexer)

return block

Expand Down