From 36031da6404d72d39ac060ea25ace4343757e655 Mon Sep 17 00:00:00 2001 From: jreback Date: Tue, 30 Jul 2013 20:14:51 -0400 Subject: [PATCH] BUG: Fix an issue in merging blocks where the resulting DataFrame had partially set _ref_locs (GH4403) --- doc/source/release.rst | 4 +++- pandas/tests/test_frame.py | 23 +++++++++++++++++++++++ pandas/tools/merge.py | 9 ++++++++- 3 files changed, 34 insertions(+), 2 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index ba3d0c359be9e..a2b525a737879 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -94,8 +94,10 @@ pandas 0.13 - Fixed an issue where ``PeriodIndex`` joining with self was returning a new instance rather than the same instance (:issue:`4379`); also adds a test for this for the other index types - - Fixed a bug with all the dtypes being converted to object when using the CSV cparser + - Fixed a bug with all the dtypes being converted to object when using the CSV cparser with the usecols parameter (:issue: `3192`) + - Fix an issue in merging blocks where the resulting DataFrame had partially + set _ref_locs (:issue:`4403`) pandas 0.12 =========== diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 842f114090a50..1b405eae08797 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + from __future__ import print_function # pylint: disable-msg=W0612,E1101 from copy import deepcopy @@ -2956,6 +2958,27 @@ def check(result, expected=None): expected = np.array([[1,2.5],[3,4.5]]) self.assert_((result == expected).all().all()) + # rename, GH 4403 + df4 = DataFrame({'TClose': [22.02], + 'RT': [0.0454], + 'TExg': [0.0422]}, + index=MultiIndex.from_tuples([(600809, 20130331)], names=['STK_ID', 'RPT_Date'])) + + df5 = DataFrame({'STK_ID': [600809] * 3, + 'RPT_Date': [20120930,20121231,20130331], + 'STK_Name': [u('饡驦'), u('饡驦'), u('饡驦')], + 'TClose': [38.05, 41.66, 30.01]}, + index=MultiIndex.from_tuples([(600809, 20120930), (600809, 20121231),(600809,20130331)], names=['STK_ID', 'RPT_Date'])) + + k = pd.merge(df4,df5,how='inner',left_index=True,right_index=True) + result = k.rename(columns={'TClose_x':'TClose', 'TClose_y':'QT_Close'}) + str(result) + result.dtypes + + expected = DataFrame([[0.0454, 22.02, 0.0422, 20130331, 600809, u('饡驦'), 30.01 ]], + columns=['RT','TClose','TExg','RPT_Date','STK_ID','STK_Name','QT_Close']).set_index(['STK_ID','RPT_Date'],drop=False) + assert_frame_equal(result,expected) + def test_insert_benchmark(self): # from the vb_suite/frame_methods/frame_insert_columns N = 10 diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 7133782fa66d3..c1d8a0d876866 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -683,6 +683,7 @@ def get_result(self): blockmaps = self._prepare_blocks() kinds = _get_merge_block_kinds(blockmaps) + result_is_unique = self.result_axes[0].is_unique result_blocks = [] # maybe want to enable flexible copying <-- what did I mean? @@ -692,6 +693,12 @@ def get_result(self): if klass in mapping: klass_blocks.extend((unit, b) for b in mapping[klass]) res_blk = self._get_merged_block(klass_blocks) + + # if we have a unique result index, need to clear the _ref_locs + # a non-unique is set as we are creating + if result_is_unique: + res_blk.set_ref_locs(None) + result_blocks.append(res_blk) return BlockManager(result_blocks, self.result_axes) @@ -1070,7 +1077,7 @@ def _concat_blocks(self, blocks): # map the column location to the block location # GH3602 if not self.new_axes[0].is_unique: - block._ref_locs = indexer + block.set_ref_locs(indexer) return block