Skip to content

Commit 863aa1d

Browse files
changhiskhanwesm
authored andcommitted
BUG: mixed float64 float32 merge failure #1849
1 parent 1e1c922 commit 863aa1d

File tree

2 files changed

+55
-22
lines changed

2 files changed

+55
-22
lines changed

pandas/tools/merge.py

Lines changed: 42 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -628,8 +628,10 @@ def _prepare_blocks(self):
628628

629629
for unit in self.units:
630630
join_blocks = unit.get_upcasted_blocks()
631-
type_map = dict((type(blk), blk) for blk in join_blocks)
632-
blockmaps.append(type_map)
631+
type_map = {}
632+
for blk in join_blocks:
633+
type_map.setdefault(type(blk), []).append(blk)
634+
blockmaps.append((unit, type_map))
633635

634636
return blockmaps
635637

@@ -640,26 +642,22 @@ def get_result(self):
640642
merged : BlockManager
641643
"""
642644
blockmaps = self._prepare_blocks()
643-
kinds = _get_all_block_kinds(blockmaps)
645+
kinds = _get_merge_block_kinds(blockmaps)
644646

645647
result_blocks = []
646648

647649
# maybe want to enable flexible copying <-- what did I mean?
648650
for klass in kinds:
649-
klass_blocks = [mapping.get(klass) for mapping in blockmaps]
651+
klass_blocks = []
652+
for unit, mapping in blockmaps:
653+
if klass in mapping:
654+
klass_blocks.extend((unit, b) for b in mapping[klass])
650655
res_blk = self._get_merged_block(klass_blocks)
651656
result_blocks.append(res_blk)
652657

653658
return BlockManager(result_blocks, self.result_axes)
654659

655-
def _get_merged_block(self, blocks):
656-
657-
to_merge = []
658-
659-
for unit, block in zip(self.units, blocks):
660-
if block is not None:
661-
to_merge.append((unit, block))
662-
660+
def _get_merged_block(self, to_merge):
663661
if len(to_merge) > 1:
664662
return self._merge_blocks(to_merge)
665663
else:
@@ -682,7 +680,8 @@ def _merge_blocks(self, merge_chunks):
682680
out_shape[self.axis] = n
683681

684682
# Should use Fortran order??
685-
out = np.empty(out_shape, dtype=fblock.values.dtype)
683+
block_dtype = _get_block_dtype([x[1] for x in merge_chunks])
684+
out = np.empty(out_shape, dtype=block_dtype)
686685

687686
sofar = 0
688687
for unit, blk in merge_chunks:
@@ -787,6 +786,25 @@ def _get_all_block_kinds(blockmaps):
787786
kinds |= set(mapping)
788787
return kinds
789788

789+
def _get_merge_block_kinds(blockmaps):
790+
kinds = set()
791+
for _, mapping in blockmaps:
792+
kinds |= set(mapping)
793+
return kinds
794+
795+
def _get_block_dtype(blocks):
796+
if len(blocks) == 0:
797+
return object
798+
blk1 = blocks[0]
799+
dtype = blk1.dtype
800+
801+
if issubclass(dtype.type, np.floating):
802+
for blk in blocks:
803+
if blk.dtype.type == np.float64:
804+
return blk.dtype
805+
806+
return dtype
807+
790808
#----------------------------------------------------------------------
791809
# Concatenate DataFrame objects
792810

@@ -928,16 +946,20 @@ def get_result(self):
928946
def _get_fresh_axis(self):
929947
return Index(np.arange(len(self._get_concat_axis())))
930948

949+
def _prepare_blocks(self):
950+
reindexed_data = self._get_reindexed_data()
951+
952+
blockmaps = []
953+
for data in reindexed_data:
954+
data = data.consolidate()
955+
type_map = dict((type(blk), blk) for blk in data.blocks)
956+
blockmaps.append(type_map)
957+
return blockmaps
958+
931959
def _get_concatenated_data(self):
932960
try:
933961
# need to conform to same other (joined) axes for block join
934-
reindexed_data = self._get_reindexed_data()
935-
936-
blockmaps = []
937-
for data in reindexed_data:
938-
data = data.consolidate()
939-
type_map = dict((type(blk), blk) for blk in data.blocks)
940-
blockmaps.append(type_map)
962+
blockmaps = self._prepare_blocks()
941963
kinds = _get_all_block_kinds(blockmaps)
942964

943965
new_blocks = []

pandas/tools/tests/test_merge.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -414,6 +414,19 @@ def test_join_float64_float32(self):
414414
expected = a.join(b.astype('f8'))
415415
assert_frame_equal(joined, expected)
416416

417+
joined = b.join(a)
418+
assert_frame_equal(expected, joined.reindex(columns=['a', 'b', 'c']))
419+
420+
a = np.random.randint(0, 5, 100)
421+
b = np.random.random(100).astype('Float64')
422+
c = np.random.random(100).astype('Float32')
423+
df = DataFrame({'a': a, 'b' : b, 'c' : c})
424+
xpdf = DataFrame({'a': a, 'b' : b, 'c' : c.astype('Float64')})
425+
s = DataFrame(np.random.random(5).astype('f'), columns=['md'])
426+
rs = df.merge(s, left_on='a', right_index=True)
427+
xp = xpdf.merge(s.astype('f8'), left_on='a', right_index=True)
428+
assert_frame_equal(rs, xp)
429+
417430
def test_join_many_non_unique_index(self):
418431
df1 = DataFrame({"a": [1,1], "b": [1,1], "c": [10,20]})
419432
df2 = DataFrame({"a": [1,1], "b": [1,2], "d": [100,200]})
@@ -1466,5 +1479,3 @@ def test_multigroup(self):
14661479
import nose
14671480
nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],
14681481
exit=False)
1469-
1470-

0 commit comments

Comments
 (0)