Skip to content

Commit ecf0847

Browse files
committed
BUG: Bug in concatenation with duplicate columns across dtypes not merging with axis=0 (GH4771)
TST: Bug in iloc with a slice index failing (GH4771)
1 parent c1ab38e commit ecf0847

File tree

5 files changed

+59
-3
lines changed

5 files changed

+59
-3
lines changed

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,8 @@ See :ref:`Internal Refactoring<whatsnew_0130.refactoring>`
331331
- Bug in multi-indexing with a partial string selection as one part of a MultIndex (:issue:`4758`)
332332
- Bug with reindexing on the index with a non-unique index will now raise ``ValueError`` (:issue:`4746`)
333333
- Bug in setting with ``loc/ix`` a single indexer with a multi-index axis and a numpy array, related to (:issue:`3777`)
334+
- Bug in concatenation with duplicate columns across dtypes not merging with axis=0 (:issue:`4771`)
335+
- Bug in ``iloc`` with a slice index failing (:issue:`4771`)
334336

335337
pandas 0.12
336338
===========

pandas/core/internals.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2174,7 +2174,7 @@ def get_slice(self, slobj, axis=0, raise_on_error=False):
21742174
placement=blk._ref_locs)
21752175
new_blocks = [newb]
21762176
else:
2177-
return self.reindex_items(new_items)
2177+
return self.reindex_items(new_items, indexer=np.arange(len(self.items))[slobj])
21782178
else:
21792179
new_blocks = self._slice_blocks(slobj, axis)
21802180

pandas/tests/test_indexing.py

+24-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
MultiIndex, DatetimeIndex, Timestamp)
1717
from pandas.util.testing import (assert_almost_equal, assert_series_equal,
1818
assert_frame_equal, assert_panel_equal)
19-
from pandas import compat
19+
from pandas import compat, concat
2020

2121
import pandas.util.testing as tm
2222
import pandas.lib as lib
@@ -359,6 +359,29 @@ def test_iloc_getitem_slice(self):
359359
self.check_result('slice', 'iloc', slice(1,3), 'ix', { 0 : [2,4], 1: [3,6], 2: [4,8] }, typs = ['ints'])
360360
self.check_result('slice', 'iloc', slice(1,3), 'indexer', slice(1,3), typs = ['labels','mixed','ts','floats','empty'], fails = IndexError)
361361

362+
def test_iloc_getitem_slice_dups(self):
363+
364+
df1 = DataFrame(np.random.randn(10,4),columns=['A','A','B','B'])
365+
df2 = DataFrame(np.random.randint(0,10,size=20).reshape(10,2),columns=['A','C'])
366+
367+
# axis=1
368+
df = concat([df1,df2],axis=1)
369+
assert_frame_equal(df.iloc[:,:4],df1)
370+
assert_frame_equal(df.iloc[:,4:],df2)
371+
372+
df = concat([df2,df1],axis=1)
373+
assert_frame_equal(df.iloc[:,:2],df2)
374+
assert_frame_equal(df.iloc[:,2:],df1)
375+
376+
assert_frame_equal(df.iloc[:,0:3],concat([df2,df1.iloc[:,[0]]],axis=1))
377+
378+
# axis=0
379+
df = concat([df,df],axis=0)
380+
assert_frame_equal(df.iloc[0:10,:2],df2)
381+
assert_frame_equal(df.iloc[0:10,2:],df1)
382+
assert_frame_equal(df.iloc[10:,:2],df2)
383+
assert_frame_equal(df.iloc[10:,2:],df1)
384+
362385
def test_iloc_getitem_out_of_bounds(self):
363386

364387
# out-of-bounds slice

pandas/tools/merge.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -992,6 +992,7 @@ def _prepare_blocks(self):
992992
blockmaps = []
993993
for data in reindexed_data:
994994
data = data.consolidate()
995+
data._set_ref_locs()
995996
blockmaps.append(data.get_block_map(typ='dict'))
996997
return blockmaps, reindexed_data
997998

@@ -1063,7 +1064,10 @@ def _concat_blocks(self, blocks):
10631064
# or maybe would require performance test)
10641065
raise PandasError('dtypes are not consistent throughout '
10651066
'DataFrames')
1066-
return make_block(concat_values, blocks[0].items, self.new_axes[0])
1067+
return make_block(concat_values,
1068+
blocks[0].items,
1069+
self.new_axes[0],
1070+
placement=blocks[0]._ref_locs)
10671071
else:
10681072

10691073
offsets = np.r_[0, np.cumsum([len(x._data.axes[0]) for

pandas/tools/tests/test_merge.py

+27
Original file line numberDiff line numberDiff line change
@@ -1396,6 +1396,33 @@ def test_crossed_dtypes_weird_corner(self):
13961396
[df, df2], keys=['one', 'two'], names=['first', 'second'])
13971397
self.assertEqual(result.index.names, ('first', 'second'))
13981398

1399+
def test_dups_index(self):
1400+
# GH 4771
1401+
1402+
# single dtypes
1403+
df = DataFrame(np.random.randint(0,10,size=40).reshape(10,4),columns=['A','A','C','C'])
1404+
1405+
result = concat([df,df],axis=1)
1406+
assert_frame_equal(result.iloc[:,:4],df)
1407+
assert_frame_equal(result.iloc[:,4:],df)
1408+
1409+
result = concat([df,df],axis=0)
1410+
assert_frame_equal(result.iloc[:10],df)
1411+
assert_frame_equal(result.iloc[10:],df)
1412+
1413+
# multi dtypes
1414+
df = concat([DataFrame(np.random.randn(10,4),columns=['A','A','B','B']),
1415+
DataFrame(np.random.randint(0,10,size=20).reshape(10,2),columns=['A','C'])],
1416+
axis=1)
1417+
1418+
result = concat([df,df],axis=1)
1419+
assert_frame_equal(result.iloc[:,:6],df)
1420+
assert_frame_equal(result.iloc[:,6:],df)
1421+
1422+
result = concat([df,df],axis=0)
1423+
assert_frame_equal(result.iloc[:10],df)
1424+
assert_frame_equal(result.iloc[10:],df)
1425+
13991426
def test_handle_empty_objects(self):
14001427
df = DataFrame(np.random.randn(10, 4), columns=list('abcd'))
14011428

0 commit comments

Comments
 (0)