Skip to content

Commit a00f0fe

Browse files
committed
BUG: Fix issue with inserting duplicate columns in a dataframe (pandas-dev#14291)
1 parent d98e982 commit a00f0fe

File tree

4 files changed

+28
-7
lines changed

4 files changed

+28
-7
lines changed

doc/source/whatsnew/v0.19.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,4 @@ Bug Fixes
4545

4646
- Bug in ``pd.concat`` where names of the ``keys`` were not propagated to the resulting ``MultiIndex`` (:issue:`14252`)
4747
- Bug in ``MultiIndex.set_levels`` where illegal level values were still set after raising an error (:issue:`13754`)
48+
- Bug in ``DataFrame.insert`` where multiple calls with duplicate columns can fail (:issue:`14291`)

pandas/core/frame.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -2487,7 +2487,7 @@ def _set_item(self, key, value):
24872487

24882488
# check if we are modifying a copy
24892489
# try to set first as we want an invalid
2490-
# value exeption to occur first
2490+
# value exception to occur first
24912491
if len(self):
24922492
self._check_setitem_copy()
24932493

@@ -2506,7 +2506,7 @@ def insert(self, loc, column, value, allow_duplicates=False):
25062506
value : int, Series, or array-like
25072507
"""
25082508
self._ensure_valid_index(value)
2509-
value = self._sanitize_column(column, value)
2509+
value = self._sanitize_column(column, value, broadcast=False)
25102510
self._data.insert(loc, column, value,
25112511
allow_duplicates=allow_duplicates)
25122512

@@ -2590,9 +2590,15 @@ def assign(self, **kwargs):
25902590

25912591
return data
25922592

2593-
def _sanitize_column(self, key, value):
2594-
# Need to make sure new columns (which go into the BlockManager as new
2595-
# blocks) are always copied
2593+
def _sanitize_column(self, key, value, broadcast=True):
2594+
"""
2595+
Ensures new columns (which go into the BlockManager as new blocks) are
2596+
always copied.
2597+
2598+
The "broadcast" parameter indicates whether all columns with the given
2599+
key should be returned. The default behavior is desirable when
2600+
calling this method prior to modifying existing values in a DataFrame.
2601+
"""
25962602

25972603
def reindexer(value):
25982604
# reindex if necessary
@@ -2665,7 +2671,7 @@ def reindexer(value):
26652671
return value
26662672

26672673
# broadcast across multiple columns if necessary
2668-
if key in self.columns and value.ndim == 1:
2674+
if broadcast and key in self.columns and value.ndim == 1:
26692675
if (not self.columns.is_unique or
26702676
isinstance(self.columns, MultiIndex)):
26712677
existing_piece = self[key]

pandas/sparse/frame.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -302,7 +302,12 @@ def fillna(self, value=None, method=None, axis=0, inplace=False,
302302
# ----------------------------------------------------------------------
303303
# Support different internal representation of SparseDataFrame
304304

305-
def _sanitize_column(self, key, value):
305+
def _sanitize_column(self, key, value, broadcast=True):
306+
"""
307+
The "broadcast" parameter was added to match the method signature of
308+
DataFrame._sanitize_column. However, this method does not make use of
309+
broadcasting.
310+
"""
306311
sp_maker = lambda x, index=None: SparseArray(
307312
x, index=index, fill_value=self._default_fill_value,
308313
kind=self._default_kind)

pandas/tests/frame/test_mutate_columns.py

+9
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,15 @@ def test_insert(self):
163163
exp = DataFrame(data={'X': ['x', 'y', 'z']}, index=['A', 'B', 'C'])
164164
assert_frame_equal(df, exp)
165165

166+
# GH 14291
167+
df = DataFrame()
168+
df.insert(0, 'A', ['a', 'b', 'c'], allow_duplicates=True)
169+
df.insert(0, 'A', ['a', 'b', 'c'], allow_duplicates=True)
170+
df.insert(0, 'A', ['a', 'b', 'c'], allow_duplicates=True)
171+
exp = DataFrame([['a', 'a', 'a'], ['b', 'b', 'b'],
172+
['c', 'c', 'c']], columns=['A', 'A', 'A'])
173+
assert_frame_equal(df, exp)
174+
166175
def test_delitem(self):
167176
del self.frame['A']
168177
self.assertNotIn('A', self.frame)

0 commit comments

Comments
 (0)