Skip to content

Commit 586a878

Browse files
committed
Merge pull request #3683 from jreback/dup_insert
BUG: allow insertion/deletion of columns in non-unique column DataFrames
2 parents 6ec026c + 1c0b105 commit 586a878

File tree

9 files changed

+455
-124
lines changed

9 files changed

+455
-124
lines changed

RELEASE.rst

+6
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,8 @@ pandas 0.11.1
102102
GH3675_, GH3676_).
103103
- Deprecated display.height, display.width is now only a formatting option
104104
does not control triggering of summary, similar to < 0.11.0.
105+
- Add the keyword ``allow_duplicates`` to ``DataFrame.insert`` to allow a duplicate column
106+
to be inserted if ``True``, default is ``False`` (same as prior to 0.11.1) (GH3679_)
105107

106108
**Bug Fixes**
107109

@@ -133,6 +135,8 @@ pandas 0.11.1
133135
- Duplicate indexes with and empty DataFrame.from_records will return a correct frame (GH3562_)
134136
- Concat to produce a non-unique columns when duplicates are across dtypes is fixed (GH3602_)
135137
- Non-unique indexing with a slice via ``loc`` and friends fixed (GH3659_)
138+
- Allow insert/delete to non-unique columns (GH3679_)
139+
- Extend ``reindex`` to correctly deal with non-unique indices (GH3679_)
136140
- Fixed bug in groupby with empty series referencing a variable before assignment. (GH3510_)
137141
- Fixed bug in mixed-frame assignment with aligned series (GH3492_)
138142
- Fixed bug in selecting month/quarter/year from a series would not select the time element
@@ -242,6 +246,8 @@ pandas 0.11.1
242246
.. _GH3606: https://github.com/pydata/pandas/issues/3606
243247
.. _GH3659: https://github.com/pydata/pandas/issues/3659
244248
.. _GH3649: https://github.com/pydata/pandas/issues/3649
249+
.. _GH3679: https://github.com/pydata/pandas/issues/3679
250+
.. _Gh3616: https://github.com/pydata/pandas/issues/3616
245251
.. _GH1818: https://github.com/pydata/pandas/issues/1818
246252
.. _GH3572: https://github.com/pydata/pandas/issues/3572
247253
.. _GH3582: https://github.com/pydata/pandas/issues/3582

doc/source/v0.11.1.txt

+4
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,8 @@ API changes
7171
``DataFrame.fillna()`` and ``DataFrame.replace()`` instead. (GH3582_,
7272
GH3675_, GH3676_)
7373

74+
- Add the keyword ``allow_duplicates`` to ``DataFrame.insert`` to allow a duplicate column
75+
to be inserted if ``True``, default is ``False`` (same as prior to 0.11.1) (GH3679_)
7476

7577
Enhancements
7678
~~~~~~~~~~~~
@@ -209,6 +211,7 @@ Bug Fixes
209211
and handle missing elements like unique indices (GH3561_)
210212
- Duplicate indexes with and empty DataFrame.from_records will return a correct frame (GH3562_)
211213
- Concat to produce a non-unique columns when duplicates are across dtypes is fixed (GH3602_)
214+
- Allow insert/delete to non-unique columns (GH3679_)
212215

213216
For example you can do
214217

@@ -270,3 +273,4 @@ on GitHub for a complete list.
270273
.. _GH3676: https://github.com/pydata/pandas/issues/3676
271274
.. _GH3675: https://github.com/pydata/pandas/issues/3675
272275
.. _GH3682: https://github.com/pydata/pandas/issues/3682
276+
.. _GH3679: https://github.com/pydata/pandas/issues/3679

pandas/core/frame.py

+9-5
Original file line numberDiff line numberDiff line change
@@ -2003,7 +2003,11 @@ def __getitem__(self, key):
20032003
return self._getitem_multilevel(key)
20042004
else:
20052005
# get column
2006-
return self._get_item_cache(key)
2006+
if self.columns.is_unique:
2007+
return self._get_item_cache(key)
2008+
2009+
# duplicate columns
2010+
return self._constructor(self._data.get(key))
20072011

20082012
def _getitem_slice(self, key):
20092013
return self._slice(key, axis=0)
@@ -2162,10 +2166,10 @@ def _set_item(self, key, value):
21622166
value = self._sanitize_column(key, value)
21632167
NDFrame._set_item(self, key, value)
21642168

2165-
def insert(self, loc, column, value):
2169+
def insert(self, loc, column, value, allow_duplicates=False):
21662170
"""
2167-
Insert column into DataFrame at specified location. Raises Exception if
2168-
column is already contained in the DataFrame
2171+
Insert column into DataFrame at specified location.
2172+
if allow_duplicates is False, Raises Exception if column is already contained in the DataFrame
21692173
21702174
Parameters
21712175
----------
@@ -2175,7 +2179,7 @@ def insert(self, loc, column, value):
21752179
value : int, Series, or array-like
21762180
"""
21772181
value = self._sanitize_column(column, value)
2178-
self._data.insert(loc, column, value)
2182+
self._data.insert(loc, column, value, allow_duplicates=allow_duplicates)
21792183

21802184
def _sanitize_column(self, key, value):
21812185
# Need to make sure new columns (which go into the BlockManager as new

pandas/core/index.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -940,8 +940,15 @@ def reindex(self, target, method=None, level=None, limit=None):
940940
if self.equals(target):
941941
indexer = None
942942
else:
943-
indexer = self.get_indexer(target, method=method,
944-
limit=limit)
943+
if self.is_unique:
944+
indexer = self.get_indexer(target, method=method,
945+
limit=limit)
946+
else:
947+
if method is not None or limit is not None:
948+
raise ValueError("cannot reindex a non-unique index "
949+
"with a method or limit")
950+
indexer, missing = self.get_indexer_non_unique(target)
951+
945952
return target, indexer
946953

947954
def join(self, other, how='left', level=None, return_indexers=False):

pandas/core/indexing.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -457,7 +457,7 @@ def _reindex(keys, level=None):
457457
else:
458458
level = None
459459

460-
if labels.is_unique:
460+
if labels.is_unique and Index(keyarr).is_unique:
461461
return _reindex(keyarr, level=level)
462462
else:
463463
indexer, missing = labels.get_indexer_non_unique(keyarr)
@@ -991,7 +991,6 @@ def _slice(self, indexer, axis=0):
991991
def _setitem_with_indexer(self, indexer, value):
992992
self.obj._set_values(indexer, value)
993993

994-
995994
def _check_bool_indexer(ax, key):
996995
# boolean indexing, need to check that the data are aligned, otherwise
997996
# disallowed
@@ -1010,7 +1009,6 @@ def _check_bool_indexer(ax, key):
10101009
result = np.asarray(result, dtype=bool)
10111010
return result
10121011

1013-
10141012
def _is_series(obj):
10151013
from pandas.core.series import Series
10161014
return isinstance(obj, Series)

0 commit comments

Comments
 (0)