pandas-dev · jreback · May 30, 2013 · May 21, 2013 · May 23, 2013 · May 24, 2013
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -102,6 +102,8 @@ pandas 0.11.1
     GH3675_, GH3676_).
   - Deprecated display.height, display.width is now only a formatting option
     does not control triggering of summary, similar to < 0.11.0.
+  - Add the keyword ``allow_duplicates`` to ``DataFrame.insert`` to allow a duplicate column
+    to be inserted if ``True``, default is ``False`` (same as prior to 0.11.1) (GH3679_)
 
 **Bug Fixes**
 
@@ -133,6 +135,8 @@ pandas 0.11.1
     - Duplicate indexes with and empty DataFrame.from_records will return a correct frame (GH3562_)
     - Concat to produce a non-unique columns when duplicates are across dtypes is fixed (GH3602_)
     - Non-unique indexing with a slice via ``loc`` and friends fixed (GH3659_)
+    - Allow insert/delete to non-unique columns (GH3679_)
+    - Extend ``reindex`` to correctly deal with non-unique indices (GH3679_)
   - Fixed bug in groupby with empty series referencing a variable before assignment. (GH3510_)
   - Fixed bug in mixed-frame assignment with aligned series (GH3492_)
   - Fixed bug in selecting month/quarter/year from a series would not select the time element
@@ -242,6 +246,8 @@ pandas 0.11.1
 .. _GH3606: https://github.com/pydata/pandas/issues/3606
 .. _GH3659: https://github.com/pydata/pandas/issues/3659
 .. _GH3649: https://github.com/pydata/pandas/issues/3649
+.. _GH3679: https://github.com/pydata/pandas/issues/3679
+.. _Gh3616: https://github.com/pydata/pandas/issues/3616
 .. _GH1818: https://github.com/pydata/pandas/issues/1818
 .. _GH3572: https://github.com/pydata/pandas/issues/3572
 .. _GH3582: https://github.com/pydata/pandas/issues/3582

diff --git a/doc/source/v0.11.1.txt b/doc/source/v0.11.1.txt
@@ -71,6 +71,8 @@ API changes
     ``DataFrame.fillna()`` and ``DataFrame.replace()`` instead. (GH3582_,
     GH3675_, GH3676_)
 
+  - Add the keyword ``allow_duplicates`` to ``DataFrame.insert`` to allow a duplicate column
+    to be inserted if ``True``, default is ``False`` (same as prior to 0.11.1) (GH3679_)
 
 Enhancements
 ~~~~~~~~~~~~
@@ -209,6 +211,7 @@ Bug Fixes
       and handle missing elements like unique indices (GH3561_)
     - Duplicate indexes with and empty DataFrame.from_records will return a correct frame (GH3562_)
     - Concat to produce a non-unique columns when duplicates are across dtypes is fixed (GH3602_)
+    - Allow insert/delete to non-unique columns (GH3679_)
 
     For example you can do
 
@@ -270,3 +273,4 @@ on GitHub for a complete list.
 .. _GH3676: https://github.com/pydata/pandas/issues/3676
 .. _GH3675: https://github.com/pydata/pandas/issues/3675
 .. _GH3682: https://github.com/pydata/pandas/issues/3682
+.. _GH3679: https://github.com/pydata/pandas/issues/3679
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2003,7 +2003,11 @@ def __getitem__(self, key):
             return self._getitem_multilevel(key)
         else:
             # get column
-            return self._get_item_cache(key)
+            if self.columns.is_unique:
+                return self._get_item_cache(key)
+
+            # duplicate columns
+            return self._constructor(self._data.get(key))
 
     def _getitem_slice(self, key):
         return self._slice(key, axis=0)
@@ -2162,10 +2166,10 @@ def _set_item(self, key, value):
         value = self._sanitize_column(key, value)
         NDFrame._set_item(self, key, value)
 
-    def insert(self, loc, column, value):
+    def insert(self, loc, column, value, allow_duplicates=False):
         """
-        Insert column into DataFrame at specified location. Raises Exception if
-        column is already contained in the DataFrame
+        Insert column into DataFrame at specified location.
+        if allow_duplicates is False, Raises Exception if column is already contained in the DataFrame
 
         Parameters
         ----------
@@ -2175,7 +2179,7 @@ def insert(self, loc, column, value):
         value : int, Series, or array-like
         """
         value = self._sanitize_column(column, value)
-        self._data.insert(loc, column, value)
+        self._data.insert(loc, column, value, allow_duplicates=allow_duplicates)
 
     def _sanitize_column(self, key, value):
         # Need to make sure new columns (which go into the BlockManager as new

diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -940,8 +940,15 @@ def reindex(self, target, method=None, level=None, limit=None):
             if self.equals(target):
                 indexer = None
             else:
-                indexer = self.get_indexer(target, method=method,
-                                           limit=limit)
+                if self.is_unique:
+                    indexer = self.get_indexer(target, method=method,
+                                               limit=limit)
+                else:
+                    if method is not None or limit is not None:
+                        raise ValueError("cannot reindex a non-unique index "
+                                         "with a method or limit")
+                    indexer, missing = self.get_indexer_non_unique(target)
+
         return target, indexer
 
     def join(self, other, how='left', level=None, return_indexers=False):

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -457,7 +457,7 @@ def _reindex(keys, level=None):
             else:
                 level = None
 
-            if labels.is_unique:
+            if labels.is_unique and Index(keyarr).is_unique:
                 return _reindex(keyarr, level=level)
             else:
                 indexer, missing = labels.get_indexer_non_unique(keyarr)
@@ -991,7 +991,6 @@ def _slice(self, indexer, axis=0):
     def _setitem_with_indexer(self, indexer, value):
         self.obj._set_values(indexer, value)
 
-
 def _check_bool_indexer(ax, key):
     # boolean indexing, need to check that the data are aligned, otherwise
     # disallowed
@@ -1010,7 +1009,6 @@ def _check_bool_indexer(ax, key):
     result = np.asarray(result, dtype=bool)
     return result
 
-
 def _is_series(obj):
     from pandas.core.series import Series
     return isinstance(obj, Series)