ENH: only treat index like col name list when slice of column index GH10797

StephenKappel · StephenKappel · commit 557c2ad16479 · 2016-01-03T20:01:33.000-05:00
diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt
@@ -110,7 +110,7 @@ Other enhancements
 - ``DataFrame`` has gained a ``_repr_latex_`` method in order to allow for automatic conversion to latex in a ipython/jupyter notebook using nbconvert. Options ``display.latex.escape`` and ``display.latex.longtable`` have been added to the configuration and are used automatically by the ``to_latex`` method. (:issue:`11778`)
 - ``sys.getsizeof(obj)`` returns the memory usage of a pandas object, including the
   values it contains (:issue:`11597`)
-- ``set_index`` now accepts indexes of column labels in the keys parameter (:issue:`10797`)
+- ``set_index`` now interprets views of the columns index passed to the keys parameter as lists of existing columns to use as the index (:issue:`10797`)
 
 .. _whatsnew_0180.enhancements.rounding:
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2733,10 +2733,10 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
         ----------
         keys : column label (str), Index, Series, array, or a list of these things
             Existing columns to set as the index (when given columns labels)
-            and/or new values to set as the index. If an Index is given, it's
-            values will be used as the index if its length is the same as the
-            length of the DataFrame; otherwise, it's values will be assumed to
-            be column labels.
+            and/or new values to set as new index values. If an Index is given,
+            it will be used as a new index unless it is a view of the column
+            index, in which case it will be interpreted as a set of existing
+            columns to set as the index.
         drop : boolean, default True
             Delete columns to be used as the new index
         append : boolean, default False
@@ -2759,9 +2759,20 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
         -------
         dataframe : DataFrame
         """
-        if not isinstance(keys, list) and not (isinstance(keys, Index) and
-                                               len(keys) != len(self.index)):
-            keys = [keys]
+        if not isinstance(keys, list):
+            if isinstance(keys, Index):
+                # if the index is a slice of the column index, treat it like
+                # a list of column labels; otherwise, treat it like a new index
+                keys_base = keys.base
+                while isinstance(keys_base, Index):
+                    keys_base = keys_base.base
+                cols_base = self.columns.base
+                while isinstance(cols_base, Index):
+                    cols_base = cols_base.base
+                if keys_base is not cols_base:
+                    keys = [keys]
+            else:
+                keys = [keys]
 
         if inplace:
             frame = self
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -2583,9 +2583,12 @@ def test_set_index_empty_column(self):
         result = df.set_index(['a', 'x'])
         repr(result)
 
-    def test_set_index_with_col_label_index(self):
-        # GH10797: It should be possible to use an index of column labels as the
-        # `keys` parameter in set_index().
+    def test_set_index_with_index(self):
+        # GH10797: It should be possible to use a slice of the column index as
+        # the `keys` parameter in set_index().
+
+        # Test that setting the first two columns as the index can be done
+        # either with a list of column labels or a slice of the column index.
         df = DataFrame({'col1': [1, 2, 3, 4, 5, 6],
                         'col2': ['a', 'b', 'c', 'a', 'b', 'c'],
                         'col3': [0.0, 0.0, 1.0, 1.0, 2.0, 2.0]})
@@ -2600,6 +2603,22 @@ def test_set_index_with_col_label_index(self):
         index_df = df.set_index(df.columns[1:])
         assert_frame_equal(expected_df, index_df)
 
+        # Test that passing the entire index results in an empty dataframe (i.e.
+        # all columns become part of the index).
+        empty_df = df.set_index(df.columns)
+        assert_equal(len(empty_df.columns), 0)
+        assert_equal(empty_df.index.nlevels, 3)
+
+        # Test that an index that is created independently of the column index
+        # is used as a new index - not as a set of column labels.
+        new_index = Index(data=['col1', 'col1', 'col2', 'col2', 'col3', 'col3'])
+        expected_df2 = DataFrame({'col1': [1, 2, 3, 4, 5, 6],
+                        'col2': ['a', 'b', 'c', 'a', 'b', 'c'],
+                        'col3': [0.0, 0.0, 1.0, 1.0, 2.0, 2.0]},
+                                index=new_index)
+        col_name_index_df = df.set_index(new_index)
+        assert_frame_equal(expected_df2, col_name_index_df)
+
     def test_set_columns(self):
         cols = Index(np.arange(len(self.mixed_frame.columns)))
         self.mixed_frame.columns = cols