ENH: allow index of col names in set_index GH10797

StephenKappel · StephenKappel · commit e98e53dcb3de · 2016-01-03T19:47:32.000-05:00
diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt
@@ -110,6 +110,7 @@ Other enhancements
 - ``DataFrame`` has gained a ``_repr_latex_`` method in order to allow for automatic conversion to latex in a ipython/jupyter notebook using nbconvert. Options ``display.latex.escape`` and ``display.latex.longtable`` have been added to the configuration and are used automatically by the ``to_latex`` method. (:issue:`11778`)
 - ``sys.getsizeof(obj)`` returns the memory usage of a pandas object, including the
   values it contains (:issue:`11597`)
+- ``set_index`` now accepts indexes of column labels in the keys parameter (:issue:`10797`)
 
 .. _whatsnew_0180.enhancements.rounding:
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2727,11 +2727,16 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
                   verify_integrity=False):
         """
         Set the DataFrame index (row labels) using one or more existing
-        columns. By default yields a new object.
+        columns and/or new arrays of values. By default yields a new object.
 
         Parameters
         ----------
-        keys : column label or list of column labels / arrays
+        keys : column label (str), Index, Series, array, or a list of these things
+            Existing columns to set as the index (when given columns labels)
+            and/or new values to set as the index. If an Index is given, it's
+            values will be used as the index if its length is the same as the
+            length of the DataFrame; otherwise, it's values will be assumed to
+            be column labels.
         drop : boolean, default True
             Delete columns to be used as the new index
         append : boolean, default False
@@ -2748,12 +2753,14 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
         >>> indexed_df = df.set_index(['A', 'B'])
         >>> indexed_df2 = df.set_index(['A', [0, 1, 2, 0, 1, 2]])
         >>> indexed_df3 = df.set_index([[0, 1, 2, 0, 1, 2]])
+        >>> indexed_df4 = df.set_index(df.columns[:2])
 
         Returns
         -------
         dataframe : DataFrame
         """
-        if not isinstance(keys, list):
+        if not isinstance(keys, list) and not (isinstance(keys, Index) and
+                                               len(keys) != len(self.index)):
             keys = [keys]
 
         if inplace:
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -2583,6 +2583,23 @@ def test_set_index_empty_column(self):
         result = df.set_index(['a', 'x'])
         repr(result)
 
+    def test_set_index_with_col_label_index(self):
+        # GH10797: It should be possible to use an index of column labels as the
+        # `keys` parameter in set_index().
+        df = DataFrame({'col1': [1, 2, 3, 4, 5, 6],
+                        'col2': ['a', 'b', 'c', 'a', 'b', 'c'],
+                        'col3': [0.0, 0.0, 1.0, 1.0, 2.0, 2.0]})
+        expected_index = MultiIndex(levels=[['a', 'b', 'c'], [0.0, 1.0, 2.0]],
+                                    labels=[[0, 1, 2, 0, 1, 2],
+                                            [0, 0, 1, 1, 2, 2]],
+                                    names=['col2', 'col3'])
+        expected_df = DataFrame(data={'col1': [1, 2, 3, 4, 5, 6]},
+                                index=expected_index)
+        list_df = df.set_index(['col2', 'col3'])
+        assert_frame_equal(expected_df, list_df)
+        index_df = df.set_index(df.columns[1:])
+        assert_frame_equal(expected_df, index_df)
+
     def test_set_columns(self):
         cols = Index(np.arange(len(self.mixed_frame.columns)))
         self.mixed_frame.columns = cols