Skip to content

Commit e98e53d

Browse files
committed
ENH: allow index of col names in set_index GH10797
1 parent 80e93bf commit e98e53d

File tree

3 files changed

+28
-3
lines changed

3 files changed

+28
-3
lines changed

doc/source/whatsnew/v0.18.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ Other enhancements
110110
- ``DataFrame`` has gained a ``_repr_latex_`` method in order to allow for automatic conversion to latex in a ipython/jupyter notebook using nbconvert. Options ``display.latex.escape`` and ``display.latex.longtable`` have been added to the configuration and are used automatically by the ``to_latex`` method. (:issue:`11778`)
111111
- ``sys.getsizeof(obj)`` returns the memory usage of a pandas object, including the
112112
values it contains (:issue:`11597`)
113+
- ``set_index`` now accepts indexes of column labels in the keys parameter (:issue:`10797`)
113114

114115
.. _whatsnew_0180.enhancements.rounding:
115116

pandas/core/frame.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -2727,11 +2727,16 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
27272727
verify_integrity=False):
27282728
"""
27292729
Set the DataFrame index (row labels) using one or more existing
2730-
columns. By default yields a new object.
2730+
columns and/or new arrays of values. By default yields a new object.
27312731
27322732
Parameters
27332733
----------
2734-
keys : column label or list of column labels / arrays
2734+
keys : column label (str), Index, Series, array, or a list of these things
2735+
Existing columns to set as the index (when given columns labels)
2736+
and/or new values to set as the index. If an Index is given, it's
2737+
values will be used as the index if its length is the same as the
2738+
length of the DataFrame; otherwise, it's values will be assumed to
2739+
be column labels.
27352740
drop : boolean, default True
27362741
Delete columns to be used as the new index
27372742
append : boolean, default False
@@ -2748,12 +2753,14 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
27482753
>>> indexed_df = df.set_index(['A', 'B'])
27492754
>>> indexed_df2 = df.set_index(['A', [0, 1, 2, 0, 1, 2]])
27502755
>>> indexed_df3 = df.set_index([[0, 1, 2, 0, 1, 2]])
2756+
>>> indexed_df4 = df.set_index(df.columns[:2])
27512757
27522758
Returns
27532759
-------
27542760
dataframe : DataFrame
27552761
"""
2756-
if not isinstance(keys, list):
2762+
if not isinstance(keys, list) and not (isinstance(keys, Index) and
2763+
len(keys) != len(self.index)):
27572764
keys = [keys]
27582765

27592766
if inplace:

pandas/tests/test_frame.py

+17
Original file line numberDiff line numberDiff line change
@@ -2583,6 +2583,23 @@ def test_set_index_empty_column(self):
25832583
result = df.set_index(['a', 'x'])
25842584
repr(result)
25852585

2586+
def test_set_index_with_col_label_index(self):
2587+
# GH10797: It should be possible to use an index of column labels as the
2588+
# `keys` parameter in set_index().
2589+
df = DataFrame({'col1': [1, 2, 3, 4, 5, 6],
2590+
'col2': ['a', 'b', 'c', 'a', 'b', 'c'],
2591+
'col3': [0.0, 0.0, 1.0, 1.0, 2.0, 2.0]})
2592+
expected_index = MultiIndex(levels=[['a', 'b', 'c'], [0.0, 1.0, 2.0]],
2593+
labels=[[0, 1, 2, 0, 1, 2],
2594+
[0, 0, 1, 1, 2, 2]],
2595+
names=['col2', 'col3'])
2596+
expected_df = DataFrame(data={'col1': [1, 2, 3, 4, 5, 6]},
2597+
index=expected_index)
2598+
list_df = df.set_index(['col2', 'col3'])
2599+
assert_frame_equal(expected_df, list_df)
2600+
index_df = df.set_index(df.columns[1:])
2601+
assert_frame_equal(expected_df, index_df)
2602+
25862603
def test_set_columns(self):
25872604
cols = Index(np.arange(len(self.mixed_frame.columns)))
25882605
self.mixed_frame.columns = cols

0 commit comments

Comments
 (0)