Skip to content

Commit 557c2ad

Browse files
committed
ENH: only treat index like col name list when slice of column index GH10797
1 parent e98e53d commit 557c2ad

File tree

3 files changed

+41
-11
lines changed

3 files changed

+41
-11
lines changed

doc/source/whatsnew/v0.18.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ Other enhancements
110110
- ``DataFrame`` has gained a ``_repr_latex_`` method in order to allow for automatic conversion to latex in a ipython/jupyter notebook using nbconvert. Options ``display.latex.escape`` and ``display.latex.longtable`` have been added to the configuration and are used automatically by the ``to_latex`` method. (:issue:`11778`)
111111
- ``sys.getsizeof(obj)`` returns the memory usage of a pandas object, including the
112112
values it contains (:issue:`11597`)
113-
- ``set_index`` now accepts indexes of column labels in the keys parameter (:issue:`10797`)
113+
- ``set_index`` now interprets views of the columns index passed to the keys parameter as lists of existing columns to use as the index (:issue:`10797`)
114114

115115
.. _whatsnew_0180.enhancements.rounding:
116116

pandas/core/frame.py

+18-7
Original file line numberDiff line numberDiff line change
@@ -2733,10 +2733,10 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
27332733
----------
27342734
keys : column label (str), Index, Series, array, or a list of these things
27352735
Existing columns to set as the index (when given columns labels)
2736-
and/or new values to set as the index. If an Index is given, it's
2737-
values will be used as the index if its length is the same as the
2738-
length of the DataFrame; otherwise, it's values will be assumed to
2739-
be column labels.
2736+
and/or new values to set as new index values. If an Index is given,
2737+
it will be used as a new index unless it is a view of the column
2738+
index, in which case it will be interpreted as a set of existing
2739+
columns to set as the index.
27402740
drop : boolean, default True
27412741
Delete columns to be used as the new index
27422742
append : boolean, default False
@@ -2759,9 +2759,20 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
27592759
-------
27602760
dataframe : DataFrame
27612761
"""
2762-
if not isinstance(keys, list) and not (isinstance(keys, Index) and
2763-
len(keys) != len(self.index)):
2764-
keys = [keys]
2762+
if not isinstance(keys, list):
2763+
if isinstance(keys, Index):
2764+
# if the index is a slice of the column index, treat it like
2765+
# a list of column labels; otherwise, treat it like a new index
2766+
keys_base = keys.base
2767+
while isinstance(keys_base, Index):
2768+
keys_base = keys_base.base
2769+
cols_base = self.columns.base
2770+
while isinstance(cols_base, Index):
2771+
cols_base = cols_base.base
2772+
if keys_base is not cols_base:
2773+
keys = [keys]
2774+
else:
2775+
keys = [keys]
27652776

27662777
if inplace:
27672778
frame = self

pandas/tests/test_frame.py

+22-3
Original file line numberDiff line numberDiff line change
@@ -2583,9 +2583,12 @@ def test_set_index_empty_column(self):
25832583
result = df.set_index(['a', 'x'])
25842584
repr(result)
25852585

2586-
def test_set_index_with_col_label_index(self):
2587-
# GH10797: It should be possible to use an index of column labels as the
2588-
# `keys` parameter in set_index().
2586+
def test_set_index_with_index(self):
2587+
# GH10797: It should be possible to use a slice of the column index as
2588+
# the `keys` parameter in set_index().
2589+
2590+
# Test that setting the first two columns as the index can be done
2591+
# either with a list of column labels or a slice of the column index.
25892592
df = DataFrame({'col1': [1, 2, 3, 4, 5, 6],
25902593
'col2': ['a', 'b', 'c', 'a', 'b', 'c'],
25912594
'col3': [0.0, 0.0, 1.0, 1.0, 2.0, 2.0]})
@@ -2600,6 +2603,22 @@ def test_set_index_with_col_label_index(self):
26002603
index_df = df.set_index(df.columns[1:])
26012604
assert_frame_equal(expected_df, index_df)
26022605

2606+
# Test that passing the entire index results in an empty dataframe (i.e.
2607+
# all columns become part of the index).
2608+
empty_df = df.set_index(df.columns)
2609+
assert_equal(len(empty_df.columns), 0)
2610+
assert_equal(empty_df.index.nlevels, 3)
2611+
2612+
# Test that an index that is created independently of the column index
2613+
# is used as a new index - not as a set of column labels.
2614+
new_index = Index(data=['col1', 'col1', 'col2', 'col2', 'col3', 'col3'])
2615+
expected_df2 = DataFrame({'col1': [1, 2, 3, 4, 5, 6],
2616+
'col2': ['a', 'b', 'c', 'a', 'b', 'c'],
2617+
'col3': [0.0, 0.0, 1.0, 1.0, 2.0, 2.0]},
2618+
index=new_index)
2619+
col_name_index_df = df.set_index(new_index)
2620+
assert_frame_equal(expected_df2, col_name_index_df)
2621+
26032622
def test_set_columns(self):
26042623
cols = Index(np.arange(len(self.mixed_frame.columns)))
26052624
self.mixed_frame.columns = cols

0 commit comments

Comments
 (0)