Skip to content

Commit 28f3af4

Browse files
committed
Merge pull request #6459 from qwhelan/set_index_names
ENH: Preserve .names in df.set_index(df.index)
2 parents 7687eae + 11c78da commit 28f3af4

File tree

4 files changed

+81
-1
lines changed

4 files changed

+81
-1
lines changed

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ API Changes
107107
or numbering columns as needed (:issue:`2385`)
108108
- Slicing and advanced/boolean indexing operations on ``Index`` classes will no
109109
longer change type of the resulting index (:issue:`6440`).
110+
- ``set_index`` no longer converts MultiIndexes to an Index of tuples (:issue:`6459`).
110111

111112
Experimental Features
112113
~~~~~~~~~~~~~~~~~~~~~
@@ -197,6 +198,7 @@ Bug Fixes
197198
- Bug in multi-axis indexing using ``.loc`` on non-unique indices (:issue:`6504`)
198199
- Bug that caused _ref_locs corruption when slice indexing across columns axis of a DataFrame (:issue:`6525`)
199200
- Regression from 0.13 in the treatmenet of numpy ``datetime64`` non-ns dtypes in Series creation (:issue:`6529`)
201+
- ``.names`` attribute of MultiIndexes passed to ``set_index`` are now preserved (:issue:`6459`).
200202

201203
pandas 0.13.1
202204
-------------

doc/source/v0.14.0.txt

+43
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,49 @@ These are out-of-bounds selections
9292
.. ipython:: python
9393

9494
i[[0,1,2]].astype(np.int_)
95+
- ``set_index`` no longer converts MultiIndexes to an Index of tuples. For example,
96+
the old behavior returned an Index in this case (:issue:`6459`):
97+
98+
.. ipython:: python
99+
:suppress:
100+
101+
from itertools import product
102+
tuples = list(product(('a', 'b'), ('c', 'd')))
103+
mi = MultiIndex.from_tuples(tuples)
104+
df_multi = DataFrame(np.random.randn(4, 2), index=mi)
105+
tuple_ind = pd.Index(tuples)
106+
107+
.. ipython:: python
108+
109+
df_multi.index
110+
111+
@suppress
112+
df_multi.index = tuple_ind
113+
114+
# Old behavior, casted MultiIndex to an Index
115+
df_multi.set_index(df_multi.index)
116+
117+
@suppress
118+
df_multi.index = mi
119+
120+
# New behavior
121+
df_multi.set_index(df_multi.index)
122+
123+
This also applies when passing multiple indices to ``set_index``:
124+
125+
.. ipython:: python
126+
127+
@suppress
128+
df_multi.index = tuple_ind
129+
130+
# Old output, 2-level MultiIndex of tuples
131+
df_multi.set_index([df_multi.index, df_multi.index])
132+
133+
@suppress
134+
df_multi.index = mi
135+
136+
# New output, 4-level MultiIndex
137+
df_multi.set_index([df_multi.index, df_multi.index])
95138

96139

97140
MultiIndexing Using Slicers

pandas/core/frame.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -2240,7 +2240,15 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
22402240

22412241
to_remove = []
22422242
for col in keys:
2243-
if isinstance(col, Series):
2243+
if isinstance(col, MultiIndex):
2244+
# append all but the last column so we don't have to modify
2245+
# the end of this loop
2246+
for n in range(col.nlevels - 1):
2247+
arrays.append(col.get_level_values(n))
2248+
2249+
level = col.get_level_values(col.nlevels - 1)
2250+
names.extend(col.names)
2251+
elif isinstance(col, (Series, Index)):
22442252
level = col.values
22452253
names.append(col.name)
22462254
elif isinstance(col, (list, np.ndarray)):

pandas/tests/test_frame.py

+27
Original file line numberDiff line numberDiff line change
@@ -12280,6 +12280,33 @@ def test_dtypes_are_correct_after_column_slice(self):
1228012280
pd.Series(odict([('a', np.float_), ('b', np.float_),
1228112281
('c', np.float_),])))
1228212282

12283+
def test_set_index_names(self):
12284+
df = pd.util.testing.makeDataFrame()
12285+
df.index.name = 'name'
12286+
12287+
self.assertEquals(df.set_index(df.index).index.names, ['name'])
12288+
12289+
mi = MultiIndex.from_arrays(df[['A', 'B']].T.values, names=['A', 'B'])
12290+
mi2 = MultiIndex.from_arrays(df[['A', 'B', 'A', 'B']].T.values,
12291+
names=['A', 'B', 'A', 'B'])
12292+
12293+
df = df.set_index(['A', 'B'])
12294+
12295+
self.assertEquals(df.set_index(df.index).index.names, ['A', 'B'])
12296+
12297+
# Check that set_index isn't converting a MultiIndex into an Index
12298+
self.assertTrue(isinstance(df.set_index(df.index).index, MultiIndex))
12299+
12300+
# Check actual equality
12301+
tm.assert_index_equal(df.set_index(df.index).index, mi)
12302+
12303+
# Check that [MultiIndex, MultiIndex] yields a MultiIndex rather
12304+
# than a pair of tuples
12305+
self.assertTrue(isinstance(df.set_index([df.index, df.index]).index, MultiIndex))
12306+
12307+
# Check equality
12308+
tm.assert_index_equal(df.set_index([df.index, df.index]).index, mi2)
12309+
1228312310

1228412311
def skip_if_no_ne(engine='numexpr'):
1228512312
if engine == 'numexpr':

0 commit comments

Comments
 (0)