Skip to content

Commit c659785

Browse files
topper-123proost
authored andcommitted
Separate MultiIndex names from levels (pandas-dev#27242)
1 parent b3d9ca0 commit c659785

File tree

14 files changed

+110
-71
lines changed

14 files changed

+110
-71
lines changed

doc/source/whatsnew/v1.0.0.rst

+32-1
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,37 @@ source, you should no longer need to install Cython into your build environment
124124
Backwards incompatible API changes
125125
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
126126

127-
- :class:`pandas.core.groupby.GroupBy.transform` now raises on invalid operation names (:issue:`27489`).
127+
.. _whatsnew_1000.api_breaking.MultiIndex._names:
128+
129+
``MultiIndex.levels`` do not hold level names any longer
130+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
131+
132+
- A :class:`MultiIndex` previously stored the level names as attributes of each of its
133+
:attr:`MultiIndex.levels`. From Pandas 1.0, the names are only accessed through
134+
:attr:`MultiIndex.names` (which was also possible previously). This is done in order to
135+
make :attr:`MultiIndex.levels` more similar to :attr:`CategoricalIndex.categories` (:issue:`27242`:).
136+
137+
*pandas 0.25.x*
138+
139+
.. code-block:: ipython
140+
141+
In [1]: mi = pd.MultiIndex.from_product([[1, 2], ['a', 'b']], names=['x', 'y'])
142+
Out[2]: mi
143+
MultiIndex([(1, 'a'),
144+
(1, 'b'),
145+
(2, 'a'),
146+
(2, 'b')],
147+
names=['x', 'y'])
148+
Out[3]: mi.levels[0].name
149+
'x'
150+
151+
*pandas 1.0.0*
152+
153+
.. ipython:: python
154+
155+
mi = pd.MultiIndex.from_product([[1, 2], ['a', 'b']], names=['x', 'y'])
156+
mi.levels[0].name
157+
128158
- :class:`pandas.core.arrays.IntervalArray` adopts a new ``__repr__`` in accordance with other array classes (:issue:`25022`)
129159

130160
*pandas 0.25.x*
@@ -150,6 +180,7 @@ Backwards incompatible API changes
150180
Other API changes
151181
^^^^^^^^^^^^^^^^^
152182

183+
- :class:`pandas.core.groupby.GroupBy.transform` now raises on invalid operation names (:issue:`27489`)
153184
- :meth:`pandas.api.types.infer_dtype` will now return "integer-na" for integer and ``np.nan`` mix (:issue:`27283`)
154185
- :meth:`MultiIndex.from_arrays` will no longer infer names from arrays if ``names=None`` is explicitly provided (:issue:`27292`)
155186
- In order to improve tab-completion, Pandas does not include most deprecated attributes when introspecting a pandas object using ``dir`` (e.g. ``dir(df)``).

pandas/core/frame.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -7792,7 +7792,8 @@ def _count_level(self, level, axis=0, numeric_only=False):
77927792
if isinstance(level, str):
77937793
level = count_axis._get_level_number(level)
77947794

7795-
level_index = count_axis.levels[level]
7795+
level_name = count_axis._names[level]
7796+
level_index = count_axis.levels[level]._shallow_copy(name=level_name)
77967797
level_codes = ensure_int64(count_axis.codes[level])
77977798
counts = lib.count_level_2d(mask, level_codes, len(level_index), axis=0)
77987799

pandas/core/indexes/multi.py

+8-7
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,7 @@ def __new__(
274274
result._set_levels(levels, copy=copy, validate=False)
275275
result._set_codes(codes, copy=copy, validate=False)
276276

277+
result._names = [None] * len(levels)
277278
if names is not None:
278279
# handles name validation
279280
result._set_names(names)
@@ -1216,7 +1217,7 @@ def __len__(self):
12161217
return len(self.codes[0])
12171218

12181219
def _get_names(self):
1219-
return FrozenList(level.name for level in self.levels)
1220+
return FrozenList(self._names)
12201221

12211222
def _set_names(self, names, level=None, validate=True):
12221223
"""
@@ -1262,7 +1263,7 @@ def _set_names(self, names, level=None, validate=True):
12621263
level = [self._get_level_number(l) for l in level]
12631264

12641265
# set the name
1265-
for l, name in zip(level, names):
1266+
for lev, name in zip(level, names):
12661267
if name is not None:
12671268
# GH 20527
12681269
# All items in 'names' need to be hashable:
@@ -1272,7 +1273,7 @@ def _set_names(self, names, level=None, validate=True):
12721273
self.__class__.__name__
12731274
)
12741275
)
1275-
self.levels[l].rename(name, inplace=True)
1276+
self._names[lev] = name
12761277

12771278
names = property(
12781279
fset=_set_names, fget=_get_names, doc="""\nNames of levels in MultiIndex.\n"""
@@ -1585,13 +1586,13 @@ def _get_level_values(self, level, unique=False):
15851586
values : ndarray
15861587
"""
15871588

1588-
values = self.levels[level]
1589+
lev = self.levels[level]
15891590
level_codes = self.codes[level]
1591+
name = self._names[level]
15901592
if unique:
15911593
level_codes = algos.unique(level_codes)
1592-
filled = algos.take_1d(values._values, level_codes, fill_value=values._na_value)
1593-
values = values._shallow_copy(filled)
1594-
return values
1594+
filled = algos.take_1d(lev._values, level_codes, fill_value=lev._na_value)
1595+
return lev._shallow_copy(filled, name=name)
15951596

15961597
def get_level_values(self, level):
15971598
"""

pandas/core/reshape/reshape.py

+9-8
Original file line numberDiff line numberDiff line change
@@ -259,10 +259,10 @@ def get_new_values(self):
259259
def get_new_columns(self):
260260
if self.value_columns is None:
261261
if self.lift == 0:
262-
return self.removed_level
262+
return self.removed_level._shallow_copy(name=self.removed_name)
263263

264-
lev = self.removed_level
265-
return lev.insert(0, lev._na_value)
264+
lev = self.removed_level.insert(0, item=self.removed_level._na_value)
265+
return lev.rename(self.removed_name)
266266

267267
stride = len(self.removed_level) + self.lift
268268
width = len(self.value_columns)
@@ -298,10 +298,10 @@ def get_new_index(self):
298298

299299
# construct the new index
300300
if len(self.new_index_levels) == 1:
301-
lev, lab = self.new_index_levels[0], result_codes[0]
302-
if (lab == -1).any():
303-
lev = lev.insert(len(lev), lev._na_value)
304-
return lev.take(lab)
301+
level, level_codes = self.new_index_levels[0], result_codes[0]
302+
if (level_codes == -1).any():
303+
level = level.insert(len(level), level._na_value)
304+
return level.take(level_codes).rename(self.new_index_names[0])
305305

306306
return MultiIndex(
307307
levels=self.new_index_levels,
@@ -661,7 +661,8 @@ def _convert_level_number(level_num, columns):
661661
new_names = this.columns.names[:-1]
662662
new_columns = MultiIndex.from_tuples(unique_groups, names=new_names)
663663
else:
664-
new_columns = unique_groups = this.columns.levels[0]
664+
new_columns = this.columns.levels[0]._shallow_copy(name=this.columns.names[0])
665+
unique_groups = new_columns
665666

666667
# time to ravel the values
667668
new_data = {}

pandas/io/json/_table_schema.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -243,8 +243,10 @@ def build_table_schema(data, index=True, primary_key=None, version=True):
243243

244244
if index:
245245
if data.index.nlevels > 1:
246-
for level in data.index.levels:
247-
fields.append(convert_pandas_type_to_json_field(level))
246+
for level, name in zip(data.index.levels, data.index.names):
247+
new_field = convert_pandas_type_to_json_field(level)
248+
new_field["name"] = name
249+
fields.append(new_field)
248250
else:
249251
fields.append(convert_pandas_type_to_json_field(data.index))
250252

pandas/tests/frame/test_alter_axes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -978,7 +978,7 @@ def test_reset_index(self, float_frame):
978978
):
979979
values = lev.take(level_codes)
980980
name = names[i]
981-
tm.assert_index_equal(values, Index(deleveled[name]))
981+
tm.assert_index_equal(values, Index(deleveled[name].rename(name=None)))
982982

983983
stacked.index.names = [None, None]
984984
deleveled2 = stacked.reset_index()

pandas/tests/indexes/multi/test_astype.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ def test_astype(idx):
1111
actual = idx.astype("O")
1212
assert_copy(actual.levels, expected.levels)
1313
assert_copy(actual.codes, expected.codes)
14-
assert [level.name for level in actual.levels] == list(expected.names)
14+
assert actual.names == list(expected.names)
1515

1616
with pytest.raises(TypeError, match="^Setting.*dtype.*object"):
1717
idx.astype(np.dtype(int))

pandas/tests/indexes/multi/test_constructor.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ def test_constructor_single_level():
1717
levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"]
1818
)
1919
assert isinstance(result, MultiIndex)
20-
expected = Index(["foo", "bar", "baz", "qux"], name="first")
20+
expected = Index(["foo", "bar", "baz", "qux"])
2121
tm.assert_index_equal(result.levels[0], expected)
2222
assert result.names == ["first"]
2323

@@ -292,8 +292,9 @@ def test_from_arrays_empty():
292292
# 1 level
293293
result = MultiIndex.from_arrays(arrays=[[]], names=["A"])
294294
assert isinstance(result, MultiIndex)
295-
expected = Index([], name="A")
295+
expected = Index([])
296296
tm.assert_index_equal(result.levels[0], expected)
297+
assert result.names == ["A"]
297298

298299
# N levels
299300
for N in [2, 3]:
@@ -439,8 +440,9 @@ def test_from_product_empty_zero_levels():
439440

440441
def test_from_product_empty_one_level():
441442
result = MultiIndex.from_product([[]], names=["A"])
442-
expected = pd.Index([], name="A")
443+
expected = pd.Index([])
443444
tm.assert_index_equal(result.levels[0], expected)
445+
assert result.names == ["A"]
444446

445447

446448
@pytest.mark.parametrize(

pandas/tests/indexes/multi/test_names.py

+12-16
Original file line numberDiff line numberDiff line change
@@ -27,28 +27,25 @@ def test_index_name_retained():
2727

2828

2929
def test_changing_names(idx):
30-
31-
# names should be applied to levels
32-
level_names = [level.name for level in idx.levels]
33-
check_level_names(idx, idx.names)
30+
assert [level.name for level in idx.levels] == [None, None]
3431

3532
view = idx.view()
3633
copy = idx.copy()
3734
shallow_copy = idx._shallow_copy()
3835

39-
# changing names should change level names on object
36+
# changing names should not change level names on object
4037
new_names = [name + "a" for name in idx.names]
4138
idx.names = new_names
42-
check_level_names(idx, new_names)
39+
check_level_names(idx, [None, None])
4340

44-
# but not on copies
45-
check_level_names(view, level_names)
46-
check_level_names(copy, level_names)
47-
check_level_names(shallow_copy, level_names)
41+
# and not on copies
42+
check_level_names(view, [None, None])
43+
check_level_names(copy, [None, None])
44+
check_level_names(shallow_copy, [None, None])
4845

4946
# and copies shouldn't change original
5047
shallow_copy.names = [name + "c" for name in shallow_copy.names]
51-
check_level_names(idx, new_names)
48+
check_level_names(idx, [None, None])
5249

5350

5451
def test_take_preserve_name(idx):
@@ -82,9 +79,9 @@ def test_copy_names():
8279
def test_names(idx, index_names):
8380

8481
# names are assigned in setup
85-
names = index_names
82+
assert index_names == ["first", "second"]
8683
level_names = [level.name for level in idx.levels]
87-
assert names == level_names
84+
assert level_names == [None, None]
8885

8986
# setting bad names on existing
9087
index = idx
@@ -109,11 +106,10 @@ def test_names(idx, index_names):
109106
names=["first", "second", "third"],
110107
)
111108

112-
# names are assigned
109+
# names are assigned on index, but not transferred to the levels
113110
index.names = ["a", "b"]
114-
ind_names = list(index.names)
115111
level_names = [level.name for level in index.levels]
116-
assert ind_names == level_names
112+
assert level_names == [None, None]
117113

118114

119115
def test_duplicate_level_names_access_raises(idx):

pandas/tests/indexes/multi/test_reindex.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,17 @@
66
import pandas.util.testing as tm
77

88

9-
def check_level_names(index, names):
10-
assert [level.name for level in index.levels] == list(names)
11-
12-
139
def test_reindex(idx):
1410
result, indexer = idx.reindex(list(idx[:4]))
1511
assert isinstance(result, MultiIndex)
16-
check_level_names(result, idx[:4].names)
12+
assert result.names == ["first", "second"]
13+
assert [level.name for level in result.levels] == [None, None]
1714

1815
result, indexer = idx.reindex(list(idx))
1916
assert isinstance(result, MultiIndex)
2017
assert indexer is None
21-
check_level_names(result, idx.names)
18+
assert result.names == ["first", "second"]
19+
assert [level.name for level in result.levels] == [None, None]
2220

2321

2422
def test_reindex_level(idx):

pandas/tests/indexes/multi/test_reshape.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,11 @@ def test_insert(idx):
1515
# key not contained in all levels
1616
new_index = idx.insert(0, ("abc", "three"))
1717

18-
exp0 = Index(list(idx.levels[0]) + ["abc"], name="first")
18+
exp0 = Index(list(idx.levels[0]) + ["abc"])
1919
tm.assert_index_equal(new_index.levels[0], exp0)
20+
assert new_index.names == ["first", "second"]
2021

21-
exp1 = Index(list(idx.levels[1]) + ["three"], name="second")
22+
exp1 = Index(list(idx.levels[1]) + ["three"])
2223
tm.assert_index_equal(new_index.levels[1], exp1)
2324
assert new_index[0] == ("abc", "three")
2425

pandas/tests/reshape/test_concat.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -1219,8 +1219,10 @@ def test_concat_keys_specific_levels(self):
12191219
names=["group_key"],
12201220
)
12211221

1222-
tm.assert_index_equal(result.columns.levels[0], Index(level, name="group_key"))
1223-
assert result.columns.names[0] == "group_key"
1222+
tm.assert_index_equal(result.columns.levels[0], Index(level))
1223+
tm.assert_index_equal(result.columns.levels[1], Index([0, 1, 2, 3]))
1224+
1225+
assert result.columns.names == ["group_key", None]
12241226

12251227
def test_concat_dataframe_keys_bug(self, sort):
12261228
t1 = DataFrame(
@@ -1409,10 +1411,8 @@ def test_concat_keys_and_levels(self):
14091411
keys=[("foo", "one"), ("foo", "two"), ("baz", "one"), ("baz", "two")],
14101412
names=["first", "second"],
14111413
)
1412-
assert result.index.names == ("first", "second") + (None,)
1413-
tm.assert_index_equal(
1414-
result.index.levels[0], Index(["baz", "foo"], name="first")
1415-
)
1414+
assert result.index.names == ("first", "second", None)
1415+
tm.assert_index_equal(result.index.levels[0], Index(["baz", "foo"]))
14161416

14171417
def test_concat_keys_levels_no_overlap(self):
14181418
# GH #1406

pandas/tests/reshape/test_reshape.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -618,16 +618,15 @@ def test_reshaping_multi_index_categorical(self):
618618
df.index.names = ["major", "minor"]
619619
df["str"] = "foo"
620620

621-
dti = df.index.levels[0]
622-
623621
df["category"] = df["str"].astype("category")
624622
result = df["category"].unstack()
625623

624+
dti = df.index.levels[0]
626625
c = Categorical(["foo"] * len(dti))
627626
expected = DataFrame(
628627
{"A": c.copy(), "B": c.copy(), "C": c.copy(), "D": c.copy()},
629628
columns=Index(list("ABCD"), name="minor"),
630-
index=dti,
629+
index=dti.rename("major"),
631630
)
632631
tm.assert_frame_equal(result, expected)
633632

0 commit comments

Comments
 (0)