Skip to content

Commit 06fb021

Browse files
topper-123Pingviinituutti
authored andcommitted
API: rename MultiIndex.labels to MultiIndex.codes (pandas-dev#23752)
1 parent e1f9ac4 commit 06fb021

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

87 files changed

+919
-822
lines changed

asv_bench/benchmarks/groupby.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -473,8 +473,8 @@ def setup(self):
473473
n1 = 400
474474
n2 = 250
475475
index = MultiIndex(levels=[np.arange(n1), tm.makeStringIndex(n2)],
476-
labels=[np.repeat(range(n1), n2).tolist(),
477-
list(range(n2)) * n1],
476+
codes=[np.repeat(range(n1), n2).tolist(),
477+
list(range(n2)) * n1],
478478
names=['lev1', 'lev2'])
479479
arr = np.random.randn(n1 * n2, 3)
480480
arr[::10000, 0] = np.nan

asv_bench/benchmarks/join_merge.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -115,16 +115,16 @@ class Join(object):
115115
def setup(self, sort):
116116
level1 = tm.makeStringIndex(10).values
117117
level2 = tm.makeStringIndex(1000).values
118-
label1 = np.arange(10).repeat(1000)
119-
label2 = np.tile(np.arange(1000), 10)
118+
codes1 = np.arange(10).repeat(1000)
119+
codes2 = np.tile(np.arange(1000), 10)
120120
index2 = MultiIndex(levels=[level1, level2],
121-
labels=[label1, label2])
121+
codes=[codes1, codes2])
122122
self.df_multi = DataFrame(np.random.randn(len(index2), 4),
123123
index=index2,
124124
columns=['A', 'B', 'C', 'D'])
125125

126-
self.key1 = np.tile(level1.take(label1), 10)
127-
self.key2 = np.tile(level2.take(label2), 10)
126+
self.key1 = np.tile(level1.take(codes1), 10)
127+
self.key2 = np.tile(level2.take(codes2), 10)
128128
self.df = DataFrame({'data1': np.random.randn(100000),
129129
'data2': np.random.randn(100000),
130130
'key1': self.key1,

asv_bench/benchmarks/multiindex_object.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,8 @@ def setup(self):
7979
levels = [np.arange(n),
8080
tm.makeStringIndex(n).values,
8181
1000 + np.arange(n)]
82-
labels = [np.random.choice(n, (k * n)) for lev in levels]
83-
self.mi = MultiIndex(levels=levels, labels=labels)
82+
codes = [np.random.choice(n, (k * n)) for lev in levels]
83+
self.mi = MultiIndex(levels=levels, codes=codes)
8484

8585
def time_duplicated(self):
8686
self.mi.duplicated()

asv_bench/benchmarks/reindex.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,9 @@ class LevelAlign(object):
7171
def setup(self):
7272
self.index = MultiIndex(
7373
levels=[np.arange(10), np.arange(100), np.arange(100)],
74-
labels=[np.arange(10).repeat(10000),
75-
np.tile(np.arange(100).repeat(100), 10),
76-
np.tile(np.tile(np.arange(100), 100), 10)])
74+
codes=[np.arange(10).repeat(10000),
75+
np.tile(np.arange(100).repeat(100), 10),
76+
np.tile(np.tile(np.arange(100), 100), 10)])
7777
self.df = DataFrame(np.random.randn(len(self.index), 4),
7878
index=self.index)
7979
self.df_level = DataFrame(np.random.randn(100, 4),

asv_bench/benchmarks/stat_ops.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,10 @@ class FrameMultiIndexOps(object):
3131

3232
def setup(self, level, op):
3333
levels = [np.arange(10), np.arange(100), np.arange(100)]
34-
labels = [np.arange(10).repeat(10000),
35-
np.tile(np.arange(100).repeat(100), 10),
36-
np.tile(np.tile(np.arange(100), 100), 10)]
37-
index = pd.MultiIndex(levels=levels, labels=labels)
34+
codes = [np.arange(10).repeat(10000),
35+
np.tile(np.arange(100).repeat(100), 10),
36+
np.tile(np.tile(np.arange(100), 100), 10)]
37+
index = pd.MultiIndex(levels=levels, codes=codes)
3838
df = pd.DataFrame(np.random.randn(len(index), 4), index=index)
3939
self.df_func = getattr(df, op)
4040

@@ -67,10 +67,10 @@ class SeriesMultiIndexOps(object):
6767

6868
def setup(self, level, op):
6969
levels = [np.arange(10), np.arange(100), np.arange(100)]
70-
labels = [np.arange(10).repeat(10000),
71-
np.tile(np.arange(100).repeat(100), 10),
72-
np.tile(np.tile(np.arange(100), 100), 10)]
73-
index = pd.MultiIndex(levels=levels, labels=labels)
70+
codes = [np.arange(10).repeat(10000),
71+
np.tile(np.arange(100).repeat(100), 10),
72+
np.tile(np.tile(np.arange(100), 100), 10)]
73+
index = pd.MultiIndex(levels=levels, codes=codes)
7474
s = pd.Series(np.random.randn(len(index)), index=index)
7575
self.s_func = getattr(s, op)
7676

doc/source/advanced.rst

+6-1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,11 @@ analysis.
4949

5050
See the :ref:`cookbook<cookbook.multi_index>` for some advanced strategies.
5151

52+
.. versionchanged:: 0.24.0
53+
54+
:attr:`MultiIndex.labels` has been renamed to :attr:`MultiIndex.codes`
55+
and :attr:`MultiIndex.set_labels` to :attr:`MultiIndex.set_codes`.
56+
5257
Creating a MultiIndex (hierarchical index) object
5358
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5459

@@ -469,7 +474,7 @@ values across a level. For instance:
469474
.. ipython:: python
470475
471476
midx = pd.MultiIndex(levels=[['zero', 'one'], ['x', 'y']],
472-
labels=[[1, 1, 0, 0], [1, 0, 1, 0]])
477+
codes=[[1, 1, 0, 0], [1, 0, 1, 0]])
473478
df = pd.DataFrame(np.random.randn(4, 2), index=midx)
474479
df
475480
df2 = df.mean(level=0)

doc/source/api.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -1712,7 +1712,7 @@ MultiIndex Attributes
17121712

17131713
MultiIndex.names
17141714
MultiIndex.levels
1715-
MultiIndex.labels
1715+
MultiIndex.codes
17161716
MultiIndex.nlevels
17171717
MultiIndex.levshape
17181718

@@ -1723,7 +1723,7 @@ MultiIndex Components
17231723
:toctree: generated/
17241724

17251725
MultiIndex.set_levels
1726-
MultiIndex.set_labels
1726+
MultiIndex.set_codes
17271727
MultiIndex.to_hierarchical
17281728
MultiIndex.to_flat_index
17291729
MultiIndex.to_frame

doc/source/dsintro.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -961,7 +961,7 @@ From DataFrame using ``to_panel`` method
961961
.. ipython:: python
962962
:okwarning:
963963
964-
midx = pd.MultiIndex(levels=[['one', 'two'], ['x','y']], labels=[[1,1,0,0],[1,0,1,0]])
964+
midx = pd.MultiIndex(levels=[['one', 'two'], ['x','y']], codes=[[1,1,0,0],[1,0,1,0]])
965965
df = pd.DataFrame({'A' : [1, 2, 3, 4], 'B': [5, 6, 7, 8]}, index=midx)
966966
df.to_panel()
967967

doc/source/indexing.rst

+3-3
Original file line numberDiff line numberDiff line change
@@ -1571,9 +1571,9 @@ Setting metadata
15711571

15721572
Indexes are "mostly immutable", but it is possible to set and change their
15731573
metadata, like the index ``name`` (or, for ``MultiIndex``, ``levels`` and
1574-
``labels``).
1574+
``codes``).
15751575

1576-
You can use the ``rename``, ``set_names``, ``set_levels``, and ``set_labels``
1576+
You can use the ``rename``, ``set_names``, ``set_levels``, and ``set_codes``
15771577
to set these attributes directly. They default to returning a copy; however,
15781578
you can specify ``inplace=True`` to have the data change in place.
15791579

@@ -1588,7 +1588,7 @@ See :ref:`Advanced Indexing <advanced>` for usage of MultiIndexes.
15881588
ind.name = "bob"
15891589
ind
15901590
1591-
``set_names``, ``set_levels``, and ``set_labels`` also take an optional
1591+
``set_names``, ``set_levels``, and ``set_codes`` also take an optional
15921592
`level`` argument
15931593

15941594
.. ipython:: python

doc/source/internals.rst

+5-5
Original file line numberDiff line numberDiff line change
@@ -74,23 +74,23 @@ MultiIndex
7474
~~~~~~~~~~
7575

7676
Internally, the ``MultiIndex`` consists of a few things: the **levels**, the
77-
integer **labels**, and the level **names**:
77+
integer **codes** (until version 0.24 named *labels*), and the level **names**:
7878

7979
.. ipython:: python
8080
8181
index = pd.MultiIndex.from_product([range(3), ['one', 'two']],
8282
names=['first', 'second'])
8383
index
8484
index.levels
85-
index.labels
85+
index.codes
8686
index.names
8787
88-
You can probably guess that the labels determine which unique element is
88+
You can probably guess that the codes determine which unique element is
8989
identified with that location at each layer of the index. It's important to
90-
note that sortedness is determined **solely** from the integer labels and does
90+
note that sortedness is determined **solely** from the integer codes and does
9191
not check (or care) whether the levels themselves are sorted. Fortunately, the
9292
constructors ``from_tuples`` and ``from_arrays`` ensure that this is true, but
93-
if you compute the levels and labels yourself, please be careful.
93+
if you compute the levels and codes yourself, please be careful.
9494

9595
Values
9696
~~~~~~

doc/source/io.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -3728,8 +3728,8 @@ storing/selecting from homogeneous index ``DataFrames``.
37283728
37293729
index = pd.MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
37303730
['one', 'two', 'three']],
3731-
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
3732-
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
3731+
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
3732+
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
37333733
names=['foo', 'bar'])
37343734
df_mi = pd.DataFrame(np.random.randn(10, 3), index=index,
37353735
columns=['A', 'B', 'C'])

doc/source/whatsnew/v0.24.0.rst

+7
Original file line numberDiff line numberDiff line change
@@ -1100,6 +1100,13 @@ Other API Changes
11001100
Deprecations
11011101
~~~~~~~~~~~~
11021102

1103+
- :attr:`MultiIndex.labels` has been deprecated and replaced by :attr:`MultiIndex.codes`.
1104+
The functionality is unchanged. The new name better reflects the natures of
1105+
these codes and makes the ``MultiIndex`` API more similar to the API for :class:`CategoricalIndex`(:issue:`13443`).
1106+
As a consequence, other uses of the name ``labels`` in ``MultiIndex`` have also been deprecated and replaced with ``codes``:
1107+
- You should initialize a ``MultiIndex`` instance using a parameter named ``codes`` rather than ``labels``.
1108+
- ``MultiIndex.set_labels`` has been deprecated in favor of :meth:`MultiIndex.set_codes`.
1109+
- For method :meth:`MultiIndex.copy`, the ``labels`` parameter has been deprecated and replaced by a ``codes`` parameter.
11031110
- :meth:`DataFrame.to_stata`, :meth:`read_stata`, :class:`StataReader` and :class:`StataWriter` have deprecated the ``encoding`` argument. The encoding of a Stata dta file is determined by the file type and cannot be changed (:issue:`21244`)
11041111
- :meth:`MultiIndex.to_hierarchical` is deprecated and will be removed in a future version (:issue:`21613`)
11051112
- :meth:`Series.ptp` is deprecated. Use ``numpy.ptp`` instead (:issue:`21614`)

pandas/core/frame.py

+10-9
Original file line numberDiff line numberDiff line change
@@ -1839,7 +1839,7 @@ def to_panel(self):
18391839
selfsorted = self
18401840

18411841
major_axis, minor_axis = selfsorted.index.levels
1842-
major_labels, minor_labels = selfsorted.index.labels
1842+
major_codes, minor_codes = selfsorted.index.codes
18431843
shape = len(major_axis), len(minor_axis)
18441844

18451845
# preserve names, if any
@@ -1854,8 +1854,8 @@ def to_panel(self):
18541854

18551855
# create new manager
18561856
new_mgr = selfsorted._data.reshape_nd(axes=new_axes,
1857-
labels=[major_labels,
1858-
minor_labels],
1857+
labels=[major_codes,
1858+
minor_codes],
18591859
shape=shape,
18601860
ref_items=selfsorted.columns)
18611861

@@ -3736,8 +3736,8 @@ def drop(self, labels=None, axis=0, index=None, columns=None,
37363736
37373737
>>> midx = pd.MultiIndex(levels=[['lama', 'cow', 'falcon'],
37383738
... ['speed', 'weight', 'length']],
3739-
... labels=[[0, 0, 0, 1, 1, 1, 2, 2, 2],
3740-
... [0, 1, 2, 0, 1, 2, 0, 1, 2]])
3739+
... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2],
3740+
... [0, 1, 2, 0, 1, 2, 0, 1, 2]])
37413741
>>> df = pd.DataFrame(index=midx, columns=['big', 'small'],
37423742
... data=[[45, 30], [200, 100], [1.5, 1], [30, 20],
37433743
... [250, 150], [1.5, 0.8], [320, 250],
@@ -4226,7 +4226,7 @@ def _maybe_casted_values(index, labels=None):
42264226
if isinstance(self.index, MultiIndex):
42274227
names = [n if n is not None else ('level_%d' % i)
42284228
for (i, n) in enumerate(self.index.names)]
4229-
to_insert = lzip(self.index.levels, self.index.labels)
4229+
to_insert = lzip(self.index.levels, self.index.codes)
42304230
else:
42314231
default = 'index' if 'index' not in self else 'level_0'
42324232
names = ([default] if self.index.name is None
@@ -4594,7 +4594,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
45944594
elif isinstance(labels, MultiIndex):
45954595
from pandas.core.sorting import lexsort_indexer
45964596

4597-
indexer = lexsort_indexer(labels._get_labels_for_sorting(),
4597+
indexer = lexsort_indexer(labels._get_codes_for_sorting(),
45984598
orders=ascending,
45994599
na_position=na_position)
46004600
else:
@@ -7147,8 +7147,9 @@ def _count_level(self, level, axis=0, numeric_only=False):
71477147
level = count_axis._get_level_number(level)
71487148

71497149
level_index = count_axis.levels[level]
7150-
labels = ensure_int64(count_axis.labels[level])
7151-
counts = lib.count_level_2d(mask, labels, len(level_index), axis=0)
7150+
level_codes = ensure_int64(count_axis.codes[level])
7151+
counts = lib.count_level_2d(mask, level_codes, len(level_index),
7152+
axis=0)
71527153

71537154
result = DataFrame(counts, index=level_index, columns=agg_axis)
71547155

pandas/core/groupby/generic.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -1112,7 +1112,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
11121112
lab = cut(Series(val), bins, include_lowest=True)
11131113
lev = lab.cat.categories
11141114
lab = lev.take(lab.cat.codes)
1115-
llab = lambda lab, inc: lab[inc]._multiindex.labels[-1]
1115+
llab = lambda lab, inc: lab[inc]._multiindex.codes[-1]
11161116

11171117
if is_interval_dtype(lab):
11181118
# TODO: should we do this inside II?
@@ -1163,7 +1163,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
11631163
out, labels[-1] = out[sorter], labels[-1][sorter]
11641164

11651165
if bins is None:
1166-
mi = MultiIndex(levels=levels, labels=labels, names=names,
1166+
mi = MultiIndex(levels=levels, codes=labels, names=names,
11671167
verify_integrity=False)
11681168

11691169
if is_integer_dtype(out):
@@ -1191,10 +1191,10 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
11911191
out, left[-1] = out[sorter], left[-1][sorter]
11921192

11931193
# build the multi-index w/ full levels
1194-
labels = list(map(lambda lab: np.repeat(lab[diff], nbin), labels[:-1]))
1195-
labels.append(left[-1])
1194+
codes = list(map(lambda lab: np.repeat(lab[diff], nbin), labels[:-1]))
1195+
codes.append(left[-1])
11961196

1197-
mi = MultiIndex(levels=levels, labels=labels, names=names,
1197+
mi = MultiIndex(levels=levels, codes=codes, names=names,
11981198
verify_integrity=False)
11991199

12001200
if is_integer_dtype(out):

pandas/core/groupby/ops.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -290,10 +290,10 @@ def result_index(self):
290290
if not self.compressed and len(self.groupings) == 1:
291291
return self.groupings[0].result_index.rename(self.names[0])
292292

293-
labels = self.recons_labels
293+
codes = self.recons_labels
294294
levels = [ping.result_index for ping in self.groupings]
295295
result = MultiIndex(levels=levels,
296-
labels=labels,
296+
codes=codes,
297297
verify_integrity=False,
298298
names=self.names)
299299
return result

0 commit comments

Comments
 (0)