Skip to content

Commit b495b8b

Browse files
committed
PERF: MultiIndex._engine.get_loc() handles non-unique fine
1 parent 4618a09 commit b495b8b

File tree

5 files changed

+10
-18
lines changed

5 files changed

+10
-18
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,7 @@ Performance Improvements
381381
- :func:`Series` / :func:`DataFrame` tab completion limits to 100 values, for better performance. (:issue:`18587`)
382382
- Improved performance of :func:`DataFrame.median` with ``axis=1`` when bottleneck is not installed (:issue:`16468`)
383383
- Improved performance of :func:`MultiIndex.get_loc` for large indexes, at the cost of a reduction in performance for small ones (:issue:`18519`)
384+
- Improved performance of :func:`MultiIndex.get_loc` for non-unique indexes, which as a consequence does not emit a ``PerformanceWarning`` any more
384385

385386

386387
.. _whatsnew_0230.docs:

pandas/core/indexes/multi.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from pandas.core.dtypes.common import (
1515
_ensure_int64,
1616
_ensure_platform_int,
17+
is_integer,
1718
is_categorical_dtype,
1819
is_object_dtype,
1920
is_iterator,
@@ -2215,8 +2216,12 @@ def _maybe_to_slice(loc):
22152216
raise KeyError('Key length ({0}) exceeds index depth ({1})'
22162217
''.format(keylen, self.nlevels))
22172218

2218-
if keylen == self.nlevels and self.is_unique:
2219-
return self._engine.get_loc(key)
2219+
if keylen == self.nlevels:
2220+
loc = self._engine.get_loc(key)
2221+
if not self.is_unique and is_integer(loc):
2222+
# Indexers expect a slice from indexing a non-unique index
2223+
loc = slice(loc, loc + 1)
2224+
return loc
22202225

22212226
# -- partial selection or non-unique index
22222227
# break the key into 2 parts based on the lexsort_depth of the index;

pandas/tests/frame/test_sort_values_level_as_str.py

+1-8
Original file line numberDiff line numberDiff line change
@@ -85,14 +85,7 @@ def test_sort_column_level_and_index_label(
8585
ascending=ascending,
8686
axis=1)
8787

88-
if len(levels) > 1:
89-
# Accessing multi-level columns that are not lexsorted raises a
90-
# performance warning
91-
with tm.assert_produces_warning(PerformanceWarning,
92-
check_stacklevel=False):
93-
assert_frame_equal(result, expected)
94-
else:
95-
assert_frame_equal(result, expected)
88+
assert_frame_equal(result, expected)
9689

9790

9891
def test_sort_values_column_index_level_precedence():

pandas/tests/indexing/test_ix.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
from pandas.compat import lrange
1212
from pandas import Series, DataFrame, option_context, MultiIndex
1313
from pandas.util import testing as tm
14-
from pandas.errors import PerformanceWarning
1514

1615

1716
class TestIX(object):
@@ -185,9 +184,7 @@ def test_ix_general(self):
185184
df = DataFrame(data).set_index(keys=['col', 'year'])
186185
key = 4.0, 2012
187186

188-
# emits a PerformanceWarning, ok
189-
with tm.assert_produces_warning(PerformanceWarning):
190-
tm.assert_frame_equal(df.loc[key], df.iloc[2:])
187+
tm.assert_frame_equal(df.loc[key], df.iloc[2:])
191188

192189
# this is ok
193190
df.sort_index(inplace=True)

pandas/tests/indexing/test_multiindex.py

-4
Original file line numberDiff line numberDiff line change
@@ -359,10 +359,6 @@ def test_multiindex_perf_warn(self):
359359
'joe': ['x', 'x', 'z', 'y'],
360360
'jolie': np.random.rand(4)}).set_index(['jim', 'joe'])
361361

362-
with tm.assert_produces_warning(PerformanceWarning,
363-
clear=[pd.core.index]):
364-
df.loc[(1, 'z')]
365-
366362
df = df.iloc[[2, 1, 3, 0]]
367363
with tm.assert_produces_warning(PerformanceWarning):
368364
df.loc[(0, )]

0 commit comments

Comments
 (0)