Skip to content

Make MultiIndex.get_loc raise for unhashable type #35914

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Fixed regressions
- Regression in :meth:`DatetimeIndex.intersection` incorrectly raising ``AssertionError`` when intersecting against a list (:issue:`35876`)
- Fix regression in updating a column inplace (e.g. using ``df['col'].fillna(.., inplace=True)``) (:issue:`35731`)
- Performance regression for :meth:`RangeIndex.format` (:issue:`35712`)
- Regression where :meth:`MultiIndex.get_loc` would return a slice spanning the full index when passed an empty list (:issue:`35878`)
- Fix regression in invalid cache after an indexing operation; this can manifest when setting which does not update the data (:issue:`35521`)
- Regression in :meth:`DataFrame.replace` where a ``TypeError`` would be raised when attempting to replace elements of type :class:`Interval` (:issue:`35931`)
- Fix regression in pickle roundtrip of the ``closed`` attribute of :class:`IntervalIndex` (:issue:`35658`)
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -2725,6 +2725,8 @@ def get_loc(self, key, method=None):
"currently supported for MultiIndex"
)

hash(key)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this raise InvalidIndexError?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That was my original thought, but it broke an existing test because evidently passing a singleton list with only np.nan raises the unhashable type error (it hits a line that uses this approach of trying to hash the key):

import numpy as np
import pandas as pd

idx = pd.MultiIndex.from_tuples([("a", 1), ("b", 2)])
idx.get_loc([np.nan])
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-6-40dd73c454ae> in <module>
----> 1 idx.get_loc([np.nan])

~/opt/miniconda3/envs/pandas-1.1.1/lib/python3.8/site-packages/pandas/core/indexes/multi.py in get_loc(self, key, method)
   2717         lead_key, follow_key = key[:i], key[i:]
   2718         start, stop = (
-> 2719             self.slice_locs(lead_key, lead_key) if lead_key else (0, len(self))
   2720         )
   2721

~/opt/miniconda3/envs/pandas-1.1.1/lib/python3.8/site-packages/pandas/core/indexes/multi.py in slice_locs(self, start, end, step, kind)
   2578         # This function adds nothing to its parent implementation (the magic
   2579         # happens in get_slice_bound method), but it adds meaningful doc.
-> 2580         return super().slice_locs(start, end, step, kind=kind)
   2581
   2582     def _partial_tup_index(self, tup, side="left"):

~/opt/miniconda3/envs/pandas-1.1.1/lib/python3.8/site-packages/pandas/core/indexes/base.py in slice_locs(self, start, end, step, kind)
   5159         start_slice = None
   5160         if start is not None:
-> 5161             start_slice = self.get_slice_bound(start, "left", kind)
   5162         if start_slice is None:
   5163             start_slice = 0

~/opt/miniconda3/envs/pandas-1.1.1/lib/python3.8/site-packages/pandas/core/indexes/multi.py in get_slice_bound(self, label, side, kind)
   2522         if not isinstance(label, tuple):
   2523             label = (label,)
-> 2524         return self._partial_tup_index(label, side=side)
   2525
   2526     def slice_locs(self, start=None, end=None, step=None, kind=None):

~/opt/miniconda3/envs/pandas-1.1.1/lib/python3.8/site-packages/pandas/core/indexes/multi.py in _partial_tup_index(self, tup, side)
   2593             section = labs[start:end]
   2594
-> 2595             if lab not in lev and not isna(lab):
   2596                 if not lev.is_type_compatible(lib.infer_dtype([lab], skipna=False)):
   2597                     raise TypeError(f"Level type mismatch: {lab}")

~/opt/miniconda3/envs/pandas-1.1.1/lib/python3.8/site-packages/pandas/core/indexes/base.py in __contains__(self, key)
   4063         False
   4064         """
-> 4065         hash(key)
   4066         try:
   4067             return key in self._engine

TypeError: unhashable type: 'list'


def _maybe_to_slice(loc):
"""convert integer indexer to boolean mask or slice if possible"""
if not isinstance(loc, np.ndarray) or loc.dtype != "int64":
Expand All @@ -2739,8 +2741,7 @@ def _maybe_to_slice(loc):
mask[loc] = True
return mask

if not isinstance(key, (tuple, list)):
# not including list here breaks some indexing, xref #30892
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is okay to remove this comment since we're not actually checking for list here, or should it be moved? cc @jbrockmendel

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think youre right (though havent checked 30892)

if not isinstance(key, tuple):
loc = self._get_level_indexer(key, level=0)
return _maybe_to_slice(loc)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2111,7 +2111,7 @@ def test_type_error_multiindex(self):
)
dg = df.pivot_table(index="i", columns="c", values=["x", "y"])

with pytest.raises(TypeError, match="is an invalid key"):
with pytest.raises(TypeError, match="unhashable type"):
dg[:, 0]

index = Index(range(2), name="i")
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/indexing/multiindex/test_multiindex.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
import pytest

import pandas._libs.index as _index
from pandas.errors import PerformanceWarning
Expand Down Expand Up @@ -83,3 +84,10 @@ def test_nested_tuples_duplicates(self):
df3 = df.copy(deep=True)
df3.loc[[(dti[0], "a")], "c2"] = 1.0
tm.assert_frame_equal(df3, expected)

def test_multiindex_get_loc_list_raises(self):
# https://github.com/pandas-dev/pandas/issues/35878
idx = pd.MultiIndex.from_tuples([("a", 1), ("b", 2)])
msg = "unhashable type"
with pytest.raises(TypeError, match=msg):
idx.get_loc([])
11 changes: 10 additions & 1 deletion pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np

from pandas import NaT, Series, date_range
from pandas import MultiIndex, NaT, Series, date_range
import pandas.testing as tm


class TestSetitemDT64Values:
Expand All @@ -17,3 +18,11 @@ def test_setitem_none_nan(self):

series[5:7] = np.nan
assert series[6] is NaT

def test_setitem_multiindex_empty_slice(self):
# https://github.com/pandas-dev/pandas/issues/35878
idx = MultiIndex.from_tuples([("a", 1), ("b", 2)])
result = Series([1, 2], index=idx)
expected = result.copy()
result.loc[[]] = 0
tm.assert_series_equal(result, expected)