Skip to content

Clean multiindex keys #15615

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2214,7 +2214,7 @@ def query(self, expr, inplace=False, **kwargs):

try:
new_data = self.loc[res]
except ValueError:
except (ValueError, NotImplementedError):
# when res is multi-dimensional loc raises, but this is sometimes a
# valid query
new_data = self[res]
Expand Down
33 changes: 12 additions & 21 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1511,37 +1511,28 @@ def _getitem_axis(self, key, axis=0):
elif is_bool_indexer(key):
return self._getbool_axis(key, axis=axis)
elif is_list_like_indexer(key):
if isinstance(key, ABCDataFrame):
# GH 15438
raise NotImplementedError("Indexing a with a DataFrame key is "
"not implemented")
elif hasattr(key, 'ndim') and key.ndim > 1:
raise NotImplementedError("Indexing with a multidimensional "
"key is not implemented")

# convert various list-like indexers
# to a list of keys
# we will use the *values* of the object
# and NOT the index if its a PandasObject
if isinstance(labels, MultiIndex):

if isinstance(key, (ABCSeries, np.ndarray)) and key.ndim <= 1:
# Series, or 0,1 ndim ndarray
if isinstance(key, (ABCSeries, np.ndarray)) and key.ndim != 1:
# Series or 1-dim ndarray
# GH 14730
key = list(key)
elif isinstance(key, ABCDataFrame):
# GH 15438
raise NotImplementedError("Indexing a MultiIndex with a "
"DataFrame key is not "
"implemented")
elif hasattr(key, 'ndim') and key.ndim > 1:
raise NotImplementedError("Indexing a MultiIndex with a "
"multidimensional key is not "
"implemented")

if (not isinstance(key, tuple) and len(key) > 1 and
not isinstance(key[0], tuple)):
key = tuple([key])
if not isinstance(key, tuple):
return self._getitem_iterable(key, axis=axis)

# an iterable multi-selection
if not (isinstance(key, tuple) and isinstance(labels, MultiIndex)):

if hasattr(key, 'ndim') and key.ndim > 1:
raise ValueError('Cannot index with multidimensional key')

else:
return self._getitem_iterable(key, axis=axis)

# nested tuple slicing
Expand Down
25 changes: 25 additions & 0 deletions pandas/tests/indexing/test_multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,10 @@ def test_loc_getitem_series(self):
result = x.loc[empty]
tm.assert_series_equal(result, expected)

with tm.assertRaises(KeyError):
# GH15452
x.loc[[4, 5]]

def test_loc_getitem_array(self):
# GH15434
# passing an array as a key with a MultiIndex
Expand Down Expand Up @@ -203,6 +207,27 @@ def test_loc_getitem_array(self):
result = x.loc[scalar]
tm.assert_series_equal(result, expected)

def test_loc_generator(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there does not appear to be an equivalent test on Index with a generator, or even an iterator. Sure they should work. But is this fixed in this PR? or just adding testing to something already works?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR removes some broken code which prevented __getitem__(generator) to work, so yes, strictly speaking it fixes it, but this was a totally unintended consequence. I actually made sure to keep the tests for generators in two independent commits, which you can drop if you don't think they are useful. But there was already at least one test on indexing a Series with generators, I just expanded a bit. No idea about indexing Indexes.

index = MultiIndex.from_product([[1, 2, 3], ['A', 'B', 'C']])
x = Series(index=index, data=range(9), dtype=np.float64)
y = [1, 3]

# getitem:
expected = Series(
data=[0, 1, 2, 6, 7, 8],
index=MultiIndex.from_product([[1, 3], ['A', 'B', 'C']]),
dtype=np.float64)
result = x.loc[iter(y)]
tm.assert_series_equal(result, expected)

# setitem:
expected = Series(
data=[9, 10, 11, 3, 4, 5, 12, 13, 14],
index=index,
dtype=np.float64)
x.loc[iter(y)] = range(9, 15)
tm.assert_series_equal(x, expected)

def test_iloc_getitem_multiindex(self):
mi_labels = DataFrame(np.random.randn(4, 3),
columns=[['i', 'i', 'j'], ['A', 'A', 'B']],
Expand Down
39 changes: 39 additions & 0 deletions pandas/tests/series/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pytest

from datetime import datetime, timedelta
import pytest

from numpy import nan
import numpy as np
Expand Down Expand Up @@ -290,6 +291,44 @@ def test_getitem_generator(self):
assert_series_equal(result, expected)
assert_series_equal(result2, expected)

def test_setitem_generator(self):
bool_idx = self.series > 0
idces = self.series[bool_idx].index

values = range(bool_idx.sum())

expected = self.series.copy()
expected[bool_idx] = values

# list of labels:
s1 = self.series.copy()
s1[iter(idces)] = values
assert_series_equal(s1, expected)

# list of labels with .loc:
s2 = self.series.copy()
s2.loc[iter(idces)] = values
assert_series_equal(s2, expected)

@pytest.mark.xfail(reason="Setitem with booleans generators unsupported")
def test_setitem_boolean_generator(self):
bool_idx = self.series > 0

values = range(bool_idx.sum())

expected = self.series.copy()
expected[bool_idx] = values

# boolean generator (fails)
s1 = self.series.copy()
s1[iter(bool_idx)] = values
assert_series_equal(s1, expected)

# boolean generator with .loc (fails)
s2 = self.series.copy()
s2.loc[iter(bool_idx)] = values
assert_series_equal(s2, expected)

def test_type_promotion(self):
# GH12599
s = pd.Series()
Expand Down
12 changes: 8 additions & 4 deletions pandas/tests/sparse/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,13 +512,17 @@ def test_loc(self):
tm.assert_sp_series_equal(sparse.loc['B'],
orig.loc['B'].to_sparse())

result = sparse.loc[[1, 3, 4]]
exp = orig.loc[[1, 3, 4]].to_sparse()
with tm.assertRaises(KeyError):
# GH15452
sparse.loc[['D', 'E', 'F']]

result = sparse.loc[['A', 'B']]
exp = orig.loc[['A', 'B']].to_sparse()
tm.assert_sp_series_equal(result, exp)

# exceeds the bounds
result = sparse.loc[[1, 3, 4, 5]]
exp = orig.loc[[1, 3, 4, 5]].to_sparse()
result = sparse.loc[['A', 'B', 'C', 'D']]
exp = orig.loc[['A', 'B', 'C', 'D']].to_sparse()
tm.assert_sp_series_equal(result, exp)

# single element list (GH 15447)
Expand Down