Skip to content

BUG: fix .loc.__setitem__ not raising when using too many indexers #44656

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Jan 30, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -911,7 +911,7 @@ def _getitem_nested_tuple(self, tup: tuple):
# we are only getting non-hashable tuples, in particular ones
# that themselves contain a slice entry
# See test_loc_series_getitem_too_many_dimensions
raise ValueError("Too many indices")
raise IndexingError("Too many indexers")

# this is a series with a multi-index specified a tuple of
# selectors
Expand Down Expand Up @@ -1231,6 +1231,14 @@ def _convert_to_indexer(self, key, axis: int):
is_int_index = labels.is_integer()
is_int_positional = is_integer(key) and not is_int_index

if (
isinstance(key, tuple)
and not isinstance(labels, MultiIndex)
and self.ndim < 2
and len(key) > 1
):
raise IndexingError("Too many indexers")

if is_scalar(key) or (isinstance(labels, MultiIndex) and is_hashable(key)):
# Otherwise get_loc will raise InvalidIndexError

Expand Down Expand Up @@ -1262,7 +1270,7 @@ def _convert_to_indexer(self, key, axis: int):
if is_nested_tuple(key, labels):
if self.ndim == 1 and any(isinstance(k, tuple) for k in key):
# GH#35349 Raise if tuple in tuple for series
raise ValueError("Too many indices")
raise IndexingError("Too many indexers")
return labels.get_locs(key)

elif is_list_like_indexer(key):
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/indexing/test_iloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1189,6 +1189,18 @@ def test_iloc_getitem_setitem_fancy_exceptions(self, float_frame):
# GH#32257 we let numpy do validation, get their exception
float_frame.iloc[:, :, :] = 1

@pytest.mark.parametrize(
"ser, keys",
[(Series([10]), (0, 0)), (Series([1, 2, 3], index=list("abc")), (0, 1))],
)
def test_iloc_setitem_indexer_length(self, ser, keys):
# GH#13831
with pytest.raises(IndexError, match="too many indices for array"):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note the difference in error between ser.iloc[keys] = 1000 and ser.iloc[keys]. IndexError is the result of the fact that we let numpy handle this, while pandas raises IndexingError. I was wondering if this should be made consistent? Us catching and re-raising from numpy seems ugly.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cc @jbrockmendel

but @DriesSchaumont I think we would want to catch that case & re-raise appropriately

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIRC a while back we removed some checks (for perf) and let numpy exceptions surface.

Not knowing anything about the specific use case, in cases where we have a choice between raising IndexError vs IndexingError (i.e. either would be a reasonable choice) I'd rather raise IndexError.

ser.iloc[keys] = 1000

with pytest.raises(IndexingError, match="Too many indexers"):
ser.iloc[keys]

# TODO(ArrayManager) "split" path doesn't properly implement DataFrame indexer
@td.skip_array_manager_not_yet_implemented
def test_iloc_frame_indexer(self):
Expand Down
32 changes: 29 additions & 3 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2685,6 +2685,18 @@ def test_loc_with_period_index_indexer():
tm.assert_frame_equal(df, df.loc[list(idx)])


def test_loc_setitem_multiindex_timestamp():
# GH#13831
vals = np.random.randn(8, 6)
idx = date_range("1/1/2000", periods=8)
cols = ["A", "B", "C", "D", "E", "F"]
df_mt = DataFrame(vals, index=idx, columns=cols)
df_mt.loc[df_mt.index[1], ("A", "B")] = np.nan
vals[1][0:2] = np.nan
res = DataFrame(vals, index=idx, columns=cols)
tm.assert_frame_equal(res, df_mt)


def test_loc_getitem_multiindex_tuple_level():
# GH#27591
lev1 = ["a", "b", "c"]
Expand Down Expand Up @@ -2715,6 +2727,20 @@ def test_loc_getitem_multiindex_tuple_level():
assert result2 == 6


@pytest.mark.parametrize(
"ser, keys",
[(Series([10]), (0, 0)), (Series([1, 2, 3], index=list("abc")), (0, 1))],
)
def test_loc_setitem_indexer_length(ser, keys):
# GH#13831
msg = "Too many indexers"
with pytest.raises(IndexingError, match=msg):
ser.loc[keys] = 1000

with pytest.raises(IndexingError, match=msg):
ser.loc[keys]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can use indexer_sli fixture to share this test with the iloc one i think

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, also moved the tests.



class TestLocSeries:
@pytest.mark.parametrize("val,expected", [(2 ** 63 - 1, 3), (2 ** 63, 4)])
def test_loc_uint64(self, val, expected):
Expand Down Expand Up @@ -2889,11 +2915,11 @@ def test_loc_series_getitem_too_many_dimensions(self, indexer):
index=MultiIndex.from_tuples([("A", "0"), ("A", "1"), ("B", "0")]),
data=[21, 22, 23],
)
msg = "Too many indices"
with pytest.raises(ValueError, match=msg):
msg = "Too many indexers"
with pytest.raises(IndexingError, match=msg):
ser.loc[indexer, :]

with pytest.raises(ValueError, match=msg):
with pytest.raises(IndexingError, match=msg):
ser.loc[indexer, :] = 1

def test_loc_setitem(self, string_series):
Expand Down