From ca37c9cb6cf7f342b2b5d4dcca2ffacef2822997 Mon Sep 17 00:00:00 2001 From: DriesSchaumont Date: Sun, 28 Nov 2021 21:55:07 +0100 Subject: [PATCH 1/8] BUG: fix .loc.__setitem__ not raising when using too many indexers --- pandas/core/indexing.py | 9 +++++++-- pandas/tests/indexing/test_iloc.py | 9 +++++++++ pandas/tests/indexing/test_loc.py | 17 ++++++++++++++--- 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index fc2204724aceb..c04711dd901cb 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -911,7 +911,7 @@ def _getitem_nested_tuple(self, tup: tuple): # we are only getting non-hashable tuples, in particular ones # that themselves contain a slice entry # See test_loc_series_getitem_too_many_dimensions - raise ValueError("Too many indices") + raise IndexingError("Too many indexers") # this is a series with a multi-index specified a tuple of # selectors @@ -1231,6 +1231,11 @@ def _convert_to_indexer(self, key, axis: int): is_int_index = labels.is_integer() is_int_positional = is_integer(key) and not is_int_index + if isinstance(key, tuple) and not isinstance(labels, MultiIndex): + if len(key) > 1: + raise IndexingError("Too many indexers") + key = key[0] + if is_scalar(key) or (isinstance(labels, MultiIndex) and is_hashable(key)): # Otherwise get_loc will raise InvalidIndexError @@ -1262,7 +1267,7 @@ def _convert_to_indexer(self, key, axis: int): if is_nested_tuple(key, labels): if self.ndim == 1 and any(isinstance(k, tuple) for k in key): # GH#35349 Raise if tuple in tuple for series - raise ValueError("Too many indices") + raise IndexingError("Too many indexers") return labels.get_locs(key) elif is_list_like_indexer(key): diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index e088f1ce87a6a..85adead2d7b68 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -1189,6 +1189,15 @@ def test_iloc_getitem_setitem_fancy_exceptions(self, float_frame): # GH#32257 we let numpy do validation, get their exception float_frame.iloc[:, :, :] = 1 + def test_iloc_setitem_indexer_length(self): + # GH#13831 + ser = Series([10]) + with pytest.raises(IndexError, match="too many indices for array"): + ser.iloc[0, 0] = 1000 + + with pytest.raises(IndexingError, match="Too many indexers"): + ser.iloc[0, 0] + # TODO(ArrayManager) "split" path doesn't properly implement DataFrame indexer @td.skip_array_manager_not_yet_implemented def test_iloc_frame_indexer(self): diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 2a9ee81b7a23a..8458d297320a1 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2715,6 +2715,17 @@ def test_loc_getitem_multiindex_tuple_level(): assert result2 == 6 +def test_loc_setitem_indexer_length(): + # GH#13831 + ser = Series([10]) + msg = "Too many indexers" + with pytest.raises(IndexingError, match=msg): + ser.loc[0, 0] = 1000 + + with pytest.raises(IndexingError, match=msg): + ser.loc[0, 0] + + class TestLocSeries: @pytest.mark.parametrize("val,expected", [(2 ** 63 - 1, 3), (2 ** 63, 4)]) def test_loc_uint64(self, val, expected): @@ -2889,11 +2900,11 @@ def test_loc_series_getitem_too_many_dimensions(self, indexer): index=MultiIndex.from_tuples([("A", "0"), ("A", "1"), ("B", "0")]), data=[21, 22, 23], ) - msg = "Too many indices" - with pytest.raises(ValueError, match=msg): + msg = "Too many indexers" + with pytest.raises(IndexingError, match=msg): ser.loc[indexer, :] - with pytest.raises(ValueError, match=msg): + with pytest.raises(IndexingError, match=msg): ser.loc[indexer, :] = 1 def test_loc_setitem(self, string_series): From d886470c978a6bf22b51f203f8a6cc1e6d21d378 Mon Sep 17 00:00:00 2001 From: Dries Schaumont Date: Tue, 30 Nov 2021 14:55:26 +0100 Subject: [PATCH 2/8] Updates solution and added extra test --- pandas/core/indexing.py | 11 +++++++---- pandas/tests/indexing/test_iloc.py | 11 +++++++---- pandas/tests/indexing/test_loc.py | 23 +++++++++++++++++++---- 3 files changed, 33 insertions(+), 12 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index c04711dd901cb..150ba6b4b7b83 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1231,10 +1231,13 @@ def _convert_to_indexer(self, key, axis: int): is_int_index = labels.is_integer() is_int_positional = is_integer(key) and not is_int_index - if isinstance(key, tuple) and not isinstance(labels, MultiIndex): - if len(key) > 1: - raise IndexingError("Too many indexers") - key = key[0] + if ( + isinstance(key, tuple) + and not isinstance(labels, MultiIndex) + and self.ndim < 2 + and len(key) > 1 + ): + raise IndexingError("Too many indexers") if is_scalar(key) or (isinstance(labels, MultiIndex) and is_hashable(key)): # Otherwise get_loc will raise InvalidIndexError diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 85adead2d7b68..272fcb3a3e317 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -1189,14 +1189,17 @@ def test_iloc_getitem_setitem_fancy_exceptions(self, float_frame): # GH#32257 we let numpy do validation, get their exception float_frame.iloc[:, :, :] = 1 - def test_iloc_setitem_indexer_length(self): + @pytest.mark.parametrize( + "ser, keys", + [(Series([10]), (0, 0)), (Series([1, 2, 3], index=list("abc")), (0, 1))], + ) + def test_iloc_setitem_indexer_length(self, ser, keys): # GH#13831 - ser = Series([10]) with pytest.raises(IndexError, match="too many indices for array"): - ser.iloc[0, 0] = 1000 + ser.iloc[keys] = 1000 with pytest.raises(IndexingError, match="Too many indexers"): - ser.iloc[0, 0] + ser.iloc[keys] # TODO(ArrayManager) "split" path doesn't properly implement DataFrame indexer @td.skip_array_manager_not_yet_implemented diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 8458d297320a1..258308a0b6fd9 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2685,6 +2685,18 @@ def test_loc_with_period_index_indexer(): tm.assert_frame_equal(df, df.loc[list(idx)]) +def test_loc_setitem_multiindex_timestamp(): + # GH#13831 + vals = np.random.randn(8, 6) + idx = date_range("1/1/2000", periods=8) + cols = ["A", "B", "C", "D", "E", "F"] + df_mt = DataFrame(vals, index=idx, columns=cols) + df_mt.loc[df_mt.index[1], ("A", "B")] = np.nan + vals[1][0:2] = np.nan + res = DataFrame(vals, index=idx, columns=cols) + tm.assert_frame_equal(res, df_mt) + + def test_loc_getitem_multiindex_tuple_level(): # GH#27591 lev1 = ["a", "b", "c"] @@ -2715,15 +2727,18 @@ def test_loc_getitem_multiindex_tuple_level(): assert result2 == 6 -def test_loc_setitem_indexer_length(): +@pytest.mark.parametrize( + "ser, keys", + [(Series([10]), (0, 0)), (Series([1, 2, 3], index=list("abc")), (0, 1))], +) +def test_loc_setitem_indexer_length(ser, keys): # GH#13831 - ser = Series([10]) msg = "Too many indexers" with pytest.raises(IndexingError, match=msg): - ser.loc[0, 0] = 1000 + ser.loc[keys] = 1000 with pytest.raises(IndexingError, match=msg): - ser.loc[0, 0] + ser.loc[keys] class TestLocSeries: From 5bae428201212d068221f58a06f77c3740a03bc5 Mon Sep 17 00:00:00 2001 From: Dries Schaumont Date: Tue, 30 Nov 2021 16:00:43 +0100 Subject: [PATCH 3/8] Add whatsnew --- doc/source/whatsnew/v1.4.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index bcebe3ab024ba..796fa6c005d27 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -680,7 +680,7 @@ Indexing - Bug in indexing on columns with ``loc`` or ``iloc`` using a slice with a negative step with ``ExtensionDtype`` columns incorrectly raising (:issue:`44551`) - Bug in :meth:`IntervalIndex.get_indexer_non_unique` returning boolean mask instead of array of integers for a non unique and non monotonic index (:issue:`44084`) - Bug in :meth:`IntervalIndex.get_indexer_non_unique` not handling targets of ``dtype`` 'object' with NaNs correctly (:issue:`44482`) -- +- Bug in :meth:`Series.loc.__setitem__` and :meth:`Series.loc.__getitem__` not raising when using multiple keys (:issue:`13831`) Missing ^^^^^^^ From 06eb7e453fcf511b7c9c3a56187aca2f08194209 Mon Sep 17 00:00:00 2001 From: DriesSchaumont Date: Wed, 8 Dec 2021 12:12:09 +0100 Subject: [PATCH 4/8] Updates for review. --- doc/source/whatsnew/v1.4.0.rst | 2 +- pandas/tests/indexing/test_iloc.py | 12 ------------ pandas/tests/indexing/test_indexing.py | 22 ++++++++++++++++++++++ pandas/tests/indexing/test_loc.py | 20 +++----------------- 4 files changed, 26 insertions(+), 30 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 796fa6c005d27..248d538ca98dc 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -680,7 +680,7 @@ Indexing - Bug in indexing on columns with ``loc`` or ``iloc`` using a slice with a negative step with ``ExtensionDtype`` columns incorrectly raising (:issue:`44551`) - Bug in :meth:`IntervalIndex.get_indexer_non_unique` returning boolean mask instead of array of integers for a non unique and non monotonic index (:issue:`44084`) - Bug in :meth:`IntervalIndex.get_indexer_non_unique` not handling targets of ``dtype`` 'object' with NaNs correctly (:issue:`44482`) -- Bug in :meth:`Series.loc.__setitem__` and :meth:`Series.loc.__getitem__` not raising when using multiple keys (:issue:`13831`) +- Bug in :meth:`Series.loc.__setitem__` and :meth:`Series.loc.__getitem__` not raising when using multiple keys without using a :class:`MultiIndex` (:issue:`13831`) Missing ^^^^^^^ diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 272fcb3a3e317..e088f1ce87a6a 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -1189,18 +1189,6 @@ def test_iloc_getitem_setitem_fancy_exceptions(self, float_frame): # GH#32257 we let numpy do validation, get their exception float_frame.iloc[:, :, :] = 1 - @pytest.mark.parametrize( - "ser, keys", - [(Series([10]), (0, 0)), (Series([1, 2, 3], index=list("abc")), (0, 1))], - ) - def test_iloc_setitem_indexer_length(self, ser, keys): - # GH#13831 - with pytest.raises(IndexError, match="too many indices for array"): - ser.iloc[keys] = 1000 - - with pytest.raises(IndexingError, match="Too many indexers"): - ser.iloc[keys] - # TODO(ArrayManager) "split" path doesn't properly implement DataFrame indexer @td.skip_array_manager_not_yet_implemented def test_iloc_frame_indexer(self): diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 0f9612fa5c96c..0ccce2231d060 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -26,6 +26,7 @@ ) import pandas._testing as tm from pandas.core.api import Float64Index +from pandas.core.indexing import IndexingError from pandas.tests.indexing.common import _mklbl from pandas.tests.indexing.test_floats import gen_obj @@ -980,3 +981,24 @@ def test_extension_array_cross_section_converts(): result = df.iloc[0] tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "ser, keys", + [(Series([10]), (0, 0)), (Series([1, 2, 3], index=list("abc")), (0, 1))], +) +def test_ser_indexer_exceeds_dimensions(ser, keys, indexer_sli): + # GH#13831 + if indexer_sli == tm.setitem: + return + + exp_err, exp_msg = IndexingError, "Too many indexers" + with pytest.raises(exp_err, match=exp_msg): + indexer_sli(ser)[keys] + + if indexer_sli == tm.iloc: + # For iloc.__setitem__ we let numpy handle the error reporting. + exp_err, exp_msg = IndexError, "too many indices for array" + + with pytest.raises(exp_err, match=exp_msg): + indexer_sli(ser)[keys] = 0 diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 258308a0b6fd9..dff5f7f2566c1 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2690,11 +2690,11 @@ def test_loc_setitem_multiindex_timestamp(): vals = np.random.randn(8, 6) idx = date_range("1/1/2000", periods=8) cols = ["A", "B", "C", "D", "E", "F"] - df_mt = DataFrame(vals, index=idx, columns=cols) - df_mt.loc[df_mt.index[1], ("A", "B")] = np.nan + exp = DataFrame(vals, index=idx, columns=cols) + exp.loc[exp.index[1], ("A", "B")] = np.nan vals[1][0:2] = np.nan res = DataFrame(vals, index=idx, columns=cols) - tm.assert_frame_equal(res, df_mt) + tm.assert_frame_equal(res, exp) def test_loc_getitem_multiindex_tuple_level(): @@ -2727,20 +2727,6 @@ def test_loc_getitem_multiindex_tuple_level(): assert result2 == 6 -@pytest.mark.parametrize( - "ser, keys", - [(Series([10]), (0, 0)), (Series([1, 2, 3], index=list("abc")), (0, 1))], -) -def test_loc_setitem_indexer_length(ser, keys): - # GH#13831 - msg = "Too many indexers" - with pytest.raises(IndexingError, match=msg): - ser.loc[keys] = 1000 - - with pytest.raises(IndexingError, match=msg): - ser.loc[keys] - - class TestLocSeries: @pytest.mark.parametrize("val,expected", [(2 ** 63 - 1, 3), (2 ** 63, 4)]) def test_loc_uint64(self, val, expected): From 8e444638c98f6d4471d9972c7e5c4db5f76e586e Mon Sep 17 00:00:00 2001 From: Dries Schaumont Date: Thu, 16 Dec 2021 15:52:19 +0100 Subject: [PATCH 5/8] Add test for list indexer. --- pandas/tests/indexing/test_indexing.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index fc1f94636ce7e..f0c9dc766dc13 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -985,7 +985,7 @@ def test_extension_array_cross_section_converts(): "ser, keys", [(Series([10]), (0, 0)), (Series([1, 2, 3], index=list("abc")), (0, 1))], ) -def test_ser_indexer_exceeds_dimensions(ser, keys, indexer_sli): +def test_ser_tup_indexer_exceeds_dimensions(ser, keys, indexer_sli): # GH#13831 if indexer_sli == tm.setitem: return @@ -1000,3 +1000,13 @@ def test_ser_indexer_exceeds_dimensions(ser, keys, indexer_sli): with pytest.raises(exp_err, match=exp_msg): indexer_sli(ser)[keys] = 0 + + +def test_ser_list_indexer_exceeds_dimensions(indexer_sli): + # GH#13831 + if indexer_sli == tm.setitem: + return + ser = Series([10]) + res = indexer_sli(ser)[[0, 0]] + exp = Series([10, 10], index=Index([0, 0])) + tm.assert_series_equal(res, exp) From 78800158efc9f8f9c4f8b4651191fc908dfe24ff Mon Sep 17 00:00:00 2001 From: DriesSchaumont Date: Sun, 19 Dec 2021 16:33:21 +0100 Subject: [PATCH 6/8] Add indexer_li fixture. --- pandas/conftest.py | 8 ++++++++ pandas/tests/indexing/test_indexing.py | 17 ++++++----------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index 8c870bc98b5ff..4466df0dc5f73 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -1682,6 +1682,14 @@ def indexer_sli(request): return request.param +@pytest.fixture(params=[tm.loc, tm.iloc]) +def indexer_li(request): + """ + Parametrize over loc.__getitem__, iloc.__getitem__ + """ + return request.param + + @pytest.fixture(params=[tm.setitem, tm.iloc]) def indexer_si(request): """ diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index b42dd0bca0821..d05770c53edfb 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -985,28 +985,23 @@ def test_extension_array_cross_section_converts(): "ser, keys", [(Series([10]), (0, 0)), (Series([1, 2, 3], index=list("abc")), (0, 1))], ) -def test_ser_tup_indexer_exceeds_dimensions(ser, keys, indexer_sli): +def test_ser_tup_indexer_exceeds_dimensions(ser, keys, indexer_li): # GH#13831 - if indexer_sli == tm.setitem: - return - exp_err, exp_msg = IndexingError, "Too many indexers" with pytest.raises(exp_err, match=exp_msg): - indexer_sli(ser)[keys] + indexer_li(ser)[keys] - if indexer_sli == tm.iloc: + if indexer_li == tm.iloc: # For iloc.__setitem__ we let numpy handle the error reporting. exp_err, exp_msg = IndexError, "too many indices for array" with pytest.raises(exp_err, match=exp_msg): - indexer_sli(ser)[keys] = 0 + indexer_li(ser)[keys] = 0 -def test_ser_list_indexer_exceeds_dimensions(indexer_sli): +def test_ser_list_indexer_exceeds_dimensions(indexer_li): # GH#13831 - if indexer_sli == tm.setitem: - return ser = Series([10]) - res = indexer_sli(ser)[[0, 0]] + res = indexer_li(ser)[[0, 0]] exp = Series([10, 10], index=Index([0, 0])) tm.assert_series_equal(res, exp) From b5c664d33802ddef7e76d3c5a8a1e4df262e3548 Mon Sep 17 00:00:00 2001 From: DriesSchaumont Date: Sun, 9 Jan 2022 12:12:10 +0100 Subject: [PATCH 7/8] Add comment to test. --- pandas/tests/indexing/test_indexing.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 992b8d313cf63..34f583a32ee4e 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -1008,6 +1008,8 @@ def test_ser_tup_indexer_exceeds_dimensions(ser, keys, indexer_li): def test_ser_list_indexer_exceeds_dimensions(indexer_li): # GH#13831 + # Make sure an exception is raised when a tuple exceeds the dimension of the series, + # but not list when a list is used. ser = Series([10]) res = indexer_li(ser)[[0, 0]] exp = Series([10, 10], index=Index([0, 0])) From f7bc4a053225feb89f5731106af12a8755c06ec7 Mon Sep 17 00:00:00 2001 From: Dries Schaumont Date: Tue, 18 Jan 2022 09:04:22 +0100 Subject: [PATCH 8/8] Move docs to 1.5 --- doc/source/whatsnew/v1.4.0.rst | 1 - doc/source/whatsnew/v1.5.0.rst | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 4e90a4c45f04b..5dd6fa0d4f72d 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -809,7 +809,6 @@ Indexing - Bug in :meth:`DataFrame.loc.__setitem__` changing dtype when indexer was completely ``False`` (:issue:`37550`) - Bug in :meth:`IntervalIndex.get_indexer_non_unique` returning boolean mask instead of array of integers for a non unique and non monotonic index (:issue:`44084`) - Bug in :meth:`IntervalIndex.get_indexer_non_unique` not handling targets of ``dtype`` 'object' with NaNs correctly (:issue:`44482`) -- Bug in :meth:`Series.loc.__setitem__` and :meth:`Series.loc.__getitem__` not raising when using multiple keys without using a :class:`MultiIndex` (:issue:`13831`) - Fixed regression where a single column ``np.matrix`` was no longer coerced to a 1d ``np.ndarray`` when added to a :class:`DataFrame` (:issue:`42376`) - Bug in :meth:`Series.__getitem__` with a :class:`CategoricalIndex` of integers treating lists of integers as positional indexers, inconsistent with the behavior with a single scalar integer (:issue:`15470`, :issue:`14865`) - Bug in :meth:`Series.__setitem__` when setting floats or integers into integer-dtype series failing to upcast when necessary to retain precision (:issue:`45121`) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index e723918ad8b4b..f88b4e2691df9 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -153,7 +153,8 @@ Interval Indexing ^^^^^^^^ - Bug in :meth:`Series.__setitem__` with a non-integer :class:`Index` when using an integer key to set a value that cannot be set inplace where a ``ValueError`` was raised insead of casting to a common dtype (:issue:`45070`) -- +- Bug in :meth:`Series.loc.__setitem__` and :meth:`Series.loc.__getitem__` not raising when using multiple keys without using a :class:`MultiIndex` (:issue:`13831`) + Missing ^^^^^^^