From 725f762f52b0a49c31674fbcbe763a7bb6f4a1d0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 8 Apr 2020 11:00:11 -0700 Subject: [PATCH 01/10] BUG: Series.__getitem__ with MultiIndex and leading integer level --- pandas/core/indexes/base.py | 11 ++++++---- pandas/core/indexes/multi.py | 20 +++++++++---------- .../tests/indexing/multiindex/test_getitem.py | 4 ++-- .../tests/indexing/multiindex/test_partial.py | 8 ++++++-- .../tests/indexing/multiindex/test_setitem.py | 7 ++++++- 5 files changed, 30 insertions(+), 20 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index df58593bc930c..73038bb44e236 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4568,10 +4568,7 @@ def get_value(self, series: "Series", key): ------- scalar or Series """ - if not is_scalar(key): - # if key is not a scalar, directly raise an error (the code below - # would convert to numpy arrays and raise later any way) - GH29926 - raise InvalidIndexError(key) + self._check_indexing_error(key) try: # GH 20882, 21257 @@ -4592,6 +4589,12 @@ def get_value(self, series: "Series", key): return self._get_values_for_loc(series, loc, key) + def _check_indexing_error(self, key): + if not is_scalar(key): + # if key is not a scalar, directly raise an error (the code below + # would convert to numpy arrays and raise later any way) - GH29926 + raise InvalidIndexError(key) + def _should_fallback_to_positional(self) -> bool: """ If an integer key is not found, should we fall back to positional indexing? diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 4e2d07ddf9225..b7a37a17f2784 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2333,23 +2333,21 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None): # -------------------------------------------------------------------- # Indexing Methods - def get_value(self, series, key): - # Label-based + def _check_indexing_error(self, key): if not is_hashable(key) or is_iterator(key): # We allow tuples if they are hashable, whereas other Index # subclasses require scalar. # We have to explicitly exclude generators, as these are hashable. raise InvalidIndexError(key) - try: - loc = self.get_loc(key) - except KeyError: - if is_integer(key): - loc = key - else: - raise - - return self._get_values_for_loc(series, loc, key) + def _should_fallback_to_positional(self) -> bool: + """ + If an integer key is not found, should we fall back to positional indexing? + """ + if not self.nlevels: + return False + # GH#33355 + return self.levels[0]._should_fallback_to_positional() def _get_values_for_loc(self, series: "Series", loc, key): """ diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 7e75b5324445e..54b22dbc53466 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -87,8 +87,8 @@ def test_series_getitem_returns_scalar( (lambda s: s[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"), (lambda s: s.loc[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"), (lambda s: s.loc[(2000, 3, 4, 5)], IndexingError, "Too many indexers"), - (lambda s: s.__getitem__(len(s)), IndexError, "is out of bounds"), - (lambda s: s[len(s)], IndexError, "is out of bounds"), + (lambda s: s.__getitem__(len(s)), KeyError, ""), # match should include len(s) + (lambda s: s[len(s)], KeyError, ""), # match should include len(s) ( lambda s: s.iloc[len(s)], IndexError, diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index 9d181bdcb9491..d5b3fd9e8430e 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -126,7 +126,11 @@ def test_partial_set(self, multiindex_year_month_day_dataframe_random_data): # this works...for now df["A"].iloc[14] = 5 - assert df["A"][14] == 5 + assert df["A"].iloc[14] == 5 + with pytest.raises(KeyError, match="14"): + # TODO: really should be separate test + # GH#33355 dont fall-back to positional when leading level is int + df["A"][14] # --------------------------------------------------------------------- # AMBIGUOUS CASES! @@ -140,7 +144,7 @@ def test_partial_loc_missing(self, multiindex_year_month_day_dataframe_random_da tm.assert_series_equal(result, expected) # need to put in some work here - + # FIXME: dont leave commented-out # self.ymd.loc[2000, 0] = 0 # assert (self.ymd.loc[2000]['A'] == 0).all() diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index 1f19244cf76d3..d60f6201b79fc 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -255,7 +255,12 @@ def test_series_setitem(self, multiindex_year_month_day_dataframe_random_data): assert notna(s.values[65:]).all() s[2000, 3, 10] = np.nan - assert isna(s[49]) + assert isna(s.iloc[49]) + + with pytest.raises(KeyError, match="49"): + # TODO: really should be separate test + # GH#33355 dont fall-back to positional when leading level is int + s[49] def test_frame_getitem_setitem_boolean(self, multiindex_dataframe_random_data): frame = multiindex_dataframe_random_data From 3c2f9e759a8cc3fc47492f8970b228bdce42921b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 8 Apr 2020 12:12:12 -0700 Subject: [PATCH 02/10] dedicated tests --- .../tests/indexing/multiindex/test_partial.py | 23 +++++++++++++++---- .../tests/indexing/multiindex/test_setitem.py | 2 +- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index d5b3fd9e8430e..3395aeb74a4b4 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from pandas import DataFrame, MultiIndex +from pandas import DataFrame, Int64Index, MultiIndex import pandas._testing as tm @@ -127,10 +127,25 @@ def test_partial_set(self, multiindex_year_month_day_dataframe_random_data): # this works...for now df["A"].iloc[14] = 5 assert df["A"].iloc[14] == 5 + + def test_getitem_int_leading_level( + self, multiindex_year_month_day_dataframe_random_data + ): + # GH#33355 dont fall-back to positional when leading level is int + ymd = multiindex_year_month_day_dataframe_random_data + ser = ymd["A"] + mi = ser.index + assert isinstance(mi, MultiIndex) + assert isinstance(mi.levels[0], Int64Index) + + assert 14 not in mi.levels[0] + assert not mi.levels[0]._should_fallback_to_positional() + assert not mi._should_fallback_to_positional() + + with pytest.raises(KeyError, match="14"): + ser[14] with pytest.raises(KeyError, match="14"): - # TODO: really should be separate test - # GH#33355 dont fall-back to positional when leading level is int - df["A"][14] + mi.get_value(ser, 14) # --------------------------------------------------------------------- # AMBIGUOUS CASES! diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index d60f6201b79fc..853b92ea91274 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -236,6 +236,7 @@ def f(name, df2): f_index ) + # FIXME: dont leave commented-out # TODO(wesm): unused? # new_df = pd.concat([f(name, df2) for name, df2 in grp], axis=1).T @@ -258,7 +259,6 @@ def test_series_setitem(self, multiindex_year_month_day_dataframe_random_data): assert isna(s.iloc[49]) with pytest.raises(KeyError, match="49"): - # TODO: really should be separate test # GH#33355 dont fall-back to positional when leading level is int s[49] From a75626d91fad97e7c872e1717c5850befb48f9bc Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 8 Apr 2020 12:14:02 -0700 Subject: [PATCH 03/10] whatnsew --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 97a7f22df3985..e093e4896e635 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -385,6 +385,7 @@ Indexing - Bug in :meth:`DataFrame.lookup` incorrectly raising an ``AttributeError`` when ``frame.index`` or ``frame.columns`` is not unique; this will now raise a ``ValueError`` with a helpful error message (:issue:`33041`) - Bug in :meth:`DataFrame.iloc.__setitem__` creating a new array instead of overwriting ``Categorical`` values in-place (:issue:`32831`) - Bug in :meth:`DataFrame.copy` _item_cache not invalidated after copy causes post-copy value updates to not be reflected (:issue:`31784`) +- Bug in `Series.__getitem__` with an integer key and a :class:`MultiIndex` with leading integer level failing to raise ``KeyError`` if the key is not present in the first level (:issue:`33355`) Missing ^^^^^^^ From f40886e0c1f42ef6cf370ab945876ac34e740a63 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 8 Apr 2020 14:21:06 -0700 Subject: [PATCH 04/10] test --- pandas/tests/indexing/multiindex/test_partial.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index 3395aeb74a4b4..ed11af8ef68ad 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from pandas import DataFrame, Int64Index, MultiIndex +from pandas import DataFrame, Float64Index, Int64Index, MultiIndex import pandas._testing as tm @@ -128,15 +128,21 @@ def test_partial_set(self, multiindex_year_month_day_dataframe_random_data): df["A"].iloc[14] = 5 assert df["A"].iloc[14] == 5 - def test_getitem_int_leading_level( - self, multiindex_year_month_day_dataframe_random_data + @pytest.mark.parametrize("dtype", [int, float]) + def test_getitem_intkey_leading_level( + self, multiindex_year_month_day_dataframe_random_data, dtype ): # GH#33355 dont fall-back to positional when leading level is int ymd = multiindex_year_month_day_dataframe_random_data + levels = ymd.index.levels + ymd.index = ymd.index.set_levels([levels[0].astype(dtype)] + levels[1:]) ser = ymd["A"] mi = ser.index assert isinstance(mi, MultiIndex) - assert isinstance(mi.levels[0], Int64Index) + if dtype is int: + assert isinstance(mi.levels[0], Int64Index) + else: + assert isinstance(mi.levels[0], Float64Index) assert 14 not in mi.levels[0] assert not mi.levels[0]._should_fallback_to_positional() From ad4f16c973abd82a0e082f29c8bf65d92b19d7c1 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 8 Apr 2020 16:22:31 -0700 Subject: [PATCH 05/10] REF: collect DataFrame.__setitem__ tests (#33408) --- .../tests/frame/indexing/test_categorical.py | 8 --- pandas/tests/frame/indexing/test_datetime.py | 9 ---- pandas/tests/frame/indexing/test_indexing.py | 16 ------ pandas/tests/frame/indexing/test_setitem.py | 50 +++++++++++++++++-- pandas/tests/frame/test_timeseries.py | 7 --- .../tests/indexing/multiindex/test_insert.py | 2 +- 6 files changed, 47 insertions(+), 45 deletions(-) diff --git a/pandas/tests/frame/indexing/test_categorical.py b/pandas/tests/frame/indexing/test_categorical.py index 0ae2f81108be9..d94dc8d2ffe00 100644 --- a/pandas/tests/frame/indexing/test_categorical.py +++ b/pandas/tests/frame/indexing/test_categorical.py @@ -391,11 +391,3 @@ def test_loc_indexing_preserves_index_category_dtype(self): result = df.loc[["a"]].index.levels[0] tm.assert_index_equal(result, expected) - - def test_wrong_length_cat_dtype_raises(self): - # GH29523 - cat = pd.Categorical.from_codes([0, 1, 1, 0, 1, 2], ["a", "b", "c"]) - df = pd.DataFrame({"bar": range(10)}) - err = "Length of values does not match length of index" - with pytest.raises(ValueError, match=err): - df["foo"] = cat diff --git a/pandas/tests/frame/indexing/test_datetime.py b/pandas/tests/frame/indexing/test_datetime.py index c1a7cb9f45a3a..1937a4c380dc9 100644 --- a/pandas/tests/frame/indexing/test_datetime.py +++ b/pandas/tests/frame/indexing/test_datetime.py @@ -44,12 +44,3 @@ def test_set_reset(self): df = result.set_index("foo") tm.assert_index_equal(df.index, idx) - - def test_scalar_assignment(self): - # issue #19843 - df = pd.DataFrame(index=(0, 1, 2)) - df["now"] = pd.Timestamp("20130101", tz="UTC") - expected = pd.DataFrame( - {"now": pd.Timestamp("20130101", tz="UTC")}, index=[0, 1, 2] - ) - tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index c3b9a7bf05c7b..ed3c4689c92d9 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1921,22 +1921,6 @@ def test_getitem_sparse_column(self): result = df.loc[:, "A"] tm.assert_series_equal(result, expected) - def test_setitem_with_sparse_value(self): - # GH8131 - df = pd.DataFrame({"c_1": ["a", "b", "c"], "n_1": [1.0, 2.0, 3.0]}) - sp_array = SparseArray([0, 0, 1]) - df["new_column"] = sp_array - tm.assert_series_equal( - df["new_column"], pd.Series(sp_array, name="new_column"), check_names=False - ) - - def test_setitem_with_unaligned_sparse_value(self): - df = pd.DataFrame({"c_1": ["a", "b", "c"], "n_1": [1.0, 2.0, 3.0]}) - sp_series = pd.Series(SparseArray([0, 0, 1]), index=[2, 1, 0]) - df["new_column"] = sp_series - exp = pd.Series(SparseArray([1, 0, 0]), name="new_column") - tm.assert_series_equal(df["new_column"], exp) - def test_setitem_with_unaligned_tz_aware_datetime_column(self): # GH 12981 # Assignment of unaligned offset-aware datetime series. diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index c12643f413490..d53665539309c 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -1,13 +1,12 @@ import numpy as np import pytest -from pandas import DataFrame, Index, Series +from pandas import Categorical, DataFrame, Index, Series, Timestamp, date_range import pandas._testing as tm +from pandas.core.arrays import SparseArray -# Column add, remove, delete. - -class TestDataFrameMutateColumns: +class TestDataFrameSetItem: def test_setitem_error_msmgs(self): # GH 7432 @@ -84,3 +83,46 @@ def test_setitem_empty_columns(self): df["X"] = ["x", "y", "z"] exp = DataFrame(data={"X": ["x", "y", "z"]}, index=["A", "B", "C"]) tm.assert_frame_equal(df, exp) + + def test_setitem_dt64_index_empty_columns(self): + rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s") + df = DataFrame(index=np.arange(len(rng))) + + df["A"] = rng + assert df["A"].dtype == np.dtype("M8[ns]") + + def test_setitem_timestamp_empty_columns(self): + # GH#19843 + df = DataFrame(index=range(3)) + df["now"] = Timestamp("20130101", tz="UTC") + + expected = DataFrame( + [[Timestamp("20130101", tz="UTC")]] * 3, index=[0, 1, 2], columns=["now"], + ) + tm.assert_frame_equal(df, expected) + + def test_setitem_wrong_length_categorical_dtype_raises(self): + # GH#29523 + cat = Categorical.from_codes([0, 1, 1, 0, 1, 2], ["a", "b", "c"]) + df = DataFrame(range(10), columns=["bar"]) + + msg = "Length of values does not match length of index" + with pytest.raises(ValueError, match=msg): + df["foo"] = cat + + def test_setitem_with_sparse_value(self): + # GH#8131 + df = DataFrame({"c_1": ["a", "b", "c"], "n_1": [1.0, 2.0, 3.0]}) + sp_array = SparseArray([0, 0, 1]) + df["new_column"] = sp_array + + expected = Series(sp_array, name="new_column") + tm.assert_series_equal(df["new_column"], expected) + + def test_setitem_with_unaligned_sparse_value(self): + df = DataFrame({"c_1": ["a", "b", "c"], "n_1": [1.0, 2.0, 3.0]}) + sp_series = Series(SparseArray([0, 0, 1]), index=[2, 1, 0]) + + df["new_column"] = sp_series + expected = Series(SparseArray([1, 0, 0]), name="new_column") + tm.assert_series_equal(df["new_column"], expected) diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index dea921a92ae37..63361789b8e50 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -13,13 +13,6 @@ def test_frame_ctor_datetime64_column(self): df = DataFrame({"A": np.random.randn(len(rng)), "B": dates}) assert np.issubdtype(df["B"].dtype, np.dtype("M8[ns]")) - def test_frame_append_datetime64_column(self): - rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s") - df = DataFrame(index=np.arange(len(rng))) - - df["A"] = rng - assert np.issubdtype(df["A"].dtype, np.dtype("M8[ns]")) - def test_frame_append_datetime64_col_other_units(self): n = 100 diff --git a/pandas/tests/indexing/multiindex/test_insert.py b/pandas/tests/indexing/multiindex/test_insert.py index 835e61da2fb3e..42922c3deeee4 100644 --- a/pandas/tests/indexing/multiindex/test_insert.py +++ b/pandas/tests/indexing/multiindex/test_insert.py @@ -5,7 +5,7 @@ class TestMultiIndexInsertion: - def test_mixed_depth_insert(self): + def test_setitem_mixed_depth(self): arrays = [ ["a", "top", "top", "routine1", "routine1", "routine2"], ["", "OD", "OD", "result1", "result2", "result1"], From becdcbf84ec2be405c44f1baf226e73c5b20d8e0 Mon Sep 17 00:00:00 2001 From: partev Date: Wed, 8 Apr 2020 22:22:06 -0400 Subject: [PATCH 06/10] use https link for Anaconda (#33413) --- doc/source/getting_started/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/getting_started/index.rst b/doc/source/getting_started/index.rst index 9ac8c58e1d8f2..3f15c91f83c6a 100644 --- a/doc/source/getting_started/index.rst +++ b/doc/source/getting_started/index.rst @@ -21,7 +21,7 @@ Installation

-pandas is part of the `Anaconda `__ distribution and can be +pandas is part of the `Anaconda `__ distribution and can be installed with Anaconda or Miniconda: .. raw:: html From 7bf22eb078d9d5bdcf0db880cb29373a23ccb0bd Mon Sep 17 00:00:00 2001 From: William Ayd Date: Thu, 9 Apr 2020 08:38:04 -0700 Subject: [PATCH 07/10] unpinned 37 (#33423) --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 935f8e97d4bf6..c874c5a8f68da 100644 --- a/environment.yml +++ b/environment.yml @@ -4,7 +4,7 @@ channels: dependencies: # required - numpy>=1.15 - - python=3.7 + - python=3 - python-dateutil>=2.6.1 - pytz From 0d5a0a7a2659501db97af84321ba92f934d2f2cf Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 9 Apr 2020 20:23:41 -0700 Subject: [PATCH 08/10] revert --- doc/source/whatsnew/v1.1.0.rst | 1 - pandas/core/indexes/base.py | 11 +++---- pandas/core/indexes/multi.py | 20 ++++++------ .../tests/indexing/multiindex/test_getitem.py | 4 +-- .../tests/indexing/multiindex/test_partial.py | 31 ++----------------- .../tests/indexing/multiindex/test_setitem.py | 7 +---- 6 files changed, 21 insertions(+), 53 deletions(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 0e68c3799efa7..5c39377899a20 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -447,7 +447,6 @@ Indexing - Bug in :meth:`DataFrame.lookup` incorrectly raising an ``AttributeError`` when ``frame.index`` or ``frame.columns`` is not unique; this will now raise a ``ValueError`` with a helpful error message (:issue:`33041`) - Bug in :meth:`DataFrame.iloc.__setitem__` creating a new array instead of overwriting ``Categorical`` values in-place (:issue:`32831`) - Bug in :meth:`DataFrame.copy` _item_cache not invalidated after copy causes post-copy value updates to not be reflected (:issue:`31784`) -- Bug in `Series.__getitem__` with an integer key and a :class:`MultiIndex` with leading integer level failing to raise ``KeyError`` if the key is not present in the first level (:issue:`33355`) Missing ^^^^^^^ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 73038bb44e236..df58593bc930c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4568,7 +4568,10 @@ def get_value(self, series: "Series", key): ------- scalar or Series """ - self._check_indexing_error(key) + if not is_scalar(key): + # if key is not a scalar, directly raise an error (the code below + # would convert to numpy arrays and raise later any way) - GH29926 + raise InvalidIndexError(key) try: # GH 20882, 21257 @@ -4589,12 +4592,6 @@ def get_value(self, series: "Series", key): return self._get_values_for_loc(series, loc, key) - def _check_indexing_error(self, key): - if not is_scalar(key): - # if key is not a scalar, directly raise an error (the code below - # would convert to numpy arrays and raise later any way) - GH29926 - raise InvalidIndexError(key) - def _should_fallback_to_positional(self) -> bool: """ If an integer key is not found, should we fall back to positional indexing? diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 6e36029441f1b..7aa1456846612 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2333,21 +2333,23 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None): # -------------------------------------------------------------------- # Indexing Methods - def _check_indexing_error(self, key): + def get_value(self, series, key): + # Label-based if not is_hashable(key) or is_iterator(key): # We allow tuples if they are hashable, whereas other Index # subclasses require scalar. # We have to explicitly exclude generators, as these are hashable. raise InvalidIndexError(key) - def _should_fallback_to_positional(self) -> bool: - """ - If an integer key is not found, should we fall back to positional indexing? - """ - if not self.nlevels: - return False - # GH#33355 - return self.levels[0]._should_fallback_to_positional() + try: + loc = self.get_loc(key) + except KeyError: + if is_integer(key): + loc = key + else: + raise + + return self._get_values_for_loc(series, loc, key) def _get_values_for_loc(self, series: "Series", loc, key): """ diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 54b22dbc53466..7e75b5324445e 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -87,8 +87,8 @@ def test_series_getitem_returns_scalar( (lambda s: s[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"), (lambda s: s.loc[(2000, 3, 4)], KeyError, r"^\(2000, 3, 4\)$"), (lambda s: s.loc[(2000, 3, 4, 5)], IndexingError, "Too many indexers"), - (lambda s: s.__getitem__(len(s)), KeyError, ""), # match should include len(s) - (lambda s: s[len(s)], KeyError, ""), # match should include len(s) + (lambda s: s.__getitem__(len(s)), IndexError, "is out of bounds"), + (lambda s: s[len(s)], IndexError, "is out of bounds"), ( lambda s: s.iloc[len(s)], IndexError, diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index ed11af8ef68ad..9d181bdcb9491 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from pandas import DataFrame, Float64Index, Int64Index, MultiIndex +from pandas import DataFrame, MultiIndex import pandas._testing as tm @@ -126,32 +126,7 @@ def test_partial_set(self, multiindex_year_month_day_dataframe_random_data): # this works...for now df["A"].iloc[14] = 5 - assert df["A"].iloc[14] == 5 - - @pytest.mark.parametrize("dtype", [int, float]) - def test_getitem_intkey_leading_level( - self, multiindex_year_month_day_dataframe_random_data, dtype - ): - # GH#33355 dont fall-back to positional when leading level is int - ymd = multiindex_year_month_day_dataframe_random_data - levels = ymd.index.levels - ymd.index = ymd.index.set_levels([levels[0].astype(dtype)] + levels[1:]) - ser = ymd["A"] - mi = ser.index - assert isinstance(mi, MultiIndex) - if dtype is int: - assert isinstance(mi.levels[0], Int64Index) - else: - assert isinstance(mi.levels[0], Float64Index) - - assert 14 not in mi.levels[0] - assert not mi.levels[0]._should_fallback_to_positional() - assert not mi._should_fallback_to_positional() - - with pytest.raises(KeyError, match="14"): - ser[14] - with pytest.raises(KeyError, match="14"): - mi.get_value(ser, 14) + assert df["A"][14] == 5 # --------------------------------------------------------------------- # AMBIGUOUS CASES! @@ -165,7 +140,7 @@ def test_partial_loc_missing(self, multiindex_year_month_day_dataframe_random_da tm.assert_series_equal(result, expected) # need to put in some work here - # FIXME: dont leave commented-out + # self.ymd.loc[2000, 0] = 0 # assert (self.ymd.loc[2000]['A'] == 0).all() diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index 853b92ea91274..1f19244cf76d3 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -236,7 +236,6 @@ def f(name, df2): f_index ) - # FIXME: dont leave commented-out # TODO(wesm): unused? # new_df = pd.concat([f(name, df2) for name, df2 in grp], axis=1).T @@ -256,11 +255,7 @@ def test_series_setitem(self, multiindex_year_month_day_dataframe_random_data): assert notna(s.values[65:]).all() s[2000, 3, 10] = np.nan - assert isna(s.iloc[49]) - - with pytest.raises(KeyError, match="49"): - # GH#33355 dont fall-back to positional when leading level is int - s[49] + assert isna(s[49]) def test_frame_getitem_setitem_boolean(self, multiindex_dataframe_random_data): frame = multiindex_dataframe_random_data From af0cdb8ff56f49c8e62c4b1b2881a0438da4bba7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 10 Apr 2020 18:51:50 -0700 Subject: [PATCH 09/10] BUG: Series.__setitem__[listlike_of_integers] with IntervalIndex] --- pandas/core/indexes/base.py | 4 ++-- pandas/core/indexes/interval.py | 2 +- pandas/core/indexes/multi.py | 4 +--- pandas/core/indexes/numeric.py | 2 +- pandas/core/series.py | 7 ++----- pandas/tests/series/indexing/test_getitem.py | 12 ++++++++++++ pandas/tests/series/indexing/test_indexing.py | 4 ++-- 7 files changed, 21 insertions(+), 14 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 1c5cdd1617cbd..076eb8644d35d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4597,9 +4597,9 @@ def _check_indexing_error(self, key): def _should_fallback_to_positional(self) -> bool: """ - If an integer key is not found, should we fall back to positional indexing? + Should an integer key be treated as positional? """ - if len(self) > 0 and (self.holds_integer() or self.is_boolean()): + if self.holds_integer() or self.is_boolean(): return False return True diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 18e995ce4efd7..6ae16db2e82db 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -514,7 +514,7 @@ def is_overlapping(self) -> bool: # GH 23309 return self._engine.is_overlapping - def _should_fallback_to_positional(self): + def _should_fallback_to_positional(self) -> bool: # integer lookups in Series.__getitem__ are unambiguously # positional in this case return self.dtype.subtype.kind in ["m", "M"] diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 42e0d228dab09..d411867af2ef8 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2342,10 +2342,8 @@ def _check_indexing_error(self, key): def _should_fallback_to_positional(self) -> bool: """ - If an integer key is not found, should we fall back to positional indexing? + Should integer key(s) be treated as positional? """ - if not self.nlevels: - return False # GH#33355 return self.levels[0]._should_fallback_to_positional() diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index e2be58a56018d..06040166d0f9e 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -376,7 +376,7 @@ def astype(self, dtype, copy=True): # Indexing Methods @doc(Index._should_fallback_to_positional) - def _should_fallback_to_positional(self): + def _should_fallback_to_positional(self) -> bool: return False @doc(Index._convert_slice_indexer) diff --git a/pandas/core/series.py b/pandas/core/series.py index a73ef08b606e3..7eecdf59cf9d7 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -79,7 +79,6 @@ from pandas.core.indexes.api import ( Float64Index, Index, - IntervalIndex, InvalidIndexError, MultiIndex, ensure_index, @@ -942,9 +941,7 @@ def _get_with(self, key): if key_type == "integer": # We need to decide whether to treat this as a positional indexer # (i.e. self.iloc) or label-based (i.e. self.loc) - if self.index.is_integer() or self.index.is_floating(): - return self.loc[key] - elif isinstance(self.index, IntervalIndex): + if not self.index._should_fallback_to_positional(): return self.loc[key] else: return self.iloc[key] @@ -1067,7 +1064,7 @@ def _set_with(self, key, value): # Note: key_type == "boolean" should not occur because that # should be caught by the is_bool_indexer check in __setitem__ if key_type == "integer": - if self.index.inferred_type == "integer": + if not self.index._should_fallback_to_positional(): self._set_labels(key, value) else: self._set_values(key, value) diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index a49bd6d59d01b..2922f3c741320 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -90,6 +90,18 @@ def test_getitem_intlist_intindex_periodvalues(self): tm.assert_series_equal(result, exp) assert result.dtype == "Period[D]" + @pytest.mark.parametrize("box", [list, np.array, pd.Index]) + def test_getitem_intlist_intervalindex_non_int(self, box): + # GH#33404 fall back to positional since ints are unambiguous + dti = date_range("2000-01-03", periods=3) + ii = pd.IntervalIndex.from_breaks(dti) + ser = Series(range(len(ii)), index=ii) + + expected = ser.iloc[:1] + key = box([0]) + result = ser[key] + tm.assert_series_equal(result, expected) + def test_getitem_generator(string_series): gen = (x > 0 for x in string_series) diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 522ed4df96ad2..4e7f97048275a 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -159,9 +159,9 @@ def test_getitem_out_of_bounds(datetime_series): datetime_series[len(datetime_series)] # GH #917 - msg = r"index -\d+ is out of bounds for axis 0 with size \d+" + # With a RangeIndex, an int key gives a KeyError s = Series([], dtype=object) - with pytest.raises(IndexError, match=msg): + with pytest.raises(KeyError, match="-1"): s[-1] From 9b14800ec21bb586f0f3039a4a6ab6278e59e558 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Sun, 12 Apr 2020 13:34:46 -0700 Subject: [PATCH 10/10] whatsnew --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 718de09a0c3e4..8236d34addbb7 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -474,6 +474,7 @@ Indexing - Bug in :meth:`DataFrame.copy` _item_cache not invalidated after copy causes post-copy value updates to not be reflected (:issue:`31784`) - Bug in `Series.__getitem__` with an integer key and a :class:`MultiIndex` with leading integer level failing to raise ``KeyError`` if the key is not present in the first level (:issue:`33355`) - Bug in :meth:`DataFrame.iloc` when slicing a single column-:class:`DataFrame`` with ``ExtensionDtype`` (e.g. ``df.iloc[:, :1]``) returning an invalid result (:issue:`32957`) +- Bug in :meth:`Series.__setitem__` with an :class:`IntervalIndex` and a list-like key of integers (:issue:`33473`) Missing ^^^^^^^