From a4586d1311d086d487a0dded3a91e05032cf40a0 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 10 Apr 2020 16:15:56 -0700 Subject: [PATCH 1/5] REF: Simplify __getitem__ by doing positional-int check first --- pandas/core/series.py | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index a73ef08b606e3..3f5927828e541 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -881,32 +881,35 @@ def __getitem__(self, key): if isinstance(key, (list, tuple)): key = unpack_1tuple(key) - if key_is_scalar or isinstance(self.index, MultiIndex): + if is_integer(key) and self.index._should_fallback_to_positional(): + return self._values[key] + + elif key_is_scalar: + return self._get_value(key) + + if ( + isinstance(key, tuple) + and is_hashable(key) + and isinstance(self.index, MultiIndex) + ): # Otherwise index.get_value will raise InvalidIndexError try: - result = self.index.get_value(self, key) + result = self._get_value(key) return result - except InvalidIndexError: - if not isinstance(self.index, MultiIndex): - raise - except (KeyError, ValueError): - if isinstance(key, tuple) and isinstance(self.index, MultiIndex): - # kludge - pass - else: - raise + except KeyError: + # We still have the corner case where this tuple is a key + # in the first level of our MultiIndex + return self._get_values_tuple(key) - if not key_is_scalar: - # avoid expensive checks if we know we have a scalar - if is_iterator(key): - key = list(key) + if is_iterator(key): + key = list(key) - if com.is_bool_indexer(key): - key = check_bool_indexer(self.index, key) - key = np.asarray(key, dtype=bool) - return self._get_values(key) + if com.is_bool_indexer(key): + key = check_bool_indexer(self.index, key) + key = np.asarray(key, dtype=bool) + return self._get_values(key) return self._get_with(key) From 5bd61416de378ee26f1f67d79b60d8f7070ca422 Mon Sep 17 00:00:00 2001 From: cleconte987 Date: Sat, 11 Apr 2020 17:18:05 +0100 Subject: [PATCH 2/5] DOC: Fix capitalisation in doc/source/whatsnew - part1 (issue #32550) (#33382) --- doc/source/whatsnew/v0.4.x.rst | 4 ++-- doc/source/whatsnew/v0.5.0.rst | 4 ++-- doc/source/whatsnew/v0.6.0.rst | 4 ++-- scripts/validate_rst_title_capitalization.py | 12 ++++++++++++ 4 files changed, 18 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.4.x.rst b/doc/source/whatsnew/v0.4.x.rst index 8e41e528f5b75..0ed7bb396674e 100644 --- a/doc/source/whatsnew/v0.4.x.rst +++ b/doc/source/whatsnew/v0.4.x.rst @@ -1,7 +1,7 @@ .. _whatsnew_04x: -v.0.4.1 through v0.4.3 (September 25 - October 9, 2011) -------------------------------------------------------- +Versions 0.4.1 through 0.4.3 (September 25 - October 9, 2011) +------------------------------------------------------------- {{ header }} diff --git a/doc/source/whatsnew/v0.5.0.rst b/doc/source/whatsnew/v0.5.0.rst index 37c52ac7bb34e..7ccb141260f18 100644 --- a/doc/source/whatsnew/v0.5.0.rst +++ b/doc/source/whatsnew/v0.5.0.rst @@ -1,8 +1,8 @@ .. _whatsnew_050: -v.0.5.0 (October 24, 2011) --------------------------- +Version 0.5.0 (October 24, 2011) +-------------------------------- {{ header }} diff --git a/doc/source/whatsnew/v0.6.0.rst b/doc/source/whatsnew/v0.6.0.rst index 973ba897b3234..f984b9ad71b63 100644 --- a/doc/source/whatsnew/v0.6.0.rst +++ b/doc/source/whatsnew/v0.6.0.rst @@ -1,7 +1,7 @@ .. _whatsnew_060: -v.0.6.0 (November 25, 2011) ---------------------------- +Version 0.6.0 (November 25, 2011) +--------------------------------- {{ header }} diff --git a/scripts/validate_rst_title_capitalization.py b/scripts/validate_rst_title_capitalization.py index edc9730db58e5..907db4ab4c7ce 100755 --- a/scripts/validate_rst_title_capitalization.py +++ b/scripts/validate_rst_title_capitalization.py @@ -99,6 +99,18 @@ "BusinessHour", "BusinessDay", "DateOffset", + "January", + "February", + "March", + "April", + "May", + "June", + "July", + "August", + "September", + "October", + "November", + "December", "Float64Index", } From b560d9580a1311c177fdffecb60905cb4626f8f4 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Sat, 11 Apr 2020 21:41:32 +0300 Subject: [PATCH 3/5] CLN: Nitpicks (#33464) * CLN: Nitpicks * Restarting CI as it failed to fetch a URL Co-authored-by: MomIsBestFriend <> --- pandas/_libs/reshape.pyx | 2 +- pandas/core/indexing.py | 7 +++---- pandas/core/internals/managers.py | 4 +--- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/reshape.pyx b/pandas/_libs/reshape.pyx index e74b5919a4590..aed5e1d612088 100644 --- a/pandas/_libs/reshape.pyx +++ b/pandas/_libs/reshape.pyx @@ -36,7 +36,7 @@ ctypedef fused reshape_t: @cython.wraparound(False) @cython.boundscheck(False) -def unstack(reshape_t[:, :] values, uint8_t[:] mask, +def unstack(reshape_t[:, :] values, const uint8_t[:] mask, Py_ssize_t stride, Py_ssize_t length, Py_ssize_t width, reshape_t[:, :] new_values, uint8_t[:, :] new_mask): """ diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 22a44d65a947a..b74399ed86fbd 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -827,7 +827,7 @@ def _getitem_nested_tuple(self, tup: Tuple): # this is iterative obj = self.obj axis = 0 - for i, key in enumerate(tup): + for key in tup: if com.is_null_slice(key): axis += 1 @@ -1420,7 +1420,7 @@ def _is_scalar_access(self, key: Tuple) -> bool: if len(key) != self.ndim: return False - for i, k in enumerate(key): + for k in key: if not is_integer(k): return False @@ -2234,8 +2234,7 @@ def is_nested_tuple(tup, labels) -> bool: if not isinstance(tup, tuple): return False - for i, k in enumerate(tup): - + for k in tup: if is_list_like(k) or isinstance(k, slice): return isinstance(labels, ABCMultiIndex) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index f3b4ebad9cec1..d1293974b776a 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -48,8 +48,6 @@ make_block, ) -from pandas.io.formats.printing import pprint_thing - # TODO: flexible with index=None and/or items=None T = TypeVar("T", bound="BlockManager") @@ -325,7 +323,7 @@ def __repr__(self) -> str: output += f"\nAxis {i}: {ax}" for block in self.blocks: - output += f"\n{pprint_thing(block)}" + output += f"\n{block}" return output def _verify_integrity(self) -> None: From 96c3093daea22f2c8cc2e691bc6482a8b48906d6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 13 Apr 2020 19:48:40 -0700 Subject: [PATCH 4/5] BUG: Series.__getitem__ with MultiIndex incorrectly doing positional --- pandas/core/indexing.py | 31 ------------------- pandas/core/series.py | 5 +-- pandas/tests/indexing/multiindex/test_loc.py | 20 +++++++----- .../tests/indexing/multiindex/test_slice.py | 10 +++--- pandas/tests/series/indexing/test_getitem.py | 13 ++++++++ 5 files changed, 31 insertions(+), 48 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b74399ed86fbd..9537d5ce7ffb5 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1080,37 +1080,6 @@ def _getitem_axis(self, key, axis: int): return self._getbool_axis(key, axis=axis) elif is_list_like_indexer(key): - # convert various list-like indexers - # to a list of keys - # we will use the *values* of the object - # and NOT the index if its a PandasObject - if isinstance(labels, ABCMultiIndex): - - if isinstance(key, (ABCSeries, np.ndarray)) and key.ndim <= 1: - # Series, or 0,1 ndim ndarray - # GH 14730 - key = list(key) - elif isinstance(key, ABCDataFrame): - # GH 15438 - raise NotImplementedError( - "Indexing a MultiIndex with a " - "DataFrame key is not " - "implemented" - ) - elif hasattr(key, "ndim") and key.ndim > 1: - raise NotImplementedError( - "Indexing a MultiIndex with a " - "multidimensional key is not " - "implemented" - ) - - if ( - not isinstance(key, tuple) - and len(key) - and not isinstance(key[0], tuple) - ): - key = tuple([key]) - # an iterable multi-selection if not (isinstance(key, tuple) and isinstance(labels, ABCMultiIndex)): diff --git a/pandas/core/series.py b/pandas/core/series.py index 3f5927828e541..49ce902a1e900 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -79,7 +79,6 @@ from pandas.core.indexes.api import ( Float64Index, Index, - IntervalIndex, InvalidIndexError, MultiIndex, ensure_index, @@ -945,9 +944,7 @@ def _get_with(self, key): if key_type == "integer": # We need to decide whether to treat this as a positional indexer # (i.e. self.iloc) or label-based (i.e. self.loc) - if self.index.is_integer() or self.index.is_floating(): - return self.loc[key] - elif isinstance(self.index, IntervalIndex): + if not self.index._should_fallback_to_positional(): return self.loc[key] else: return self.iloc[key] diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index b7802d9b8fe0c..f0cbdbe8d0564 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -134,16 +134,15 @@ def test_loc_multiindex_missing_label_raises(self): @pytest.mark.parametrize("key, pos", [([2, 4], [0, 1]), ([2], []), ([2, 3], [])]) def test_loc_multiindex_list_missing_label(self, key, pos): - # GH 27148 - lists with missing labels do not raise: + # GH 27148 - lists with missing labels _do_ raise df = DataFrame( np.random.randn(3, 3), columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]], ) - expected = df.iloc[pos] - result = df.loc[key] - tm.assert_frame_equal(result, expected) + with pytest.raises(KeyError, match="not in index"): + df.loc[key] def test_loc_multiindex_too_many_dims_raises(self): # GH 14885 @@ -295,8 +294,8 @@ def convert_nested_indexer(indexer_type, keys): [ ([], []), # empty ok (["A"], slice(3)), - (["A", "D"], slice(3)), - (["D", "E"], []), # no values found - fine + (["A", "D"], []), # "D" isnt present -> raise + (["D", "E"], []), # no values found -> raise (["D"], []), # same, with single item list: GH 27148 (pd.IndexSlice[:, ["foo"]], slice(2, None, 3)), (pd.IndexSlice[:, ["foo", "bah"]], slice(2, None, 3)), @@ -310,8 +309,13 @@ def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, pos): ) s = Series(np.arange(9, dtype="int64"), index=idx).sort_index() expected = s.iloc[pos] - result = s.loc[indexer] - tm.assert_series_equal(result, expected) + + if expected.size == 0 and indexer != []: + with pytest.raises(KeyError, match=str(indexer)): + s.loc[indexer] + else: + result = s.loc[indexer] + tm.assert_series_equal(result, expected) def test_series_loc_getitem_fancy(multiindex_year_month_day_dataframe_random_data): diff --git a/pandas/tests/indexing/multiindex/test_slice.py b/pandas/tests/indexing/multiindex/test_slice.py index f367a92d0b006..532bb4f2e6dac 100644 --- a/pandas/tests/indexing/multiindex/test_slice.py +++ b/pandas/tests/indexing/multiindex/test_slice.py @@ -118,11 +118,11 @@ def test_per_axis_per_level_getitem(self): with pytest.raises(ValueError, match=msg): df.loc[(slice(None), np.array([True, False])), :] - # ambiguous notation - # this is interpreted as slicing on both axes (GH #16396) - result = df.loc[slice(None), [1]] - expected = df.iloc[:, []] - tm.assert_frame_equal(result, expected) + with pytest.raises(KeyError, match=r"\[1\] not in index"): + # slice(None) is on the index, [1] is on the columns, but 1 is + # not in the columns, so we raise + # This used to treat [1] as positional GH#16396 + df.loc[slice(None), [1]] result = df.loc[(slice(None), [1]), :] expected = df.iloc[[0, 3]] diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index a49bd6d59d01b..23e80cf37b7ac 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -90,6 +90,19 @@ def test_getitem_intlist_intindex_periodvalues(self): tm.assert_series_equal(result, exp) assert result.dtype == "Period[D]" + @pytest.mark.parametrize("box", [list, np.array, pd.Index]) + @pytest.mark.parametrize("dtype", [np.int64, np.float64, np.uint64]) + def test_getitem_intlist_multiindex_numeric_level(self, dtype, box): + # GH#33404 do _not_ fall back to positional since ints are ambiguous + idx = pd.Index(range(4)).astype(dtype) + dti = date_range("2000-01-03", periods=3) + mi = pd.MultiIndex.from_product([idx, dti]) + ser = Series(range(len(mi))[::-1], index=mi) + + key = box([5]) + with pytest.raises(KeyError, match="5"): + ser[key] + def test_getitem_generator(string_series): gen = (x > 0 for x in string_series) From f4a32d7aab3a3d34f81847b365d0555f6eef457d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 17 Apr 2020 15:23:36 -0700 Subject: [PATCH 5/5] whatsnew --- doc/source/whatsnew/v1.1.0.rst | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index a797090a83444..f6c9083c540a7 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -321,6 +321,36 @@ key and type of :class:`Index`. These now consistently raise ``KeyError`` (:iss ... KeyError: Timestamp('1970-01-01 00:00:00') +.. _whatsnew_110.api_breaking.indexing_int_multiindex_raises_key_errors: + +Failed Integer Lookups on MultiIndex Raise KeyError +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Indexing with integers with a :class:`MultiIndex` that has a integer-dtype +first level incorrectly failed to raise ``KeyError`` when one or more of +those integer keys is not present in the first level of the index (:issue:`33539`) + +.. ipython:: python + + idx = pd.Index(range(4)) + dti = pd.date_range("2000-01-03", periods=3) + mi = pd.MultiIndex.from_product([idx, dti]) + ser = pd.Series(range(len(mi)), index=mi) + +*Previous behavior*: + +.. code-block:: ipython + + In [5]: ser[[5]] + Out[5]: Series([], dtype: int64) + +*New behavior*: + +.. code-block:: ipython + + In [5]: ser[[5]] + ... + KeyError: '[5] not in index' + :meth:`DataFrame.merge` preserves right frame's row order ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ :meth:`DataFrame.merge` now preserves right frame's row order when executing a right merge (:issue:`27453`)