From c8df1c82e77a7e59d8596780ec22c883d57dd2b5 Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 9 Nov 2020 02:03:40 +0100 Subject: [PATCH 01/23] Bug in loc raised Error when non integer interval was given for MultiIndex --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/indexes/multi.py | 5 +++- pandas/tests/indexing/multiindex/test_loc.py | 25 ++++++++++++++++++++ 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index d07db18ee5df0..a3e840dc22077 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -466,6 +466,7 @@ Indexing - Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`MultiIndex` with a level named "0" (:issue:`37194`) - Bug in :meth:`Series.__getitem__` when using an unsigned integer array as an indexer giving incorrect results or segfaulting instead of raising ``KeyError`` (:issue:`37218`) - Bug in :meth:`Index.where` incorrectly casting numeric values to strings (:issue:`37591`) +- Bug in :meth:`DataFrame.loc` raised ``TypeError`` when non integer interval was given to select values from :class:`MultiIndex` (:issue:`25165`) Missing ^^^^^^^ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 65e71a6109a5a..6007ede2f5338 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2702,7 +2702,10 @@ def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int: if is_scalar(key) and isna(key): return -1 else: - return level_index.get_loc(key) + loc = level_index.get_loc(key) + # if isinstance(loc, slice): + # return loc.start + return loc def get_loc(self, key, method=None): """ diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index d79af1ea6b804..bc6519ffca2c4 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -598,3 +598,28 @@ def test_getitem_loc_commutability(multiindex_year_month_day_dataframe_random_da result = ser[2000, 5] expected = df.loc[2000, 5]["A"] tm.assert_series_equal(result, expected) + + +def test_getitem_loc_datetime(): + # GH: 25165 + date_idx = pd.date_range("2019", periods=2, freq="MS") + df = pd.DataFrame( + list(range(4)), + index=pd.MultiIndex.from_product([date_idx, [0, 1]], names=["x", "y"]), + ) + expected = pd.DataFrame( + [2, 3], + index=pd.MultiIndex.from_product( + [[pd.to_datetime("2019-02-01")], [0, 1]], names=["x", "y"] + ), + ) + result = df["2019-2":] + tm.assert_frame_equal(result, expected) + result = df.loc["2019-2":] + tm.assert_frame_equal(result, expected) + + result = df.loc(axis=0)["2019-2":] + tm.assert_frame_equal(result, expected) + + result = df.loc["2019-2":, :] + tm.assert_frame_equal(result, expected) From 9e62c50a2bcb5611bbb311b6b8a3e52b4f89570c Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 9 Nov 2020 02:05:26 +0100 Subject: [PATCH 02/23] Adjust tests --- pandas/tests/indexing/multiindex/test_loc.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index bc6519ffca2c4..cf5a8faf7bf73 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -603,13 +603,13 @@ def test_getitem_loc_commutability(multiindex_year_month_day_dataframe_random_da def test_getitem_loc_datetime(): # GH: 25165 date_idx = pd.date_range("2019", periods=2, freq="MS") - df = pd.DataFrame( + df = DataFrame( list(range(4)), - index=pd.MultiIndex.from_product([date_idx, [0, 1]], names=["x", "y"]), + index=MultiIndex.from_product([date_idx, [0, 1]], names=["x", "y"]), ) - expected = pd.DataFrame( + expected = DataFrame( [2, 3], - index=pd.MultiIndex.from_product( + index=MultiIndex.from_product( [[pd.to_datetime("2019-02-01")], [0, 1]], names=["x", "y"] ), ) From 8377a799c254fb72401e17ff91c3a124d21324af Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 9 Nov 2020 02:51:54 +0100 Subject: [PATCH 03/23] Comment it back in --- pandas/core/indexes/multi.py | 4 +-- pandas/tests/indexing/multiindex/test_loc.py | 25 ------------------ .../tests/indexing/multiindex/test_partial.py | 26 ++++++++++++++++++- 3 files changed, 27 insertions(+), 28 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 6007ede2f5338..52da1dc173713 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2703,8 +2703,8 @@ def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int: return -1 else: loc = level_index.get_loc(key) - # if isinstance(loc, slice): - # return loc.start + if isinstance(loc, slice): + return loc.start return loc def get_loc(self, key, method=None): diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index cf5a8faf7bf73..d79af1ea6b804 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -598,28 +598,3 @@ def test_getitem_loc_commutability(multiindex_year_month_day_dataframe_random_da result = ser[2000, 5] expected = df.loc[2000, 5]["A"] tm.assert_series_equal(result, expected) - - -def test_getitem_loc_datetime(): - # GH: 25165 - date_idx = pd.date_range("2019", periods=2, freq="MS") - df = DataFrame( - list(range(4)), - index=MultiIndex.from_product([date_idx, [0, 1]], names=["x", "y"]), - ) - expected = DataFrame( - [2, 3], - index=MultiIndex.from_product( - [[pd.to_datetime("2019-02-01")], [0, 1]], names=["x", "y"] - ), - ) - result = df["2019-2":] - tm.assert_frame_equal(result, expected) - result = df.loc["2019-2":] - tm.assert_frame_equal(result, expected) - - result = df.loc(axis=0)["2019-2":] - tm.assert_frame_equal(result, expected) - - result = df.loc["2019-2":, :] - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index 538aa1d3a1164..4a6b9482c8d7e 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from pandas import DataFrame, Float64Index, Int64Index, MultiIndex +from pandas import DataFrame, Float64Index, Int64Index, MultiIndex, date_range, to_datetime import pandas._testing as tm @@ -208,6 +208,30 @@ def test_setitem_multiple_partial(self, multiindex_dataframe_random_data): expected.loc["bar"] = 0 tm.assert_series_equal(result, expected) + def test_getitem_loc_datetime(self): + # GH: 25165 + date_idx = date_range("2019", periods=2, freq="MS") + df = DataFrame( + list(range(4)), + index=MultiIndex.from_product([date_idx, [0, 1]], names=["x", "y"]), + ) + expected = DataFrame( + [2, 3], + index=MultiIndex.from_product( + [[to_datetime("2019-02-01")], [0, 1]], names=["x", "y"] + ), + ) + result = df["2019-2":] + tm.assert_frame_equal(result, expected) + result = df.loc["2019-2":] + tm.assert_frame_equal(result, expected) + + result = df.loc(axis=0)["2019-2":] + tm.assert_frame_equal(result, expected) + + result = df.loc["2019-2":, :] + tm.assert_frame_equal(result, expected) + def test_loc_getitem_partial_both_axis(): # gh-12660 From e6acaaca42b30cf437d82adf754a731207ca7544 Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 9 Nov 2020 02:53:18 +0100 Subject: [PATCH 04/23] Run black --- pandas/tests/indexing/multiindex/test_partial.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index 4a6b9482c8d7e..962a50b347edf 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -1,7 +1,14 @@ import numpy as np import pytest -from pandas import DataFrame, Float64Index, Int64Index, MultiIndex, date_range, to_datetime +from pandas import ( + DataFrame, + Float64Index, + Int64Index, + MultiIndex, + date_range, + to_datetime, +) import pandas._testing as tm From 37fe677b75388c992df6189677c24d0fbe4a289e Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 9 Nov 2020 02:53:51 +0100 Subject: [PATCH 05/23] Rename test --- pandas/tests/indexing/multiindex/test_partial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index 962a50b347edf..f7052df87bdc6 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -215,7 +215,7 @@ def test_setitem_multiple_partial(self, multiindex_dataframe_random_data): expected.loc["bar"] = 0 tm.assert_series_equal(result, expected) - def test_getitem_loc_datetime(self): + def test_partial_getitem_loc_datetime(self): # GH: 25165 date_idx = date_range("2019", periods=2, freq="MS") df = DataFrame( From d38c9e74f4b09cc8245b85f50f1736891c6cbb88 Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 9 Nov 2020 22:06:08 +0100 Subject: [PATCH 06/23] Change new implementation --- pandas/core/indexes/multi.py | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 52da1dc173713..c7f726c995a64 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2673,14 +2673,13 @@ def _partial_tup_index(self, tup, side="left"): loc -= 1 return start + section.searchsorted(loc, side=side) - idx = self._get_loc_single_level_index(lev, lab) - if k < n - 1: - end = start + section.searchsorted(idx, side="right") - start = start + section.searchsorted(idx, side="left") - else: - return start + section.searchsorted(idx, side=side) + start, end = self._get_loc_single_level_index(lev, lab) + if end is None: + end = section.searchsorted(start, side="right") + if k >= n - 1: + return start + section.searchsorted(start, side=side) - def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int: + def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> Tuple[int, Optional[int]]: """ If key is NA value, location of index unify as -1. @@ -2700,12 +2699,12 @@ def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int: Index.get_loc : The get_loc method for (single-level) index. """ if is_scalar(key) and isna(key): - return -1 + return -1, None else: loc = level_index.get_loc(key) if isinstance(loc, slice): - return loc.start - return loc + return loc.start, loc.stop + return loc, None def get_loc(self, key, method=None): """ @@ -2811,7 +2810,7 @@ def _maybe_to_slice(loc): for i, k in enumerate(follow_key, len(lead_key)): mask = self.codes[i][loc] == self._get_loc_single_level_index( self.levels[i], k - ) + )[0] if not mask.all(): loc = loc[mask] if not len(loc): @@ -3049,22 +3048,21 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): else: - code = self._get_loc_single_level_index(level_index, key) + start, end = self._get_loc_single_level_index(level_index, key) if level > 0 or self.lexsort_depth == 0: # Desired level is not sorted - locs = np.array(level_codes == code, dtype=bool, copy=False) + locs = np.array(level_codes == start, dtype=bool, copy=False) if not locs.any(): # The label is present in self.levels[level] but unused: raise KeyError(key) return locs - - i = level_codes.searchsorted(code, side="left") - j = level_codes.searchsorted(code, side="right") - if i == j: + if end is None: + end = level_codes.searchsorted(start, side="right") + if start == end: # The label is present in self.levels[level] but unused: raise KeyError(key) - return slice(i, j) + return slice(start, end) def get_locs(self, seq): """ From 72345a4a11b0f246e1d34a326671b502f2774bbd Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 9 Nov 2020 22:25:45 +0100 Subject: [PATCH 07/23] Do slicing outside --- pandas/core/indexes/multi.py | 37 +++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index c7f726c995a64..c9ce657144c63 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2673,13 +2673,19 @@ def _partial_tup_index(self, tup, side="left"): loc -= 1 return start + section.searchsorted(loc, side=side) - start, end = self._get_loc_single_level_index(lev, lab) - if end is None: - end = section.searchsorted(start, side="right") - if k >= n - 1: - return start + section.searchsorted(start, side=side) + idx = self._get_loc_single_level_index(lev, lab) + if isinstance(idx, slice) and k < n - 1: + start = idx.start + end = idx.stop + elif k < n - 1: + end = start + section.searchsorted(idx, side="right") + start = start + section.searchsorted(idx, side="left") + else: + if isinstance(idx, slice): + idx = idx.start + return start + section.searchsorted(idx, side=side) - def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> Tuple[int, Optional[int]]: + def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int: """ If key is NA value, location of index unify as -1. @@ -2701,10 +2707,7 @@ def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> Tupl if is_scalar(key) and isna(key): return -1, None else: - loc = level_index.get_loc(key) - if isinstance(loc, slice): - return loc.start, loc.stop - return loc, None + return level_index.get_loc(key) def get_loc(self, key, method=None): """ @@ -2810,7 +2813,7 @@ def _maybe_to_slice(loc): for i, k in enumerate(follow_key, len(lead_key)): mask = self.codes[i][loc] == self._get_loc_single_level_index( self.levels[i], k - )[0] + ) if not mask.all(): loc = loc[mask] if not len(loc): @@ -3048,17 +3051,21 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): else: - start, end = self._get_loc_single_level_index(level_index, key) + idx = self._get_loc_single_level_index(level_index, key) if level > 0 or self.lexsort_depth == 0: # Desired level is not sorted - locs = np.array(level_codes == start, dtype=bool, copy=False) + locs = np.array(level_codes == idx, dtype=bool, copy=False) if not locs.any(): # The label is present in self.levels[level] but unused: raise KeyError(key) return locs - if end is None: - end = level_codes.searchsorted(start, side="right") + if isinstance(idx, slice): + start = idx.start + end = idx.stop + else: + start = level_codes.searchsorted(idx, side="left") + end = level_codes.searchsorted(idx, side="right") if start == end: # The label is present in self.levels[level] but unused: raise KeyError(key) From 04056aed45a8b08ee62531132bc7263a928fcdfc Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 9 Nov 2020 22:34:41 +0100 Subject: [PATCH 08/23] Add test --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/tests/indexing/multiindex/test_loc.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 4053e5100c7d3..3db94a8ab290a 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -467,7 +467,7 @@ Indexing - Bug in :meth:`Series.__getitem__` when using an unsigned integer array as an indexer giving incorrect results or segfaulting instead of raising ``KeyError`` (:issue:`37218`) - Bug in :meth:`Index.where` incorrectly casting numeric values to strings (:issue:`37591`) - Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` raises when numeric label was given for object :class:`Index` although label was in :class:`Index` (:issue:`26491`) -- Bug in :meth:`DataFrame.loc` raised ``TypeError`` when non integer interval was given to select values from :class:`MultiIndex` (:issue:`25165`) +- Bug in :meth:`DataFrame.loc` raised ``TypeError`` when non integer interval was given to select values from :class:`MultiIndex` (:issue:`25165`, :issue:`18185`) Missing ^^^^^^^ diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index d79af1ea6b804..9131802adb9eb 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -598,3 +598,10 @@ def test_getitem_loc_commutability(multiindex_year_month_day_dataframe_random_da result = ser[2000, 5] expected = df.loc[2000, 5]["A"] tm.assert_series_equal(result, expected) + + +def test_get_loc_datetime_index(): + # GH: 18185 + index = pd.date_range("2001-01-01", periods=100) + mi = MultiIndex.from_arrays([index]) + assert mi.get_loc("2001-01") == slice(0, 31, None) From 41dbbcc0cdd457ccabf9142d71b72f28989c5d1e Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 9 Nov 2020 22:35:53 +0100 Subject: [PATCH 09/23] Change issue number --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 3db94a8ab290a..6a3a91ab545b5 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -467,7 +467,7 @@ Indexing - Bug in :meth:`Series.__getitem__` when using an unsigned integer array as an indexer giving incorrect results or segfaulting instead of raising ``KeyError`` (:issue:`37218`) - Bug in :meth:`Index.where` incorrectly casting numeric values to strings (:issue:`37591`) - Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` raises when numeric label was given for object :class:`Index` although label was in :class:`Index` (:issue:`26491`) -- Bug in :meth:`DataFrame.loc` raised ``TypeError`` when non integer interval was given to select values from :class:`MultiIndex` (:issue:`25165`, :issue:`18185`) +- Bug in :meth:`DataFrame.loc` raised ``TypeError`` when non integer interval was given to select values from :class:`MultiIndex` (:issue:`25165`, :issue:`24263`) Missing ^^^^^^^ From adaa27b2e1689ecb32dc977fd66253462d46693a Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 9 Nov 2020 23:14:50 +0100 Subject: [PATCH 10/23] Delete None --- pandas/core/indexes/multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 7f3dc8d9fdd0d..fd0937e28db46 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2707,7 +2707,7 @@ def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int: Index.get_loc : The get_loc method for (single-level) index. """ if is_scalar(key) and isna(key): - return -1, None + return -1 else: return level_index.get_loc(key) From d59e7f721dabfb3a9b809cae8c6420ea284dddb2 Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 12 Nov 2020 01:59:06 +0100 Subject: [PATCH 11/23] Change whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 5f23c58b06ddd..6406283c28268 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -469,7 +469,7 @@ Indexing - Bug in :meth:`Index.where` incorrectly casting numeric values to strings (:issue:`37591`) - Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` raises when numeric label was given for object :class:`Index` although label was in :class:`Index` (:issue:`26491`) - Bug in :meth:`DataFrame.loc` returned requested key plus missing values when ``loc`` was applied to single level from :class:`MultiIndex` (:issue:`27104`) -- Bug in :meth:`DataFrame.loc` raised ``TypeError`` when non integer interval was given to select values from :class:`MultiIndex` (:issue:`25165`, :issue:`24263`) +- Bug in :meth:`DataFrame.loc` raised ``TypeError`` when non-integer :class:`Interval` was given to select values from :class:`MultiIndex` (:issue:`25165`, :issue:`24263`) Missing ^^^^^^^ From d9562c4160567dd74187f9a4448ce3ed026c6d20 Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 12 Nov 2020 02:00:18 +0100 Subject: [PATCH 12/23] Add comment --- pandas/core/indexes/multi.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index fd0937e28db46..38eaa7b73e499 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2677,6 +2677,8 @@ def _partial_tup_index(self, tup, side="left"): idx = self._get_loc_single_level_index(lev, lab) if isinstance(idx, slice) and k < n - 1: + # Get start and end value from slice, necessary when a non-integer + # interval is given as input start = idx.start end = idx.stop elif k < n - 1: From c382af05298f1c0b3864e78c5a2ccf825e64c90b Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 12 Nov 2020 02:01:44 +0100 Subject: [PATCH 13/23] Improve test --- pandas/tests/indexing/multiindex/test_loc.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 8e0f89a94ffd3..8c495a1e12fed 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -630,3 +630,4 @@ def test_get_loc_datetime_index(): index = pd.date_range("2001-01-01", periods=100) mi = MultiIndex.from_arrays([index]) assert mi.get_loc("2001-01") == slice(0, 31, None) + assert index.get_loc("2001-01") == slice(0, 31, None) From 694e46952adb0171c1f92f7f03149d6048b9f348 Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 12 Nov 2020 02:04:02 +0100 Subject: [PATCH 14/23] Change test description --- pandas/tests/indexing/multiindex/test_loc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 8c495a1e12fed..21ea16d9aba58 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -626,7 +626,7 @@ def test_getitem_non_found_tuple(): def test_get_loc_datetime_index(): - # GH: 18185 + # GH#18185 index = pd.date_range("2001-01-01", periods=100) mi = MultiIndex.from_arrays([index]) assert mi.get_loc("2001-01") == slice(0, 31, None) From d78d8ce388b9ff6497c994cb3949425aaff9b6e9 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 13 Nov 2020 12:21:01 +0100 Subject: [PATCH 15/23] Change issue number in test --- pandas/tests/indexing/multiindex/test_loc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 21ea16d9aba58..d860d2f52e530 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -626,7 +626,7 @@ def test_getitem_non_found_tuple(): def test_get_loc_datetime_index(): - # GH#18185 + # GH#24263 index = pd.date_range("2001-01-01", periods=100) mi = MultiIndex.from_arrays([index]) assert mi.get_loc("2001-01") == slice(0, 31, None) From c13da448eaa5e1d14fbff972dd833e0d5b9d0bbc Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 14 Nov 2020 12:21:16 +0100 Subject: [PATCH 16/23] Fix comments --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/core/indexes/multi.py | 2 +- pandas/tests/indexing/multiindex/test_loc.py | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 81732c12346c5..b64e5971ce25f 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -475,7 +475,7 @@ Indexing - Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` raises when numeric label was given for object :class:`Index` although label was in :class:`Index` (:issue:`26491`) - Bug in :meth:`DataFrame.loc` returned requested key plus missing values when ``loc`` was applied to single level from :class:`MultiIndex` (:issue:`27104`) - Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` using a listlike indexer containing NA values (:issue:`37722`) -- Bug in :meth:`DataFrame.loc` raised ``TypeError`` when non-integer :class:`Interval` was given to select values from :class:`MultiIndex` (:issue:`25165`, :issue:`24263`) +- Bug in :meth:`DataFrame.loc` raised ``TypeError`` when non-integer slice was given to select values from :class:`MultiIndex` (:issue:`25165`, :issue:`24263`) Missing ^^^^^^^ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 0b904ed3c6909..cbaa7adf2f30b 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2678,7 +2678,7 @@ def _partial_tup_index(self, tup, side="left"): idx = self._get_loc_single_level_index(lev, lab) if isinstance(idx, slice) and k < n - 1: # Get start and end value from slice, necessary when a non-integer - # interval is given as input + # interval is given as input GH#37707 start = idx.start end = idx.stop elif k < n - 1: diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index d860d2f52e530..b03d21ad5b365 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -629,5 +629,6 @@ def test_get_loc_datetime_index(): # GH#24263 index = pd.date_range("2001-01-01", periods=100) mi = MultiIndex.from_arrays([index]) + # Check if get_loc matches for Index and MultiIndex assert mi.get_loc("2001-01") == slice(0, 31, None) assert index.get_loc("2001-01") == slice(0, 31, None) From 0a6cab72dcca376eeb21be68676a3e896fbba8c6 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 14 Nov 2020 12:50:16 +0100 Subject: [PATCH 17/23] Add testcases --- pandas/core/indexes/multi.py | 2 ++ .../tests/indexing/multiindex/test_partial.py | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index cbaa7adf2f30b..c232ceabdf77f 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3021,6 +3021,8 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): start = 0 if key.stop is not None: stop = level_index.get_loc(key.stop) + elif isinstance(start, slice): + stop = start.stop else: stop = len(level_index) - 1 step = key.step diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index f7052df87bdc6..1d2e9c5c05605 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -239,6 +239,25 @@ def test_partial_getitem_loc_datetime(self): result = df.loc["2019-2":, :] tm.assert_frame_equal(result, expected) + df2 = df.swaplevel(0, 1).sort_index() + expected = expected.swaplevel(0, 1) + + result = df2.loc[:, "2019-02":, :] + tm.assert_frame_equal(result, expected) + + expected = df.copy() + result = df[:"2019-2"] + tm.assert_frame_equal(result, expected) + + result = df.loc[:"2019-2"] + tm.assert_frame_equal(result, expected) + + result = df.loc(axis=0)[:"2019-2"] + tm.assert_frame_equal(result, expected) + + result = df.loc[:"2019-2", :] + tm.assert_frame_equal(result, expected) + def test_loc_getitem_partial_both_axis(): # gh-12660 From 3e9baa6b76521e364e0d740912af9d6e1ccb0a6e Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 14 Nov 2020 22:23:58 +0100 Subject: [PATCH 18/23] Fix bugs --- pandas/core/indexes/multi.py | 2 +- pandas/tests/indexes/multi/test_indexing.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index c232ceabdf77f..d877dd7d31b2f 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3022,7 +3022,7 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): if key.stop is not None: stop = level_index.get_loc(key.stop) elif isinstance(start, slice): - stop = start.stop + stop = len(level_index) else: stop = len(level_index) - 1 step = key.step diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py index e8e31aa0cef80..a967953330ffa 100644 --- a/pandas/tests/indexes/multi/test_indexing.py +++ b/pandas/tests/indexes/multi/test_indexing.py @@ -768,7 +768,7 @@ def test_timestamp_multiindex_indexer(): [ pd.date_range( start="2019-01-02T00:15:33", - end="2019-01-05T02:15:33", + end="2019-01-05T03:15:33", freq="H", name="date", ), From c579c886162bb8cac04d731534465ca421b08abd Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 15 Nov 2020 19:00:51 +0100 Subject: [PATCH 19/23] Adress review comments --- pandas/core/indexes/multi.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index d877dd7d31b2f..305fdc46c6b80 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2687,7 +2687,9 @@ def _partial_tup_index(self, tup, side="left"): else: if isinstance(idx, slice): idx = idx.start - return start + section.searchsorted(idx, side=side) + return start + section.searchsorted(idx, side=side) + else: + return start + section.searchsorted(idx, side=side) def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int: """ @@ -3066,6 +3068,7 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): # The label is present in self.levels[level] but unused: raise KeyError(key) return locs + if isinstance(idx, slice): start = idx.start end = idx.stop From 923610e2e2b520aad134cf74cb703912c73ba44b Mon Sep 17 00:00:00 2001 From: phofl Date: Mon, 16 Nov 2020 21:36:37 +0100 Subject: [PATCH 20/23] Fix whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 2a7c77d271f2a..993917bd1e101 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -584,7 +584,7 @@ Indexing - Bug in :meth:`DataFrame.loc` returned requested key plus missing values when ``loc`` was applied to single level from :class:`MultiIndex` (:issue:`27104`) - Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` using a listlike indexer containing NA values (:issue:`37722`) - Bug in :meth:`DataFrame.xs` ignored ``droplevel=False`` for columns (:issue:`19056`) -- Bug in :meth:`DataFrame.loc` raised ``TypeError`` when non-integer slice was given to select values from :class:`MultiIndex` (:issue:`25165`, :issue:`24263`) +- Bug in :meth:`DataFrame.loc` raising ``TypeError`` when non-integer slice was given to select values from :class:`MultiIndex` (:issue:`25165`, :issue:`24263`) Missing ^^^^^^^ From fdd170e9908055a4cb4aa7599d0c3420ab10c0ef Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 17 Nov 2020 22:21:48 +0100 Subject: [PATCH 21/23] Fix blank line and if else --- pandas/core/indexes/multi.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 305fdc46c6b80..fc8faf15f05f0 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2684,12 +2684,11 @@ def _partial_tup_index(self, tup, side="left"): elif k < n - 1: end = start + section.searchsorted(idx, side="right") start = start + section.searchsorted(idx, side="left") + elif isinstance(idx, slice): + idx = idx.start + return start + section.searchsorted(idx, side=side) else: - if isinstance(idx, slice): - idx = idx.start - return start + section.searchsorted(idx, side=side) - else: - return start + section.searchsorted(idx, side=side) + return start + section.searchsorted(idx, side=side) def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int: """ @@ -3075,6 +3074,7 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): else: start = level_codes.searchsorted(idx, side="left") end = level_codes.searchsorted(idx, side="right") + if start == end: # The label is present in self.levels[level] but unused: raise KeyError(key) From 4c17aeb9a1cefc5821e1b113c153fe9fa9145c38 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 17 Nov 2020 23:59:14 +0100 Subject: [PATCH 22/23] Parametrize test --- .../tests/indexing/multiindex/test_partial.py | 37 ++++++++----------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index 1d2e9c5c05605..3dcb291808306 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -215,7 +215,13 @@ def test_setitem_multiple_partial(self, multiindex_dataframe_random_data): expected.loc["bar"] = 0 tm.assert_series_equal(result, expected) - def test_partial_getitem_loc_datetime(self): + @pytest.mark.parametrize( + "indexer, exp_idx, exp_values", + [ + (slice("2019-2", None), [to_datetime("2019-02-01")], [2, 3]), + (slice(None, "2019-2"), date_range("2019", periods=2, freq="MS"), [0, 1, 2, 3]), + ]) + def test_partial_getitem_loc_datetime(self, indexer, exp_idx, exp_values): # GH: 25165 date_idx = date_range("2019", periods=2, freq="MS") df = DataFrame( @@ -223,39 +229,26 @@ def test_partial_getitem_loc_datetime(self): index=MultiIndex.from_product([date_idx, [0, 1]], names=["x", "y"]), ) expected = DataFrame( - [2, 3], + exp_values, index=MultiIndex.from_product( - [[to_datetime("2019-02-01")], [0, 1]], names=["x", "y"] + [exp_idx, [0, 1]], names=["x", "y"] ), ) - result = df["2019-2":] + result = df[indexer] tm.assert_frame_equal(result, expected) - result = df.loc["2019-2":] + result = df.loc[indexer] tm.assert_frame_equal(result, expected) - result = df.loc(axis=0)["2019-2":] + result = df.loc(axis=0)[indexer] tm.assert_frame_equal(result, expected) - result = df.loc["2019-2":, :] + result = df.loc[indexer, :] tm.assert_frame_equal(result, expected) df2 = df.swaplevel(0, 1).sort_index() - expected = expected.swaplevel(0, 1) + expected = expected.swaplevel(0, 1).sort_index() - result = df2.loc[:, "2019-02":, :] - tm.assert_frame_equal(result, expected) - - expected = df.copy() - result = df[:"2019-2"] - tm.assert_frame_equal(result, expected) - - result = df.loc[:"2019-2"] - tm.assert_frame_equal(result, expected) - - result = df.loc(axis=0)[:"2019-2"] - tm.assert_frame_equal(result, expected) - - result = df.loc[:"2019-2", :] + result = df2.loc[:, indexer, :] tm.assert_frame_equal(result, expected) From a766f985c92f9a999fc1238717b5808471cb581b Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 18 Nov 2020 00:13:06 +0100 Subject: [PATCH 23/23] Run black --- pandas/tests/indexing/multiindex/test_partial.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index 3dcb291808306..9c356b81b85db 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -219,8 +219,13 @@ def test_setitem_multiple_partial(self, multiindex_dataframe_random_data): "indexer, exp_idx, exp_values", [ (slice("2019-2", None), [to_datetime("2019-02-01")], [2, 3]), - (slice(None, "2019-2"), date_range("2019", periods=2, freq="MS"), [0, 1, 2, 3]), - ]) + ( + slice(None, "2019-2"), + date_range("2019", periods=2, freq="MS"), + [0, 1, 2, 3], + ), + ], + ) def test_partial_getitem_loc_datetime(self, indexer, exp_idx, exp_values): # GH: 25165 date_idx = date_range("2019", periods=2, freq="MS") @@ -230,9 +235,7 @@ def test_partial_getitem_loc_datetime(self, indexer, exp_idx, exp_values): ) expected = DataFrame( exp_values, - index=MultiIndex.from_product( - [exp_idx, [0, 1]], names=["x", "y"] - ), + index=MultiIndex.from_product([exp_idx, [0, 1]], names=["x", "y"]), ) result = df[indexer] tm.assert_frame_equal(result, expected)