diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index fe6c7c7b7d213..ddb35e858124a 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -217,6 +217,8 @@ Indexing ^^^^^^^^ - Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` when passing a string, the return type depended on whether the index was monotonic (:issue:`24892`) - Bug in :meth:`DataFrame.truncate` and :meth:`Series.truncate` when the object's Index has a length greater than one but only one unique value (:issue:`42365`) +- Bug in indexing on a :class:`MultiIndex` failing to drop scalar levels when the indexer is a tuple containing a datetime-like string (:issue:`42476`) +- Missing ^^^^^^^ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index d23cb4de3f2a0..3b30af4bb6b1e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -44,7 +44,6 @@ DtypeObj, F, Shape, - T, npt, ) from pandas.compat.numpy import function as nv @@ -3719,16 +3718,6 @@ def _filter_indexer_tolerance( # -------------------------------------------------------------------- # Indexer Conversion Methods - def _get_partial_string_timestamp_match_key(self, key: T) -> T: - """ - Translate any partial string timestamp matches in key, returning the - new key. - - Only relevant for MultiIndex. - """ - # GH#10331 - return key - @final def _validate_positional_slice(self, key: slice) -> None: """ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 8c47388002b0d..c9144f311c25d 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2581,35 +2581,6 @@ def _get_indexer_level_0(self, target) -> np.ndarray: ci = Index(cat) return ci.get_indexer_for(target) - def _get_partial_string_timestamp_match_key(self, key): - """ - Translate any partial string timestamp matches in key, returning the - new key. - - Only relevant for MultiIndex. - """ - # GH#10331 - if isinstance(key, str) and self.levels[0]._supports_partial_string_indexing: - # Convert key '2016-01-01' to - # ('2016-01-01'[, slice(None, None, None)]+) - key = (key,) + (slice(None),) * (len(self.levels) - 1) - - if isinstance(key, tuple): - # Convert (..., '2016-01-01', ...) in tuple to - # (..., slice('2016-01-01', '2016-01-01', None), ...) - new_key = [] - for i, component in enumerate(key): - if ( - isinstance(component, str) - and self.levels[i]._supports_partial_string_indexing - ): - new_key.append(slice(component, component, None)) - else: - new_key.append(component) - key = tuple(new_key) - - return key - def get_slice_bound( self, label: Hashable | Sequence[Hashable], side: str, kind: str | None = None ) -> int: @@ -2858,7 +2829,12 @@ def _maybe_to_slice(loc): ) if keylen == self.nlevels and self.is_unique: - return self._engine.get_loc(key) + try: + return self._engine.get_loc(key) + except TypeError: + # e.g. partial string slicing + loc, _ = self.get_loc_level(key, list(range(self.nlevels))) + return loc # -- partial selection or non-unique index # break the key into 2 parts based on the lexsort_depth of the index; @@ -3008,6 +2984,10 @@ def maybe_mi_droplevels(indexer, levels): return (self._engine.get_loc(key), None) except KeyError as err: raise KeyError(key) from err + except TypeError: + # e.g. partial string indexing + # test_partial_string_timestamp_multiindex + pass # partial selection indexer = self.get_loc(key) @@ -3019,7 +2999,19 @@ def maybe_mi_droplevels(indexer, levels): # TODO: in some cases we still need to drop some levels, # e.g. test_multiindex_perf_warn - ilevels = [] + # test_partial_string_timestamp_multiindex + ilevels = [ + i + for i in range(len(key)) + if ( + not isinstance(key[i], str) + or not self.levels[i]._supports_partial_string_indexing + ) + and key[i] != slice(None, None) + ] + if len(ilevels) == self.nlevels: + # TODO: why? + ilevels = [] return indexer, maybe_mi_droplevels(indexer, ilevels) else: @@ -3060,6 +3052,16 @@ def maybe_mi_droplevels(indexer, levels): return indexer, maybe_mi_droplevels(indexer, ilevels) else: indexer = self._get_level_indexer(key, level=level) + if ( + isinstance(key, str) + and self.levels[level]._supports_partial_string_indexing + ): + # check to see if we did an exact lookup vs sliced + check = self.levels[level].get_loc(key) + if not is_integer(check): + # e.g. test_partial_string_timestamp_multiindex + return indexer, self[indexer] + return indexer, maybe_mi_droplevels(indexer, [level]) def _get_level_indexer( @@ -3157,6 +3159,10 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): if level > 0 or self._lexsort_depth == 0: # Desired level is not sorted + if isinstance(idx, slice): + locs = (level_codes >= idx.start) & (level_codes < idx.stop) + return locs + locs = np.array(level_codes == idx, dtype=bool, copy=False) if not locs.any(): # The label is present in self.levels[level] but unused: @@ -3164,8 +3170,10 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): return locs if isinstance(idx, slice): - start = idx.start - end = idx.stop + # e.g. test_partial_string_timestamp_multiindex + start = level_codes.searchsorted(idx.start, side="left") + # NB: "left" here bc of slice semantics + end = level_codes.searchsorted(idx.stop, side="left") else: start = level_codes.searchsorted(idx, side="left") end = level_codes.searchsorted(idx, side="right") diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 387dcca6897b7..ec9a233c3ab0d 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1129,12 +1129,6 @@ def _handle_lowerdim_multi_index_axis0(self, tup: tuple): try: # fast path for series or for tup devoid of slices return self._get_label(tup, axis=axis) - except TypeError as err: - # slices are unhashable - # FIXME: this raises when we have a DatetimeIndex first level and a - # string for the first tup entry - # see test_partial_slicing_with_multiindex - raise IndexingError("No label returned") from err except KeyError as ek: # raise KeyError if number of indexers match @@ -1149,7 +1143,6 @@ def _getitem_axis(self, key, axis: int): key = list(key) labels = self.obj._get_axis(axis) - key = labels._get_partial_string_timestamp_match_key(key) if isinstance(key, slice): self._validate_key(key, axis) diff --git a/pandas/tests/indexes/datetimes/test_partial_slicing.py b/pandas/tests/indexes/datetimes/test_partial_slicing.py index 87c56ea588a5d..c5b47053471eb 100644 --- a/pandas/tests/indexes/datetimes/test_partial_slicing.py +++ b/pandas/tests/indexes/datetimes/test_partial_slicing.py @@ -16,7 +16,6 @@ date_range, ) import pandas._testing as tm -from pandas.core.indexing import IndexingError class TestSlicing: @@ -337,11 +336,10 @@ def test_partial_slicing_with_multiindex(self): result = df_multi.loc[("2013-06-19 09:30:00", "ACCT1", "ABC")] tm.assert_series_equal(result, expected) - # this is an IndexingError as we don't do partial string selection on - # multi-levels. - msg = "Too many indexers" - with pytest.raises(IndexingError, match=msg): - df_multi.loc[("2013-06-19", "ACCT1", "ABC")] + # partial string indexing on first level, scalar indexing on the other two + result = df_multi.loc[("2013-06-19", "ACCT1", "ABC")] + expected = df_multi.iloc[:1].droplevel([1, 2]) + tm.assert_frame_equal(result, expected) def test_partial_slicing_with_multiindex_series(self): # GH 4294 diff --git a/pandas/tests/indexes/multi/test_partial_indexing.py b/pandas/tests/indexes/multi/test_partial_indexing.py index 286522f6b946d..fb34fc7f570ba 100644 --- a/pandas/tests/indexes/multi/test_partial_indexing.py +++ b/pandas/tests/indexes/multi/test_partial_indexing.py @@ -72,7 +72,9 @@ def test_partial_string_timestamp_multiindex(df): # partial string match on date and hour, from middle result = df.loc["2016-01-02 12"] - expected = df.iloc[9:12] + # hourly resolution, same as index.levels[0], so we are _not_ slicing on + # that level, so that level gets dropped + expected = df.iloc[9:12].droplevel(0) tm.assert_frame_equal(result, expected) # partial string match on secondary index @@ -81,11 +83,14 @@ def test_partial_string_timestamp_multiindex(df): tm.assert_frame_equal(result, expected) # tuple selector with partial string match on date + # "2016-01-01" has daily resolution, so _is_ a slice on the first level. result = df.loc[("2016-01-01", "a"), :] expected = df.iloc[[0, 3]] + expected = df.iloc[[0, 3]].droplevel(1) tm.assert_frame_equal(result, expected) - # Slicing date on first level should break (of course) + # Slicing date on first level should break (of course) bc the DTI is the + # second level on df_swap with pytest.raises(KeyError, match="'2016-01-01'"): df_swap.loc["2016-01-01"] diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index b9a4e658cc753..13ddf6f7d71db 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -554,15 +554,17 @@ def test_loc_period_string_indexing(): ), ) result = df.loc[("2013Q1", 1111), "OMS"] - expected = Series( - [np.nan], - dtype=object, - name="OMS", - index=MultiIndex.from_tuples( - [(pd.Period("2013Q1"), 1111)], names=["Period", "CVR"] - ), - ) - tm.assert_series_equal(result, expected) + + alt = df.loc[(a[0], 1111), "OMS"] + assert np.isnan(alt) + + # Because the resolution of the string matches, it is an exact lookup, + # not a slice + assert np.isnan(result) + + # TODO: should it figure this out? + # alt = df.loc["2013Q1", 1111, "OMS"] + # assert np.isnan(alt) def test_loc_datetime_mask_slicing():