diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 13d1caedbb50a..5f12a918c0520 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -672,6 +672,11 @@ def _format_duplicate_message(self) -> DataFrame: assert len(duplicates) out = Series(np.arange(len(self))).groupby(self).agg(list)[duplicates] + if self._is_multi: + # test_format_duplicate_labels_message_multi + # error: "Type[Index]" has no attribute "from_tuples" [attr-defined] + out.index = type(self).from_tuples(out.index) # type: ignore[attr-defined] + if self.nlevels == 1: out = out.rename_axis("label") return out.to_frame(name="positions") @@ -5400,22 +5405,15 @@ def _get_indexer_strict(self, key, axis_name: str_t) -> tuple[Index, np.ndarray] self._raise_if_missing(keyarr, indexer, axis_name) - if ( - needs_i8_conversion(self.dtype) - or is_categorical_dtype(self.dtype) - or is_interval_dtype(self.dtype) - ): - # For CategoricalIndex take instead of reindex to preserve dtype. - # For IntervalIndex this is to map integers to the Intervals they match to. - keyarr = self.take(indexer) - if keyarr.dtype.kind in ["m", "M"]: - # DTI/TDI.take can infer a freq in some cases when we dont want one - if isinstance(key, list) or ( - isinstance(key, type(self)) - # "Index" has no attribute "freq" - and key.freq is None # type: ignore[attr-defined] - ): - keyarr = keyarr._with_freq(None) + keyarr = self.take(indexer) + if keyarr.dtype.kind in ["m", "M"]: + # DTI/TDI.take can infer a freq in some cases when we dont want one + if isinstance(key, list) or ( + isinstance(key, type(self)) + # "Index" has no attribute "freq" + and key.freq is None # type: ignore[attr-defined] + ): + keyarr = keyarr._with_freq(None) return keyarr, indexer diff --git a/pandas/tests/indexing/interval/test_interval.py b/pandas/tests/indexing/interval/test_interval.py index ccb16c5d97ecc..db3a569d3925b 100644 --- a/pandas/tests/indexing/interval/test_interval.py +++ b/pandas/tests/indexing/interval/test_interval.py @@ -133,21 +133,23 @@ def test_getitem_interval_with_nans(self, frame_or_series, indexer_sl): class TestIntervalIndexInsideMultiIndex: def test_mi_intervalindex_slicing_with_scalar(self): # GH#27456 + ii = IntervalIndex.from_arrays( + [0, 1, 10, 11, 0, 1, 10, 11], [1, 2, 11, 12, 1, 2, 11, 12], name="MP" + ) idx = pd.MultiIndex.from_arrays( [ pd.Index(["FC", "FC", "FC", "FC", "OWNER", "OWNER", "OWNER", "OWNER"]), pd.Index( ["RID1", "RID1", "RID2", "RID2", "RID1", "RID1", "RID2", "RID2"] ), - IntervalIndex.from_arrays( - [0, 1, 10, 11, 0, 1, 10, 11], [1, 2, 11, 12, 1, 2, 11, 12] - ), + ii, ] ) idx.names = ["Item", "RID", "MP"] df = DataFrame({"value": [1, 2, 3, 4, 5, 6, 7, 8]}) df.index = idx + query_df = DataFrame( { "Item": ["FC", "OWNER", "FC", "OWNER", "OWNER"], @@ -161,5 +163,13 @@ def test_mi_intervalindex_slicing_with_scalar(self): idx = pd.MultiIndex.from_arrays([query_df.Item, query_df.RID, query_df.MP]) query_df.index = idx result = df.value.loc[query_df.index] - expected = Series([1, 6, 2, 8, 7], index=idx, name="value") + + # the IntervalIndex level is indexed with floats, which map to + # the intervals containing them. Matching the behavior we would get + # with _only_ an IntervalIndex, we get an IntervalIndex level back. + sliced_level = ii.take([0, 1, 1, 3, 2]) + expected_index = pd.MultiIndex.from_arrays( + [idx.get_level_values(0), idx.get_level_values(1), sliced_level] + ) + expected = Series([1, 6, 2, 8, 7], index=expected_index, name="value") tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index 6116c34f238e2..3a8957cda7b3f 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -512,8 +512,8 @@ def test_floating_misc(self, indexer_sl): for fancy_idx in [[5.0, 0.0], np.array([5.0, 0.0])]: # float tm.assert_series_equal(indexer_sl(s)[fancy_idx], expected) - expected = Series([2, 0], index=Index([5, 0], dtype="int64")) - for fancy_idx in [[5, 0], np.array([5, 0])]: # int + expected = Series([2, 0], index=Index([5, 0], dtype="float64")) + for fancy_idx in [[5, 0], np.array([5, 0])]: tm.assert_series_equal(indexer_sl(s)[fancy_idx], expected) # all should return the same as we are slicing 'the same' diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 6de83e34122c2..7acdc2f8607d0 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -682,7 +682,9 @@ def test_loc_setitem_frame_mixed_labels(self): df = DataFrame({1: [1, 2], 2: [3, 4], "a": ["a", "b"]}) result = df.loc[0, [1, 2]] - expected = Series([1, 3], index=[1, 2], dtype=object, name=0) + expected = Series( + [1, 3], index=Index([1, 2], dtype=object), dtype=object, name=0 + ) tm.assert_series_equal(result, expected) expected = DataFrame({1: [5, 2], 2: [6, 4], "a": ["a", "b"]}) @@ -2414,9 +2416,6 @@ def test_loc_getitem_label_list_integer_labels(columns, column_key, expected_col expected = df.iloc[:, expected_columns] result = df.loc[["A", "B", "C"], column_key] - if df.columns.is_object() and all(isinstance(x, int) for x in column_key): - expected.columns = expected.columns.astype(int) - tm.assert_frame_equal(result, expected, check_column_type=True) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index e100fef3490ba..8e80463a9c600 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -91,14 +91,14 @@ def test_reindex_preserve_levels( assert chunk.index is new_index chunk = ymd.loc[new_index] - assert chunk.index is new_index + assert chunk.index.equals(new_index) ymdT = ymd.T chunk = ymdT.reindex(columns=new_index) assert chunk.columns is new_index chunk = ymdT.loc[:, new_index] - assert chunk.columns is new_index + assert chunk.columns.equals(new_index) def test_groupby_transform(self, multiindex_dataframe_random_data): frame = multiindex_dataframe_random_data