Skip to content

Commit f02c204

Browse files
jbrockmendelCGe0516
authored andcommitted
BUG: partial-indexing on MultiIndex with IntervalIndex level (pandas-dev#42569)
1 parent 86c45f1 commit f02c204

File tree

5 files changed

+35
-28
lines changed

5 files changed

+35
-28
lines changed

pandas/core/indexes/base.py

+14-16
Original file line numberDiff line numberDiff line change
@@ -672,6 +672,11 @@ def _format_duplicate_message(self) -> DataFrame:
672672
assert len(duplicates)
673673

674674
out = Series(np.arange(len(self))).groupby(self).agg(list)[duplicates]
675+
if self._is_multi:
676+
# test_format_duplicate_labels_message_multi
677+
# error: "Type[Index]" has no attribute "from_tuples" [attr-defined]
678+
out.index = type(self).from_tuples(out.index) # type: ignore[attr-defined]
679+
675680
if self.nlevels == 1:
676681
out = out.rename_axis("label")
677682
return out.to_frame(name="positions")
@@ -5400,22 +5405,15 @@ def _get_indexer_strict(self, key, axis_name: str_t) -> tuple[Index, np.ndarray]
54005405

54015406
self._raise_if_missing(keyarr, indexer, axis_name)
54025407

5403-
if (
5404-
needs_i8_conversion(self.dtype)
5405-
or is_categorical_dtype(self.dtype)
5406-
or is_interval_dtype(self.dtype)
5407-
):
5408-
# For CategoricalIndex take instead of reindex to preserve dtype.
5409-
# For IntervalIndex this is to map integers to the Intervals they match to.
5410-
keyarr = self.take(indexer)
5411-
if keyarr.dtype.kind in ["m", "M"]:
5412-
# DTI/TDI.take can infer a freq in some cases when we dont want one
5413-
if isinstance(key, list) or (
5414-
isinstance(key, type(self))
5415-
# "Index" has no attribute "freq"
5416-
and key.freq is None # type: ignore[attr-defined]
5417-
):
5418-
keyarr = keyarr._with_freq(None)
5408+
keyarr = self.take(indexer)
5409+
if keyarr.dtype.kind in ["m", "M"]:
5410+
# DTI/TDI.take can infer a freq in some cases when we dont want one
5411+
if isinstance(key, list) or (
5412+
isinstance(key, type(self))
5413+
# "Index" has no attribute "freq"
5414+
and key.freq is None # type: ignore[attr-defined]
5415+
):
5416+
keyarr = keyarr._with_freq(None)
54195417

54205418
return keyarr, indexer
54215419

pandas/tests/indexing/interval/test_interval.py

+14-4
Original file line numberDiff line numberDiff line change
@@ -133,21 +133,23 @@ def test_getitem_interval_with_nans(self, frame_or_series, indexer_sl):
133133
class TestIntervalIndexInsideMultiIndex:
134134
def test_mi_intervalindex_slicing_with_scalar(self):
135135
# GH#27456
136+
ii = IntervalIndex.from_arrays(
137+
[0, 1, 10, 11, 0, 1, 10, 11], [1, 2, 11, 12, 1, 2, 11, 12], name="MP"
138+
)
136139
idx = pd.MultiIndex.from_arrays(
137140
[
138141
pd.Index(["FC", "FC", "FC", "FC", "OWNER", "OWNER", "OWNER", "OWNER"]),
139142
pd.Index(
140143
["RID1", "RID1", "RID2", "RID2", "RID1", "RID1", "RID2", "RID2"]
141144
),
142-
IntervalIndex.from_arrays(
143-
[0, 1, 10, 11, 0, 1, 10, 11], [1, 2, 11, 12, 1, 2, 11, 12]
144-
),
145+
ii,
145146
]
146147
)
147148

148149
idx.names = ["Item", "RID", "MP"]
149150
df = DataFrame({"value": [1, 2, 3, 4, 5, 6, 7, 8]})
150151
df.index = idx
152+
151153
query_df = DataFrame(
152154
{
153155
"Item": ["FC", "OWNER", "FC", "OWNER", "OWNER"],
@@ -161,5 +163,13 @@ def test_mi_intervalindex_slicing_with_scalar(self):
161163
idx = pd.MultiIndex.from_arrays([query_df.Item, query_df.RID, query_df.MP])
162164
query_df.index = idx
163165
result = df.value.loc[query_df.index]
164-
expected = Series([1, 6, 2, 8, 7], index=idx, name="value")
166+
167+
# the IntervalIndex level is indexed with floats, which map to
168+
# the intervals containing them. Matching the behavior we would get
169+
# with _only_ an IntervalIndex, we get an IntervalIndex level back.
170+
sliced_level = ii.take([0, 1, 1, 3, 2])
171+
expected_index = pd.MultiIndex.from_arrays(
172+
[idx.get_level_values(0), idx.get_level_values(1), sliced_level]
173+
)
174+
expected = Series([1, 6, 2, 8, 7], index=expected_index, name="value")
165175
tm.assert_series_equal(result, expected)

pandas/tests/indexing/test_floats.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -512,8 +512,8 @@ def test_floating_misc(self, indexer_sl):
512512
for fancy_idx in [[5.0, 0.0], np.array([5.0, 0.0])]: # float
513513
tm.assert_series_equal(indexer_sl(s)[fancy_idx], expected)
514514

515-
expected = Series([2, 0], index=Index([5, 0], dtype="int64"))
516-
for fancy_idx in [[5, 0], np.array([5, 0])]: # int
515+
expected = Series([2, 0], index=Index([5, 0], dtype="float64"))
516+
for fancy_idx in [[5, 0], np.array([5, 0])]:
517517
tm.assert_series_equal(indexer_sl(s)[fancy_idx], expected)
518518

519519
# all should return the same as we are slicing 'the same'

pandas/tests/indexing/test_loc.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -713,7 +713,9 @@ def test_loc_setitem_frame_mixed_labels(self):
713713
df = DataFrame({1: [1, 2], 2: [3, 4], "a": ["a", "b"]})
714714

715715
result = df.loc[0, [1, 2]]
716-
expected = Series([1, 3], index=[1, 2], dtype=object, name=0)
716+
expected = Series(
717+
[1, 3], index=Index([1, 2], dtype=object), dtype=object, name=0
718+
)
717719
tm.assert_series_equal(result, expected)
718720

719721
expected = DataFrame({1: [5, 2], 2: [6, 4], "a": ["a", "b"]})
@@ -2445,9 +2447,6 @@ def test_loc_getitem_label_list_integer_labels(columns, column_key, expected_col
24452447
expected = df.iloc[:, expected_columns]
24462448
result = df.loc[["A", "B", "C"], column_key]
24472449

2448-
if df.columns.is_object() and all(isinstance(x, int) for x in column_key):
2449-
expected.columns = expected.columns.astype(int)
2450-
24512450
tm.assert_frame_equal(result, expected, check_column_type=True)
24522451

24532452

pandas/tests/test_multilevel.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -91,14 +91,14 @@ def test_reindex_preserve_levels(
9191
assert chunk.index is new_index
9292

9393
chunk = ymd.loc[new_index]
94-
assert chunk.index is new_index
94+
assert chunk.index.equals(new_index)
9595

9696
ymdT = ymd.T
9797
chunk = ymdT.reindex(columns=new_index)
9898
assert chunk.columns is new_index
9999

100100
chunk = ymdT.loc[:, new_index]
101-
assert chunk.columns is new_index
101+
assert chunk.columns.equals(new_index)
102102

103103
def test_groupby_transform(self, multiindex_dataframe_random_data):
104104
frame = multiindex_dataframe_random_data

0 commit comments

Comments
 (0)