Skip to content

BUG: partial-indexing on MultiIndex with IntervalIndex level #42569

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jul 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 14 additions & 16 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -672,6 +672,11 @@ def _format_duplicate_message(self) -> DataFrame:
assert len(duplicates)

out = Series(np.arange(len(self))).groupby(self).agg(list)[duplicates]
if self._is_multi:
# test_format_duplicate_labels_message_multi
# error: "Type[Index]" has no attribute "from_tuples" [attr-defined]
out.index = type(self).from_tuples(out.index) # type: ignore[attr-defined]

if self.nlevels == 1:
out = out.rename_axis("label")
return out.to_frame(name="positions")
Expand Down Expand Up @@ -5400,22 +5405,15 @@ def _get_indexer_strict(self, key, axis_name: str_t) -> tuple[Index, np.ndarray]

self._raise_if_missing(keyarr, indexer, axis_name)

if (
needs_i8_conversion(self.dtype)
or is_categorical_dtype(self.dtype)
or is_interval_dtype(self.dtype)
):
# For CategoricalIndex take instead of reindex to preserve dtype.
# For IntervalIndex this is to map integers to the Intervals they match to.
keyarr = self.take(indexer)
if keyarr.dtype.kind in ["m", "M"]:
# DTI/TDI.take can infer a freq in some cases when we dont want one
if isinstance(key, list) or (
isinstance(key, type(self))
# "Index" has no attribute "freq"
and key.freq is None # type: ignore[attr-defined]
):
keyarr = keyarr._with_freq(None)
keyarr = self.take(indexer)
if keyarr.dtype.kind in ["m", "M"]:
# DTI/TDI.take can infer a freq in some cases when we dont want one
if isinstance(key, list) or (
isinstance(key, type(self))
# "Index" has no attribute "freq"
and key.freq is None # type: ignore[attr-defined]
):
keyarr = keyarr._with_freq(None)

return keyarr, indexer

Expand Down
18 changes: 14 additions & 4 deletions pandas/tests/indexing/interval/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,21 +133,23 @@ def test_getitem_interval_with_nans(self, frame_or_series, indexer_sl):
class TestIntervalIndexInsideMultiIndex:
def test_mi_intervalindex_slicing_with_scalar(self):
# GH#27456
ii = IntervalIndex.from_arrays(
[0, 1, 10, 11, 0, 1, 10, 11], [1, 2, 11, 12, 1, 2, 11, 12], name="MP"
)
idx = pd.MultiIndex.from_arrays(
[
pd.Index(["FC", "FC", "FC", "FC", "OWNER", "OWNER", "OWNER", "OWNER"]),
pd.Index(
["RID1", "RID1", "RID2", "RID2", "RID1", "RID1", "RID2", "RID2"]
),
IntervalIndex.from_arrays(
[0, 1, 10, 11, 0, 1, 10, 11], [1, 2, 11, 12, 1, 2, 11, 12]
),
ii,
]
)

idx.names = ["Item", "RID", "MP"]
df = DataFrame({"value": [1, 2, 3, 4, 5, 6, 7, 8]})
df.index = idx

query_df = DataFrame(
{
"Item": ["FC", "OWNER", "FC", "OWNER", "OWNER"],
Expand All @@ -161,5 +163,13 @@ def test_mi_intervalindex_slicing_with_scalar(self):
idx = pd.MultiIndex.from_arrays([query_df.Item, query_df.RID, query_df.MP])
query_df.index = idx
result = df.value.loc[query_df.index]
expected = Series([1, 6, 2, 8, 7], index=idx, name="value")

# the IntervalIndex level is indexed with floats, which map to
# the intervals containing them. Matching the behavior we would get
# with _only_ an IntervalIndex, we get an IntervalIndex level back.
sliced_level = ii.take([0, 1, 1, 3, 2])
expected_index = pd.MultiIndex.from_arrays(
[idx.get_level_values(0), idx.get_level_values(1), sliced_level]
)
expected = Series([1, 6, 2, 8, 7], index=expected_index, name="value")
tm.assert_series_equal(result, expected)
4 changes: 2 additions & 2 deletions pandas/tests/indexing/test_floats.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,8 +512,8 @@ def test_floating_misc(self, indexer_sl):
for fancy_idx in [[5.0, 0.0], np.array([5.0, 0.0])]: # float
tm.assert_series_equal(indexer_sl(s)[fancy_idx], expected)

expected = Series([2, 0], index=Index([5, 0], dtype="int64"))
for fancy_idx in [[5, 0], np.array([5, 0])]: # int
expected = Series([2, 0], index=Index([5, 0], dtype="float64"))
for fancy_idx in [[5, 0], np.array([5, 0])]:
tm.assert_series_equal(indexer_sl(s)[fancy_idx], expected)

# all should return the same as we are slicing 'the same'
Expand Down
7 changes: 3 additions & 4 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -682,7 +682,9 @@ def test_loc_setitem_frame_mixed_labels(self):
df = DataFrame({1: [1, 2], 2: [3, 4], "a": ["a", "b"]})

result = df.loc[0, [1, 2]]
expected = Series([1, 3], index=[1, 2], dtype=object, name=0)
expected = Series(
[1, 3], index=Index([1, 2], dtype=object), dtype=object, name=0
)
tm.assert_series_equal(result, expected)

expected = DataFrame({1: [5, 2], 2: [6, 4], "a": ["a", "b"]})
Expand Down Expand Up @@ -2414,9 +2416,6 @@ def test_loc_getitem_label_list_integer_labels(columns, column_key, expected_col
expected = df.iloc[:, expected_columns]
result = df.loc[["A", "B", "C"], column_key]

if df.columns.is_object() and all(isinstance(x, int) for x in column_key):
expected.columns = expected.columns.astype(int)

tm.assert_frame_equal(result, expected, check_column_type=True)


Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/test_multilevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,14 +91,14 @@ def test_reindex_preserve_levels(
assert chunk.index is new_index

chunk = ymd.loc[new_index]
assert chunk.index is new_index
assert chunk.index.equals(new_index)

ymdT = ymd.T
chunk = ymdT.reindex(columns=new_index)
assert chunk.columns is new_index

chunk = ymdT.loc[:, new_index]
assert chunk.columns is new_index
assert chunk.columns.equals(new_index)

def test_groupby_transform(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
Expand Down