Skip to content

TST: collect indexing tests by method #39876

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Feb 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,15 @@ def test_setitem_intervals(self):
tm.assert_series_equal(df["C"], df["C"])
tm.assert_series_equal(df["C"], df["E"], check_names=False)

def test_setitem_categorical(self):
# GH#35369
df = DataFrame({"h": Series(list("mn")).astype("category")})
df.h = df.h.cat.reorder_categories(["n", "m"])
expected = DataFrame(
{"h": Categorical(["m", "n"]).reorder_categories(["n", "m"])}
)
tm.assert_frame_equal(df, expected)


class TestSetitemTZAwareValues:
@pytest.fixture
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexing/interval/test_interval_new.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def test_non_unique_moar(self, indexer_sl):
result = indexer_sl(ser)[[Interval(1, 3)]]
tm.assert_series_equal(expected, result)

def test_missing_key_error_message(
def test_loc_getitem_missing_key_error_message(
self, frame_or_series, series_with_interval_index
):
# GH#27365
Expand Down
12 changes: 4 additions & 8 deletions pandas/tests/indexing/multiindex/test_getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,26 +62,22 @@ def test_series_getitem_duplicates_multiindex(level0_value):
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize("indexer", [lambda s: s[2000, 3], lambda s: s.loc[2000, 3]])
def test_series_getitem(multiindex_year_month_day_dataframe_random_data, indexer):
def test_series_getitem(multiindex_year_month_day_dataframe_random_data, indexer_sl):
s = multiindex_year_month_day_dataframe_random_data["A"]
expected = s.reindex(s.index[42:65])
expected.index = expected.index.droplevel(0).droplevel(0)

result = indexer(s)
result = indexer_sl(s)[2000, 3]
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize(
"indexer", [lambda s: s[2000, 3, 10], lambda s: s.loc[2000, 3, 10]]
)
def test_series_getitem_returns_scalar(
multiindex_year_month_day_dataframe_random_data, indexer
multiindex_year_month_day_dataframe_random_data, indexer_sl
):
s = multiindex_year_month_day_dataframe_random_data["A"]
expected = s.iloc[49]

result = indexer(s)
result = indexer_sl(s)[2000, 3, 10]
assert result == expected


Expand Down
22 changes: 9 additions & 13 deletions pandas/tests/indexing/multiindex/test_iloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,10 @@ def simple_multiindex_dataframe():
random data by default.
"""

def _simple_multiindex_dataframe(data=None):
if data is None:
data = np.random.randn(3, 3)
return DataFrame(
data, columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]]
)

return _simple_multiindex_dataframe
data = np.random.randn(3, 3)
return DataFrame(
data, columns=[[2, 2, 4], [6, 8, 10]], index=[[4, 4, 8], [8, 10, 12]]
)


@pytest.mark.parametrize(
Expand All @@ -45,23 +41,23 @@ def _simple_multiindex_dataframe(data=None):
],
)
def test_iloc_returns_series(indexer, expected, simple_multiindex_dataframe):
arr = np.random.randn(3, 3)
df = simple_multiindex_dataframe(arr)
df = simple_multiindex_dataframe
arr = df.values
result = indexer(df)
expected = expected(arr)
tm.assert_series_equal(result, expected)


def test_iloc_returns_dataframe(simple_multiindex_dataframe):
df = simple_multiindex_dataframe()
df = simple_multiindex_dataframe
result = df.iloc[[0, 1]]
expected = df.xs(4, drop_level=False)
tm.assert_frame_equal(result, expected)


def test_iloc_returns_scalar(simple_multiindex_dataframe):
arr = np.random.randn(3, 3)
df = simple_multiindex_dataframe(arr)
df = simple_multiindex_dataframe
arr = df.values
result = df.iloc[2, 2]
expected = arr[2, 2]
assert result == expected
Expand Down
68 changes: 34 additions & 34 deletions pandas/tests/indexing/multiindex/test_indexing_slow.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,49 +40,49 @@
b = df.drop_duplicates(subset=cols[:-1])


@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
@pytest.mark.parametrize("lexsort_depth", list(range(5)))
@pytest.mark.parametrize("key", keys)
@pytest.mark.parametrize("frame", [a, b])
def test_multiindex_get_loc(lexsort_depth, key, frame):
# GH7724, GH2646
def validate(mi, df, key):
# check indexing into a multi-index before & past the lexsort depth

with warnings.catch_warnings(record=True):
mask = np.ones(len(df)).astype("bool")

# test indexing into a multi-index before & past the lexsort depth
# test for all partials of this key
for i, k in enumerate(key):
mask &= df.iloc[:, i] == k

def validate(mi, df, key):
mask = np.ones(len(df)).astype("bool")
if not mask.any():
assert key[: i + 1] not in mi.index
continue

# test for all partials of this key
for i, k in enumerate(key):
mask &= df.iloc[:, i] == k
assert key[: i + 1] in mi.index
right = df[mask].copy()

if not mask.any():
assert key[: i + 1] not in mi.index
continue
if i + 1 != len(key): # partial key
return_value = right.drop(cols[: i + 1], axis=1, inplace=True)
assert return_value is None
return_value = right.set_index(cols[i + 1 : -1], inplace=True)
assert return_value is None
tm.assert_frame_equal(mi.loc[key[: i + 1]], right)

assert key[: i + 1] in mi.index
right = df[mask].copy()
else: # full key
return_value = right.set_index(cols[:-1], inplace=True)
assert return_value is None
if len(right) == 1: # single hit
right = Series(
right["jolia"].values, name=right.index[0], index=["jolia"]
)
tm.assert_series_equal(mi.loc[key[: i + 1]], right)
else: # multi hit
tm.assert_frame_equal(mi.loc[key[: i + 1]], right)

if i + 1 != len(key): # partial key
return_value = right.drop(cols[: i + 1], axis=1, inplace=True)
assert return_value is None
return_value = right.set_index(cols[i + 1 : -1], inplace=True)
assert return_value is None
tm.assert_frame_equal(mi.loc[key[: i + 1]], right)

else: # full key
return_value = right.set_index(cols[:-1], inplace=True)
assert return_value is None
if len(right) == 1: # single hit
right = Series(
right["jolia"].values, name=right.index[0], index=["jolia"]
)
tm.assert_series_equal(mi.loc[key[: i + 1]], right)
else: # multi hit
tm.assert_frame_equal(mi.loc[key[: i + 1]], right)
@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
@pytest.mark.parametrize("lexsort_depth", list(range(5)))
@pytest.mark.parametrize("key", keys)
@pytest.mark.parametrize("frame", [a, b])
def test_multiindex_get_loc(lexsort_depth, key, frame):
# GH7724, GH2646

with warnings.catch_warnings(record=True):
if lexsort_depth == 0:
df = frame.copy()
else:
Expand Down
35 changes: 0 additions & 35 deletions pandas/tests/indexing/multiindex/test_insert.py

This file was deleted.

67 changes: 0 additions & 67 deletions pandas/tests/indexing/multiindex/test_ix.py

This file was deleted.

57 changes: 57 additions & 0 deletions pandas/tests/indexing/multiindex/test_loc.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

from pandas.errors import PerformanceWarning

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -29,6 +31,61 @@ def frame_random_data_integer_multi_index():


class TestMultiIndexLoc:
def test_loc_setitem_frame_with_multiindex(self, multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
frame.loc[("bar", "two"), "B"] = 5
assert frame.loc[("bar", "two"), "B"] == 5

# with integer labels
df = frame.copy()
df.columns = list(range(3))
df.loc[("bar", "two"), 1] = 7
assert df.loc[("bar", "two"), 1] == 7

def test_loc_getitem_general(self):

# GH#2817
data = {
"amount": {0: 700, 1: 600, 2: 222, 3: 333, 4: 444},
"col": {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0},
"year": {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012},
}
df = DataFrame(data).set_index(keys=["col", "year"])
key = 4.0, 2012

# emits a PerformanceWarning, ok
with tm.assert_produces_warning(PerformanceWarning):
tm.assert_frame_equal(df.loc[key], df.iloc[2:])

# this is ok
return_value = df.sort_index(inplace=True)
assert return_value is None
res = df.loc[key]

# col has float dtype, result should be Float64Index
index = MultiIndex.from_arrays([[4.0] * 3, [2012] * 3], names=["col", "year"])
expected = DataFrame({"amount": [222, 333, 444]}, index=index)
tm.assert_frame_equal(res, expected)

def test_loc_getitem_multiindex_missing_label_raises(self):
# GH#21593
df = DataFrame(
np.random.randn(3, 3),
columns=[[2, 2, 4], [6, 8, 10]],
index=[[4, 4, 8], [8, 10, 12]],
)

with pytest.raises(KeyError, match=r"^2$"):
df.loc[2]

def test_loc_getitem_list_of_tuples_with_multiindex(
self, multiindex_year_month_day_dataframe_random_data
):
ser = multiindex_year_month_day_dataframe_random_data["A"]
expected = ser.reindex(ser.index[49:51])
result = ser.loc[[(2000, 3, 10), (2000, 3, 13)]]
tm.assert_series_equal(result, expected)

def test_loc_getitem_series(self):
# GH14730
# passing a series as a key with a MultiIndex
Expand Down
Loading