Skip to content

TST/REF: collect indexing tests by method #37638

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Nov 6, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 13 additions & 96 deletions pandas/tests/frame/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,21 +66,6 @@ def test_getitem_dupe_cols(self):
with pytest.raises(KeyError, match=re.escape(msg)):
df[["baf"]]

@pytest.mark.parametrize("key_type", [iter, np.array, Series, Index])
def test_loc_iterable(self, float_frame, key_type):
idx = key_type(["A", "B", "C"])
result = float_frame.loc[:, idx]
expected = float_frame.loc[:, ["A", "B", "C"]]
tm.assert_frame_equal(result, expected)

def test_loc_timedelta_0seconds(self):
# GH#10583
df = DataFrame(np.random.normal(size=(10, 4)))
df.index = pd.timedelta_range(start="0s", periods=10, freq="s")
expected = df.loc[pd.Timedelta("0s") :, :]
result = df.loc["0s":, :]
tm.assert_frame_equal(expected, result)

@pytest.mark.parametrize(
"idx_type",
[
Expand Down Expand Up @@ -125,28 +110,20 @@ def test_getitem_listlike(self, idx_type, levels, float_frame):
with pytest.raises(KeyError, match="not in index"):
frame[idx]

@pytest.mark.parametrize(
"val,expected", [(2 ** 63 - 1, Series([1])), (2 ** 63, Series([2]))]
)
def test_loc_uint64(self, val, expected):
# see gh-19399
df = DataFrame([1, 2], index=[2 ** 63 - 1, 2 ** 63])
result = df.loc[val]

expected.name = val
tm.assert_series_equal(result, expected)

def test_getitem_callable(self, float_frame):
# GH 12533
result = float_frame[lambda x: "A"]
tm.assert_series_equal(result, float_frame.loc[:, "A"])
expected = float_frame.loc[:, "A"]
tm.assert_series_equal(result, expected)

result = float_frame[lambda x: ["A", "B"]]
expected = float_frame.loc[:, ["A", "B"]]
tm.assert_frame_equal(result, float_frame.loc[:, ["A", "B"]])

df = float_frame[:3]
result = df[lambda x: [True, False, True]]
tm.assert_frame_equal(result, float_frame.iloc[[0, 2], :])
expected = float_frame.iloc[[0, 2], :]
tm.assert_frame_equal(result, expected)

def test_setitem_list(self, float_frame):

Expand Down Expand Up @@ -181,11 +158,6 @@ def test_setitem_list(self, float_frame):
expected = Series(["1", "2"], df.columns, name=1)
tm.assert_series_equal(result, expected)

def test_setitem_list_not_dataframe(self, float_frame):
data = np.random.randn(len(float_frame), 2)
float_frame[["A", "B"]] = data
tm.assert_almost_equal(float_frame[["A", "B"]].values, data)

def test_setitem_list_of_tuples(self, float_frame):
tuples = list(zip(float_frame["A"], float_frame["B"]))
float_frame["tuples"] = tuples
Expand Down Expand Up @@ -273,14 +245,6 @@ def test_setitem_multi_index(self):
df[("joe", "last")] = df[("jolie", "first")].loc[i, j]
tm.assert_frame_equal(df[("joe", "last")], df[("jolie", "first")])

def test_setitem_callable(self):
# GH 12533
df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]})
df[lambda x: "A"] = [11, 12, 13, 14]

exp = DataFrame({"A": [11, 12, 13, 14], "B": [5, 6, 7, 8]})
tm.assert_frame_equal(df, exp)

def test_setitem_other_callable(self):
# GH 13299
def inc(x):
Expand Down Expand Up @@ -518,18 +482,13 @@ def test_setitem(self, float_frame):
df.loc[0] = np.nan
tm.assert_frame_equal(df, expected)

@pytest.mark.parametrize("dtype", ["int32", "int64", "float32", "float64"])
def test_setitem_dtype(self, dtype, float_frame):
arr = np.random.randn(len(float_frame))

float_frame[dtype] = np.array(arr, dtype=dtype)
assert float_frame[dtype].dtype.name == dtype

def test_setitem_tuple(self, float_frame):
float_frame["A", "B"] = float_frame["A"]
tm.assert_series_equal(
float_frame["A", "B"], float_frame["A"], check_names=False
)
assert ("A", "B") in float_frame.columns

result = float_frame["A", "B"]
expected = float_frame["A"]
tm.assert_series_equal(result, expected, check_names=False)

def test_setitem_always_copy(self, float_frame):
s = float_frame["A"].copy()
Expand Down Expand Up @@ -588,25 +547,6 @@ def test_setitem_boolean(self, float_frame):
np.putmask(expected.values, mask.values, df.values * 2)
tm.assert_frame_equal(df, expected)

@pytest.mark.parametrize(
"mask_type",
[lambda df: df > np.abs(df) / 2, lambda df: (df > np.abs(df) / 2).values],
ids=["dataframe", "array"],
)
def test_setitem_boolean_mask(self, mask_type, float_frame):

# Test for issue #18582
df = float_frame.copy()
mask = mask_type(df)

# index with boolean mask
result = df.copy()
result[mask] = np.nan

expected = df.copy()
expected.values[np.array(mask)] = np.nan
tm.assert_frame_equal(result, expected)

def test_setitem_cast(self, float_frame):
float_frame["D"] = float_frame["D"].astype("i8")
assert float_frame["D"].dtype == np.int64
Expand Down Expand Up @@ -821,19 +761,6 @@ def test_getitem_empty_frame_with_boolean(self):
df2 = df[df > 0]
tm.assert_frame_equal(df, df2)

def test_slice_floats(self):
index = [52195.504153, 52196.303147, 52198.369883]
df = DataFrame(np.random.rand(3, 2), index=index)

s1 = df.loc[52195.1:52196.5]
assert len(s1) == 2

s1 = df.loc[52195.1:52196.6]
assert len(s1) == 2

s1 = df.loc[52195.1:52198.9]
assert len(s1) == 3

def test_getitem_fancy_slice_integers_step(self):
df = DataFrame(np.random.randn(10, 5))

Expand Down Expand Up @@ -883,15 +810,6 @@ def test_fancy_getitem_slice_mixed(self, float_frame, float_string_frame):

assert (float_frame["C"] == 4).all()

def test_setitem_slice_position(self):
# GH#31469
df = DataFrame(np.zeros((100, 1)))
df[-4:] = 1
arr = np.zeros((100, 1))
arr[-4:] = 1
expected = DataFrame(arr)
tm.assert_frame_equal(df, expected)

def test_getitem_setitem_non_ix_labels(self):
df = tm.makeTimeDataFrame()

Expand Down Expand Up @@ -1000,14 +918,13 @@ def test_getitem_fancy_ints(self, float_frame):
expected = float_frame.loc[:, float_frame.columns[[2, 0, 1]]]
tm.assert_frame_equal(result, expected)

def test_getitem_setitem_fancy_exceptions(self, float_frame):
ix = float_frame.iloc
def test_iloc_getitem_setitem_fancy_exceptions(self, float_frame):
with pytest.raises(IndexingError, match="Too many indexers"):
ix[:, :, :]
float_frame.iloc[:, :, :]

with pytest.raises(IndexError, match="too many indices for array"):
# GH#32257 we let numpy do validation, get their exception
ix[:, :, :] = 1
float_frame.iloc[:, :, :] = 1

def test_getitem_setitem_boolean_misaligned(self, float_frame):
# boolean index misaligned labels
Expand Down
54 changes: 54 additions & 0 deletions pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,18 @@


class TestDataFrameSetItem:
@pytest.mark.parametrize("dtype", ["int32", "int64", "float32", "float64"])
def test_setitem_dtype(self, dtype, float_frame):
arr = np.random.randn(len(float_frame))

float_frame[dtype] = np.array(arr, dtype=dtype)
assert float_frame[dtype].dtype.name == dtype

def test_setitem_list_not_dataframe(self, float_frame):
data = np.random.randn(len(float_frame), 2)
float_frame[["A", "B"]] = data
tm.assert_almost_equal(float_frame[["A", "B"]].values, data)

def test_setitem_error_msmgs(self):

# GH 7432
Expand Down Expand Up @@ -285,3 +297,45 @@ def test_iloc_setitem_bool_indexer(self, klass):
df.iloc[indexer, 1] = df.iloc[indexer, 1] * 2
expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]})
tm.assert_frame_equal(df, expected)


class TestDataFrameSetItemSlicing:
def test_setitem_slice_position(self):
# GH#31469
df = DataFrame(np.zeros((100, 1)))
df[-4:] = 1
arr = np.zeros((100, 1))
arr[-4:] = 1
expected = DataFrame(arr)
tm.assert_frame_equal(df, expected)


class TestDataFrameSetItemCallable:
def test_setitem_callable(self):
# GH#12533
df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]})
df[lambda x: "A"] = [11, 12, 13, 14]

exp = DataFrame({"A": [11, 12, 13, 14], "B": [5, 6, 7, 8]})
tm.assert_frame_equal(df, exp)


class TestDataFrameSetItemBooleanMask:
@pytest.mark.parametrize(
"mask_type",
[lambda df: df > np.abs(df) / 2, lambda df: (df > np.abs(df) / 2).values],
ids=["dataframe", "array"],
)
def test_setitem_boolean_mask(self, mask_type, float_frame):

# Test for issue #18582
df = float_frame.copy()
mask = mask_type(df)

# index with boolean mask
result = df.copy()
result[mask] = np.nan

expected = df.copy()
expected.values[np.array(mask)] = np.nan
tm.assert_frame_equal(result, expected)
37 changes: 0 additions & 37 deletions pandas/tests/frame/indexing/test_sparse.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,6 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

import pandas as pd
import pandas._testing as tm
from pandas.arrays import SparseArray
from pandas.core.arrays.sparse import SparseDtype


class TestSparseDataFrameIndexing:
Expand All @@ -23,34 +17,3 @@ def test_getitem_sparse_column(self):

result = df.loc[:, "A"]
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"])
@pytest.mark.parametrize("dtype", [np.int64, np.float64, complex])
@td.skip_if_no_scipy
def test_loc_getitem_from_spmatrix(self, spmatrix_t, dtype):
import scipy.sparse

spmatrix_t = getattr(scipy.sparse, spmatrix_t)

# The bug is triggered by a sparse matrix with purely sparse columns. So the
# recipe below generates a rectangular matrix of dimension (5, 7) where all the
# diagonal cells are ones, meaning the last two columns are purely sparse.
rows, cols = 5, 7
spmatrix = spmatrix_t(np.eye(rows, cols, dtype=dtype), dtype=dtype)
df = pd.DataFrame.sparse.from_spmatrix(spmatrix)

# regression test for #34526
itr_idx = range(2, rows)
result = df.loc[itr_idx].values
expected = spmatrix.toarray()[itr_idx]
tm.assert_numpy_array_equal(result, expected)

# regression test for #34540
result = df.loc[itr_idx].dtypes.values
expected = np.full(cols, SparseDtype(dtype, fill_value=0))
tm.assert_numpy_array_equal(result, expected)

def test_all_sparse(self):
df = pd.DataFrame({"A": pd.array([0, 0], dtype=pd.SparseDtype("int64"))})
result = df.loc[[0, 1]]
tm.assert_frame_equal(result, df)
33 changes: 30 additions & 3 deletions pandas/tests/indexing/test_at.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,41 @@
from datetime import datetime, timezone

import pandas as pd
import numpy as np
import pytest

from pandas import DataFrame
import pandas._testing as tm


def test_at_timezone():
# https://github.com/pandas-dev/pandas/issues/33544
result = pd.DataFrame({"foo": [datetime(2000, 1, 1)]})
result = DataFrame({"foo": [datetime(2000, 1, 1)]})
result.at[0, "foo"] = datetime(2000, 1, 2, tzinfo=timezone.utc)
expected = pd.DataFrame(
expected = DataFrame(
{"foo": [datetime(2000, 1, 2, tzinfo=timezone.utc)]}, dtype=object
)
tm.assert_frame_equal(result, expected)


class TestAtWithDuplicates:
def test_at_with_duplicate_axes_requires_scalar_lookup(self):
# GH#33041 check that falling back to loc doesn't allow non-scalar
# args to slip in

arr = np.random.randn(6).reshape(3, 2)
df = DataFrame(arr, columns=["A", "A"])

msg = "Invalid call for scalar access"
with pytest.raises(ValueError, match=msg):
df.at[[1, 2]]
with pytest.raises(ValueError, match=msg):
df.at[1, ["A"]]
with pytest.raises(ValueError, match=msg):
df.at[:, "A"]

with pytest.raises(ValueError, match=msg):
df.at[[1, 2]] = 1
with pytest.raises(ValueError, match=msg):
df.at[1, ["A"]] = 1
with pytest.raises(ValueError, match=msg):
df.at[:, "A"] = 1
10 changes: 0 additions & 10 deletions pandas/tests/indexing/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,16 +73,6 @@ def test_loc_scalar(self):
with pytest.raises(KeyError, match="^1$"):
df.loc[1]

def test_getitem_scalar(self):

cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")])

s = Series([1, 2], index=cats)

expected = s.iloc[0]
result = s[cats[0]]
assert result == expected

def test_slicing(self):
cat = Series(Categorical([1, 2, 3, 4]))
reversed = cat[::-1]
Expand Down
Loading