Skip to content

Commit adeed7a

Browse files
authored
TST/REF: collect indexing tests by method (#37638)
1 parent c86b778 commit adeed7a

14 files changed

+455
-433
lines changed

pandas/tests/frame/indexing/test_indexing.py

+13-96
Original file line numberDiff line numberDiff line change
@@ -66,21 +66,6 @@ def test_getitem_dupe_cols(self):
6666
with pytest.raises(KeyError, match=re.escape(msg)):
6767
df[["baf"]]
6868

69-
@pytest.mark.parametrize("key_type", [iter, np.array, Series, Index])
70-
def test_loc_iterable(self, float_frame, key_type):
71-
idx = key_type(["A", "B", "C"])
72-
result = float_frame.loc[:, idx]
73-
expected = float_frame.loc[:, ["A", "B", "C"]]
74-
tm.assert_frame_equal(result, expected)
75-
76-
def test_loc_timedelta_0seconds(self):
77-
# GH#10583
78-
df = DataFrame(np.random.normal(size=(10, 4)))
79-
df.index = pd.timedelta_range(start="0s", periods=10, freq="s")
80-
expected = df.loc[pd.Timedelta("0s") :, :]
81-
result = df.loc["0s":, :]
82-
tm.assert_frame_equal(expected, result)
83-
8469
@pytest.mark.parametrize(
8570
"idx_type",
8671
[
@@ -125,28 +110,20 @@ def test_getitem_listlike(self, idx_type, levels, float_frame):
125110
with pytest.raises(KeyError, match="not in index"):
126111
frame[idx]
127112

128-
@pytest.mark.parametrize(
129-
"val,expected", [(2 ** 63 - 1, Series([1])), (2 ** 63, Series([2]))]
130-
)
131-
def test_loc_uint64(self, val, expected):
132-
# see gh-19399
133-
df = DataFrame([1, 2], index=[2 ** 63 - 1, 2 ** 63])
134-
result = df.loc[val]
135-
136-
expected.name = val
137-
tm.assert_series_equal(result, expected)
138-
139113
def test_getitem_callable(self, float_frame):
140114
# GH 12533
141115
result = float_frame[lambda x: "A"]
142-
tm.assert_series_equal(result, float_frame.loc[:, "A"])
116+
expected = float_frame.loc[:, "A"]
117+
tm.assert_series_equal(result, expected)
143118

144119
result = float_frame[lambda x: ["A", "B"]]
120+
expected = float_frame.loc[:, ["A", "B"]]
145121
tm.assert_frame_equal(result, float_frame.loc[:, ["A", "B"]])
146122

147123
df = float_frame[:3]
148124
result = df[lambda x: [True, False, True]]
149-
tm.assert_frame_equal(result, float_frame.iloc[[0, 2], :])
125+
expected = float_frame.iloc[[0, 2], :]
126+
tm.assert_frame_equal(result, expected)
150127

151128
def test_setitem_list(self, float_frame):
152129

@@ -181,11 +158,6 @@ def test_setitem_list(self, float_frame):
181158
expected = Series(["1", "2"], df.columns, name=1)
182159
tm.assert_series_equal(result, expected)
183160

184-
def test_setitem_list_not_dataframe(self, float_frame):
185-
data = np.random.randn(len(float_frame), 2)
186-
float_frame[["A", "B"]] = data
187-
tm.assert_almost_equal(float_frame[["A", "B"]].values, data)
188-
189161
def test_setitem_list_of_tuples(self, float_frame):
190162
tuples = list(zip(float_frame["A"], float_frame["B"]))
191163
float_frame["tuples"] = tuples
@@ -273,14 +245,6 @@ def test_setitem_multi_index(self):
273245
df[("joe", "last")] = df[("jolie", "first")].loc[i, j]
274246
tm.assert_frame_equal(df[("joe", "last")], df[("jolie", "first")])
275247

276-
def test_setitem_callable(self):
277-
# GH 12533
278-
df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]})
279-
df[lambda x: "A"] = [11, 12, 13, 14]
280-
281-
exp = DataFrame({"A": [11, 12, 13, 14], "B": [5, 6, 7, 8]})
282-
tm.assert_frame_equal(df, exp)
283-
284248
def test_setitem_other_callable(self):
285249
# GH 13299
286250
def inc(x):
@@ -518,18 +482,13 @@ def test_setitem(self, float_frame):
518482
df.loc[0] = np.nan
519483
tm.assert_frame_equal(df, expected)
520484

521-
@pytest.mark.parametrize("dtype", ["int32", "int64", "float32", "float64"])
522-
def test_setitem_dtype(self, dtype, float_frame):
523-
arr = np.random.randn(len(float_frame))
524-
525-
float_frame[dtype] = np.array(arr, dtype=dtype)
526-
assert float_frame[dtype].dtype.name == dtype
527-
528485
def test_setitem_tuple(self, float_frame):
529486
float_frame["A", "B"] = float_frame["A"]
530-
tm.assert_series_equal(
531-
float_frame["A", "B"], float_frame["A"], check_names=False
532-
)
487+
assert ("A", "B") in float_frame.columns
488+
489+
result = float_frame["A", "B"]
490+
expected = float_frame["A"]
491+
tm.assert_series_equal(result, expected, check_names=False)
533492

534493
def test_setitem_always_copy(self, float_frame):
535494
s = float_frame["A"].copy()
@@ -588,25 +547,6 @@ def test_setitem_boolean(self, float_frame):
588547
np.putmask(expected.values, mask.values, df.values * 2)
589548
tm.assert_frame_equal(df, expected)
590549

591-
@pytest.mark.parametrize(
592-
"mask_type",
593-
[lambda df: df > np.abs(df) / 2, lambda df: (df > np.abs(df) / 2).values],
594-
ids=["dataframe", "array"],
595-
)
596-
def test_setitem_boolean_mask(self, mask_type, float_frame):
597-
598-
# Test for issue #18582
599-
df = float_frame.copy()
600-
mask = mask_type(df)
601-
602-
# index with boolean mask
603-
result = df.copy()
604-
result[mask] = np.nan
605-
606-
expected = df.copy()
607-
expected.values[np.array(mask)] = np.nan
608-
tm.assert_frame_equal(result, expected)
609-
610550
def test_setitem_cast(self, float_frame):
611551
float_frame["D"] = float_frame["D"].astype("i8")
612552
assert float_frame["D"].dtype == np.int64
@@ -821,19 +761,6 @@ def test_getitem_empty_frame_with_boolean(self):
821761
df2 = df[df > 0]
822762
tm.assert_frame_equal(df, df2)
823763

824-
def test_slice_floats(self):
825-
index = [52195.504153, 52196.303147, 52198.369883]
826-
df = DataFrame(np.random.rand(3, 2), index=index)
827-
828-
s1 = df.loc[52195.1:52196.5]
829-
assert len(s1) == 2
830-
831-
s1 = df.loc[52195.1:52196.6]
832-
assert len(s1) == 2
833-
834-
s1 = df.loc[52195.1:52198.9]
835-
assert len(s1) == 3
836-
837764
def test_getitem_fancy_slice_integers_step(self):
838765
df = DataFrame(np.random.randn(10, 5))
839766

@@ -883,15 +810,6 @@ def test_fancy_getitem_slice_mixed(self, float_frame, float_string_frame):
883810

884811
assert (float_frame["C"] == 4).all()
885812

886-
def test_setitem_slice_position(self):
887-
# GH#31469
888-
df = DataFrame(np.zeros((100, 1)))
889-
df[-4:] = 1
890-
arr = np.zeros((100, 1))
891-
arr[-4:] = 1
892-
expected = DataFrame(arr)
893-
tm.assert_frame_equal(df, expected)
894-
895813
def test_getitem_setitem_non_ix_labels(self):
896814
df = tm.makeTimeDataFrame()
897815

@@ -1000,14 +918,13 @@ def test_getitem_fancy_ints(self, float_frame):
1000918
expected = float_frame.loc[:, float_frame.columns[[2, 0, 1]]]
1001919
tm.assert_frame_equal(result, expected)
1002920

1003-
def test_getitem_setitem_fancy_exceptions(self, float_frame):
1004-
ix = float_frame.iloc
921+
def test_iloc_getitem_setitem_fancy_exceptions(self, float_frame):
1005922
with pytest.raises(IndexingError, match="Too many indexers"):
1006-
ix[:, :, :]
923+
float_frame.iloc[:, :, :]
1007924

1008925
with pytest.raises(IndexError, match="too many indices for array"):
1009926
# GH#32257 we let numpy do validation, get their exception
1010-
ix[:, :, :] = 1
927+
float_frame.iloc[:, :, :] = 1
1011928

1012929
def test_getitem_setitem_boolean_misaligned(self, float_frame):
1013930
# boolean index misaligned labels

pandas/tests/frame/indexing/test_setitem.py

+54
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,18 @@
2222

2323

2424
class TestDataFrameSetItem:
25+
@pytest.mark.parametrize("dtype", ["int32", "int64", "float32", "float64"])
26+
def test_setitem_dtype(self, dtype, float_frame):
27+
arr = np.random.randn(len(float_frame))
28+
29+
float_frame[dtype] = np.array(arr, dtype=dtype)
30+
assert float_frame[dtype].dtype.name == dtype
31+
32+
def test_setitem_list_not_dataframe(self, float_frame):
33+
data = np.random.randn(len(float_frame), 2)
34+
float_frame[["A", "B"]] = data
35+
tm.assert_almost_equal(float_frame[["A", "B"]].values, data)
36+
2537
def test_setitem_error_msmgs(self):
2638

2739
# GH 7432
@@ -285,3 +297,45 @@ def test_iloc_setitem_bool_indexer(self, klass):
285297
df.iloc[indexer, 1] = df.iloc[indexer, 1] * 2
286298
expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]})
287299
tm.assert_frame_equal(df, expected)
300+
301+
302+
class TestDataFrameSetItemSlicing:
303+
def test_setitem_slice_position(self):
304+
# GH#31469
305+
df = DataFrame(np.zeros((100, 1)))
306+
df[-4:] = 1
307+
arr = np.zeros((100, 1))
308+
arr[-4:] = 1
309+
expected = DataFrame(arr)
310+
tm.assert_frame_equal(df, expected)
311+
312+
313+
class TestDataFrameSetItemCallable:
314+
def test_setitem_callable(self):
315+
# GH#12533
316+
df = DataFrame({"A": [1, 2, 3, 4], "B": [5, 6, 7, 8]})
317+
df[lambda x: "A"] = [11, 12, 13, 14]
318+
319+
exp = DataFrame({"A": [11, 12, 13, 14], "B": [5, 6, 7, 8]})
320+
tm.assert_frame_equal(df, exp)
321+
322+
323+
class TestDataFrameSetItemBooleanMask:
324+
@pytest.mark.parametrize(
325+
"mask_type",
326+
[lambda df: df > np.abs(df) / 2, lambda df: (df > np.abs(df) / 2).values],
327+
ids=["dataframe", "array"],
328+
)
329+
def test_setitem_boolean_mask(self, mask_type, float_frame):
330+
331+
# Test for issue #18582
332+
df = float_frame.copy()
333+
mask = mask_type(df)
334+
335+
# index with boolean mask
336+
result = df.copy()
337+
result[mask] = np.nan
338+
339+
expected = df.copy()
340+
expected.values[np.array(mask)] = np.nan
341+
tm.assert_frame_equal(result, expected)
-37
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,6 @@
1-
import numpy as np
2-
import pytest
3-
4-
import pandas.util._test_decorators as td
5-
61
import pandas as pd
72
import pandas._testing as tm
83
from pandas.arrays import SparseArray
9-
from pandas.core.arrays.sparse import SparseDtype
104

115

126
class TestSparseDataFrameIndexing:
@@ -23,34 +17,3 @@ def test_getitem_sparse_column(self):
2317

2418
result = df.loc[:, "A"]
2519
tm.assert_series_equal(result, expected)
26-
27-
@pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"])
28-
@pytest.mark.parametrize("dtype", [np.int64, np.float64, complex])
29-
@td.skip_if_no_scipy
30-
def test_loc_getitem_from_spmatrix(self, spmatrix_t, dtype):
31-
import scipy.sparse
32-
33-
spmatrix_t = getattr(scipy.sparse, spmatrix_t)
34-
35-
# The bug is triggered by a sparse matrix with purely sparse columns. So the
36-
# recipe below generates a rectangular matrix of dimension (5, 7) where all the
37-
# diagonal cells are ones, meaning the last two columns are purely sparse.
38-
rows, cols = 5, 7
39-
spmatrix = spmatrix_t(np.eye(rows, cols, dtype=dtype), dtype=dtype)
40-
df = pd.DataFrame.sparse.from_spmatrix(spmatrix)
41-
42-
# regression test for #34526
43-
itr_idx = range(2, rows)
44-
result = df.loc[itr_idx].values
45-
expected = spmatrix.toarray()[itr_idx]
46-
tm.assert_numpy_array_equal(result, expected)
47-
48-
# regression test for #34540
49-
result = df.loc[itr_idx].dtypes.values
50-
expected = np.full(cols, SparseDtype(dtype, fill_value=0))
51-
tm.assert_numpy_array_equal(result, expected)
52-
53-
def test_all_sparse(self):
54-
df = pd.DataFrame({"A": pd.array([0, 0], dtype=pd.SparseDtype("int64"))})
55-
result = df.loc[[0, 1]]
56-
tm.assert_frame_equal(result, df)

pandas/tests/indexing/test_at.py

+30-3
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,41 @@
11
from datetime import datetime, timezone
22

3-
import pandas as pd
3+
import numpy as np
4+
import pytest
5+
6+
from pandas import DataFrame
47
import pandas._testing as tm
58

69

710
def test_at_timezone():
811
# https://github.com/pandas-dev/pandas/issues/33544
9-
result = pd.DataFrame({"foo": [datetime(2000, 1, 1)]})
12+
result = DataFrame({"foo": [datetime(2000, 1, 1)]})
1013
result.at[0, "foo"] = datetime(2000, 1, 2, tzinfo=timezone.utc)
11-
expected = pd.DataFrame(
14+
expected = DataFrame(
1215
{"foo": [datetime(2000, 1, 2, tzinfo=timezone.utc)]}, dtype=object
1316
)
1417
tm.assert_frame_equal(result, expected)
18+
19+
20+
class TestAtWithDuplicates:
21+
def test_at_with_duplicate_axes_requires_scalar_lookup(self):
22+
# GH#33041 check that falling back to loc doesn't allow non-scalar
23+
# args to slip in
24+
25+
arr = np.random.randn(6).reshape(3, 2)
26+
df = DataFrame(arr, columns=["A", "A"])
27+
28+
msg = "Invalid call for scalar access"
29+
with pytest.raises(ValueError, match=msg):
30+
df.at[[1, 2]]
31+
with pytest.raises(ValueError, match=msg):
32+
df.at[1, ["A"]]
33+
with pytest.raises(ValueError, match=msg):
34+
df.at[:, "A"]
35+
36+
with pytest.raises(ValueError, match=msg):
37+
df.at[[1, 2]] = 1
38+
with pytest.raises(ValueError, match=msg):
39+
df.at[1, ["A"]] = 1
40+
with pytest.raises(ValueError, match=msg):
41+
df.at[:, "A"] = 1

pandas/tests/indexing/test_categorical.py

-10
Original file line numberDiff line numberDiff line change
@@ -73,16 +73,6 @@ def test_loc_scalar(self):
7373
with pytest.raises(KeyError, match="^1$"):
7474
df.loc[1]
7575

76-
def test_getitem_scalar(self):
77-
78-
cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")])
79-
80-
s = Series([1, 2], index=cats)
81-
82-
expected = s.iloc[0]
83-
result = s[cats[0]]
84-
assert result == expected
85-
8676
def test_slicing(self):
8777
cat = Series(Categorical([1, 2, 3, 4]))
8878
reversed = cat[::-1]

0 commit comments

Comments
 (0)