Skip to content

Commit 18b4864

Browse files
authored
TST/REF: collect tests by method (#37372)
* TST: cln/parametrize * TST: collect tests by method * TST/REF: collect tests by method * lint fixup
1 parent 35056ab commit 18b4864

File tree

10 files changed

+187
-191
lines changed

10 files changed

+187
-191
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import pytest
2+
3+
from pandas import DataFrame, MultiIndex
4+
5+
6+
class TestGetitem:
7+
def test_getitem_unused_level_raises(self):
8+
# GH#20410
9+
mi = MultiIndex(
10+
levels=[["a_lot", "onlyone", "notevenone"], [1970, ""]],
11+
codes=[[1, 0], [1, 0]],
12+
)
13+
df = DataFrame(-1, index=range(3), columns=mi)
14+
15+
with pytest.raises(KeyError, match="notevenone"):
16+
df["notevenone"]

pandas/tests/frame/test_join.py

+25
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,31 @@ def test_suppress_future_warning_with_sort_kw(sort_kw):
222222

223223

224224
class TestDataFrameJoin:
225+
def test_join(self, multiindex_dataframe_random_data):
226+
frame = multiindex_dataframe_random_data
227+
228+
a = frame.loc[frame.index[:5], ["A"]]
229+
b = frame.loc[frame.index[2:], ["B", "C"]]
230+
231+
joined = a.join(b, how="outer").reindex(frame.index)
232+
expected = frame.copy()
233+
expected.values[np.isnan(joined.values)] = np.nan
234+
235+
assert not np.isnan(joined.values).all()
236+
237+
# TODO what should join do with names ?
238+
tm.assert_frame_equal(joined, expected, check_names=False)
239+
240+
def test_join_segfault(self):
241+
# GH#1532
242+
df1 = DataFrame({"a": [1, 1], "b": [1, 2], "x": [1, 2]})
243+
df2 = DataFrame({"a": [2, 2], "b": [1, 2], "y": [1, 2]})
244+
df1 = df1.set_index(["a", "b"])
245+
df2 = df2.set_index(["a", "b"])
246+
# it works!
247+
for how in ["left", "right", "outer"]:
248+
df1.join(df2, how=how)
249+
225250
def test_join_str_datetime(self):
226251
str_dates = ["20120209", "20120222"]
227252
dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)]

pandas/tests/indexes/test_base.py

-6
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,6 @@
1414
from pandas.compat.numpy import np_datetime64_compat
1515
from pandas.util._test_decorators import async_mark
1616

17-
from pandas.core.dtypes.generic import ABCIndex
18-
1917
import pandas as pd
2018
from pandas import (
2119
CategoricalIndex,
@@ -2518,10 +2516,6 @@ def test_ensure_index_mixed_closed_intervals(self):
25182516
],
25192517
)
25202518
def test_generated_op_names(opname, index):
2521-
if isinstance(index, ABCIndex) and opname == "rsub":
2522-
# Index.__rsub__ does not exist; though the method does exist
2523-
# for subclasses. see GH#19723
2524-
return
25252519
opname = f"__{opname}__"
25262520
method = getattr(index, opname)
25272521
assert method.__name__ == opname

pandas/tests/indexing/test_loc.py

+42-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from pandas.compat.numpy import is_numpy_dev
99

1010
import pandas as pd
11-
from pandas import DataFrame, Series, Timestamp, date_range
11+
from pandas import DataFrame, MultiIndex, Series, Timestamp, date_range
1212
import pandas._testing as tm
1313
from pandas.api.types import is_scalar
1414
from pandas.tests.indexing.common import Base
@@ -979,6 +979,47 @@ def test_loc_reverse_assignment(self):
979979
tm.assert_series_equal(result, expected)
980980

981981

982+
class TestLocWithMultiIndex:
983+
@pytest.mark.parametrize(
984+
"keys, expected",
985+
[
986+
(["b", "a"], [["b", "b", "a", "a"], [1, 2, 1, 2]]),
987+
(["a", "b"], [["a", "a", "b", "b"], [1, 2, 1, 2]]),
988+
((["a", "b"], [1, 2]), [["a", "a", "b", "b"], [1, 2, 1, 2]]),
989+
((["a", "b"], [2, 1]), [["a", "a", "b", "b"], [2, 1, 2, 1]]),
990+
((["b", "a"], [2, 1]), [["b", "b", "a", "a"], [2, 1, 2, 1]]),
991+
((["b", "a"], [1, 2]), [["b", "b", "a", "a"], [1, 2, 1, 2]]),
992+
((["c", "a"], [2, 1]), [["c", "a", "a"], [1, 2, 1]]),
993+
],
994+
)
995+
@pytest.mark.parametrize("dim", ["index", "columns"])
996+
def test_loc_getitem_multilevel_index_order(self, dim, keys, expected):
997+
# GH#22797
998+
# Try to respect order of keys given for MultiIndex.loc
999+
kwargs = {dim: [["c", "a", "a", "b", "b"], [1, 1, 2, 1, 2]]}
1000+
df = DataFrame(np.arange(25).reshape(5, 5), **kwargs)
1001+
exp_index = MultiIndex.from_arrays(expected)
1002+
if dim == "index":
1003+
res = df.loc[keys, :]
1004+
tm.assert_index_equal(res.index, exp_index)
1005+
elif dim == "columns":
1006+
res = df.loc[:, keys]
1007+
tm.assert_index_equal(res.columns, exp_index)
1008+
1009+
def test_loc_preserve_names(self, multiindex_year_month_day_dataframe_random_data):
1010+
ymd = multiindex_year_month_day_dataframe_random_data
1011+
1012+
result = ymd.loc[2000]
1013+
result2 = ymd["A"].loc[2000]
1014+
assert result.index.names == ymd.index.names[1:]
1015+
assert result2.index.names == ymd.index.names[1:]
1016+
1017+
result = ymd.loc[2000, 2]
1018+
result2 = ymd["A"].loc[2000, 2]
1019+
assert result.index.name == ymd.index.names[2]
1020+
assert result2.index.name == ymd.index.names[2]
1021+
1022+
9821023
def test_series_loc_getitem_label_list_missing_values():
9831024
# gh-11428
9841025
key = np.array(

pandas/tests/io/formats/test_to_html.py

+8
Original file line numberDiff line numberDiff line change
@@ -822,6 +822,14 @@ def test_html_repr_min_rows(datapath, max_rows, min_rows, expected):
822822
assert result == expected
823823

824824

825+
def test_to_html_multilevel(multiindex_year_month_day_dataframe_random_data):
826+
ymd = multiindex_year_month_day_dataframe_random_data
827+
828+
ymd.columns.name = "foo"
829+
ymd.to_html()
830+
ymd.T.to_html()
831+
832+
825833
@pytest.mark.parametrize("na_rep", ["NaN", "Ted"])
826834
def test_to_html_na_rep_and_float_format(na_rep):
827835
# https://github.com/pandas-dev/pandas/issues/13828

pandas/tests/io/test_pickle.py

+17
Original file line numberDiff line numberDiff line change
@@ -531,3 +531,20 @@ def test_pickle_binary_object_compression(compression):
531531
read_df = pd.read_pickle(buffer, compression=compression)
532532
buffer.seek(0)
533533
tm.assert_frame_equal(df, read_df)
534+
535+
536+
def test_pickle_dataframe_with_multilevel_index(
537+
multiindex_year_month_day_dataframe_random_data,
538+
multiindex_dataframe_random_data,
539+
):
540+
ymd = multiindex_year_month_day_dataframe_random_data
541+
frame = multiindex_dataframe_random_data
542+
543+
def _test_roundtrip(frame):
544+
unpickled = tm.round_trip_pickle(frame)
545+
tm.assert_frame_equal(frame, unpickled)
546+
547+
_test_roundtrip(frame)
548+
_test_roundtrip(frame.T)
549+
_test_roundtrip(ymd)
550+
_test_roundtrip(ymd.T)

pandas/tests/series/methods/test_count.py

+20
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,26 @@
77

88

99
class TestSeriesCount:
10+
def test_count_level_series(self):
11+
index = MultiIndex(
12+
levels=[["foo", "bar", "baz"], ["one", "two", "three", "four"]],
13+
codes=[[0, 0, 0, 2, 2], [2, 0, 1, 1, 2]],
14+
)
15+
16+
ser = Series(np.random.randn(len(index)), index=index)
17+
18+
result = ser.count(level=0)
19+
expected = ser.groupby(level=0).count()
20+
tm.assert_series_equal(
21+
result.astype("f8"), expected.reindex(result.index).fillna(0)
22+
)
23+
24+
result = ser.count(level=1)
25+
expected = ser.groupby(level=1).count()
26+
tm.assert_series_equal(
27+
result.astype("f8"), expected.reindex(result.index).fillna(0)
28+
)
29+
1030
def test_count_multiindex(self, series_with_multilevel_index):
1131
ser = series_with_multilevel_index
1232

pandas/tests/series/test_reductions.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22
import pytest
33

44
import pandas as pd
5-
from pandas import Series
5+
from pandas import MultiIndex, Series
6+
import pandas._testing as tm
67

78

89
def test_reductions_td64_with_nat():
@@ -46,6 +47,14 @@ def test_prod_numpy16_bug():
4647
assert not isinstance(result, Series)
4748

4849

50+
def test_sum_with_level():
51+
obj = Series([10.0], index=MultiIndex.from_tuples([(2, 3)]))
52+
53+
result = obj.sum(level=0)
54+
expected = Series([10.0], index=[2])
55+
tm.assert_series_equal(result, expected)
56+
57+
4958
@pytest.mark.parametrize("func", [np.any, np.all])
5059
@pytest.mark.parametrize("kwargs", [dict(keepdims=True), dict(out=object())])
5160
def test_validate_any_all_out_keepdims_raises(kwargs, func):

pandas/tests/test_expressions.py

+47-55
Original file line numberDiff line numberDiff line change
@@ -48,30 +48,37 @@ def setup_method(self, method):
4848
def teardown_method(self, method):
4949
expr._MIN_ELEMENTS = self._MIN_ELEMENTS
5050

51-
def run_arithmetic(self, df, other):
51+
@staticmethod
52+
def call_op(df, other, flex: bool, opname: str):
53+
if flex:
54+
op = lambda x, y: getattr(x, opname)(y)
55+
op.__name__ = opname
56+
else:
57+
op = getattr(operator, opname)
58+
59+
expr.set_use_numexpr(False)
60+
expected = op(df, other)
61+
expr.set_use_numexpr(True)
62+
63+
expr.get_test_result()
64+
65+
result = op(df, other)
66+
return result, expected
67+
68+
def run_arithmetic(self, df, other, flex: bool):
5269
expr._MIN_ELEMENTS = 0
5370
operations = ["add", "sub", "mul", "mod", "truediv", "floordiv"]
54-
for test_flex in [True, False]:
55-
for arith in operations:
56-
# TODO: share with run_binary
57-
if test_flex:
58-
op = lambda x, y: getattr(x, arith)(y)
59-
op.__name__ = arith
71+
for arith in operations:
72+
result, expected = self.call_op(df, other, flex, arith)
73+
74+
if arith == "truediv":
75+
if expected.ndim == 1:
76+
assert expected.dtype.kind == "f"
6077
else:
61-
op = getattr(operator, arith)
62-
expr.set_use_numexpr(False)
63-
expected = op(df, other)
64-
expr.set_use_numexpr(True)
65-
66-
result = op(df, other)
67-
if arith == "truediv":
68-
if expected.ndim == 1:
69-
assert expected.dtype.kind == "f"
70-
else:
71-
assert all(x.kind == "f" for x in expected.dtypes.values)
72-
tm.assert_equal(expected, result)
73-
74-
def run_binary(self, df, other):
78+
assert all(x.kind == "f" for x in expected.dtypes.values)
79+
tm.assert_equal(expected, result)
80+
81+
def run_binary(self, df, other, flex: bool):
7582
"""
7683
tests solely that the result is the same whether or not numexpr is
7784
enabled. Need to test whether the function does the correct thing
@@ -81,37 +88,27 @@ def run_binary(self, df, other):
8188
expr.set_test_mode(True)
8289
operations = ["gt", "lt", "ge", "le", "eq", "ne"]
8390

84-
for test_flex in [True, False]:
85-
for arith in operations:
86-
if test_flex:
87-
op = lambda x, y: getattr(x, arith)(y)
88-
op.__name__ = arith
89-
else:
90-
op = getattr(operator, arith)
91-
expr.set_use_numexpr(False)
92-
expected = op(df, other)
93-
expr.set_use_numexpr(True)
94-
95-
expr.get_test_result()
96-
result = op(df, other)
97-
used_numexpr = expr.get_test_result()
98-
assert used_numexpr, "Did not use numexpr as expected."
99-
tm.assert_equal(expected, result)
100-
101-
def run_frame(self, df, other, run_binary=True):
102-
self.run_arithmetic(df, other)
103-
if run_binary:
104-
expr.set_use_numexpr(False)
105-
binary_comp = other + 1
106-
expr.set_use_numexpr(True)
107-
self.run_binary(df, binary_comp)
91+
for arith in operations:
92+
result, expected = self.call_op(df, other, flex, arith)
93+
94+
used_numexpr = expr.get_test_result()
95+
assert used_numexpr, "Did not use numexpr as expected."
96+
tm.assert_equal(expected, result)
97+
98+
def run_frame(self, df, other, flex: bool):
99+
self.run_arithmetic(df, other, flex)
100+
101+
expr.set_use_numexpr(False)
102+
binary_comp = other + 1
103+
expr.set_use_numexpr(True)
104+
self.run_binary(df, binary_comp, flex)
108105

109106
for i in range(len(df.columns)):
110-
self.run_arithmetic(df.iloc[:, i], other.iloc[:, i])
107+
self.run_arithmetic(df.iloc[:, i], other.iloc[:, i], flex)
111108
# FIXME: dont leave commented-out
112109
# series doesn't uses vec_compare instead of numexpr...
113110
# binary_comp = other.iloc[:, i] + 1
114-
# self.run_binary(df.iloc[:, i], binary_comp)
111+
# self.run_binary(df.iloc[:, i], binary_comp, flex)
115112

116113
@pytest.mark.parametrize(
117114
"df",
@@ -126,14 +123,9 @@ def run_frame(self, df, other, run_binary=True):
126123
_mixed2,
127124
],
128125
)
129-
def test_arithmetic(self, df):
130-
# TODO: FIGURE OUT HOW TO GET RUN_BINARY TO WORK WITH MIXED=...
131-
# can't do arithmetic because comparison methods try to do *entire*
132-
# frame instead of by-column
133-
kinds = {x.kind for x in df.dtypes.values}
134-
should = len(kinds) == 1
135-
136-
self.run_frame(df, df, run_binary=should)
126+
@pytest.mark.parametrize("flex", [True, False])
127+
def test_arithmetic(self, df, flex):
128+
self.run_frame(df, df, flex)
137129

138130
def test_invalid(self):
139131

0 commit comments

Comments
 (0)