Skip to content

Commit ca93eed

Browse files
SaturnFromTitanSeeminSyed
authored andcommitted
CLN: Split pandas/tests/base/test_ops.py (pandas-dev#32744)
1 parent eb40124 commit ca93eed

9 files changed

+751
-687
lines changed

pandas/tests/base/common.py

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from typing import Any
2+
3+
from pandas import Index
4+
5+
6+
def allow_na_ops(obj: Any) -> bool:
7+
"""Whether to skip test cases including NaN"""
8+
is_bool_index = isinstance(obj, Index) and obj.is_boolean()
9+
return not is_bool_index and obj._can_hold_na
+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
from datetime import datetime
2+
3+
import numpy as np
4+
5+
import pandas as pd
6+
import pandas._testing as tm
7+
8+
9+
def test_drop_duplicates_series_vs_dataframe():
10+
# GH 14192
11+
df = pd.DataFrame(
12+
{
13+
"a": [1, 1, 1, "one", "one"],
14+
"b": [2, 2, np.nan, np.nan, np.nan],
15+
"c": [3, 3, np.nan, np.nan, "three"],
16+
"d": [1, 2, 3, 4, 4],
17+
"e": [
18+
datetime(2015, 1, 1),
19+
datetime(2015, 1, 1),
20+
datetime(2015, 2, 1),
21+
pd.NaT,
22+
pd.NaT,
23+
],
24+
}
25+
)
26+
for column in df.columns:
27+
for keep in ["first", "last", False]:
28+
dropped_frame = df[[column]].drop_duplicates(keep=keep)
29+
dropped_series = df[column].drop_duplicates(keep=keep)
30+
tm.assert_frame_equal(dropped_frame, dropped_series.to_frame())

pandas/tests/base/test_factorize.py

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import numpy as np
2+
import pytest
3+
4+
import pandas as pd
5+
import pandas._testing as tm
6+
7+
8+
@pytest.mark.parametrize("sort", [True, False])
9+
def test_factorize(index_or_series_obj, sort):
10+
obj = index_or_series_obj
11+
result_codes, result_uniques = obj.factorize(sort=sort)
12+
13+
constructor = pd.Index
14+
if isinstance(obj, pd.MultiIndex):
15+
constructor = pd.MultiIndex.from_tuples
16+
expected_uniques = constructor(obj.unique())
17+
18+
if sort:
19+
expected_uniques = expected_uniques.sort_values()
20+
21+
# construct an integer ndarray so that
22+
# `expected_uniques.take(expected_codes)` is equal to `obj`
23+
expected_uniques_list = list(expected_uniques)
24+
expected_codes = [expected_uniques_list.index(val) for val in obj]
25+
expected_codes = np.asarray(expected_codes, dtype=np.intp)
26+
27+
tm.assert_numpy_array_equal(result_codes, expected_codes)
28+
tm.assert_index_equal(result_uniques, expected_uniques)

pandas/tests/base/test_fillna.py

+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
"""
2+
Though Index.fillna and Series.fillna has separate impl,
3+
test here to confirm these works as the same
4+
"""
5+
6+
import numpy as np
7+
import pytest
8+
9+
from pandas._libs.tslib import iNaT
10+
11+
from pandas.core.dtypes.common import needs_i8_conversion
12+
from pandas.core.dtypes.generic import ABCMultiIndex
13+
14+
from pandas import Index
15+
import pandas._testing as tm
16+
from pandas.tests.base.common import allow_na_ops
17+
18+
19+
def test_fillna(index_or_series_obj):
20+
# GH 11343
21+
obj = index_or_series_obj
22+
if isinstance(obj, ABCMultiIndex):
23+
pytest.skip("MultiIndex doesn't support isna")
24+
25+
# values will not be changed
26+
fill_value = obj.values[0] if len(obj) > 0 else 0
27+
result = obj.fillna(fill_value)
28+
if isinstance(obj, Index):
29+
tm.assert_index_equal(obj, result)
30+
else:
31+
tm.assert_series_equal(obj, result)
32+
33+
# check shallow_copied
34+
assert obj is not result
35+
36+
37+
@pytest.mark.parametrize("null_obj", [np.nan, None])
38+
def test_fillna_null(null_obj, index_or_series_obj):
39+
# GH 11343
40+
obj = index_or_series_obj
41+
klass = type(obj)
42+
43+
if not allow_na_ops(obj):
44+
pytest.skip(f"{klass} doesn't allow for NA operations")
45+
elif len(obj) < 1:
46+
pytest.skip("Test doesn't make sense on empty data")
47+
elif isinstance(obj, ABCMultiIndex):
48+
pytest.skip(f"MultiIndex can't hold '{null_obj}'")
49+
50+
values = obj.values
51+
fill_value = values[0]
52+
expected = values.copy()
53+
if needs_i8_conversion(obj):
54+
values[0:2] = iNaT
55+
expected[0:2] = fill_value
56+
else:
57+
values[0:2] = null_obj
58+
expected[0:2] = fill_value
59+
60+
expected = klass(expected)
61+
obj = klass(values)
62+
63+
result = obj.fillna(fill_value)
64+
if isinstance(obj, Index):
65+
tm.assert_index_equal(result, expected)
66+
else:
67+
tm.assert_series_equal(result, expected)
68+
69+
# check shallow_copied
70+
assert obj is not result

pandas/tests/base/test_misc.py

+204
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
import sys
2+
3+
import numpy as np
4+
import pytest
5+
6+
from pandas.compat import PYPY
7+
8+
from pandas.core.dtypes.common import (
9+
is_categorical_dtype,
10+
is_datetime64_dtype,
11+
is_datetime64tz_dtype,
12+
is_object_dtype,
13+
)
14+
15+
import pandas as pd
16+
from pandas import DataFrame, Index, IntervalIndex, Series
17+
import pandas._testing as tm
18+
19+
20+
@pytest.mark.parametrize(
21+
"op_name, op",
22+
[
23+
("add", "+"),
24+
("sub", "-"),
25+
("mul", "*"),
26+
("mod", "%"),
27+
("pow", "**"),
28+
("truediv", "/"),
29+
("floordiv", "//"),
30+
],
31+
)
32+
@pytest.mark.parametrize("klass", [Series, DataFrame])
33+
def test_binary_ops_docstring(klass, op_name, op):
34+
# not using the all_arithmetic_functions fixture with _get_opstr
35+
# as _get_opstr is used internally in the dynamic implementation of the docstring
36+
operand1 = klass.__name__.lower()
37+
operand2 = "other"
38+
expected_str = " ".join([operand1, op, operand2])
39+
assert expected_str in getattr(klass, op_name).__doc__
40+
41+
# reverse version of the binary ops
42+
expected_str = " ".join([operand2, op, operand1])
43+
assert expected_str in getattr(klass, "r" + op_name).__doc__
44+
45+
46+
def test_none_comparison(series_with_simple_index):
47+
series = series_with_simple_index
48+
if isinstance(series.index, IntervalIndex):
49+
# IntervalIndex breaks on "series[0] = np.nan" below
50+
pytest.skip("IntervalIndex doesn't support assignment")
51+
if len(series) < 1:
52+
pytest.skip("Test doesn't make sense on empty data")
53+
54+
# bug brought up by #1079
55+
# changed from TypeError in 0.17.0
56+
series[0] = np.nan
57+
58+
# noinspection PyComparisonWithNone
59+
result = series == None # noqa
60+
assert not result.iat[0]
61+
assert not result.iat[1]
62+
63+
# noinspection PyComparisonWithNone
64+
result = series != None # noqa
65+
assert result.iat[0]
66+
assert result.iat[1]
67+
68+
result = None == series # noqa
69+
assert not result.iat[0]
70+
assert not result.iat[1]
71+
72+
result = None != series # noqa
73+
assert result.iat[0]
74+
assert result.iat[1]
75+
76+
if is_datetime64_dtype(series) or is_datetime64tz_dtype(series):
77+
# Following DatetimeIndex (and Timestamp) convention,
78+
# inequality comparisons with Series[datetime64] raise
79+
msg = "Invalid comparison"
80+
with pytest.raises(TypeError, match=msg):
81+
None > series
82+
with pytest.raises(TypeError, match=msg):
83+
series > None
84+
else:
85+
result = None > series
86+
assert not result.iat[0]
87+
assert not result.iat[1]
88+
89+
result = series < None
90+
assert not result.iat[0]
91+
assert not result.iat[1]
92+
93+
94+
def test_ndarray_compat_properties(index_or_series_obj):
95+
obj = index_or_series_obj
96+
97+
# Check that we work.
98+
for p in ["shape", "dtype", "T", "nbytes"]:
99+
assert getattr(obj, p, None) is not None
100+
101+
# deprecated properties
102+
for p in ["flags", "strides", "itemsize", "base", "data"]:
103+
assert not hasattr(obj, p)
104+
105+
msg = "can only convert an array of size 1 to a Python scalar"
106+
with pytest.raises(ValueError, match=msg):
107+
obj.item() # len > 1
108+
109+
assert obj.ndim == 1
110+
assert obj.size == len(obj)
111+
112+
assert Index([1]).item() == 1
113+
assert Series([1]).item() == 1
114+
115+
116+
@pytest.mark.skipif(PYPY, reason="not relevant for PyPy")
117+
def test_memory_usage(index_or_series_obj):
118+
obj = index_or_series_obj
119+
res = obj.memory_usage()
120+
res_deep = obj.memory_usage(deep=True)
121+
122+
is_object = is_object_dtype(obj) or (
123+
isinstance(obj, Series) and is_object_dtype(obj.index)
124+
)
125+
is_categorical = is_categorical_dtype(obj) or (
126+
isinstance(obj, Series) and is_categorical_dtype(obj.index)
127+
)
128+
129+
if len(obj) == 0:
130+
assert res_deep == res == 0
131+
elif is_object or is_categorical:
132+
# only deep will pick them up
133+
assert res_deep > res
134+
else:
135+
assert res == res_deep
136+
137+
# sys.getsizeof will call the .memory_usage with
138+
# deep=True, and add on some GC overhead
139+
diff = res_deep - sys.getsizeof(obj)
140+
assert abs(diff) < 100
141+
142+
143+
def test_memory_usage_components_series(series_with_simple_index):
144+
series = series_with_simple_index
145+
total_usage = series.memory_usage(index=True)
146+
non_index_usage = series.memory_usage(index=False)
147+
index_usage = series.index.memory_usage()
148+
assert total_usage == non_index_usage + index_usage
149+
150+
151+
def test_memory_usage_components_narrow_series(narrow_series):
152+
series = narrow_series
153+
total_usage = series.memory_usage(index=True)
154+
non_index_usage = series.memory_usage(index=False)
155+
index_usage = series.index.memory_usage()
156+
assert total_usage == non_index_usage + index_usage
157+
158+
159+
def test_searchsorted(index_or_series_obj):
160+
# numpy.searchsorted calls obj.searchsorted under the hood.
161+
# See gh-12238
162+
obj = index_or_series_obj
163+
164+
if isinstance(obj, pd.MultiIndex):
165+
# See gh-14833
166+
pytest.skip("np.searchsorted doesn't work on pd.MultiIndex")
167+
168+
max_obj = max(obj, default=0)
169+
index = np.searchsorted(obj, max_obj)
170+
assert 0 <= index <= len(obj)
171+
172+
index = np.searchsorted(obj, max_obj, sorter=range(len(obj)))
173+
assert 0 <= index <= len(obj)
174+
175+
176+
def test_access_by_position(indices):
177+
index = indices
178+
179+
if len(index) == 0:
180+
pytest.skip("Test doesn't make sense on empty data")
181+
elif isinstance(index, pd.MultiIndex):
182+
pytest.skip("Can't instantiate Series from MultiIndex")
183+
184+
series = pd.Series(index)
185+
assert index[0] == series.iloc[0]
186+
assert index[5] == series.iloc[5]
187+
assert index[-1] == series.iloc[-1]
188+
189+
size = len(index)
190+
assert index[-1] == index[size - 1]
191+
192+
msg = f"index {size} is out of bounds for axis 0 with size {size}"
193+
with pytest.raises(IndexError, match=msg):
194+
index[size]
195+
msg = "single positional indexer is out-of-bounds"
196+
with pytest.raises(IndexError, match=msg):
197+
series.iloc[size]
198+
199+
200+
def test_get_indexer_non_unique_dtype_mismatch():
201+
# GH 25459
202+
indexes, missing = pd.Index(["A", "B"]).get_indexer_non_unique(pd.Index([0]))
203+
tm.assert_numpy_array_equal(np.array([-1], dtype=np.intp), indexes)
204+
tm.assert_numpy_array_equal(np.array([0], dtype=np.int64), missing)

0 commit comments

Comments
 (0)