Skip to content

Change check() function to test actual types in Series and Index #546

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Feb 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 12 additions & 24 deletions pandas-stubs/core/series.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -1289,12 +1289,10 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
# ignore needed for mypy as we want different results based on the arguments
@overload # type: ignore[override]
def __and__( # type: ignore[misc]
self, other: bool | list[bool] | np_ndarray_bool | Series[bool]
self, other: bool | list[bool] | list[int] | np_ndarray_bool | Series[bool]
) -> Series[bool]: ...
@overload
def __and__(
self, other: int | list[int] | np_ndarray_anyint | Series[int]
) -> Series[int]: ...
def __and__(self, other: int | np_ndarray_anyint | Series[int]) -> Series[int]: ...
# def __array__(self, dtype: Optional[_bool] = ...) -> _np_ndarray
def __div__(self, other: num | _ListLike | Series[S1]) -> Series[S1]: ...
def __eq__(self, other: object) -> Series[_bool]: ... # type: ignore[override]
Expand Down Expand Up @@ -1323,22 +1321,18 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
# ignore needed for mypy as we want different results based on the arguments
@overload # type: ignore[override]
def __or__( # type: ignore[misc]
self, other: bool | list[bool] | np_ndarray_bool | Series[bool]
self, other: bool | list[bool] | list[int] | np_ndarray_bool | Series[bool]
) -> Series[bool]: ...
@overload
def __or__(
self, other: int | list[int] | np_ndarray_anyint | Series[int]
) -> Series[int]: ...
def __or__(self, other: int | np_ndarray_anyint | Series[int]) -> Series[int]: ...
def __radd__(self, other: num | _str | _ListLike | Series[S1]) -> Series[S1]: ...
# ignore needed for mypy as we want different results based on the arguments
@overload # type: ignore[override]
def __rand__( # type: ignore[misc]
self, other: bool | list[bool] | np_ndarray_bool | Series[bool]
self, other: bool | list[bool] | list[int] | np_ndarray_bool | Series[bool]
) -> Series[bool]: ...
@overload
def __rand__(
self, other: int | list[int] | np_ndarray_anyint | Series[int]
) -> Series[int]: ...
def __rand__(self, other: int | np_ndarray_anyint | Series[int]) -> Series[int]: ... # type: ignore[misc]
def __rdiv__(self, other: num | _ListLike | Series[S1]) -> Series[S1]: ...
def __rdivmod__(self, other: num | _ListLike | Series[S1]) -> Series[S1]: ... # type: ignore[override]
def __rfloordiv__(self, other: num | _ListLike | Series[S1]) -> Series[S1]: ...
Expand All @@ -1349,12 +1343,10 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
# ignore needed for mypy as we want different results based on the arguments
@overload # type: ignore[override]
def __ror__( # type: ignore[misc]
self, other: bool | list[bool] | np_ndarray_bool | Series[bool]
self, other: bool | list[bool] | list[int] | np_ndarray_bool | Series[bool]
) -> Series[bool]: ...
@overload
def __ror__(
self, other: int | list[int] | np_ndarray_anyint | Series[int]
) -> Series[int]: ...
def __ror__(self, other: int | np_ndarray_anyint | Series[int]) -> Series[int]: ... # type: ignore[misc]
def __rsub__(self, other: num | _ListLike | Series[S1]) -> Series: ...
@overload
def __rtruediv__(self, other: TimedeltaSeries) -> Series[float]: ...
Expand All @@ -1363,12 +1355,10 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
# ignore needed for mypy as we want different results based on the arguments
@overload # type: ignore[override]
def __rxor__( # type: ignore[misc]
self, other: bool | list[bool] | np_ndarray_bool | Series[bool]
self, other: bool | list[bool] | list[int] | np_ndarray_bool | Series[bool]
) -> Series[bool]: ...
@overload
def __rxor__(
self, other: int | list[int] | np_ndarray_anyint | Series[int]
) -> Series[int]: ...
def __rxor__(self, other: int | np_ndarray_anyint | Series[int]) -> Series[int]: ... # type: ignore[misc]
@overload
def __sub__(
self, other: Timestamp | datetime | TimestampSeries
Expand All @@ -1389,12 +1379,10 @@ class Series(IndexOpsMixin, NDFrame, Generic[S1]):
# ignore needed for mypy as we want different results based on the arguments
@overload # type: ignore[override]
def __xor__( # type: ignore[misc]
self, other: bool | list[bool] | np_ndarray_bool | Series[bool]
self, other: bool | list[bool] | list[int] | np_ndarray_bool | Series[bool]
) -> Series[bool]: ...
@overload
def __xor__(
self, other: int | list[int] | np_ndarray_anyint | Series[int]
) -> Series[int]: ...
def __xor__(self, other: int | np_ndarray_anyint | Series[int]) -> Series[int]: ...
def __invert__(self) -> Series[bool]: ...
# properties
# @property
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ pre-commit = ">=2.19.0"
black = ">=22.12.0"
isort = ">=5.10.1"
openpyxl = ">=3.0.10"
tables = { version = ">=3.7.0", python = "<3.11" }
tables = { version = ">=3.7.0" }
lxml = { version = ">=4.7.1,<4.9.0", python = "<3.11" }
pyreadstat = ">=1.2.0"
xlrd = ">=2.0.1"
Expand Down
11 changes: 5 additions & 6 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,18 @@
)
# This is only needed temporarily due to no wheels being available for lxml on 3.11

pytables_skip = pytest.mark.skipif(
sys.version_info >= (3, 11), reason="pytables is not available for 3.11 yet"
)
# This is only needed temporarily due to no wheels being available for pytables on 3.11


def check(actual: T, klass: type, dtype: type | None = None, attr: str = "left") -> T:
if not isinstance(actual, klass):
raise RuntimeError(f"Expected type '{klass}' but got '{type(actual)}'")
if dtype is None:
return actual # type: ignore[return-value]

if hasattr(actual, "__iter__"):
if isinstance(actual, pd.Series):
value = actual.iloc[0]
elif isinstance(actual, pd.Index):
value = actual[0] # type: ignore[assignment]
elif hasattr(actual, "__iter__"):
value = next(iter(actual)) # pyright: ignore[reportGeneralTypeIssues]
else:
assert hasattr(actual, attr)
Expand Down
38 changes: 21 additions & 17 deletions tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,13 @@ def test_types_init() -> None:

def test_types_all() -> None:
df = pd.DataFrame([[False, True], [False, False]], columns=["col1", "col2"])
check(assert_type(df.all(), "pd.Series[bool]"), pd.Series, bool)
check(assert_type(df.all(), "pd.Series[bool]"), pd.Series, np.bool_)
check(assert_type(df.all(axis=None), bool), np.bool_)


def test_types_any() -> None:
df = pd.DataFrame([[False, True], [False, False]], columns=["col1", "col2"])
check(assert_type(df.any(), "pd.Series[bool]"), pd.Series, bool)
check(assert_type(df.any(), "pd.Series[bool]"), pd.Series, np.bool_)
check(assert_type(df.any(axis=None), bool), np.bool_)


Expand Down Expand Up @@ -496,7 +496,7 @@ def gethead(s: pd.Series, y: int) -> pd.Series:
check(assert_type(df.apply(gethead, args=(4,)), pd.DataFrame), pd.DataFrame)

# Check various return types for default result_type (None) with default axis (0)
check(assert_type(df.apply(returns_scalar), "pd.Series[int]"), pd.Series, int)
check(assert_type(df.apply(returns_scalar), "pd.Series[int]"), pd.Series, np.int64)
check(assert_type(df.apply(returns_series), pd.DataFrame), pd.DataFrame)
check(assert_type(df.apply(returns_listlike_of_3), pd.DataFrame), pd.DataFrame)
check(assert_type(df.apply(returns_dict), pd.Series), pd.Series)
Expand All @@ -507,7 +507,7 @@ def gethead(s: pd.Series, y: int) -> pd.Series:
# to pass a result_type of "expand" to a scalar return
assert_type(df.apply(returns_scalar, result_type="expand"), "pd.Series[int]"),
pd.Series,
int,
np.int64,
)
check(
assert_type(df.apply(returns_series, result_type="expand"), pd.DataFrame),
Expand All @@ -530,7 +530,7 @@ def gethead(s: pd.Series, y: int) -> pd.Series:
# to pass a result_type of "reduce" to a scalar return
assert_type(df.apply(returns_scalar, result_type="reduce"), "pd.Series[int]"),
pd.Series,
int,
np.int64,
)
check(
# Note that technically it does not make sense
Expand All @@ -548,7 +548,9 @@ def gethead(s: pd.Series, y: int) -> pd.Series:

# Check various return types for default result_type (None) with axis=1
check(
assert_type(df.apply(returns_scalar, axis=1), "pd.Series[int]"), pd.Series, int
assert_type(df.apply(returns_scalar, axis=1), "pd.Series[int]"),
pd.Series,
np.int64,
)
check(assert_type(df.apply(returns_series, axis=1), pd.DataFrame), pd.DataFrame)
check(assert_type(df.apply(returns_listlike_of_3, axis=1), pd.Series), pd.Series)
Expand All @@ -562,7 +564,7 @@ def gethead(s: pd.Series, y: int) -> pd.Series:
df.apply(returns_scalar, axis=1, result_type="expand"), "pd.Series[int]"
),
pd.Series,
int,
np.int64,
)
check(
assert_type(
Expand All @@ -589,7 +591,7 @@ def gethead(s: pd.Series, y: int) -> pd.Series:
df.apply(returns_scalar, axis=1, result_type="reduce"), "pd.Series[int]"
),
pd.Series,
int,
np.int64,
)
check(
# Note that technically it does not make sense
Expand Down Expand Up @@ -668,33 +670,35 @@ def gethead(s: pd.Series, y: int) -> pd.Series:
# Test various other positional/keyword argument combinations
# to ensure all overloads are supported
check(
assert_type(df.apply(returns_scalar, axis=0), "pd.Series[int]"), pd.Series, int
assert_type(df.apply(returns_scalar, axis=0), "pd.Series[int]"),
pd.Series,
np.int64,
)
check(
assert_type(
df.apply(returns_scalar, axis=0, result_type=None), "pd.Series[int]"
),
pd.Series,
int,
np.int64,
)
check(
assert_type(df.apply(returns_scalar, 0, False, None), "pd.Series[int]"),
pd.Series,
int,
np.int64,
)
check(
assert_type(
df.apply(returns_scalar, 0, False, result_type=None), "pd.Series[int]"
),
pd.Series,
int,
np.int64,
)
check(
assert_type(
df.apply(returns_scalar, 0, raw=False, result_type=None), "pd.Series[int]"
),
pd.Series,
int,
np.int64,
)


Expand Down Expand Up @@ -863,7 +867,7 @@ def test_types_groupby_methods() -> None:
check(
assert_type(df.groupby("col1").value_counts(normalize=False), "pd.Series[int]"),
pd.Series,
int,
np.int64,
)
check(
assert_type(
Expand Down Expand Up @@ -948,12 +952,12 @@ def test_types_groupby_any() -> None:
check(
assert_type(df.groupby("col1")["col2"].any(), "pd.Series[bool]"),
pd.Series,
bool,
np.bool_,
)
check(
assert_type(df.groupby("col1")["col2"].any(), "pd.Series[bool]"),
pd.Series,
bool,
np.bool_,
)


Expand Down Expand Up @@ -2277,7 +2281,7 @@ def test_series_groupby_and_value_counts() -> None:
)
c1 = df.groupby("Animal")["Max Speed"].value_counts()
c2 = df.groupby("Animal")["Max Speed"].value_counts(normalize=True)
check(assert_type(c1, "pd.Series[int]"), pd.Series, int)
check(assert_type(c1, "pd.Series[int]"), pd.Series, np.int64)
check(assert_type(c2, "pd.Series[float]"), pd.Series, float)


Expand Down
4 changes: 2 additions & 2 deletions tests/test_indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def test_column_getitem() -> None:

column = df.columns[0]
check(assert_type(column, Scalar), str)
check(assert_type(df[column], pd.Series), pd.Series, int)
check(assert_type(df[column], pd.Series), pd.Series, np.int64)


def test_column_contains() -> None:
Expand Down Expand Up @@ -843,7 +843,7 @@ def test_getitem() -> None:
iri = pd.RangeIndex(0, 10)
check(assert_type(iri, pd.RangeIndex), pd.RangeIndex, int)
check(assert_type(iri[0], int), int)
check(assert_type(iri[[0, 2, 4]], pd.Index), pd.Index, int)
check(assert_type(iri[[0, 2, 4]], pd.Index), pd.Index, np.int64)

mi = pd.MultiIndex.from_product([["a", "b"], ["c", "d"]], names=["ab", "cd"])
check(assert_type(mi, pd.MultiIndex), pd.MultiIndex)
Expand Down
10 changes: 1 addition & 9 deletions tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,7 @@
from pandas.io.sas.sas_xport import XportReader
from pandas.io.stata import StataReader

from . import (
lxml_skip,
pytables_skip,
)
from . import lxml_skip

DF = DataFrame({"a": [1, 2, 3], "b": [0.0, 0.0, 0.0]})
CWD = os.path.split(os.path.abspath(__file__))[0]
Expand Down Expand Up @@ -290,14 +287,12 @@ def test_sas_xport() -> None:
pass


@pytables_skip
def test_hdf():
with ensure_clean() as path:
check(assert_type(DF.to_hdf(path, "df"), None), type(None))
check(assert_type(read_hdf(path), Union[DataFrame, Series]), DataFrame)


@pytables_skip
def test_hdfstore():
with ensure_clean() as path:
store = HDFStore(path, model="w")
Expand Down Expand Up @@ -341,7 +336,6 @@ def test_hdfstore():
store.close()


@pytables_skip
def test_read_hdf_iterator():
with ensure_clean() as path:
check(assert_type(DF.to_hdf(path, "df", format="table"), None), type(None))
Expand All @@ -356,7 +350,6 @@ def test_read_hdf_iterator():
ti.close()


@pytables_skip
def test_hdf_context_manager():
with ensure_clean() as path:
check(assert_type(DF.to_hdf(path, "df", format="table"), None), type(None))
Expand All @@ -365,7 +358,6 @@ def test_hdf_context_manager():
check(assert_type(store.get("df"), Union[DataFrame, Series]), DataFrame)


@pytables_skip
def test_hdf_series():
s = DF["a"]
with ensure_clean() as path:
Expand Down
6 changes: 3 additions & 3 deletions tests/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,10 +240,10 @@ def test_types_json_normalize() -> None:
def test_isna() -> None:
# https://github.com/pandas-dev/pandas-stubs/issues/264
s1 = pd.Series([1, np.nan, 3.2])
check(assert_type(pd.isna(s1), "pd.Series[bool]"), pd.Series, bool)
check(assert_type(pd.isna(s1), "pd.Series[bool]"), pd.Series, np.bool_)

s2 = pd.Series([1, 3.2])
check(assert_type(pd.notna(s2), "pd.Series[bool]"), pd.Series, bool)
check(assert_type(pd.notna(s2), "pd.Series[bool]"), pd.Series, np.bool_)

df1 = pd.DataFrame({"a": [1, 2, 1, 2], "b": [1, 1, 2, np.nan]})
check(assert_type(pd.isna(df1), "pd.DataFrame"), pd.DataFrame)
Expand Down Expand Up @@ -890,7 +890,7 @@ def test_cut() -> None:
check(assert_type(s0r, pd.Series), pd.Series, pd.Interval)
check(assert_type(s1r, pd.DatetimeIndex), pd.DatetimeIndex, pd.Timestamp)
s0rlf, s1rlf = pd.cut(s1, bins=10, labels=False, retbins=True)
check(assert_type(s0rlf, pd.Series), pd.Series, int)
check(assert_type(s0rlf, pd.Series), pd.Series, np.int64)
check(assert_type(s1rlf, pd.DatetimeIndex), pd.DatetimeIndex, pd.Timestamp)
s0rls, s1rls = pd.cut(s1, bins=4, labels=["1", "2", "3", "4"], retbins=True)
check(assert_type(s0rls, pd.Series), pd.Series, str)
Expand Down
Loading