From 66ece5527707d32efe7d02393f5b69c7c916780c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Thu, 17 Aug 2023 16:04:48 -0400 Subject: [PATCH 1/7] Infer dtype of Series in more cases --- pandas-stubs/core/frame.pyi | 2 +- pandas-stubs/core/indexes/period.pyi | 4 +- pandas-stubs/core/series.pyi | 67 ++++---- pyproject.toml | 2 +- tests/test_series.py | 246 +++++++++++++++++---------- 5 files changed, 196 insertions(+), 125 deletions(-) diff --git a/pandas-stubs/core/frame.pyi b/pandas-stubs/core/frame.pyi index 152d97f18..eff25f3c1 100644 --- a/pandas-stubs/core/frame.pyi +++ b/pandas-stubs/core/frame.pyi @@ -555,7 +555,7 @@ class DataFrame(NDFrame, OpsMixin): @overload def __getitem__( # type: ignore[misc] self, - key: Series[_bool] + key: Series | DataFrame | Index | np_ndarray_str diff --git a/pandas-stubs/core/indexes/period.pyi b/pandas-stubs/core/indexes/period.pyi index 9580dda72..778778391 100644 --- a/pandas-stubs/core/indexes/period.pyi +++ b/pandas-stubs/core/indexes/period.pyi @@ -6,9 +6,7 @@ import numpy as np import pandas as pd from pandas import Index from pandas.core.indexes.accessors import PeriodIndexFieldOps -from pandas.core.indexes.datetimelike import ( - DatetimeIndexOpsMixin as DatetimeIndexOpsMixin, -) +from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin from pandas.core.indexes.timedeltas import TimedeltaIndex from typing_extensions import Self diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index c1055c48e..1ff954e3d 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -211,18 +211,21 @@ class _LocIndexerSeries(_LocIndexer, Generic[S1]): value: S1 | ArrayLike | Series[S1] | None, ) -> None: ... +_ListLike: TypeAlias = ( + ArrayLike | dict[_str, np.ndarray] | Sequence[S1] | IndexOpsMixin[S1] +) + class Series(IndexOpsMixin[S1], NDFrame): - _ListLike: TypeAlias = ArrayLike | dict[_str, np.ndarray] | list | tuple | Index __hash__: ClassVar[None] - # TODO: can __new__ be converted to __init__? Pandas implements __init__ @overload def __new__( cls, - data: DatetimeIndex | Sequence[Timestamp | np.datetime64 | datetime], + data: DatetimeIndex | Sequence[np.datetime64 | datetime], index: Axes | None = ..., + *, dtype: TimestampDtypeArg = ..., - name: Hashable | None = ..., + name: Hashable = ..., copy: bool = ..., ) -> TimestampSeries: ... @overload @@ -232,7 +235,7 @@ class Series(IndexOpsMixin[S1], NDFrame): index: Axes | None = ..., *, dtype: TimestampDtypeArg, - name: Hashable | None = ..., + name: Hashable = ..., copy: bool = ..., ) -> TimestampSeries: ... @overload @@ -240,17 +243,19 @@ class Series(IndexOpsMixin[S1], NDFrame): cls, data: PeriodIndex, index: Axes | None = ..., + *, dtype: PeriodDtype = ..., - name: Hashable | None = ..., + name: Hashable = ..., copy: bool = ..., ) -> PeriodSeries: ... @overload def __new__( cls, - data: TimedeltaIndex | Sequence[Timedelta | np.timedelta64 | timedelta], + data: TimedeltaIndex | Sequence[np.timedelta64 | timedelta], index: Axes | None = ..., + *, dtype: TimedeltaDtypeArg = ..., - name: Hashable | None = ..., + name: Hashable = ..., copy: bool = ..., ) -> TimedeltaSeries: ... @overload @@ -260,35 +265,39 @@ class Series(IndexOpsMixin[S1], NDFrame): | Interval[_OrderableT] | Sequence[Interval[_OrderableT]], index: Axes | None = ..., + *, dtype: Literal["Interval"] = ..., - name: Hashable | None = ..., + name: Hashable = ..., copy: bool = ..., ) -> IntervalSeries[_OrderableT]: ... @overload def __new__( cls, - data: object | _ListLike | Series[S1] | dict[int, S1] | dict[_str, S1] | None, - dtype: type[S1], + data: object, index: Axes | None = ..., - name: Hashable | None = ..., + *, + dtype: type[S1], + name: Hashable = ..., copy: bool = ..., ) -> Self: ... @overload def __new__( cls, - data: Series[S1] | dict[int, S1] | dict[_str, S1] = ..., + data: _ListLike[S1] | dict[int, S1] | dict[_str, S1] = ..., index: Axes | None = ..., + *, dtype: Dtype = ..., - name: Hashable | None = ..., + name: Hashable = ..., copy: bool = ..., ) -> Self: ... @overload def __new__( cls, - data: object | _ListLike | None = ..., + data: object = ..., index: Axes | None = ..., + *, dtype: Dtype = ..., - name: Hashable | None = ..., + name: Hashable = ..., copy: bool = ..., ) -> Series: ... @property @@ -342,8 +351,8 @@ class Series(IndexOpsMixin[S1], NDFrame): | Series[S1] | slice | MaskType - | tuple[S1 | slice, ...], - ) -> Series: ... + | tuple[Hashable | slice, ...], + ) -> Self: ... @overload def __getitem__(self, idx: int | _str) -> S1: ... def __setitem__(self, key, value) -> None: ... @@ -676,7 +685,7 @@ class Series(IndexOpsMixin[S1], NDFrame): def diff(self, periods: int = ...) -> Series[S1]: ... def autocorr(self, lag: int = ...) -> float: ... @overload - def dot(self, other: Series[S1]) -> Scalar: ... + def dot(self, other: Series[S1]) -> Scalar: ... # pyright: ignore[reportOverlappingOverload] @overload def dot(self, other: DataFrame) -> Series[S1]: ... @overload @@ -781,7 +790,7 @@ class Series(IndexOpsMixin[S1], NDFrame): ignore_index: _bool = ..., inplace: Literal[False] = ..., key: Callable | None = ..., - ) -> Series: ... + ) -> Self: ... @overload def sort_index( self, @@ -902,7 +911,7 @@ class Series(IndexOpsMixin[S1], NDFrame): inplace: Literal[False] = ..., level: Level | None = ..., errors: IgnoreRaise = ..., - ) -> Series: ... + ) -> Self: ... @overload def rename( self, @@ -913,7 +922,7 @@ class Series(IndexOpsMixin[S1], NDFrame): inplace: Literal[False] = ..., level: Level | None = ..., errors: IgnoreRaise = ..., - ) -> Series: ... + ) -> Self: ... @overload def rename( self, @@ -932,7 +941,7 @@ class Series(IndexOpsMixin[S1], NDFrame): copy: _bool = ..., limit: int | None = ..., tolerance: float | None = ..., - ) -> Series: ... + ) -> Self: ... @overload def drop( self, @@ -956,7 +965,7 @@ class Series(IndexOpsMixin[S1], NDFrame): level: Level | None = ..., inplace: Literal[False] = ..., errors: IgnoreRaise = ..., - ) -> Series: ... + ) -> Self: ... @overload def drop( self, @@ -1344,7 +1353,7 @@ class Series(IndexOpsMixin[S1], NDFrame): na_option: Literal["keep", "top", "bottom"] = ..., ascending: _bool = ..., pct: _bool = ..., - ) -> Series: ... + ) -> Series[float]: ... def where( self, cond: Series[S1] @@ -1961,10 +1970,8 @@ class Series(IndexOpsMixin[S1], NDFrame): axis: AxisIndex | None = ..., copy: _bool = ..., inplace: Literal[False] = ..., - ) -> Series: ... - def set_axis( - self, labels, *, axis: Axis = ..., copy: _bool = ... - ) -> Series[S1]: ... + ) -> Self: ... + def set_axis(self, labels, *, axis: Axis = ..., copy: _bool = ...) -> Self: ... def __iter__(self) -> Iterator[S1]: ... class TimestampSeries(Series[Timestamp]): @@ -2098,7 +2105,7 @@ class TimedeltaSeries(Series[Timedelta]): axis: AxisIndex = ..., level: Level | None = ..., drop_level: _bool = ..., - ) -> Series: ... + ) -> Self: ... class PeriodSeries(Series[Period]): # ignore needed because of mypy diff --git a/pyproject.toml b/pyproject.toml index a1e7fd27a..a0cb7d541 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,7 @@ numpy = [ ] [tool.poetry.dev-dependencies] -mypy = "1.5.0" +mypy = "1.5.1" pandas = "2.0.3" pyarrow = ">=10.0.1" pytest = ">=7.1.2" diff --git a/tests/test_series.py b/tests/test_series.py index a9eced036..3886ac617 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -128,7 +128,7 @@ def test_types_csv() -> None: def test_types_copy() -> None: s = pd.Series(data=[1, 2, 3, 4]) - check(assert_type(s.copy(), pd.Series), pd.Series, np.int64) + check(assert_type(s.copy(), "pd.Series[int]"), pd.Series, np.int64) def test_types_select() -> None: @@ -221,15 +221,19 @@ def test_types_setting() -> None: def test_types_drop() -> None: s = pd.Series([0, 1, 2]) - check(assert_type(s.drop(0), pd.Series), pd.Series) - check(assert_type(s.drop([0, 1]), pd.Series), pd.Series) - check(assert_type(s.drop(0, axis=0), pd.Series), pd.Series) + check(assert_type(s.drop(0), "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(s.drop([0, 1]), "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(s.drop(0, axis=0), "pd.Series[int]"), pd.Series, np.intp) assert assert_type(s.drop([0, 1], inplace=True, errors="raise"), None) is None assert assert_type(s.drop([0, 1], inplace=True, errors="ignore"), None) is None # GH 302 s = pd.Series([0, 1, 2]) - check(assert_type(s.drop(pd.Index([0, 1])), pd.Series), pd.Series) - check(assert_type(s.drop(index=pd.Index([0, 1])), pd.Series), pd.Series) + check(assert_type(s.drop(pd.Index([0, 1])), "pd.Series[int]"), pd.Series, np.intp) + check( + assert_type(s.drop(index=pd.Index([0, 1])), "pd.Series[int]"), + pd.Series, + np.intp, + ) def test_types_drop_multilevel() -> None: @@ -242,8 +246,8 @@ def test_types_drop_multilevel() -> None: def test_types_dropna() -> None: - s = pd.Series([1, np.nan, np.nan]) - check(assert_type(s.dropna(), pd.Series), pd.Series) + s = pd.Series([1.0, np.nan]) + check(assert_type(s.dropna(), "pd.Series[float]"), pd.Series, float) assert assert_type(s.dropna(axis=0, inplace=True), None) is None @@ -263,28 +267,42 @@ class MyEnum(Enum): def test_types_fillna() -> None: - s = pd.Series([1, np.nan, np.nan, 3]) - check(assert_type(s.fillna(0), pd.Series), pd.Series) - check(assert_type(s.fillna(0, axis="index"), pd.Series), pd.Series) + s = pd.Series([1.0, np.nan]) + check(assert_type(s.fillna(0), "pd.Series[float]"), pd.Series, float) + check(assert_type(s.fillna(0, axis="index"), "pd.Series[float]"), pd.Series, float) with pytest_warns_bounded( FutureWarning, "Series.fillna with 'method' is deprecated", lower="2.0.99", ): - check(assert_type(s.fillna(method="backfill", axis=0), pd.Series), pd.Series) + check( + assert_type(s.fillna(method="backfill", axis=0), "pd.Series[float]"), + pd.Series, + float, + ) assert assert_type(s.fillna(method="bfill", inplace=True), None) is None - check(assert_type(s.fillna(method="pad"), pd.Series), pd.Series) - check(assert_type(s.fillna(method="ffill", limit=1), pd.Series), pd.Series) + check(assert_type(s.fillna(method="pad"), "pd.Series[float]"), pd.Series, float) + check( + assert_type(s.fillna(method="ffill", limit=1), "pd.Series[float]"), + pd.Series, + float, + ) # GH 263 - check(assert_type(s.fillna(pd.NA), pd.Series), pd.Series) + check(assert_type(s.fillna(pd.NA), "pd.Series[float]"), pd.Series, float) def test_types_sort_index() -> None: - s = pd.Series([1, 2, 3], index=[2, 3, 1]) - check(assert_type(s.sort_index(), pd.Series), pd.Series) - check(assert_type(s.sort_index(ascending=False), pd.Series), pd.Series) + s = pd.Series([1, 2], index=[2, 3]) + check(assert_type(s.sort_index(), "pd.Series[int]"), pd.Series, np.intp) + check( + assert_type(s.sort_index(ascending=False), "pd.Series[int]"), pd.Series, np.intp + ) assert assert_type(s.sort_index(ascending=False, inplace=True), None) is None - check(assert_type(s.sort_index(kind="mergesort"), pd.Series), pd.Series) + check( + assert_type(s.sort_index(kind="mergesort"), "pd.Series[int]"), + pd.Series, + np.intp, + ) # This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html @@ -294,25 +312,37 @@ def test_types_sort_index_with_key() -> None: def test_types_sort_values() -> None: - s = pd.Series([4, 2, 1, 3]) - check(assert_type(s.sort_values(), pd.Series), pd.Series) + s = pd.Series([4, 2]) + check(assert_type(s.sort_values(), "pd.Series[int]"), pd.Series, np.int64) if TYPE_CHECKING_INVALID_USAGE: check(assert_type(s.sort_values(0), pd.Series), pd.Series) # type: ignore[assert-type,call-overload] # pyright: ignore[reportGeneralTypeIssues] - check(assert_type(s.sort_values(axis=0), pd.Series), pd.Series) - check(assert_type(s.sort_values(ascending=False), pd.Series), pd.Series) + check(assert_type(s.sort_values(axis=0), "pd.Series[int]"), pd.Series, np.intp) + check( + assert_type(s.sort_values(ascending=False), "pd.Series[int]"), + pd.Series, + np.intp, + ) assert assert_type(s.sort_values(inplace=True, kind="quicksort"), None) is None - check(assert_type(s.sort_values(na_position="last"), pd.Series), pd.Series) - check(assert_type(s.sort_values(ignore_index=True), pd.Series), pd.Series) + check( + assert_type(s.sort_values(na_position="last"), "pd.Series[int]"), + pd.Series, + np.intp, + ) + check( + assert_type(s.sort_values(ignore_index=True), "pd.Series[int]"), + pd.Series, + np.intp, + ) # This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html def test_types_sort_values_with_key() -> None: - s = pd.Series([1, 2, 3], index=[2, 3, 1]) + s = pd.Series([1, 2], index=[2, 3]) res: pd.Series = s.sort_values(key=lambda k: -k) def test_types_shift() -> None: - s = pd.Series([1, 2, 3]) + s = pd.Series([1, 2]) s.shift() s.shift(axis=0, periods=1) s.shift(-1, fill_value=0) @@ -359,11 +389,11 @@ def test_types_sum() -> None: # 2. Runtime return types of `series.sum(min_count=...)` are NOT # tested (because of potential `nan`s). - s0 = assert_type(pd.Series([1, 2, 3, np.nan]), "pd.Series") - check(assert_type(s0.sum(), "Any"), np.float64) - check(assert_type(s0.sum(skipna=False), "Any"), np.float64) - check(assert_type(s0.sum(numeric_only=False), "Any"), np.float64) - assert_type(s0.sum(min_count=4), "Any") + s0 = assert_type(pd.Series([1.0, np.nan]), "pd.Series[float]") + check(assert_type(s0.sum(), float), np.float64) + check(assert_type(s0.sum(skipna=False), float), np.float64) + check(assert_type(s0.sum(numeric_only=False), float), np.float64) + assert_type(s0.sum(min_count=4), float) s1 = assert_type(pd.Series([False, True], dtype=bool), "pd.Series[bool]") check(assert_type(s1.sum(), "int"), np.int64) @@ -523,7 +553,7 @@ def test_types_element_wise_arithmetic() -> None: res_pow: pd.Series = s ** s2.abs() res_pow2: pd.Series = s.pow(s2.abs(), fill_value=0) - check(assert_type(divmod(s, s2), Tuple[pd.Series, pd.Series]), tuple) + check(assert_type(divmod(s, s2), "tuple[pd.Series[int], pd.Series[int]]"), tuple) def test_types_scalar_arithmetic() -> None: @@ -842,7 +872,7 @@ def test_update() -> None: s1.update(pd.Series([0, 2, 12])) # Series.update() accepting objects that can be coerced to a Series was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html s1.update([1, 2, -4, 3]) - s1.update([1, "b", "c", "d"]) + s1.update([1, "b", "c", "d"]) # type: ignore[list-item] # pyright: ignore[reportGeneralTypeIssues] s1.update({1: 9, 3: 4}) @@ -878,40 +908,58 @@ def test_types_between() -> None: def test_types_agg() -> None: s = pd.Series([1, 2, 3], index=["col1", "col2", "col3"]) - check(assert_type(s.agg("min"), Any), np.int64) - check(assert_type(s.agg(["min", "max"]), pd.Series), pd.Series) - check(assert_type(s.agg({"a": "min"}), pd.Series), pd.Series) - check(assert_type(s.agg("mean", axis=0), Any), np.float64) + check(assert_type(s.agg("min"), int), np.int64) + check(assert_type(s.agg(["min", "max"]), "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(s.agg({"a": "min"}), "pd.Series[int]"), pd.Series, np.intp) + check( + assert_type( # type: ignore[assert-type] + s.agg("mean", axis=0), float # pyright: ignore[reportGeneralTypeIssues] + ), + np.float64, + ) with pytest_warns_bounded( FutureWarning, r"The provided callable is currently using", lower="2.0.99", ): - check(assert_type(s.agg(min), Any), np.int64) - check(assert_type(s.agg([min, max]), pd.Series), pd.Series) - check(assert_type(s.agg({0: min}), pd.Series), pd.Series) - check(assert_type(s.agg(x=max, y="min", z=np.mean), pd.Series), pd.Series) + check(assert_type(s.agg(min), int), np.int64) + check(assert_type(s.agg([min, max]), "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(s.agg({0: min}), "pd.Series[int]"), pd.Series, np.intp) + check( + assert_type( # type: ignore[assert-type] + s.agg( + x=max, y="min", z=np.mean + ), # pyright: ignore[reportGeneralTypeIssues] + "pd.Series[float]", + ), + pd.Series, + np.float64, + ) def test_types_aggregate() -> None: s = pd.Series([1, 2, 3], index=["col1", "col2", "col3"]) - check(assert_type(s.aggregate("min"), Any), np.int64) - check(assert_type(s.aggregate(["min", "max"]), pd.Series), pd.Series) - check(assert_type(s.aggregate({"a": "min"}), pd.Series), pd.Series) + check(assert_type(s.aggregate("min"), int), np.int64) + check( + assert_type(s.aggregate(["min", "max"]), "pd.Series[int]"), pd.Series, np.intp + ) + check(assert_type(s.aggregate({"a": "min"}), "pd.Series[int]"), pd.Series, np.intp) with pytest_warns_bounded( FutureWarning, r"The provided callable is currently using", lower="2.0.99", ): - check(assert_type(s.aggregate(min), Any), np.int64) - check(assert_type(s.aggregate([min, max]), pd.Series), pd.Series) - check(assert_type(s.aggregate({0: min}), pd.Series), pd.Series) + check(assert_type(s.aggregate(min), int), np.int64) + check( + assert_type(s.aggregate([min, max]), "pd.Series[int]"), pd.Series, np.intp + ) + check(assert_type(s.aggregate({0: min}), "pd.Series[int]"), pd.Series, np.intp) def test_types_transform() -> None: s = pd.Series([1, 2, 3], index=["col1", "col2", "col3"]) - check(assert_type(s.transform("abs"), pd.Series), pd.Series) - check(assert_type(s.transform(abs), pd.Series), pd.Series) + check(assert_type(s.transform("abs"), "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(s.transform(abs), "pd.Series[int]"), pd.Series, np.intp) check(assert_type(s.transform(["abs", "sqrt"]), pd.DataFrame), pd.DataFrame) check(assert_type(s.transform([abs, np.sqrt]), pd.DataFrame), pd.DataFrame) check( @@ -976,25 +1024,25 @@ def test_types_values() -> None: def test_types_rename() -> None: # Scalar s1 = pd.Series([1, 2, 3]).rename("A") - check(assert_type(s1, pd.Series), pd.Series) + check(assert_type(s1, "pd.Series[int]"), pd.Series, np.intp) # Hashable Sequence s2 = pd.Series([1, 2, 3]).rename(("A", "B")) - check(assert_type(s2, pd.Series), pd.Series) + check(assert_type(s2, "pd.Series[int]"), pd.Series, np.intp) # Optional s3 = pd.Series([1, 2, 3]).rename(None) - check(assert_type(s3, pd.Series), pd.Series) + check(assert_type(s3, "pd.Series[int]"), pd.Series, np.intp) # Functions def add1(x: int) -> int: return x + 1 s4 = pd.Series([1, 2, 3]).rename(add1) - check(assert_type(s4, pd.Series), pd.Series) + check(assert_type(s4, "pd.Series[int]"), pd.Series, np.intp) # Dictionary s5 = pd.Series([1, 2, 3]).rename({1: 10}) - check(assert_type(s5, pd.Series), pd.Series) + check(assert_type(s5, "pd.Series[int]"), pd.Series, np.intp) # inplace s6: None = pd.Series([1, 2, 3]).rename("A", inplace=True) @@ -1010,8 +1058,8 @@ def test_types_ne() -> None: def test_types_bfill() -> None: s1 = pd.Series([1, 2, 3]) - check(assert_type(s1.bfill(), pd.Series), pd.Series) - check(assert_type(s1.bfill(inplace=False), pd.Series), pd.Series) + check(assert_type(s1.bfill(), "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(s1.bfill(inplace=False), "pd.Series[int]"), pd.Series, np.intp) assert assert_type(s1.bfill(inplace=True), None) is None @@ -1048,8 +1096,8 @@ def test_types_ewm() -> None: def test_types_ffill() -> None: s1 = pd.Series([1, 2, 3]) - check(assert_type(s1.ffill(), pd.Series), pd.Series) - check(assert_type(s1.ffill(inplace=False), pd.Series), pd.Series) + check(assert_type(s1.ffill(), "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(s1.ffill(inplace=False), "pd.Series[int]"), pd.Series, np.intp) assert assert_type(s1.ffill(inplace=True), None) is None @@ -1097,10 +1145,10 @@ def test_series_index_isin() -> None: t2 = s.loc[~s.index.isin([1, 3])] t3 = s[s.index.isin([1, 3])] t4 = s[~s.index.isin([1, 3])] - check(assert_type(t1, pd.Series), pd.Series) - check(assert_type(t2, pd.Series), pd.Series) - check(assert_type(t3, pd.Series), pd.Series) - check(assert_type(t4, pd.Series), pd.Series) + check(assert_type(t1, "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(t2, "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(t3, "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(t4, "pd.Series[int]"), pd.Series, np.intp) def test_series_invert() -> None: @@ -1108,8 +1156,8 @@ def test_series_invert() -> None: s2 = ~s1 check(assert_type(s2, "pd.Series[bool]"), pd.Series, np.bool_) s3 = pd.Series([1, 2, 3]) - check(assert_type(s3[s2], pd.Series), pd.Series) - check(assert_type(s3.loc[s2], pd.Series), pd.Series) + check(assert_type(s3[s2], "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(s3.loc[s2], "pd.Series[int]"), pd.Series, np.intp) def test_series_multiindex_getitem() -> None: @@ -1143,18 +1191,18 @@ def test_reset_index() -> None: r3 = s.reset_index("ab") check(assert_type(r3, pd.DataFrame), pd.DataFrame) r4 = s.reset_index(drop=True) - check(assert_type(r4, pd.Series), pd.Series) + check(assert_type(r4, "pd.Series[int]"), pd.Series, np.intp) r5 = s.reset_index(["ab"], drop=True) - check(assert_type(r5, pd.Series), pd.Series) + check(assert_type(r5, "pd.Series[int]"), pd.Series, np.intp) r6 = s.reset_index(["ab"], drop=True, allow_duplicates=True) - check(assert_type(r6, pd.Series), pd.Series) + check(assert_type(r6, "pd.Series[int]"), pd.Series, np.intp) assert assert_type(s.reset_index(inplace=True, drop=True), None) is None def test_series_add_str() -> None: s = pd.Series(["abc", "def"]) - check(assert_type(s + "x", pd.Series), pd.Series) - check(assert_type("x" + s, pd.Series), pd.Series) + check(assert_type(s + "x", pd.Series), pd.Series, str) + check(assert_type("x" + s, "pd.Series[str]"), pd.Series, str) def test_series_dtype() -> None: @@ -1165,14 +1213,20 @@ def test_series_dtype() -> None: def test_types_replace() -> None: # GH 44 s = pd.Series([1, 2, 3]) - check(assert_type(s.replace(1, 2), pd.Series), pd.Series) - check(assert_type(s.replace(1, 2, inplace=False), pd.Series), pd.Series) + check(assert_type(s.replace(1, 2), "pd.Series[int]"), pd.Series, np.intp) + check( + assert_type(s.replace(1, 2, inplace=False), "pd.Series[int]"), + pd.Series, + np.intp, + ) assert assert_type(s.replace(1, 2, inplace=True), None) is None def test_cat_accessor() -> None: # GH 43 - s = pd.Series(pd.Categorical(["a", "b", "a"], categories=["a", "b"])) + s: pd.Series[str] = pd.Series( + pd.Categorical(["a", "b", "a"], categories=["a", "b"]) + ) check(assert_type(s.cat.codes, "pd.Series[int]"), pd.Series, np.int8) # GH 139 ser = pd.Series([1, 2, 3], name="A").astype("category") @@ -1518,8 +1572,8 @@ def test_neg() -> None: # GH 253 sr = pd.Series([1, 2, 3]) sr_int = pd.Series([1, 2, 3], dtype=int) - check(assert_type(-sr, pd.Series), pd.Series) - check(assert_type(-sr_int, "pd.Series[int]"), pd.Series, np.int_) + check(assert_type(-sr, "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(-sr_int, "pd.Series[int]"), pd.Series, np.intp) def test_getattr() -> None: @@ -1679,8 +1733,8 @@ def test_AnyArrayLike_and_clip() -> None: ser = pd.Series([1, 2, 3]) s1 = ser.clip(lower=ser) s2 = ser.clip(upper=ser) - check(assert_type(s1, pd.Series), pd.Series) - check(assert_type(s2, pd.Series), pd.Series) + check(assert_type(s1, "pd.Series[int]"), pd.Series) + check(assert_type(s2, "pd.Series[int]"), pd.Series) def test_pandera_generic() -> None: @@ -2597,7 +2651,7 @@ def test_all_astype_args_tested() -> None: def test_check_xs() -> None: s4 = pd.Series([1, 4]) s4.xs(0, axis=0) - check(assert_type(s4, pd.Series), pd.Series) + check(assert_type(s4, "pd.Series[int]"), pd.Series, np.intp) def test_types_apply_set() -> None: @@ -2609,20 +2663,32 @@ def test_types_apply_set() -> None: def test_prefix_summix_axis() -> None: s = pd.Series([1, 2, 3, 4]) - check(assert_type(s.add_suffix("_item", axis=0), pd.Series), pd.Series) - check(assert_type(s.add_suffix("_item", axis="index"), pd.Series), pd.Series) - check(assert_type(s.add_prefix("_item", axis=0), pd.Series), pd.Series) - check(assert_type(s.add_prefix("_item", axis="index"), pd.Series), pd.Series) + check( + assert_type(s.add_suffix("_item", axis=0), "pd.Series[int]"), pd.Series, np.intp + ) + check( + assert_type(s.add_suffix("_item", axis="index"), "pd.Series[int]"), + pd.Series, + np.intp, + ) + check( + assert_type(s.add_prefix("_item", axis=0), "pd.Series[int]"), pd.Series, np.intp + ) + check( + assert_type(s.add_prefix("_item", axis="index"), "pd.Series[int]"), + pd.Series, + np.intp, + ) if TYPE_CHECKING_INVALID_USAGE: - check(assert_type(s.add_prefix("_item", axis=1), pd.Series), pd.Series) # type: ignore[arg-type] # pyright: ignore[reportGeneralTypeIssues] - check(assert_type(s.add_suffix("_item", axis="columns"), pd.Series), pd.Series) # type: ignore[arg-type] # pyright: ignore[reportGeneralTypeIssues] + s.add_prefix("_item", axis=1) # type: ignore[arg-type] # pyright: ignore[reportGeneralTypeIssues] + s.add_suffix("_item", axis="columns") # type: ignore[arg-type] # pyright: ignore[reportGeneralTypeIssues] def test_convert_dtypes_dtype_backend() -> None: s = pd.Series([1, 2, 3, 4]) s1 = s.convert_dtypes(dtype_backend="numpy_nullable") - check(assert_type(s1, pd.Series), pd.Series) + check(assert_type(s1, "pd.Series[int]"), pd.Series, np.intp) def test_apply_returns_none() -> None: @@ -2634,7 +2700,7 @@ def test_apply_returns_none() -> None: def test_loc_callable() -> None: # GH 586 s = pd.Series([1, 2]) - check(assert_type(s.loc[lambda x: x > 1], pd.Series), pd.Series) + check(assert_type(s.loc[lambda x: x > 1], "pd.Series[int]"), pd.Series, np.intp) def test_to_json_mode() -> None: @@ -2675,17 +2741,17 @@ def test_types_mask() -> None: s = pd.Series([1, 2, 3, 4, 5]) # Test case with a boolean condition and a scalar value - check(assert_type(s.mask(s > 3, 10), pd.Series), pd.Series, np.integer) + check(assert_type(s.mask(s > 3, 10), "pd.Series[int]"), pd.Series, np.integer) # Test case with a boolean condition and a callable def double(x): return x * 2 - check(assert_type(s.mask(s > 3, double), pd.Series), pd.Series, np.integer) + check(assert_type(s.mask(s > 3, double), "pd.Series[int]"), pd.Series, np.integer) # Test cases with None and pd.NA as other - check(assert_type(s.mask(s > 3, None), pd.Series), pd.Series, np.float64) - check(assert_type(s.mask(s > 3, pd.NA), pd.Series), pd.Series, np.float64) + check(assert_type(s.mask(s > 3, None), "pd.Series[int]"), pd.Series, np.float64) + check(assert_type(s.mask(s > 3, pd.NA), "pd.Series[int]"), pd.Series, np.float64) def test_timedelta_div() -> None: From d495cb0633dfac26c679caddb62a1236cd3c08c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Thu, 17 Aug 2023 19:53:19 -0400 Subject: [PATCH 2/7] ignores --- pandas-stubs/core/series.pyi | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index 1ff954e3d..10e14f86d 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -219,7 +219,7 @@ class Series(IndexOpsMixin[S1], NDFrame): __hash__: ClassVar[None] @overload - def __new__( + def __new__( # type: ignore[misc] cls, data: DatetimeIndex | Sequence[np.datetime64 | datetime], index: Axes | None = ..., @@ -229,7 +229,7 @@ class Series(IndexOpsMixin[S1], NDFrame): copy: bool = ..., ) -> TimestampSeries: ... @overload - def __new__( + def __new__( # type: ignore[misc] cls, data: _ListLike, index: Axes | None = ..., @@ -239,7 +239,7 @@ class Series(IndexOpsMixin[S1], NDFrame): copy: bool = ..., ) -> TimestampSeries: ... @overload - def __new__( + def __new__( # type: ignore[misc] cls, data: PeriodIndex, index: Axes | None = ..., @@ -249,7 +249,7 @@ class Series(IndexOpsMixin[S1], NDFrame): copy: bool = ..., ) -> PeriodSeries: ... @overload - def __new__( + def __new__( # type: ignore[misc] cls, data: TimedeltaIndex | Sequence[np.timedelta64 | timedelta], index: Axes | None = ..., @@ -283,7 +283,7 @@ class Series(IndexOpsMixin[S1], NDFrame): @overload def __new__( cls, - data: _ListLike[S1] | dict[int, S1] | dict[_str, S1] = ..., + data: _ListLike[S1] | dict[int, S1] | dict[_str, S1], index: Axes | None = ..., *, dtype: Dtype = ..., @@ -685,7 +685,7 @@ class Series(IndexOpsMixin[S1], NDFrame): def diff(self, periods: int = ...) -> Series[S1]: ... def autocorr(self, lag: int = ...) -> float: ... @overload - def dot(self, other: Series[S1]) -> Scalar: ... # pyright: ignore[reportOverlappingOverload] + def dot(self, other: Series[S1]) -> Scalar: ... # type: ignore[misc] # pyright: ignore[reportOverlappingOverload] @overload def dot(self, other: DataFrame) -> Series[S1]: ... @overload @@ -693,7 +693,7 @@ class Series(IndexOpsMixin[S1], NDFrame): def __matmul__(self, other): ... def __rmatmul__(self, other): ... @overload - def searchsorted( + def searchsorted( # type: ignore[misc] self, value: _ListLike, side: Literal["left", "right"] = ..., From 4b8739721c2ad3632c34ee0705fd0de543511295 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Fri, 18 Aug 2023 09:45:45 -0400 Subject: [PATCH 3/7] address some of the feedback --- pandas-stubs/core/series.pyi | 51 ++++++++++++++++++++---------------- tests/test_series.py | 14 +++++++--- 2 files changed, 39 insertions(+), 26 deletions(-) diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index 10e14f86d..f197f4e06 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -221,7 +221,10 @@ class Series(IndexOpsMixin[S1], NDFrame): @overload def __new__( # type: ignore[misc] cls, - data: DatetimeIndex | Sequence[np.datetime64 | datetime], + data: DatetimeIndex + | Sequence[np.datetime64 | datetime] + | np.datetime64 + | datetime, index: Axes | None = ..., *, dtype: TimestampDtypeArg = ..., @@ -251,7 +254,10 @@ class Series(IndexOpsMixin[S1], NDFrame): @overload def __new__( # type: ignore[misc] cls, - data: TimedeltaIndex | Sequence[np.timedelta64 | timedelta], + data: TimedeltaIndex + | Sequence[np.timedelta64 | timedelta] + | np.timedelta64 + | timedelta, index: Axes | None = ..., *, dtype: TimedeltaDtypeArg = ..., @@ -273,7 +279,7 @@ class Series(IndexOpsMixin[S1], NDFrame): @overload def __new__( cls, - data: object, + data: Scalar | _ListLike | dict[int, Any] | dict[_str, Any] | None, index: Axes | None = ..., *, dtype: type[S1], @@ -283,7 +289,7 @@ class Series(IndexOpsMixin[S1], NDFrame): @overload def __new__( cls, - data: _ListLike[S1] | dict[int, S1] | dict[_str, S1], + data: S1 | _ListLike[S1] | dict[int, S1] | dict[_str, S1], index: Axes | None = ..., *, dtype: Dtype = ..., @@ -293,7 +299,7 @@ class Series(IndexOpsMixin[S1], NDFrame): @overload def __new__( cls, - data: object = ..., + data: Scalar | _ListLike | dict[int, Any] | dict[_str, Any] | None = ..., index: Axes | None = ..., *, dtype: Dtype = ..., @@ -685,11 +691,13 @@ class Series(IndexOpsMixin[S1], NDFrame): def diff(self, periods: int = ...) -> Series[S1]: ... def autocorr(self, lag: int = ...) -> float: ... @overload - def dot(self, other: Series[S1]) -> Scalar: ... # type: ignore[misc] # pyright: ignore[reportOverlappingOverload] + def dot(self, other: Series[S1]) -> Scalar: ... @overload def dot(self, other: DataFrame) -> Series[S1]: ... @overload - def dot(self, other: _ListLike) -> np.ndarray: ... + def dot( + self, other: ArrayLike | dict[_str, np.ndarray] | Sequence[S1] | Index[S1] + ) -> np.ndarray: ... def __matmul__(self, other): ... def __rmatmul__(self, other): ... @overload @@ -843,7 +851,7 @@ class Series(IndexOpsMixin[S1], NDFrame): axis: AxisIndex = ..., *args, **kwargs, - ) -> Series[S1]: ... + ) -> Self: ... agg = aggregate @overload def transform( @@ -1440,14 +1448,10 @@ class Series(IndexOpsMixin[S1], NDFrame): # just failed to generate these so I couldn't match # them up. @overload - def __add__(self, other: TimestampSeries) -> TimestampSeries: ... - @overload - def __add__(self, other: DatetimeIndex) -> TimestampSeries: ... - @overload - def __add__(self, other: Timestamp) -> TimestampSeries: ... + def __add__(self, other: S1 | Self) -> Self: ... @overload def __add__( - self, other: num | _str | Timedelta | _ListLike | Series[S1] | np.timedelta64 + self, other: num | _str | Timedelta | _ListLike | Series | np.timedelta64 ) -> Series: ... # ignore needed for mypy as we want different results based on the arguments @overload # type: ignore[override] @@ -1488,7 +1492,10 @@ class Series(IndexOpsMixin[S1], NDFrame): ) -> Series[bool]: ... @overload def __or__(self, other: int | np_ndarray_anyint | Series[int]) -> Series[int]: ... - def __radd__(self, other: num | _str | _ListLike | Series[S1]) -> Series[S1]: ... + @overload + def __radd__(self, other: S1 | Series[S1]) -> Self: ... + @overload + def __radd__(self, other: num | _str | _ListLike | Series) -> Series: ... # ignore needed for mypy as we want different results based on the arguments @overload # type: ignore[override] def __rand__( # type: ignore[misc] @@ -1973,6 +1980,13 @@ class Series(IndexOpsMixin[S1], NDFrame): ) -> Self: ... def set_axis(self, labels, *, axis: Axis = ..., copy: _bool = ...) -> Self: ... def __iter__(self) -> Iterator[S1]: ... + def xs( + self, + key: Hashable, + axis: AxisIndex = ..., + level: Level | None = ..., + drop_level: _bool = ..., + ) -> Self: ... class TimestampSeries(Series[Timestamp]): # ignore needed because of mypy @@ -2099,13 +2113,6 @@ class TimedeltaSeries(Series[Timedelta]): numeric_only: _bool = ..., **kwargs, ) -> Timedelta: ... - def xs( - self, - key: Hashable, - axis: AxisIndex = ..., - level: Level | None = ..., - drop_level: _bool = ..., - ) -> Self: ... class PeriodSeries(Series[Period]): # ignore needed because of mypy diff --git a/tests/test_series.py b/tests/test_series.py index 3886ac617..c8ca6add1 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -1176,7 +1176,7 @@ def test_series_mul() -> None: sm2 = s * s check(assert_type(sm2, pd.Series), pd.Series) sp = s + 4 - check(assert_type(sp, pd.Series), pd.Series) + check(assert_type(sp, "pd.Series[int]"), pd.Series, np.intp) def test_reset_index() -> None: @@ -1201,7 +1201,7 @@ def test_reset_index() -> None: def test_series_add_str() -> None: s = pd.Series(["abc", "def"]) - check(assert_type(s + "x", pd.Series), pd.Series, str) + check(assert_type(s + "x", "pd.Series[str]"), pd.Series, str) check(assert_type("x" + s, "pd.Series[str]"), pd.Series, str) @@ -1733,8 +1733,8 @@ def test_AnyArrayLike_and_clip() -> None: ser = pd.Series([1, 2, 3]) s1 = ser.clip(lower=ser) s2 = ser.clip(upper=ser) - check(assert_type(s1, "pd.Series[int]"), pd.Series) - check(assert_type(s2, "pd.Series[int]"), pd.Series) + check(assert_type(s1, "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(s2, "pd.Series[int]"), pd.Series, np.intp) def test_pandera_generic() -> None: @@ -2777,3 +2777,9 @@ def test_timedelta_div() -> None: [1] / series # type: ignore[operator] # pyright: ignore[reportGeneralTypeIssues] 1 // series # type: ignore[operator] # pyright: ignore[reportGeneralTypeIssues] [1] // series # type: ignore[operator] # pyright: ignore[reportGeneralTypeIssues] + + +def test_rank() -> None: + check( + assert_type(pd.Series([1, 2]).rank(), "pd.Series[float]"), pd.Series, np.float64 + ) From a6b03aa57e4dc1773a0adee78a15f7eed097ceb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Fri, 18 Aug 2023 09:55:26 -0400 Subject: [PATCH 4/7] One of pandas's windows/linux inconsistencies --- tests/test_series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_series.py b/tests/test_series.py index c8ca6add1..88e194e0d 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -1573,7 +1573,7 @@ def test_neg() -> None: sr = pd.Series([1, 2, 3]) sr_int = pd.Series([1, 2, 3], dtype=int) check(assert_type(-sr, "pd.Series[int]"), pd.Series, np.intp) - check(assert_type(-sr_int, "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(-sr_int, "pd.Series[int]"), pd.Series, np.integer) def test_getattr() -> None: From 76e302be959320bd7f2961107c119b7706d6978a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Fri, 18 Aug 2023 11:46:43 -0400 Subject: [PATCH 5/7] more changes; incl. np.intp/int64 -> np.integer --- .pre-commit-config.yaml | 2 +- pandas-stubs/_typing.pyi | 16 ++-- pandas-stubs/core/series.pyi | 8 ++ tests/test_frame.py | 32 ++++--- tests/test_indexes.py | 6 +- tests/test_pandas.py | 2 +- tests/test_series.py | 179 +++++++++++++++++++---------------- 7 files changed, 134 insertions(+), 111 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7aabe629a..0b831caa7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,7 +11,7 @@ repos: hooks: - id: isort - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.0.283 + rev: v0.0.285 hooks: - id: ruff args: [ diff --git a/pandas-stubs/_typing.pyi b/pandas-stubs/_typing.pyi index 3ee9134d0..f8fbc6136 100644 --- a/pandas-stubs/_typing.pyi +++ b/pandas-stubs/_typing.pyi @@ -91,7 +91,7 @@ DtypeBackend: TypeAlias = Literal["pyarrow", "numpy_nullable"] BooleanDtypeArg: TypeAlias = ( # Builtin bool type and its string alias - type[bool] # noqa: PYI030,PYI055 + type[bool] # noqa: PYI030 | Literal["bool"] # Pandas nullable boolean type and its string alias | pd.BooleanDtype @@ -105,7 +105,7 @@ BooleanDtypeArg: TypeAlias = ( ) IntDtypeArg: TypeAlias = ( # Builtin integer type and its string alias - type[int] # noqa: PYI030,PYI055 + type[int] # noqa: PYI030 | Literal["int"] # Pandas nullable integer types and their string aliases | pd.Int8Dtype @@ -137,7 +137,7 @@ IntDtypeArg: TypeAlias = ( ) UIntDtypeArg: TypeAlias = ( # Pandas nullable unsigned integer types and their string aliases - pd.UInt8Dtype # noqa: PYI030,PYI055 + pd.UInt8Dtype # noqa: PYI030 | pd.UInt16Dtype | pd.UInt32Dtype | pd.UInt64Dtype @@ -166,7 +166,7 @@ UIntDtypeArg: TypeAlias = ( ) FloatDtypeArg: TypeAlias = ( # Builtin float type and its string alias - type[float] # noqa: PYI030,PYI055 + type[float] # noqa: PYI030 | Literal["float"] # Pandas nullable float types and their string aliases | pd.Float32Dtype @@ -197,7 +197,7 @@ FloatDtypeArg: TypeAlias = ( ) ComplexDtypeArg: TypeAlias = ( # Builtin complex type and its string alias - type[complex] # noqa: PYI030,PYI055 + type[complex] # noqa: PYI030 | Literal["complex"] # Numpy complex types and their aliases # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.csingle @@ -326,7 +326,7 @@ TimestampDtypeArg: TypeAlias = Literal[ StrDtypeArg: TypeAlias = ( # Builtin str type and its string alias - type[str] # noqa: PYI030,PYI055 + type[str] # noqa: PYI030 | Literal["str"] # Pandas nullable string type and its string alias | pd.StringDtype @@ -340,7 +340,7 @@ StrDtypeArg: TypeAlias = ( ) BytesDtypeArg: TypeAlias = ( # Builtin bytes type and its string alias - type[bytes] # noqa: PYI030,PYI055 + type[bytes] # noqa: PYI030 | Literal["bytes"] # Numpy bytes type and its string alias # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.bytes_ @@ -353,7 +353,7 @@ CategoryDtypeArg: TypeAlias = CategoricalDtype | Literal["category"] ObjectDtypeArg: TypeAlias = ( # Builtin object type and its string alias - type[object] # noqa: PYI030,PYI055 + type[object] # noqa: PYI030 | Literal["object"] # Numpy object type and its string alias # https://numpy.org/doc/stable/reference/arrays.scalars.html#numpy.object_ diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index f197f4e06..b0edc57ce 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -837,6 +837,14 @@ class Series(IndexOpsMixin[S1], NDFrame): ) -> DataFrame: ... def map(self, arg, na_action: Literal["ignore"] | None = ...) -> Series[S1]: ... @overload + def aggregate( # type: ignore[misc] + self: Series[int], + func: Literal["mean"], + axis: AxisIndex = ..., + *args, + **kwargs, + ) -> float: ... + @overload def aggregate( self, func: AggFuncTypeBase, diff --git a/tests/test_frame.py b/tests/test_frame.py index b71880032..3f9bca6cf 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -522,7 +522,9 @@ def gethead(s: pd.Series, y: int) -> pd.Series: check(assert_type(df.apply(gethead, args=(4,)), pd.DataFrame), pd.DataFrame) # Check various return types for default result_type (None) with default axis (0) - check(assert_type(df.apply(returns_scalar), "pd.Series[int]"), pd.Series, np.int64) + check( + assert_type(df.apply(returns_scalar), "pd.Series[int]"), pd.Series, np.integer + ) check(assert_type(df.apply(returns_series), pd.DataFrame), pd.DataFrame) check(assert_type(df.apply(returns_listlike_of_3), pd.DataFrame), pd.DataFrame) check(assert_type(df.apply(returns_dict), pd.Series), pd.Series) @@ -533,7 +535,7 @@ def gethead(s: pd.Series, y: int) -> pd.Series: # to pass a result_type of "expand" to a scalar return assert_type(df.apply(returns_scalar, result_type="expand"), "pd.Series[int]"), pd.Series, - np.int64, + np.integer, ) check( assert_type(df.apply(returns_series, result_type="expand"), pd.DataFrame), @@ -556,7 +558,7 @@ def gethead(s: pd.Series, y: int) -> pd.Series: # to pass a result_type of "reduce" to a scalar return assert_type(df.apply(returns_scalar, result_type="reduce"), "pd.Series[int]"), pd.Series, - np.int64, + np.integer, ) check( # Note that technically it does not make sense @@ -576,7 +578,7 @@ def gethead(s: pd.Series, y: int) -> pd.Series: check( assert_type(df.apply(returns_scalar, axis=1), "pd.Series[int]"), pd.Series, - np.int64, + np.integer, ) check(assert_type(df.apply(returns_series, axis=1), pd.DataFrame), pd.DataFrame) check(assert_type(df.apply(returns_listlike_of_3, axis=1), pd.Series), pd.Series) @@ -590,7 +592,7 @@ def gethead(s: pd.Series, y: int) -> pd.Series: df.apply(returns_scalar, axis=1, result_type="expand"), "pd.Series[int]" ), pd.Series, - np.int64, + np.integer, ) check( assert_type( @@ -617,7 +619,7 @@ def gethead(s: pd.Series, y: int) -> pd.Series: df.apply(returns_scalar, axis=1, result_type="reduce"), "pd.Series[int]" ), pd.Series, - np.int64, + np.integer, ) check( # Note that technically it does not make sense @@ -698,33 +700,33 @@ def gethead(s: pd.Series, y: int) -> pd.Series: check( assert_type(df.apply(returns_scalar, axis=0), "pd.Series[int]"), pd.Series, - np.int64, + np.integer, ) check( assert_type( df.apply(returns_scalar, axis=0, result_type=None), "pd.Series[int]" ), pd.Series, - np.int64, + np.integer, ) check( assert_type(df.apply(returns_scalar, 0, False, None), "pd.Series[int]"), pd.Series, - np.int64, + np.integer, ) check( assert_type( df.apply(returns_scalar, 0, False, result_type=None), "pd.Series[int]" ), pd.Series, - np.int64, + np.integer, ) check( assert_type( df.apply(returns_scalar, 0, raw=False, result_type=None), "pd.Series[int]" ), pd.Series, - np.int64, + np.integer, ) @@ -916,7 +918,7 @@ def test_types_groupby_methods() -> None: check( assert_type(df.groupby("col1").value_counts(normalize=False), "pd.Series[int]"), pd.Series, - np.int64, + np.integer, ) check( assert_type( @@ -1728,7 +1730,7 @@ def test_indexslice_getitem(): assert_type(df.loc[pd.IndexSlice[:, df["z"] > 40], :], pd.DataFrame), pd.DataFrame, ) - check(assert_type(df.loc[pd.IndexSlice[2, 30], "z"], Scalar), np.int64) + check(assert_type(df.loc[pd.IndexSlice[2, 30], "z"], Scalar), np.integer) check( assert_type(df.loc[pd.IndexSlice[[2, 4], [20, 40]], :], pd.DataFrame), pd.DataFrame, @@ -2039,7 +2041,7 @@ def test_groupby_result() -> None: index, value = next(iterator) assert_type((index, value), Tuple[Tuple, pd.DataFrame]) - check(assert_type(index, Tuple), tuple, np.int64) + check(assert_type(index, Tuple), tuple, np.integer) check(assert_type(value, pd.DataFrame), pd.DataFrame) iterator2 = df.groupby("a").__iter__() @@ -2471,7 +2473,7 @@ def test_series_groupby_and_value_counts() -> None: ) c1 = df.groupby("Animal")["Max Speed"].value_counts() c2 = df.groupby("Animal")["Max Speed"].value_counts(normalize=True) - check(assert_type(c1, "pd.Series[int]"), pd.Series, np.int64) + check(assert_type(c1, "pd.Series[int]"), pd.Series, np.integer) check(assert_type(c2, "pd.Series[float]"), pd.Series, float) diff --git a/tests/test_indexes.py b/tests/test_indexes.py index 0115b152d..5ae1bba6d 100644 --- a/tests/test_indexes.py +++ b/tests/test_indexes.py @@ -75,7 +75,7 @@ def test_column_getitem() -> None: column = df.columns[0] check(assert_type(column, str), str) - check(assert_type(df[column], pd.Series), pd.Series, np.int64) + check(assert_type(df[column], pd.Series), pd.Series, np.integer) def test_column_contains() -> None: @@ -857,7 +857,7 @@ def test_getitem() -> None: iri = pd.RangeIndex(0, 10) check(assert_type(iri, pd.RangeIndex), pd.RangeIndex, int) check(assert_type(iri[0], int), int) - check(assert_type(iri[[0, 2, 4]], pd.Index), pd.Index, np.int64) + check(assert_type(iri[[0, 2, 4]], pd.Index), pd.Index, np.integer) mi = pd.MultiIndex.from_product([["a", "b"], ["c", "d"]], names=["ab", "cd"]) check(assert_type(mi, pd.MultiIndex), pd.MultiIndex) @@ -997,7 +997,7 @@ def test_annotate() -> None: def test_new() -> None: - check(assert_type(pd.Index([1]), "pd.Index[int]"), pd.Index, np.intp) + check(assert_type(pd.Index([1]), "pd.Index[int]"), pd.Index, np.integer) check(assert_type(pd.Index([1], dtype=float), "pd.Index[float]"), pd.Index, float) check( assert_type(pd.Index([pd.Timestamp(0)]), pd.DatetimeIndex), diff --git a/tests/test_pandas.py b/tests/test_pandas.py index 2b6bd36d1..ed67efc86 100644 --- a/tests/test_pandas.py +++ b/tests/test_pandas.py @@ -921,7 +921,7 @@ def test_cut() -> None: check(assert_type(s0r, pd.Series), pd.Series, pd.Interval) check(assert_type(s1r, pd.DatetimeIndex), pd.DatetimeIndex, pd.Timestamp) s0rlf, s1rlf = pd.cut(s1, bins=10, labels=False, retbins=True) - check(assert_type(s0rlf, pd.Series), pd.Series, np.int64) + check(assert_type(s0rlf, pd.Series), pd.Series, np.integer) check(assert_type(s1rlf, pd.DatetimeIndex), pd.DatetimeIndex, pd.Timestamp) s0rls, s1rls = pd.cut(s1, bins=4, labels=["1", "2", "3", "4"], retbins=True) check(assert_type(s0rls, pd.Series), pd.Series, str) diff --git a/tests/test_series.py b/tests/test_series.py index 88e194e0d..4da19a5ed 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -128,7 +128,7 @@ def test_types_csv() -> None: def test_types_copy() -> None: s = pd.Series(data=[1, 2, 3, 4]) - check(assert_type(s.copy(), "pd.Series[int]"), pd.Series, np.int64) + check(assert_type(s.copy(), "pd.Series[int]"), pd.Series, np.integer) def test_types_select() -> None: @@ -221,18 +221,20 @@ def test_types_setting() -> None: def test_types_drop() -> None: s = pd.Series([0, 1, 2]) - check(assert_type(s.drop(0), "pd.Series[int]"), pd.Series, np.intp) - check(assert_type(s.drop([0, 1]), "pd.Series[int]"), pd.Series, np.intp) - check(assert_type(s.drop(0, axis=0), "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(s.drop(0), "pd.Series[int]"), pd.Series, np.integer) + check(assert_type(s.drop([0, 1]), "pd.Series[int]"), pd.Series, np.integer) + check(assert_type(s.drop(0, axis=0), "pd.Series[int]"), pd.Series, np.integer) assert assert_type(s.drop([0, 1], inplace=True, errors="raise"), None) is None assert assert_type(s.drop([0, 1], inplace=True, errors="ignore"), None) is None # GH 302 s = pd.Series([0, 1, 2]) - check(assert_type(s.drop(pd.Index([0, 1])), "pd.Series[int]"), pd.Series, np.intp) + check( + assert_type(s.drop(pd.Index([0, 1])), "pd.Series[int]"), pd.Series, np.integer + ) check( assert_type(s.drop(index=pd.Index([0, 1])), "pd.Series[int]"), pd.Series, - np.intp, + np.integer, ) @@ -293,15 +295,17 @@ def test_types_fillna() -> None: def test_types_sort_index() -> None: s = pd.Series([1, 2], index=[2, 3]) - check(assert_type(s.sort_index(), "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(s.sort_index(), "pd.Series[int]"), pd.Series, np.integer) check( - assert_type(s.sort_index(ascending=False), "pd.Series[int]"), pd.Series, np.intp + assert_type(s.sort_index(ascending=False), "pd.Series[int]"), + pd.Series, + np.integer, ) assert assert_type(s.sort_index(ascending=False, inplace=True), None) is None check( assert_type(s.sort_index(kind="mergesort"), "pd.Series[int]"), pd.Series, - np.intp, + np.integer, ) @@ -313,25 +317,25 @@ def test_types_sort_index_with_key() -> None: def test_types_sort_values() -> None: s = pd.Series([4, 2]) - check(assert_type(s.sort_values(), "pd.Series[int]"), pd.Series, np.int64) + check(assert_type(s.sort_values(), "pd.Series[int]"), pd.Series, np.integer) if TYPE_CHECKING_INVALID_USAGE: check(assert_type(s.sort_values(0), pd.Series), pd.Series) # type: ignore[assert-type,call-overload] # pyright: ignore[reportGeneralTypeIssues] - check(assert_type(s.sort_values(axis=0), "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(s.sort_values(axis=0), "pd.Series[int]"), pd.Series, np.integer) check( assert_type(s.sort_values(ascending=False), "pd.Series[int]"), pd.Series, - np.intp, + np.integer, ) assert assert_type(s.sort_values(inplace=True, kind="quicksort"), None) is None check( assert_type(s.sort_values(na_position="last"), "pd.Series[int]"), pd.Series, - np.intp, + np.integer, ) check( assert_type(s.sort_values(ignore_index=True), "pd.Series[int]"), pd.Series, - np.intp, + np.integer, ) @@ -396,15 +400,15 @@ def test_types_sum() -> None: assert_type(s0.sum(min_count=4), float) s1 = assert_type(pd.Series([False, True], dtype=bool), "pd.Series[bool]") - check(assert_type(s1.sum(), "int"), np.int64) - check(assert_type(s1.sum(skipna=False), "int"), np.int64) - check(assert_type(s1.sum(numeric_only=False), "int"), np.int64) + check(assert_type(s1.sum(), "int"), np.integer) + check(assert_type(s1.sum(skipna=False), "int"), np.integer) + check(assert_type(s1.sum(numeric_only=False), "int"), np.integer) assert_type(s1.sum(min_count=4), "int") s2 = assert_type(pd.Series([0, 1], dtype=int), "pd.Series[int]") - check(assert_type(s2.sum(), "int"), np.int64) - check(assert_type(s2.sum(skipna=False), "int"), np.int64) - check(assert_type(s2.sum(numeric_only=False), "int"), np.int64) + check(assert_type(s2.sum(), "int"), np.integer) + check(assert_type(s2.sum(skipna=False), "int"), np.integer) + check(assert_type(s2.sum(numeric_only=False), "int"), np.integer) assert_type(s2.sum(min_count=4), "int") s3 = assert_type(pd.Series([1, 2, 3, np.nan], dtype=float), "pd.Series[float]") @@ -484,7 +488,7 @@ def test_types_idxmax() -> None: def test_types_value_counts() -> None: s = pd.Series(["a", "b"]) - check(assert_type(s.value_counts(), "pd.Series[int]"), pd.Series, np.int64) + check(assert_type(s.value_counts(), "pd.Series[int]"), pd.Series, np.integer) def test_types_unique() -> None: @@ -523,7 +527,7 @@ def get_depth(url: str) -> int: return len(url) ss = s.astype(str) - check(assert_type(ss.apply(get_depth), pd.Series), pd.Series, np.int64) + check(assert_type(ss.apply(get_depth), pd.Series), pd.Series, np.integer) check(assert_type(s.apply(lambda x: pd.NA), pd.Series), pd.Series, NAType) @@ -908,23 +912,18 @@ def test_types_between() -> None: def test_types_agg() -> None: s = pd.Series([1, 2, 3], index=["col1", "col2", "col3"]) - check(assert_type(s.agg("min"), int), np.int64) - check(assert_type(s.agg(["min", "max"]), "pd.Series[int]"), pd.Series, np.intp) - check(assert_type(s.agg({"a": "min"}), "pd.Series[int]"), pd.Series, np.intp) - check( - assert_type( # type: ignore[assert-type] - s.agg("mean", axis=0), float # pyright: ignore[reportGeneralTypeIssues] - ), - np.float64, - ) + check(assert_type(s.agg("min"), int), np.integer) + check(assert_type(s.agg(["min", "max"]), "pd.Series[int]"), pd.Series, np.integer) + check(assert_type(s.agg({"a": "min"}), "pd.Series[int]"), pd.Series, np.integer) + check(assert_type(s.agg("mean", axis=0), float), np.float64) with pytest_warns_bounded( FutureWarning, r"The provided callable is currently using", lower="2.0.99", ): - check(assert_type(s.agg(min), int), np.int64) - check(assert_type(s.agg([min, max]), "pd.Series[int]"), pd.Series, np.intp) - check(assert_type(s.agg({0: min}), "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(s.agg(min), int), np.integer) + check(assert_type(s.agg([min, max]), "pd.Series[int]"), pd.Series, np.integer) + check(assert_type(s.agg({0: min}), "pd.Series[int]"), pd.Series, np.integer) check( assert_type( # type: ignore[assert-type] s.agg( @@ -939,27 +938,35 @@ def test_types_agg() -> None: def test_types_aggregate() -> None: s = pd.Series([1, 2, 3], index=["col1", "col2", "col3"]) - check(assert_type(s.aggregate("min"), int), np.int64) + check(assert_type(s.aggregate("min"), int), np.integer) check( - assert_type(s.aggregate(["min", "max"]), "pd.Series[int]"), pd.Series, np.intp + assert_type(s.aggregate(["min", "max"]), "pd.Series[int]"), + pd.Series, + np.integer, + ) + check( + assert_type(s.aggregate({"a": "min"}), "pd.Series[int]"), pd.Series, np.integer ) - check(assert_type(s.aggregate({"a": "min"}), "pd.Series[int]"), pd.Series, np.intp) with pytest_warns_bounded( FutureWarning, r"The provided callable is currently using", lower="2.0.99", ): - check(assert_type(s.aggregate(min), int), np.int64) + check(assert_type(s.aggregate(min), int), np.integer) + check( + assert_type(s.aggregate([min, max]), "pd.Series[int]"), + pd.Series, + np.integer, + ) check( - assert_type(s.aggregate([min, max]), "pd.Series[int]"), pd.Series, np.intp + assert_type(s.aggregate({0: min}), "pd.Series[int]"), pd.Series, np.integer ) - check(assert_type(s.aggregate({0: min}), "pd.Series[int]"), pd.Series, np.intp) def test_types_transform() -> None: s = pd.Series([1, 2, 3], index=["col1", "col2", "col3"]) - check(assert_type(s.transform("abs"), "pd.Series[int]"), pd.Series, np.intp) - check(assert_type(s.transform(abs), "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(s.transform("abs"), "pd.Series[int]"), pd.Series, np.integer) + check(assert_type(s.transform(abs), "pd.Series[int]"), pd.Series, np.integer) check(assert_type(s.transform(["abs", "sqrt"]), pd.DataFrame), pd.DataFrame) check(assert_type(s.transform([abs, np.sqrt]), pd.DataFrame), pd.DataFrame) check( @@ -1024,25 +1031,25 @@ def test_types_values() -> None: def test_types_rename() -> None: # Scalar s1 = pd.Series([1, 2, 3]).rename("A") - check(assert_type(s1, "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(s1, "pd.Series[int]"), pd.Series, np.integer) # Hashable Sequence s2 = pd.Series([1, 2, 3]).rename(("A", "B")) - check(assert_type(s2, "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(s2, "pd.Series[int]"), pd.Series, np.integer) # Optional s3 = pd.Series([1, 2, 3]).rename(None) - check(assert_type(s3, "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(s3, "pd.Series[int]"), pd.Series, np.integer) # Functions def add1(x: int) -> int: return x + 1 s4 = pd.Series([1, 2, 3]).rename(add1) - check(assert_type(s4, "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(s4, "pd.Series[int]"), pd.Series, np.integer) # Dictionary s5 = pd.Series([1, 2, 3]).rename({1: 10}) - check(assert_type(s5, "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(s5, "pd.Series[int]"), pd.Series, np.integer) # inplace s6: None = pd.Series([1, 2, 3]).rename("A", inplace=True) @@ -1058,8 +1065,8 @@ def test_types_ne() -> None: def test_types_bfill() -> None: s1 = pd.Series([1, 2, 3]) - check(assert_type(s1.bfill(), "pd.Series[int]"), pd.Series, np.intp) - check(assert_type(s1.bfill(inplace=False), "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(s1.bfill(), "pd.Series[int]"), pd.Series, np.integer) + check(assert_type(s1.bfill(inplace=False), "pd.Series[int]"), pd.Series, np.integer) assert assert_type(s1.bfill(inplace=True), None) is None @@ -1096,8 +1103,8 @@ def test_types_ewm() -> None: def test_types_ffill() -> None: s1 = pd.Series([1, 2, 3]) - check(assert_type(s1.ffill(), "pd.Series[int]"), pd.Series, np.intp) - check(assert_type(s1.ffill(inplace=False), "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(s1.ffill(), "pd.Series[int]"), pd.Series, np.integer) + check(assert_type(s1.ffill(inplace=False), "pd.Series[int]"), pd.Series, np.integer) assert assert_type(s1.ffill(inplace=True), None) is None @@ -1145,10 +1152,10 @@ def test_series_index_isin() -> None: t2 = s.loc[~s.index.isin([1, 3])] t3 = s[s.index.isin([1, 3])] t4 = s[~s.index.isin([1, 3])] - check(assert_type(t1, "pd.Series[int]"), pd.Series, np.intp) - check(assert_type(t2, "pd.Series[int]"), pd.Series, np.intp) - check(assert_type(t3, "pd.Series[int]"), pd.Series, np.intp) - check(assert_type(t4, "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(t1, "pd.Series[int]"), pd.Series, np.integer) + check(assert_type(t2, "pd.Series[int]"), pd.Series, np.integer) + check(assert_type(t3, "pd.Series[int]"), pd.Series, np.integer) + check(assert_type(t4, "pd.Series[int]"), pd.Series, np.integer) def test_series_invert() -> None: @@ -1156,8 +1163,8 @@ def test_series_invert() -> None: s2 = ~s1 check(assert_type(s2, "pd.Series[bool]"), pd.Series, np.bool_) s3 = pd.Series([1, 2, 3]) - check(assert_type(s3[s2], "pd.Series[int]"), pd.Series, np.intp) - check(assert_type(s3.loc[s2], "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(s3[s2], "pd.Series[int]"), pd.Series, np.integer) + check(assert_type(s3.loc[s2], "pd.Series[int]"), pd.Series, np.integer) def test_series_multiindex_getitem() -> None: @@ -1176,7 +1183,7 @@ def test_series_mul() -> None: sm2 = s * s check(assert_type(sm2, pd.Series), pd.Series) sp = s + 4 - check(assert_type(sp, "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(sp, "pd.Series[int]"), pd.Series, np.integer) def test_reset_index() -> None: @@ -1191,11 +1198,11 @@ def test_reset_index() -> None: r3 = s.reset_index("ab") check(assert_type(r3, pd.DataFrame), pd.DataFrame) r4 = s.reset_index(drop=True) - check(assert_type(r4, "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(r4, "pd.Series[int]"), pd.Series, np.integer) r5 = s.reset_index(["ab"], drop=True) - check(assert_type(r5, "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(r5, "pd.Series[int]"), pd.Series, np.integer) r6 = s.reset_index(["ab"], drop=True, allow_duplicates=True) - check(assert_type(r6, "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(r6, "pd.Series[int]"), pd.Series, np.integer) assert assert_type(s.reset_index(inplace=True, drop=True), None) is None @@ -1213,11 +1220,11 @@ def test_series_dtype() -> None: def test_types_replace() -> None: # GH 44 s = pd.Series([1, 2, 3]) - check(assert_type(s.replace(1, 2), "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(s.replace(1, 2), "pd.Series[int]"), pd.Series, np.integer) check( assert_type(s.replace(1, 2, inplace=False), "pd.Series[int]"), pd.Series, - np.intp, + np.integer, ) assert assert_type(s.replace(1, 2, inplace=True), None) is None @@ -1231,25 +1238,27 @@ def test_cat_accessor() -> None: # GH 139 ser = pd.Series([1, 2, 3], name="A").astype("category") check( - assert_type(ser.cat.set_categories([1, 2, 3]), pd.Series), pd.Series, np.int64 + assert_type(ser.cat.set_categories([1, 2, 3]), pd.Series), pd.Series, np.integer ) check( assert_type(ser.cat.reorder_categories([2, 3, 1], ordered=True), pd.Series), pd.Series, - np.int64, + np.integer, ) check( assert_type(ser.cat.rename_categories([1, 2, 3]), pd.Series), pd.Series, - np.int64, + np.integer, ) check( - assert_type(ser.cat.remove_unused_categories(), pd.Series), pd.Series, np.int64 + assert_type(ser.cat.remove_unused_categories(), pd.Series), + pd.Series, + np.integer, ) - check(assert_type(ser.cat.remove_categories([2]), pd.Series), pd.Series, np.int64) - check(assert_type(ser.cat.add_categories([4]), pd.Series), pd.Series, np.int64) - check(assert_type(ser.cat.as_ordered(), pd.Series), pd.Series, np.int64) - check(assert_type(ser.cat.as_unordered(), pd.Series), pd.Series, np.int64) + check(assert_type(ser.cat.remove_categories([2]), pd.Series), pd.Series, np.integer) + check(assert_type(ser.cat.add_categories([4]), pd.Series), pd.Series, np.integer) + check(assert_type(ser.cat.as_ordered(), pd.Series), pd.Series, np.integer) + check(assert_type(ser.cat.as_unordered(), pd.Series), pd.Series, np.integer) def test_cat_ctor_values() -> None: @@ -1349,7 +1358,7 @@ def test_string_accessors(): check(assert_type(s.str.cat(sep="X"), str), str) check(assert_type(s.str.center(10), pd.Series), pd.Series) check(assert_type(s.str.contains("a"), "pd.Series[bool]"), pd.Series, np.bool_) - check(assert_type(s.str.count("pp"), "pd.Series[int]"), pd.Series, np.int64) + check(assert_type(s.str.count("pp"), "pd.Series[int]"), pd.Series, np.integer) check(assert_type(s.str.decode("utf-8"), pd.Series), pd.Series) check(assert_type(s.str.encode("latin-1"), pd.Series), pd.Series) check(assert_type(s.str.endswith("e"), "pd.Series[bool]"), pd.Series, np.bool_) @@ -1374,7 +1383,7 @@ def test_string_accessors(): check(assert_type(s.str.istitle(), "pd.Series[bool]"), pd.Series, np.bool_) check(assert_type(s.str.isupper(), "pd.Series[bool]"), pd.Series, np.bool_) check(assert_type(s2.str.join("-"), pd.Series), pd.Series) - check(assert_type(s.str.len(), "pd.Series[int]"), pd.Series, np.int64) + check(assert_type(s.str.len(), "pd.Series[int]"), pd.Series, np.integer) check(assert_type(s.str.ljust(80), pd.Series), pd.Series) check(assert_type(s.str.lower(), pd.Series), pd.Series) check(assert_type(s.str.lstrip("a"), pd.Series), pd.Series) @@ -1572,7 +1581,7 @@ def test_neg() -> None: # GH 253 sr = pd.Series([1, 2, 3]) sr_int = pd.Series([1, 2, 3], dtype=int) - check(assert_type(-sr, "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(-sr, "pd.Series[int]"), pd.Series, np.integer) check(assert_type(-sr_int, "pd.Series[int]"), pd.Series, np.integer) @@ -1733,8 +1742,8 @@ def test_AnyArrayLike_and_clip() -> None: ser = pd.Series([1, 2, 3]) s1 = ser.clip(lower=ser) s2 = ser.clip(upper=ser) - check(assert_type(s1, "pd.Series[int]"), pd.Series, np.intp) - check(assert_type(s2, "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(s1, "pd.Series[int]"), pd.Series, np.integer) + check(assert_type(s2, "pd.Series[int]"), pd.Series, np.integer) def test_pandera_generic() -> None: @@ -2651,7 +2660,7 @@ def test_all_astype_args_tested() -> None: def test_check_xs() -> None: s4 = pd.Series([1, 4]) s4.xs(0, axis=0) - check(assert_type(s4, "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(s4, "pd.Series[int]"), pd.Series, np.integer) def test_types_apply_set() -> None: @@ -2664,20 +2673,24 @@ def test_types_apply_set() -> None: def test_prefix_summix_axis() -> None: s = pd.Series([1, 2, 3, 4]) check( - assert_type(s.add_suffix("_item", axis=0), "pd.Series[int]"), pd.Series, np.intp + assert_type(s.add_suffix("_item", axis=0), "pd.Series[int]"), + pd.Series, + np.integer, ) check( assert_type(s.add_suffix("_item", axis="index"), "pd.Series[int]"), pd.Series, - np.intp, + np.integer, ) check( - assert_type(s.add_prefix("_item", axis=0), "pd.Series[int]"), pd.Series, np.intp + assert_type(s.add_prefix("_item", axis=0), "pd.Series[int]"), + pd.Series, + np.integer, ) check( assert_type(s.add_prefix("_item", axis="index"), "pd.Series[int]"), pd.Series, - np.intp, + np.integer, ) if TYPE_CHECKING_INVALID_USAGE: @@ -2688,7 +2701,7 @@ def test_prefix_summix_axis() -> None: def test_convert_dtypes_dtype_backend() -> None: s = pd.Series([1, 2, 3, 4]) s1 = s.convert_dtypes(dtype_backend="numpy_nullable") - check(assert_type(s1, "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(s1, "pd.Series[int]"), pd.Series, np.integer) def test_apply_returns_none() -> None: @@ -2700,7 +2713,7 @@ def test_apply_returns_none() -> None: def test_loc_callable() -> None: # GH 586 s = pd.Series([1, 2]) - check(assert_type(s.loc[lambda x: x > 1], "pd.Series[int]"), pd.Series, np.intp) + check(assert_type(s.loc[lambda x: x > 1], "pd.Series[int]"), pd.Series, np.integer) def test_to_json_mode() -> None: From e42d49241a4912a9b9afe42b5caa6573dd21d8a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Fri, 18 Aug 2023 13:38:19 -0400 Subject: [PATCH 6/7] everything(?) but agg --- tests/test_series.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tests/test_series.py b/tests/test_series.py index 4da19a5ed..f508b2257 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -248,7 +248,7 @@ def test_types_drop_multilevel() -> None: def test_types_dropna() -> None: - s = pd.Series([1.0, np.nan]) + s = pd.Series([1.0, np.nan, np.nan]) check(assert_type(s.dropna(), "pd.Series[float]"), pd.Series, float) assert assert_type(s.dropna(axis=0, inplace=True), None) is None @@ -269,7 +269,7 @@ class MyEnum(Enum): def test_types_fillna() -> None: - s = pd.Series([1.0, np.nan]) + s = pd.Series([1.0, np.nan, np.nan, 3.0]) check(assert_type(s.fillna(0), "pd.Series[float]"), pd.Series, float) check(assert_type(s.fillna(0, axis="index"), "pd.Series[float]"), pd.Series, float) with pytest_warns_bounded( @@ -294,7 +294,7 @@ def test_types_fillna() -> None: def test_types_sort_index() -> None: - s = pd.Series([1, 2], index=[2, 3]) + s = pd.Series([1, 2, 3], index=[2, 3, 1]) check(assert_type(s.sort_index(), "pd.Series[int]"), pd.Series, np.integer) check( assert_type(s.sort_index(ascending=False), "pd.Series[int]"), @@ -316,7 +316,7 @@ def test_types_sort_index_with_key() -> None: def test_types_sort_values() -> None: - s = pd.Series([4, 2]) + s = pd.Series([4, 2, 1, 3]) check(assert_type(s.sort_values(), "pd.Series[int]"), pd.Series, np.integer) if TYPE_CHECKING_INVALID_USAGE: check(assert_type(s.sort_values(0), pd.Series), pd.Series) # type: ignore[assert-type,call-overload] # pyright: ignore[reportGeneralTypeIssues] @@ -341,12 +341,12 @@ def test_types_sort_values() -> None: # This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html def test_types_sort_values_with_key() -> None: - s = pd.Series([1, 2], index=[2, 3]) + s = pd.Series([1, 2, 3], index=[2, 3, 1]) res: pd.Series = s.sort_values(key=lambda k: -k) def test_types_shift() -> None: - s = pd.Series([1, 2]) + s = pd.Series([1, 2, 3]) s.shift() s.shift(axis=0, periods=1) s.shift(-1, fill_value=0) @@ -393,7 +393,7 @@ def test_types_sum() -> None: # 2. Runtime return types of `series.sum(min_count=...)` are NOT # tested (because of potential `nan`s). - s0 = assert_type(pd.Series([1.0, np.nan]), "pd.Series[float]") + s0 = assert_type(pd.Series([1.0, 2.0, 3.0, np.nan]), "pd.Series[float]") check(assert_type(s0.sum(), float), np.float64) check(assert_type(s0.sum(skipna=False), float), np.float64) check(assert_type(s0.sum(numeric_only=False), float), np.float64) @@ -876,7 +876,8 @@ def test_update() -> None: s1.update(pd.Series([0, 2, 12])) # Series.update() accepting objects that can be coerced to a Series was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html s1.update([1, 2, -4, 3]) - s1.update([1, "b", "c", "d"]) # type: ignore[list-item] # pyright: ignore[reportGeneralTypeIssues] + if TYPE_CHECKING_INVALID_USAGE: + s1.update([1, "b", "c", "d"]) # type: ignore[list-item] # pyright: ignore[reportGeneralTypeIssues] s1.update({1: 9, 3: 4}) From 04752c4f977c8a1f046264b059d8e90c3ca51bbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?= Date: Fri, 18 Aug 2023 16:28:31 -0400 Subject: [PATCH 7/7] return series --- pandas-stubs/core/series.pyi | 2 +- tests/test_series.py | 27 +++++++++------------------ 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/pandas-stubs/core/series.pyi b/pandas-stubs/core/series.pyi index b0edc57ce..149d90fb8 100644 --- a/pandas-stubs/core/series.pyi +++ b/pandas-stubs/core/series.pyi @@ -859,7 +859,7 @@ class Series(IndexOpsMixin[S1], NDFrame): axis: AxisIndex = ..., *args, **kwargs, - ) -> Self: ... + ) -> Series: ... agg = aggregate @overload def transform( diff --git a/tests/test_series.py b/tests/test_series.py index f508b2257..1a1140909 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -914,8 +914,8 @@ def test_types_between() -> None: def test_types_agg() -> None: s = pd.Series([1, 2, 3], index=["col1", "col2", "col3"]) check(assert_type(s.agg("min"), int), np.integer) - check(assert_type(s.agg(["min", "max"]), "pd.Series[int]"), pd.Series, np.integer) - check(assert_type(s.agg({"a": "min"}), "pd.Series[int]"), pd.Series, np.integer) + check(assert_type(s.agg(["min", "max"]), pd.Series), pd.Series, np.integer) + check(assert_type(s.agg({"a": "min"}), pd.Series), pd.Series, np.integer) check(assert_type(s.agg("mean", axis=0), float), np.float64) with pytest_warns_bounded( FutureWarning, @@ -923,15 +923,10 @@ def test_types_agg() -> None: lower="2.0.99", ): check(assert_type(s.agg(min), int), np.integer) - check(assert_type(s.agg([min, max]), "pd.Series[int]"), pd.Series, np.integer) - check(assert_type(s.agg({0: min}), "pd.Series[int]"), pd.Series, np.integer) + check(assert_type(s.agg([min, max]), pd.Series), pd.Series, np.integer) + check(assert_type(s.agg({0: min}), pd.Series), pd.Series, np.integer) check( - assert_type( # type: ignore[assert-type] - s.agg( - x=max, y="min", z=np.mean - ), # pyright: ignore[reportGeneralTypeIssues] - "pd.Series[float]", - ), + assert_type(s.agg(x=max, y="min", z=np.mean), pd.Series), pd.Series, np.float64, ) @@ -941,13 +936,11 @@ def test_types_aggregate() -> None: s = pd.Series([1, 2, 3], index=["col1", "col2", "col3"]) check(assert_type(s.aggregate("min"), int), np.integer) check( - assert_type(s.aggregate(["min", "max"]), "pd.Series[int]"), + assert_type(s.aggregate(["min", "max"]), pd.Series), pd.Series, np.integer, ) - check( - assert_type(s.aggregate({"a": "min"}), "pd.Series[int]"), pd.Series, np.integer - ) + check(assert_type(s.aggregate({"a": "min"}), pd.Series), pd.Series, np.integer) with pytest_warns_bounded( FutureWarning, r"The provided callable is currently using", @@ -955,13 +948,11 @@ def test_types_aggregate() -> None: ): check(assert_type(s.aggregate(min), int), np.integer) check( - assert_type(s.aggregate([min, max]), "pd.Series[int]"), + assert_type(s.aggregate([min, max]), pd.Series), pd.Series, np.integer, ) - check( - assert_type(s.aggregate({0: min}), "pd.Series[int]"), pd.Series, np.integer - ) + check(assert_type(s.aggregate({0: min}), pd.Series), pd.Series, np.integer) def test_types_transform() -> None: