From dc822d6b5c623e5370f11603aa4fda1b829274be Mon Sep 17 00:00:00 2001 From: Loic Diridollou Date: Sun, 16 Feb 2025 11:05:59 -0500 Subject: [PATCH] GH1089 Simplify test migration in series/frame --- tests/test_frame.py | 19 ++++--- tests/test_series.py | 127 +++++++++++++++++++++++++++---------------- 2 files changed, 93 insertions(+), 53 deletions(-) diff --git a/tests/test_frame.py b/tests/test_frame.py index fe2b6be1b..8bf24f477 100644 --- a/tests/test_frame.py +++ b/tests/test_frame.py @@ -35,7 +35,6 @@ DatetimeIndexResampler, Resampler, ) -from pandas.core.series import Series import pytest from typing_extensions import ( TypeAlias, @@ -560,8 +559,11 @@ def test_types_median() -> None: def test_types_iterrows() -> None: df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) - # TODO rewrite the below with check assert_type - vv: Iterable[tuple[Hashable, Series]] = df.iterrows() + check( + assert_type(df.iterrows(), "Iterable[tuple[Hashable, pd.Series]]"), + Iterable, + tuple, + ) def test_types_itertuples() -> None: @@ -2155,13 +2157,16 @@ def test_types_regressions() -> None: s1 = pd.Series([1, 2, 3]) s2 = pd.Series([4, 5, 6]) df = pd.concat([s1, s2], axis=1) - # TODO the inference here returns Any, should return Series - ts1: pd.Series = pd.concat([s1, s2], axis=0) - ts2: pd.Series = pd.concat([s1, s2]) + ts1 = pd.concat([s1, s2], axis=0) + ts2 = pd.concat([s1, s2]) + + check(assert_type(ts1, pd.Series), pd.Series) + check(assert_type(ts2, pd.Series), pd.Series) # https://github.com/microsoft/python-type-stubs/issues/110 check(assert_type(pd.Timestamp("2021-01-01"), pd.Timestamp), datetime.date) - tslist: list[pd.Timestamp] = list(pd.to_datetime(["2022-01-01", "2022-01-02"])) + tslist = list(pd.to_datetime(["2022-01-01", "2022-01-02"])) + check(assert_type(tslist, list[pd.Timestamp]), list, pd.Timestamp) sseries = pd.Series(tslist) with pytest_warns_bounded(FutureWarning, "'d' is deprecated", lower="2.2.99"): sseries + pd.Timedelta(1, "d") diff --git a/tests/test_series.py b/tests/test_series.py index f0dbe8019..dbc503794 100644 --- a/tests/test_series.py +++ b/tests/test_series.py @@ -145,20 +145,20 @@ def test_types_all() -> None: def test_types_csv() -> None: s = pd.Series(data=[1, 2, 3]) - csv_df: str = s.to_csv() + check(assert_type(s.to_csv(), str), str) with ensure_clean() as path: s.to_csv(path) - s2: pd.DataFrame = pd.read_csv(path) + check(assert_type(pd.read_csv(path), pd.DataFrame), pd.DataFrame) with ensure_clean() as path: s.to_csv(Path(path)) - s3: pd.DataFrame = pd.read_csv(Path(path)) + check(assert_type(pd.read_csv(Path(path)), pd.DataFrame), pd.DataFrame) # This keyword was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html with ensure_clean() as path: s.to_csv(path, errors="replace") - s4: pd.DataFrame = pd.read_csv(path) + check(assert_type(pd.read_csv(path), pd.DataFrame), pd.DataFrame) def test_types_copy() -> None: @@ -176,7 +176,7 @@ def test_types_select() -> None: lower="2.0.99", ): s[0] - s[1:] + check(assert_type(s[1:], "pd.Series[int]"), pd.Series, np.integer) def test_types_iloc_iat() -> None: @@ -229,11 +229,11 @@ def test_types_boolean_indexing() -> None: def test_types_df_to_df_comparison() -> None: s = pd.Series(data={"col1": [1, 2]}) s2 = pd.Series(data={"col1": [3, 2]}) - res_gt: pd.Series = s > s2 - res_ge: pd.Series = s >= s2 - res_lt: pd.Series = s < s2 - res_le: pd.Series = s <= s2 - res_e: pd.Series = s == s2 + check(assert_type(s > s2, "pd.Series[bool]"), pd.Series, np.bool) + check(assert_type(s >= s2, "pd.Series[bool]"), pd.Series, np.bool) + check(assert_type(s < s2, "pd.Series[bool]"), pd.Series, np.bool) + check(assert_type(s <= s2, "pd.Series[bool]"), pd.Series, np.bool) + check(assert_type(s == s2, "pd.Series[bool]"), pd.Series, np.bool) def test_types_head_tail() -> None: @@ -309,7 +309,11 @@ def test_types_drop_multilevel() -> None: codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], ) s = pd.Series(data=[1, 2, 3, 4, 5, 6], index=index) - res: pd.Series = s.drop(labels="first", level=1) + check( + assert_type(s.drop(labels="first", level=1), "pd.Series[int]"), + pd.Series, + np.integer, + ) def test_types_drop_duplicates() -> None: @@ -382,7 +386,11 @@ def test_types_sort_index() -> None: # This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html def test_types_sort_index_with_key() -> None: s = pd.Series([1, 2, 3], index=["a", "B", "c"]) - res: pd.Series = s.sort_index(key=lambda k: k.str.lower()) + check( + assert_type(s.sort_index(key=lambda k: k.str.lower()), "pd.Series[int]"), + pd.Series, + np.integer, + ) def test_types_sort_values() -> None: @@ -412,7 +420,11 @@ def test_types_sort_values() -> None: # This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html def test_types_sort_values_with_key() -> None: s = pd.Series([1, 2, 3], index=[2, 3, 1]) - res: pd.Series = s.sort_values(key=lambda k: -k) + check( + assert_type(s.sort_values(key=lambda k: -k), "pd.Series[int]"), + pd.Series, + np.integer, + ) def test_types_shift() -> None: @@ -435,18 +447,26 @@ def test_types_rank() -> None: def test_types_mean() -> None: s = pd.Series([1, 2, 3, np.nan]) - f1: float = s.mean() - s1: pd.Series = s.groupby(level=0).mean() - f2: float = s.mean(skipna=False) - f3: float = s.mean(numeric_only=False) + check(assert_type(s.mean(), float), float) + check( + assert_type(s.groupby(level=0).mean(), "pd.Series[float]"), + pd.Series, + np.float64, + ) + check(assert_type(s.mean(skipna=False), float), float) + check(assert_type(s.mean(numeric_only=False), float), float) def test_types_median() -> None: s = pd.Series([1, 2, 3, np.nan]) - f1: float = s.median() - s1: pd.Series = s.groupby(level=0).median() - f2: float = s.median(skipna=False) - f3: float = s.median(numeric_only=False) + check(assert_type(s.median(), float), float) + check( + assert_type(s.groupby(level=0).median(), "pd.Series[float]"), + pd.Series, + np.float64, + ) + check(assert_type(s.median(skipna=False), float), float) + check(assert_type(s.median(numeric_only=False), float), float) def test_types_sum() -> None: @@ -624,17 +644,25 @@ def test_types_element_wise_arithmetic() -> None: s = pd.Series([0, 1, -10]) s2 = pd.Series([7, -5, 10]) - res_add1: pd.Series = s + s2 - res_add2: pd.Series = s.add(s2, fill_value=0) + check(assert_type(s + s2, "pd.Series[int]"), pd.Series, np.integer) + check(assert_type(s.add(s2, fill_value=0), "pd.Series[int]"), pd.Series, np.integer) - res_sub: pd.Series = s - s2 - res_sub2: pd.Series = s.sub(s2, fill_value=0) + # TODO this one below should type pd.Series[int] + check(assert_type(s - s2, pd.Series), pd.Series, np.integer) + check(assert_type(s.sub(s2, fill_value=0), "pd.Series[int]"), pd.Series, np.integer) - res_mul: pd.Series = s * s2 - res_mul2: pd.Series = s.mul(s2, fill_value=0) + # TODO these two below should type pd.Series[int] + # check(assert_type(s * s2, "pd.Series[int]"), pd.Series, np.integer ) + check(assert_type(s * s2, pd.Series), pd.Series, np.integer) + # check(assert_type(s.mul(s2, fill_value=0), "pd.Series[int]"), pd.Series, np.integer) + check(assert_type(s.mul(s2, fill_value=0), pd.Series), pd.Series, np.integer) - res_div: pd.Series = s / s2 - res_div2: pd.Series = s.div(s2, fill_value=0) + # TODO these two below should type pd.Series[float] + # check(assert_type(s / s2, "pd.Series[float]"), pd.Series, np.float64) + check(assert_type(s / s2, pd.Series), pd.Series, np.float64) + check( + assert_type(s.div(s2, fill_value=0), "pd.Series[float]"), pd.Series, np.float64 + ) res_floordiv: pd.Series = s // s2 res_floordiv2: pd.Series = s.floordiv(s2, fill_value=0) @@ -651,8 +679,8 @@ def test_types_element_wise_arithmetic() -> None: def test_types_scalar_arithmetic() -> None: s = pd.Series([0, 1, -10]) - res_add1: pd.Series = s + 1 - res_add2: pd.Series = s.add(1, fill_value=0) + check(assert_type(s + 1, "pd.Series[int]"), pd.Series, np.integer) + check(assert_type(s.add(1, fill_value=0), "pd.Series[int]"), pd.Series, np.integer) res_sub: pd.Series = s - 1 res_sub2: pd.Series = s.sub(1, fill_value=0) @@ -675,8 +703,8 @@ def test_types_scalar_arithmetic() -> None: res_pow3: pd.Series = s.pow(0.5) -# GH 103 def test_types_complex_arithmetic() -> None: + # GH 103 c = 1 + 1j s = pd.Series([1.0, 2.0, 3.0]) x = s + c @@ -1105,8 +1133,8 @@ def test_types_getitem() -> None: s = pd.Series({"key": [0, 1, 2, 3]}) key: list[int] = s["key"] s2 = pd.Series([0, 1, 2, 3]) - value: int = s2[0] - s3: pd.Series = s[:2] + check(assert_type(s2[0], int), np.integer) + check(assert_type(s[:2], pd.Series), pd.Series) def test_types_getitem_by_timestamp() -> None: @@ -1117,9 +1145,9 @@ def test_types_getitem_by_timestamp() -> None: def test_types_eq() -> None: s1 = pd.Series([1, 2, 3]) - res1: pd.Series = s1 == 1 + check(assert_type(s1 == 1, "pd.Series[bool]"), pd.Series, np.bool) s2 = pd.Series([1, 2, 4]) - res2: pd.Series = s1 == s2 + check(assert_type(s1 == s2, "pd.Series[bool]"), pd.Series, np.bool) def test_types_rename_axis() -> None: @@ -1177,6 +1205,7 @@ def add1(x: int) -> int: s5 = pd.Series([1, 2, 3]).rename({1: 10}) check(assert_type(s5, "pd.Series[int]"), pd.Series, np.integer) # inplace + # TODO fix issue with inplace=True returning a Series, cf pandas #60942 s6: None = pd.Series([1, 2, 3]).rename("A", inplace=True) if TYPE_CHECKING_INVALID_USAGE: @@ -1186,7 +1215,7 @@ def add1(x: int) -> int: def test_types_ne() -> None: s1 = pd.Series([1, 2, 3]) s2 = pd.Series([1, 2, 4]) - s3: pd.Series = s1 != s2 + check(assert_type(s1 != s2, "pd.Series[bool]"), pd.Series, np.bool) def test_types_bfill() -> None: @@ -1255,7 +1284,7 @@ def test_types_ffill() -> None: def test_types_as_type() -> None: s1 = pd.Series([1, 2, 8, 9]) - s2: pd.Series = s1.astype("int32") + check(assert_type(s1.astype("int32"), "pd.Series[int]"), pd.Series, np.int32) def test_types_dot() -> None: @@ -1408,13 +1437,19 @@ def test_cat_accessor() -> None: def test_cat_ctor_values() -> None: - c1 = pd.Categorical(["a", "b", "a"]) + check(assert_type(pd.Categorical(["a", "b", "a"]), pd.Categorical), pd.Categorical) # GH 95 - c2 = pd.Categorical(pd.Series(["a", "b", "a"])) + check( + assert_type(pd.Categorical(pd.Series(["a", "b", "a"])), pd.Categorical), + pd.Categorical, + ) s: Sequence = cast(Sequence, ["a", "b", "a"]) - c3 = pd.Categorical(s) + check(assert_type(pd.Categorical(s), pd.Categorical), pd.Categorical) # GH 107 - c4 = pd.Categorical(np.array([1, 2, 3, 1, 1])) + check( + assert_type(pd.Categorical(np.array([1, 2, 3, 1, 1])), pd.Categorical), + pd.Categorical, + ) def test_iloc_getitem_ndarray() -> None: @@ -1472,8 +1507,8 @@ def test_iloc_setitem_ndarray() -> None: def test_types_iter() -> None: s = pd.Series([1, 2, 3], dtype=int) iterable: Iterable[int] = s - assert_type(iter(s), Iterator[int]) - assert_type(next(iter(s)), int) + check(assert_type(iter(s), Iterator[int]), Iterator, int) + check(assert_type(next(iter(s)), int), int) def test_types_to_list() -> None: @@ -2701,12 +2736,12 @@ def test_astype_bytes(cast_arg: BytesDtypeArg, target_type: type) -> None: @pytest.mark.parametrize("cast_arg, target_type", ASTYPE_CATEGORICAL_ARGS, ids=repr) def test_astype_categorical(cast_arg: CategoryDtypeArg, target_type: type) -> None: s = pd.Series(["a", "b"]) - check(s.astype("category"), pd.Series, target_type) + check(s.astype(cast_arg), pd.Series, target_type) if TYPE_CHECKING: # pandas category assert_type(s.astype(pd.CategoricalDtype()), "pd.Series[pd.CategoricalDtype]") - assert_type(s.astype("category"), "pd.Series[pd.CategoricalDtype]") + assert_type(s.astype(cast_arg), "pd.Series[pd.CategoricalDtype]") # pyarrow dictionary # assert_type(s.astype("dictionary[pyarrow]"), "pd.Series[Categorical]")