From 66e4aed8caa9aadf9ab0afc7b89ba1e742682460 Mon Sep 17 00:00:00 2001 From: zbigniewkrolikowski Date: Mon, 24 Jan 2022 18:07:47 +0100 Subject: [PATCH 01/26] Add typing tests according to the issue 45252. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Test stubs originally authored by Joanna Sendorek, Zbigniew Królikowski and pandas_stubs contributors: https://github.com/VirtusLab/pandas-stubs/graphs/contributors Adapted to pandas needs by Torsten Wörtwein --- pandas/tests/typing/__init__.py | 0 pandas/tests/typing/valid/__init__.py | 0 pandas/tests/typing/valid/test_frame.py | 825 ++++++++++++++++++++ pandas/tests/typing/valid/test_interval.py | 38 + pandas/tests/typing/valid/test_pandas.py | 167 ++++ pandas/tests/typing/valid/test_series.py | 583 ++++++++++++++ pandas/tests/typing/valid/test_testing.py | 21 + pandas/tests/typing/valid/test_timestamp.py | 67 ++ pyproject.toml | 31 +- 9 files changed, 1730 insertions(+), 2 deletions(-) create mode 100644 pandas/tests/typing/__init__.py create mode 100644 pandas/tests/typing/valid/__init__.py create mode 100644 pandas/tests/typing/valid/test_frame.py create mode 100644 pandas/tests/typing/valid/test_interval.py create mode 100644 pandas/tests/typing/valid/test_pandas.py create mode 100644 pandas/tests/typing/valid/test_series.py create mode 100644 pandas/tests/typing/valid/test_testing.py create mode 100644 pandas/tests/typing/valid/test_timestamp.py diff --git a/pandas/tests/typing/__init__.py b/pandas/tests/typing/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/typing/valid/__init__.py b/pandas/tests/typing/valid/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/typing/valid/test_frame.py b/pandas/tests/typing/valid/test_frame.py new file mode 100644 index 0000000000000..b4d7b637ede3f --- /dev/null +++ b/pandas/tests/typing/valid/test_frame.py @@ -0,0 +1,825 @@ +# flake8: noqa: F841 +# TODO: many functions need return types annotations for pyright +# to run with reportGeneralTypeIssues = true +import io +import tempfile +from pathlib import Path +from typing import ( + Any, + Iterable, + List, + Tuple, +) + +import numpy as np + +import pandas as pd +from pandas.util import _test_decorators as td + + +def test_types_init() -> None: + pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}, index=[2, 1]) + pd.DataFrame(data=[1, 2, 3, 4], dtype=np.int8) + pd.DataFrame( + np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), + columns=["a", "b", "c"], + dtype=np.int8, + copy=True, + ) + + +def test_types_to_csv() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + # error: Incompatible types in assignment (expression has type "Optional[str]", + # variable has type "str") + csv_df: str = df.to_csv() # type: ignore[assignment] + + with tempfile.NamedTemporaryFile() as file: + df.to_csv(file.name) + df2: pd.DataFrame = pd.read_csv(file.name) + + with tempfile.NamedTemporaryFile() as file: + df.to_csv(Path(file.name)) + df3: pd.DataFrame = pd.read_csv(Path(file.name)) + + # This keyword was added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + with tempfile.NamedTemporaryFile() as file: + df.to_csv(file.name, errors="replace") + df4: pd.DataFrame = pd.read_csv(file.name) + + # Testing support for binary file handles, added in 1.2.0 + # https://pandas.pydata.org/docs/whatsnew/v1.2.0.html + df.to_csv(io.BytesIO(), encoding="utf-8", compression="gzip") + + +def test_types_to_csv_when_path_passed() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + path: Path = Path("./dummy_path.txt") + try: + assert not path.exists() + df.to_csv(path) + df5: pd.DataFrame = pd.read_csv(path) + finally: + path.unlink() + + +def test_types_copy() -> None: + df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + df2: pd.DataFrame = df.copy() + + +def test_types_getitem() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4], 5: [6, 7]}) + i = pd.Index(["col1", "col2"]) + s = pd.Series(["col1", "col2"]) + select_df = pd.DataFrame({"col1": [True, True], "col2": [False, True]}) + a = np.array(["col1", "col2"]) + df["col1"] + df[5] + df[["col1", "col2"]] + df[1:] + df[s] + df[a] + df[select_df] + df[i] + + +def test_slice_setitem() -> None: + # Due to the bug in pandas 1.2.3 + # (https://github.com/pandas-dev/pandas/issues/40440), + # this is in separate test case + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4], 5: [6, 7]}) + df[1:] = ["a", "b", "c"] + + +def test_types_setitem() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4], 5: [6, 7]}) + i = pd.Index(["col1", "col2"]) + s = pd.Series(["col1", "col2"]) + a = np.array(["col1", "col2"]) + df["col1"] = [1, 2] + df[5] = [5, 6] + df[["col1", "col2"]] = [[1, 2], [3, 4]] + df[s] = [5, 6] + df[a] = [[1, 2], [3, 4]] + df[i] = [8, 9] + + +def test_types_setitem_mask() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4], 5: [6, 7]}) + select_df = pd.DataFrame({"col1": [True, True], "col2": [False, True]}) + df[select_df] = [1, 2, 3] + + +def test_types_iloc_iat() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df.iloc[1, 1] + df.iloc[[1], [1]] + df.iat[0, 0] + + +def test_types_loc_at() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df.loc[[0], "col1"] + df.at[0, "col1"] + + +def test_types_boolean_indexing() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df[df > 1] + df[~(df > 1.0)] + + +def test_types_head_tail() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df.head(1) + df.tail(1) + + +def test_types_assign() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df.assign(col3=lambda frame: frame.sum(axis=1)) + df["col3"] = df.sum(axis=1) + + +def test_types_sample() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df.sample(frac=0.5) + df.sample(n=1) + + +def test_types_nlargest_nsmallest() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df.nlargest(1, "col1") + df.nsmallest(1, "col2") + + +def test_types_filter() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df.filter(items=["col1"]) + df.filter(regex="co.*") + df.filter(like="1") + + +def test_types_setting() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df["col1"] = 1 + df[df == 1] = 7 + + +def test_types_drop() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + res: pd.DataFrame = df.drop("col1", axis=1) + res2: pd.DataFrame = df.drop(columns=["col1"]) + res3: pd.DataFrame = df.drop({0}) + res4: pd.DataFrame = df.drop(index={0}) + res5: pd.DataFrame = df.drop(columns={"col1"}) + res6: pd.DataFrame = df.drop(index=1) + res7: pd.DataFrame = df.drop(labels=0) + res8: None = df.drop([0, 0], inplace=True) + + +def test_types_dropna() -> None: + df = pd.DataFrame(data={"col1": [np.nan, np.nan], "col2": [3, np.nan]}) + res: pd.DataFrame = df.dropna() + res2: pd.DataFrame = df.dropna(axis=1, thresh=1) + res3: None = df.dropna(axis=0, how="all", subset=["col1"], inplace=True) + + +def test_types_fillna() -> None: + df = pd.DataFrame(data={"col1": [np.nan, np.nan], "col2": [3, np.nan]}) + res: pd.DataFrame = df.fillna(0) + res2: None = df.fillna(method="pad", axis=1, inplace=True) + + +def test_types_sort_index() -> None: + df = pd.DataFrame(data={"col1": [1, 2, 3, 4]}, index=[5, 1, 3, 2]) + df2 = pd.DataFrame(data={"col1": [1, 2, 3, 4]}, index=["a", "b", "c", "d"]) + res: pd.DataFrame = df.sort_index() + level1 = (1, 2) + res2: pd.DataFrame = df.sort_index(ascending=False, level=level1) + level2: List[str] = ["a", "b", "c"] + # error: Argument "level" to "sort_index" of "DataFrame" has incompatible type + # "List[str]"; expected "Optional[Union[Hashable, int]]" + res3: pd.DataFrame = df2.sort_index(level=level2) # type: ignore[arg-type] + res4: pd.DataFrame = df.sort_index(ascending=False, level=3) + res5: None = df.sort_index(kind="mergesort", inplace=True) + + +# This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html +def test_types_sort_index_with_key() -> None: + df = pd.DataFrame(data={"col1": [1, 2, 3, 4]}, index=["a", "b", "C", "d"]) + res: pd.DataFrame = df.sort_index(key=lambda k: k.str.lower()) + + +def test_types_set_index() -> None: + df = pd.DataFrame( + data={"col1": [1, 2, 3, 4], "col2": ["a", "b", "c", "d"]}, index=[5, 1, 3, 2] + ) + res: pd.DataFrame = df.set_index("col1") + res2: pd.DataFrame = df.set_index("col1", drop=False) + res3: pd.DataFrame = df.set_index("col1", append=True) + res4: pd.DataFrame = df.set_index("col1", verify_integrity=True) + res5: pd.DataFrame = df.set_index(["col1", "col2"]) + res6: None = df.set_index("col1", inplace=True) + + +def test_types_query() -> None: + df = pd.DataFrame(data={"col1": [1, 2, 3, 4], "col2": [3, 0, 1, 7]}) + res: pd.DataFrame = df.query("col1 > col2") + res2: None = df.query("col1 % col2 == 0", inplace=True) + + +def test_types_eval() -> None: + df = pd.DataFrame(data={"col1": [1, 2, 3, 4], "col2": [3, 0, 1, 7]}) + df.eval("col1 > col2") + res: None = df.eval("C = col1 % col2 == 0", inplace=True) + + +def test_types_sort_values() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + res: pd.DataFrame = df.sort_values("col1") + res2: None = df.sort_values("col1", ascending=False, inplace=True) + res3: pd.DataFrame = df.sort_values(by=["col1", "col2"], ascending=[True, False]) + + +# This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html +def test_types_sort_values_with_key() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + res: pd.DataFrame = df.sort_values(by="col1", key=lambda k: -k) + + +def test_types_shift() -> None: + df = pd.DataFrame(data={"col1": [1, 1], "col2": [3, 4]}) + df.shift() + df.shift(1) + df.shift(-1) + + +def test_types_rank() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df.rank(axis=0, na_option="bottom") + df.rank(method="min", pct=True) + df.rank(method="dense", ascending=True) + df.rank(method="first", numeric_only=True) + + +def test_types_mean() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "Series") + s1: pd.Series = df.mean() # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "Series") + s2: pd.Series = df.mean(axis=0) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "DataFrame") + df2: pd.DataFrame = df.mean(level=0) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "DataFrame") + df3: pd.DataFrame = df.mean(axis=1, level=0) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "DataFrame") + df4: pd.DataFrame = df.mean(1, True, level=0) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "Series")# error: Incompatible types in assignment + # (expression has type "Union[Series, float]", variable has type "Series") + s3: pd.Series = df.mean( # type: ignore[assignment] + axis=1, skipna=True, numeric_only=False + ) + + +def test_types_median() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "Series") + s1: pd.Series = df.median() # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "Series") + s2: pd.Series = df.median(axis=0) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "DataFrame") + df2: pd.DataFrame = df.median(level=0) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "DataFrame") + df3: pd.DataFrame = df.median(axis=1, level=0) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "DataFrame") + df4: pd.DataFrame = df.median(1, True, level=0) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "Series") + s3: pd.Series = df.median( # type: ignore[assignment] + axis=1, skipna=True, numeric_only=False + ) + + +def test_types_itertuples() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + res1: Iterable[Tuple[Any, ...]] = df.itertuples() + res2: Iterable[Tuple[Any, ...]] = df.itertuples(index=False, name="Foobar") + res3: Iterable[Tuple[Any, ...]] = df.itertuples(index=False, name=None) + + +def test_types_sum() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df.sum() + df.sum(axis=1) + + +def test_types_cumsum() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df.cumsum() + df.sum(axis=0) + + +def test_types_min() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df.min() + df.min(axis=0) + + +def test_types_max() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df.max() + df.max(axis=0) + + +def test_types_quantile() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df.quantile([0.25, 0.5]) + df.quantile(0.75) + df.quantile() + + +def test_types_clip() -> None: + df = pd.DataFrame(data={"col1": [20, 12], "col2": [3, 14]}) + df.clip(lower=5, upper=15) + + +def test_types_abs() -> None: + df = pd.DataFrame(data={"col1": [-5, 1], "col2": [3, -14]}) + df.abs() + + +def test_types_var() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [1, 4]}) + df.var() + df.var(axis=1, ddof=1) + df.var(skipna=True, numeric_only=False) + + +def test_types_std() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [1, 4]}) + df.std() + df.std(axis=1, ddof=1) + df.std(skipna=True, numeric_only=False) + + +def test_types_idxmin() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df.idxmin() + df.idxmin(axis=0) + + +def test_types_idxmax() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df.idxmax() + df.idxmax(axis=0) + + +# This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html +def test_types_value_counts() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [1, 4]}) + df.value_counts() + + +def test_types_unique() -> None: + # This is really more for of a Series test + df = pd.DataFrame(data={"col1": [1, 2], "col2": [1, 4]}) + df["col1"].unique() + + +def test_types_apply() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df.apply(lambda x: x ** 2) + df.apply(np.exp) + df.apply(str) + + +def test_types_applymap() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df.applymap(lambda x: x ** 2) + df.applymap(np.exp) + df.applymap(str) + # na_action parameter was added in 1.2.0 + # https://pandas.pydata.org/docs/whatsnew/v1.2.0.html + df.applymap(np.exp, na_action="ignore") + df.applymap(str, na_action=None) + + +def test_types_element_wise_arithmetic() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df2 = pd.DataFrame(data={"col1": [10, 20], "col3": [3, 4]}) + + df + df2 + df.add(df2, fill_value=0) + + df - df2 + df.sub(df2, fill_value=0) + + df * df2 + df.mul(df2, fill_value=0) + + df / df2 + df.div(df2, fill_value=0) + + df // df2 + df.floordiv(df2, fill_value=0) + + df % df2 + df.mod(df2, fill_value=0) + + # divmod operation was added in 1.2.0 + # https://pandas.pydata.org/docs/whatsnew/v1.2.0.html + # noinspection PyTypeChecker + divmod(df, df2) + df.__divmod__(df2) + df.__rdivmod__(df2) + + +def test_types_melt() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df.melt() + df.melt(id_vars=["col1"], value_vars=["col2"]) + df.melt( + id_vars=["col1"], + value_vars=["col2"], + var_name="someVariable", + value_name="someValue", + ) + + pd.melt(df) + pd.melt(df, id_vars=["col1"], value_vars=["col2"]) + pd.melt( + df, + id_vars=["col1"], + value_vars=["col2"], + var_name="someVariable", + value_name="someValue", + ) + + +def test_types_pivot() -> None: + df = pd.DataFrame( + data={ + "col1": ["first", "second", "third", "fourth"], + "col2": [50, 70, 56, 111], + "col3": ["A", "B", "B", "A"], + "col4": [100, 102, 500, 600], + } + ) + df.pivot(index="col1", columns="col3", values="col2") + df.pivot(index="col1", columns="col3") + df.pivot(index="col1", columns="col3", values=["col2", "col4"]) + + +def test_types_groupby() -> None: + df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5], "col3": [0, 1, 0]}) + df.index.name = "ind" + df.groupby(by="col1") + df.groupby(level="ind") + df.groupby(by="col1", sort=False, as_index=True) + df.groupby(by=["col1", "col2"]) + + df1: pd.DataFrame = df.groupby(by="col1").agg("sum") + df2: pd.DataFrame = df.groupby(level="ind").aggregate("sum") + df3: pd.DataFrame = df.groupby(by="col1", sort=False, as_index=True).transform( + lambda x: x.max() + ) + # error: Incompatible types in assignment (expression has type "Union[Series, + # DataFrame]", variable has type "DataFrame") + df4: pd.DataFrame = df.groupby( # type: ignore[assignment] + by=["col1", "col2"] + ).count() + df5: pd.DataFrame = df.groupby(by=["col1", "col2"]).filter(lambda x: x["col1"] > 0) + df6: pd.DataFrame = df.groupby(by=["col1", "col2"]).nunique() + + +# This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html +def test_types_group_by_with_dropna_keyword() -> None: + df = pd.DataFrame( + data={"col1": [1, 1, 2, 1], "col2": [2, None, 1, 2], "col3": [3, 4, 3, 2]} + ) + df.groupby(by="col2", dropna=True).sum() + df.groupby(by="col2", dropna=False).sum() + df.groupby(by="col2").sum() + + +def test_types_merge() -> None: + df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]}) + df2 = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [0, 1, 0]}) + df.merge(df2) + df.merge(df2, on="col1") + df.merge(df2, on="col1", how="left") + df.merge(df2, on=["col1", "col2"], how="left") + df.merge(df2, on=("col1", "col2"), how="left") + l: List[str] = ["col1", "col2"] + df.merge(df2, on=l) + + +def test_types_plot() -> None: + df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]}) + df.plot.hist() + df.plot.scatter(x="col2", y="col1") + + +def test_types_window() -> None: + df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]}) + df.expanding() + df.expanding(axis=1, center=True) + + df.rolling(2) + df.rolling(2, axis=1, center=True) + + +def test_types_cov() -> None: + df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]}) + df.cov() + df.cov(min_periods=1) + # ddof param was added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + df.cov(ddof=2) + + +def test_types_to_numpy() -> None: + df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]}) + df.to_numpy() + df.to_numpy(dtype="str", copy=True) + # na_value param was added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + df.to_numpy(na_value=0) + + +# error: Untyped decorator makes function "test_types_to_feather" untyped +@td.skip_if_no("tabulate") # type: ignore[misc] +def test_to_markdown() -> None: + df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]}) + df.to_markdown() + df.to_markdown(buf=None, mode="wt") + # index param was added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + df.to_markdown(index=False) + + +# error: Untyped decorator makes function "test_types_to_feather" untyped +@td.skip_if_no("pyarrow") # type: ignore[misc] +def test_types_to_feather() -> None: + df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]}) + df.to_feather("dummy_path") + # kwargs for pyarrow.feather.write_feather added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + df.to_feather( + "dummy_path", + compression="zstd", + compression_level=3, + chunksize=2, + ) + + # to_feather has been able to accept a buffer since pandas 1.0.0 + # See https://pandas.pydata.org/docs/whatsnew/v1.0.0.html + # Docstring and type were updated in 1.2.0. + # https://github.com/pandas-dev/pandas/pull/35408 + with tempfile.TemporaryFile() as f: + df.to_feather(f) + + +# compare() method added in 1.1.0 +# https://pandas.pydata.org/docs/whatsnew/v1.1.0.html +def test_types_compare() -> None: + df1 = pd.DataFrame( + data={"col1": [1, 1, 2, 1], "col2": [2, None, 1, 2], "col3": [3, 4, 3, 2]} + ) + df2 = pd.DataFrame( + data={"col1": [1, 2, 5, 6], "col2": [3, 4, 1, 1], "col3": [3, 4, 3, 2]} + ) + df1.compare(df2) + df2.compare(df1, align_axis=0, keep_shape=True, keep_equal=True) + + +def test_types_agg() -> None: + df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["A", "B", "C"]) + df.agg("min") + df.agg(x=("A", max), y=("B", "min"), z=("C", np.mean)) + df.agg("mean", axis=1) + + +def test_types_describe() -> None: + df = pd.DataFrame( + data={ + "col1": [1, 2, -4], + "col2": [ + np.datetime64("2000-01-01"), + np.datetime64("2010-01-01"), + np.datetime64("2010-01-01"), + ], + } + ) + df.describe() + df.describe(percentiles=[0.5], include="all") + df.describe(exclude=np.number) + # datetime_is_numeric param added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + df.describe(datetime_is_numeric=True) + + +def test_types_to_string() -> None: + df = pd.DataFrame( + data={ + "col1": [1, None, -4], + "col2": [ + np.datetime64("2000-01-01"), + np.datetime64("2010-01-01"), + np.datetime64("2010-01-01"), + ], + } + ) + df.to_string( + index=True, + col_space=2, + header=["a", "b"], + na_rep="0", + justify="left", + max_rows=2, + min_rows=0, + max_cols=2, + show_dimensions=True, + line_width=3, + ) + # col_space accepting list or dict added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + df.to_string(col_space=[1, 2]) + df.to_string(col_space={"col1": 1, "col2": 3}) + + +def test_types_to_html() -> None: + df = pd.DataFrame( + data={ + "col1": [1, None, -4], + "col2": [ + np.datetime64("2000-01-01"), + np.datetime64("2010-01-01"), + np.datetime64("2010-01-01"), + ], + } + ) + df.to_html( + index=True, + col_space=2, + header=["a", "b"], + na_rep="0", + justify="left", + max_rows=2, + max_cols=2, + show_dimensions=True, + ) + # col_space accepting list or dict added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + df.to_html(col_space=[1, 2]) + df.to_html(col_space={"col1": 1, "col2": 3}) + + +def test_types_resample() -> None: + df = pd.DataFrame({"values": [2, 11, 3, 13, 14, 18, 17, 19]}) + df["date"] = pd.date_range("01/01/2018", periods=8, freq="W") + df.resample("M", on="date") + # origin and offset params added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + df.resample("20min", origin="epoch", offset=pd.Timedelta(2, "minutes"), on="date") + + +def test_types_from_dict() -> None: + pd.DataFrame.from_dict({"col_1": [3, 2, 1, 0], "col_2": ["a", "b", "c", "d"]}) + pd.DataFrame.from_dict({1: [3, 2, 1, 0], 2: ["a", "b", "c", "d"]}) + pd.DataFrame.from_dict({"a": {1: 2}, "b": {3: 4, 1: 4}}, orient="index") + pd.DataFrame.from_dict({"a": {"row1": 2}, "b": {"row2": 4, "row1": 4}}) + pd.DataFrame.from_dict({"a": (1, 2, 3), "b": (2, 4, 5)}) + pd.DataFrame.from_dict( + data={"col_1": {"a": 1}, "col_2": {"a": 1, "b": 2}}, orient="columns" + ) + + +def test_pipe() -> None: + def foo(df: pd.DataFrame) -> pd.DataFrame: + return df + + df1: pd.DataFrame = pd.DataFrame({"a": [1]}).pipe(foo) + + df2: pd.DataFrame = ( + pd.DataFrame( + { + "price": [10, 11, 9, 13, 14, 18, 17, 19], + "volume": [50, 60, 40, 100, 50, 100, 40, 50], + } + ) + .assign(week_starting=pd.date_range("01/01/2018", periods=8, freq="W")) + .resample("M", on="week_starting") + .pipe(foo) + ) + + df3: pd.DataFrame = pd.DataFrame({"a": [1], "b": [1]}).groupby("a").pipe(foo) + + df4: pd.DataFrame = pd.DataFrame({"a": [1], "b": [1]}).style.pipe(foo) + + +# set_flags() method added in 1.2.0 +# https://pandas.pydata.org/docs/whatsnew/v1.2.0.html +def test_types_set_flags() -> None: + pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]).set_flags( + allows_duplicate_labels=False + ) + pd.DataFrame([[1, 2], [8, 9]], columns=["A", "A"]).set_flags( + allows_duplicate_labels=True + ) + pd.DataFrame([[1, 2], [8, 9]], columns=["A", "A"]) + + +# error: Untyped decorator makes function "test_types_to_parquet" untyped +@td.skip_if_no("pyarrow") # type: ignore[misc] +def test_types_to_parquet() -> None: + df = pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]).set_flags( + allows_duplicate_labels=False + ) + with tempfile.NamedTemporaryFile() as file: + df.to_parquet(Path(file.name)) + # to_parquet() returns bytes when no path given since 1.2.0 + # https://pandas.pydata.org/docs/whatsnew/v1.2.0.html + # error: Incompatible types in assignment (expression has type "Optional[bytes]", + # variable has type "bytes") + b: bytes = df.to_parquet() # type: ignore[assignment] + + +def test_types_to_latex() -> None: + df = pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]) + df.to_latex( + columns=["A"], label="some_label", caption="some_caption", multirow=True + ) + df.to_latex(escape=False, decimal=",", column_format="r") + # position param was added in 1.2.0 + # https://pandas.pydata.org/docs/whatsnew/v1.2.0.html + df.to_latex(position="some") + # caption param was extended to accept tuple in 1.2.0 + # https://pandas.pydata.org/docs/whatsnew/v1.2.0.html + df.to_latex(caption=("cap1", "cap2")) + + +def test_types_explode() -> None: + df = pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]) + res1: pd.DataFrame = df.explode("A") + res2: pd.DataFrame = df.explode("A", ignore_index=False) + res3: pd.DataFrame = df.explode("A", ignore_index=True) + + +def test_types_rename() -> None: + df = pd.DataFrame(columns=["a"]) + col_map = {"a": "b"} + # error: Argument "columns" to "rename" of "DataFrame" has incompatible type + # "Dict[str, str]"; expected "Optional[Union[Mapping[Hashable, Any], + # Callable[[Hashable], Hashable]]]" + df.rename(columns=col_map) # type: ignore[arg-type] + df.rename(columns={"a": "b"}) + df.rename(columns={1: "b"}) + # Apparently all of these calls are accepted by pandas + df.rename(columns={None: "b"}) + df.rename(columns={"": "b"}) + df.rename(columns={(2, 1): "b"}) + + +def test_types_eq() -> None: + df1 = pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]) + res1: pd.DataFrame = df1 == 1 + df2 = pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]) + res2: pd.DataFrame = df1 == df2 + + +def test_types_as_type() -> None: + df1 = pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]) + df2: pd.DataFrame = df1.astype({"A": "int32"}) + + +def test_types_dot() -> None: + df1 = pd.DataFrame([[0, 1, -2, -1], [1, 1, 1, 1]]) + df2 = pd.DataFrame([[0, 1], [1, 2], [-1, -1], [2, 0]]) + s1 = pd.Series([1, 1, 2, 1]) + np_array = np.array([[0, 1], [1, 2], [-1, -1], [2, 0]]) + # error: Incompatible types in assignment (expression has type "Union[DataFrame, + # Series]", variable has type "DataFrame") + df3: pd.DataFrame = df1 @ df2 # type: ignore[assignment] + df4: pd.DataFrame = df1.dot(df2) + # error: Incompatible types in assignment (expression has type "Union[DataFrame, + # Series]", variable has type "DataFrame") + df5: pd.DataFrame = df1 @ np_array # type: ignore[assignment] + df6: pd.DataFrame = df1.dot(np_array) + df7: pd.Series = df1 @ s1 + df8: pd.Series = df1.dot(s1) diff --git a/pandas/tests/typing/valid/test_interval.py b/pandas/tests/typing/valid/test_interval.py new file mode 100644 index 0000000000000..937af8cc85d0d --- /dev/null +++ b/pandas/tests/typing/valid/test_interval.py @@ -0,0 +1,38 @@ +# flake8: noqa: F841 +# pyright: reportGeneralTypeIssues = true + +import pandas as pd + + +def test_interval_init() -> None: + i1: pd.Interval = pd.Interval(1, 2, closed="both") + i2: pd.Interval = pd.Interval(1, right=2, closed="right") + i3: pd.Interval = pd.Interval(left=1, right=2, closed="left") + + +def test_interval_arithmetic() -> None: + i1: pd.Interval = pd.Interval(1, 2, closed="both") + i2: pd.Interval = pd.Interval(1, right=2, closed="right") + + i3: pd.Interval = i1 + 1 + i4: pd.Interval = i1 - 1 + i5: pd.Interval = i1 * 2 + i6: pd.Interval = i1 / 2 + i7: pd.Interval = i1 // 2 + + +def test_max_intervals() -> None: + i1 = pd.Interval( + pd.Timestamp("2000-01-01"), pd.Timestamp("2000-01-02"), closed="both" + ) + i2 = pd.Interval( + pd.Timestamp("2000-01-01T12:00:00"), pd.Timestamp("2000-01-02"), closed="both" + ) + print(max(i1.left, i2.left)) + + +def test_interval_length() -> None: + i1 = pd.Interval( + pd.Timestamp("2000-01-01"), pd.Timestamp("2000-01-02"), closed="both" + ) + i1.length.total_seconds() diff --git a/pandas/tests/typing/valid/test_pandas.py b/pandas/tests/typing/valid/test_pandas.py new file mode 100644 index 0000000000000..b74dcc88f1363 --- /dev/null +++ b/pandas/tests/typing/valid/test_pandas.py @@ -0,0 +1,167 @@ +# flake8: noqa: F841 +# TODO: many functions need return types annotations for pyright +# to run with reportGeneralTypeIssues = true +import tempfile +from typing import ( + Any, + Dict, + List, + Union, +) + +import pandas as pd +from pandas.io.parsers import TextFileReader + + +def test_types_to_datetime() -> None: + df = pd.DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]}) + # error: No overload variant of "to_datetime" matches argument type "DataFrame" + pd.to_datetime(df) # type: ignore[call-overload] + # error: No overload variant of "to_datetime" matches argument types "DataFrame", + # "str", "str", "bool" + pd.to_datetime( # type: ignore[call-overload] + df, unit="s", origin="unix", infer_datetime_format=True + ) + # error: No overload variant of "to_datetime" matches argument types "DataFrame", + # "str", "bool", "None", "str", "bool" + pd.to_datetime( # type: ignore[call-overload] + df, unit="ns", dayfirst=True, utc=None, format="%M:%D", exact=False + ) + pd.to_datetime([1, 2], unit="D", origin=pd.Timestamp("01/01/2000")) + pd.to_datetime([1, 2], unit="D", origin=3) + + +def test_types_concat() -> None: + s = pd.Series([0, 1, -10]) + s2 = pd.Series([7, -5, 10]) + + pd.concat([s, s2]) + pd.concat([s, s2], axis=1) + pd.concat([s, s2], keys=["first", "second"], sort=True) + pd.concat([s, s2], keys=["first", "second"], names=["source", "row"]) + + # Depends on the axis + # error: Argument 1 to "concat" has incompatible type "Dict[str, Series]"; expected + # "Union[Iterable[DataFrame], Mapping[Hashable, DataFrame]]" + rs1: Union[pd.Series, pd.DataFrame] = pd.concat( + {"a": s, "b": s2} # type:ignore[arg-type] + ) + # error: Argument 1 to "concat" has incompatible type "Dict[str, Series]"; expected + # "Union[Iterable[NDFrame], Mapping[Hashable, NDFrame]]" + rs1a: Union[pd.Series, pd.DataFrame] = pd.concat( + {"a": s, "b": s2}, axis=1 # type:ignore[arg-type] + ) + # error: Argument 1 to "concat" has incompatible type "Dict[int, Series]"; expected + # "Union[Iterable[DataFrame], Mapping[Hashable, DataFrame]]" + rs2: Union[pd.Series, pd.DataFrame] = pd.concat( + {1: s, 2: s2} # type:ignore[arg-type] + ) + # error: Argument 1 to "concat" has incompatible type "Dict[int, Series]"; expected + # "Union[Iterable[NDFrame], Mapping[Hashable, NDFrame]]" + rs2a: Union[pd.Series, pd.DataFrame] = pd.concat( + {1: s, 2: s2}, axis=1 # type:ignore[arg-type] + ) + # error: Argument 1 to "concat" has incompatible type "Dict[Optional[int], Series]"; + # expected "Union[Iterable[DataFrame], Mapping[Hashable, DataFrame]]" + rs3: Union[pd.Series, pd.DataFrame] = pd.concat( + {1: s, None: s2} # type:ignore[arg-type] + ) + # error: Argument 1 to "concat" has incompatible type "Dict[Optional[int], Series]"; + # expected "Union[Iterable[NDFrame], Mapping[Hashable, NDFrame]]" + rs3a: Union[pd.Series, pd.DataFrame] = pd.concat( + {1: s, None: s2}, axis=1 # type:ignore[arg-type] + ) + + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df2 = pd.DataFrame(data={"col1": [10, 20], "col2": [30, 40]}) + + pd.concat([df, df2]) + pd.concat([df, df2], axis=1) + pd.concat([df, df2], keys=["first", "second"], sort=True) + pd.concat([df, df2], keys=["first", "second"], names=["source", "row"]) + + # error: Incompatible types in assignment (expression has type "Union[DataFrame, + # Series]", variable has type "DataFrame") + # error: Argument 1 to "concat" has incompatible type "Dict[str, DataFrame]"; + # expected "Union[Iterable[NDFrame], Mapping[Hashable, NDFrame]]" + result: pd.DataFrame = pd.concat( # type: ignore[assignment] + { + "a": pd.DataFrame([1, 2, 3]), + "b": pd.DataFrame([4, 5, 6]), + }, # type:ignore[arg-type] + axis=1, + ) + # error: Argument 1 to "concat" has incompatible type "Dict[str, Series]"; expected + # "Union[Iterable[NDFrame], Mapping[Hashable, NDFrame]]" + result2: Union[pd.DataFrame, pd.Series] = pd.concat( + { + "a": pd.Series([1, 2, 3]), + "b": pd.Series([4, 5, 6]), + }, # type:ignore[arg-type] + axis=1, + ) + + # error: Argument 1 to "concat" has incompatible type "Dict[str, DataFrame]"; + # expected "Union[Iterable[DataFrame], Mapping[Hashable, DataFrame]]" + rdf1: pd.DataFrame = pd.concat({"a": df, "b": df2}) # type:ignore[arg-type] + # error: Argument 1 to "concat" has incompatible type "Dict[int, DataFrame]"; + # expected "Union[Iterable[DataFrame], Mapping[Hashable, DataFrame]]" + rdf2: pd.DataFrame = pd.concat({1: df, 2: df2}) # type:ignore[arg-type] + # error: Argument 1 to "concat" has incompatible type "Dict[Optional[int], + # DataFrame]"; expected "Union[Iterable[DataFrame], Mapping[Hashable, DataFrame]]" + rdf3: pd.DataFrame = pd.concat({1: df, None: df2}) # type:ignore[arg-type] + + rdf4: pd.DataFrame = pd.concat(list(map(lambda x: s2, ["some_value", 3])), axis=1) + + +def test_types_json_normalize() -> None: + data1: List[Dict[str, Any]] = [ + {"id": 1, "name": {"first": "Coleen", "last": "Volk"}}, + {"name": {"given": "More", "family": "Regner"}}, + {"id": 2, "name": "Faye Raker"}, + ] + df1: pd.DataFrame = pd.json_normalize(data=data1) + df2: pd.DataFrame = pd.json_normalize(data=data1, max_level=0, sep=";") + df3: pd.DataFrame = pd.json_normalize( + data=data1, meta_prefix="id", record_prefix="name", errors="raise" + ) + df4: pd.DataFrame = pd.json_normalize(data=data1, record_path=None, meta="id") + data2: Dict[str, Any] = {"name": {"given": "More", "family": "Regner"}} + df5: pd.DataFrame = pd.json_normalize(data=data2) + + +def test_types_read_csv() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + # error: Incompatible types in assignment (expression has type "Optional[str]", + # variable has type "str") + csv_df: str = df.to_csv() # type: ignore[assignment] + + with tempfile.NamedTemporaryFile() as file: + df.to_csv(file.name) + df2: pd.DataFrame = pd.read_csv(file.name) + df3: pd.DataFrame = pd.read_csv(file.name, sep="a", squeeze=False) + df4: pd.DataFrame = pd.read_csv( + file.name, + header=None, + prefix="b", + mangle_dupe_cols=True, + keep_default_na=False, + ) + df5: pd.DataFrame = pd.read_csv( + file.name, engine="python", true_values=[0, 1, 3], na_filter=False + ) + df6: pd.DataFrame = pd.read_csv( + file.name, + skiprows=lambda x: x in [0, 2], + skip_blank_lines=True, + dayfirst=False, + ) + df7: pd.DataFrame = pd.read_csv(file.name, nrows=2) + tfr1: TextFileReader = pd.read_csv( + file.name, nrows=2, iterator=True, chunksize=3 + ) + tfr2: TextFileReader = pd.read_csv(file.name, nrows=2, chunksize=1) + tfr3: TextFileReader = pd.read_csv( + file.name, nrows=2, iterator=False, chunksize=1 + ) + tfr4: TextFileReader = pd.read_csv(file.name, nrows=2, iterator=True) diff --git a/pandas/tests/typing/valid/test_series.py b/pandas/tests/typing/valid/test_series.py new file mode 100644 index 0000000000000..965241c067176 --- /dev/null +++ b/pandas/tests/typing/valid/test_series.py @@ -0,0 +1,583 @@ +# flake8: noqa: F841 +# TODO: many functions need return types annotations for pyright +# to run with reportGeneralTypeIssues = true + +import tempfile +from pathlib import Path +from typing import List + +import numpy as np + +import pandas as pd +from pandas._typing import Scalar +from pandas.core.window import ExponentialMovingWindow +from pandas.util import _test_decorators as td + + +def test_types_init() -> None: + pd.Series(1) + pd.Series((1, 2, 3)) + pd.Series(np.array([1, 2, 3])) + pd.Series(data=[1, 2, 3, 4], name="series") + pd.Series(data=[1, 2, 3, 4], dtype=np.int8) + pd.Series(data={"row1": [1, 2], "row2": [3, 4]}) + pd.Series(data=[1, 2, 3, 4], index=[4, 3, 2, 1], copy=True) + + +def test_types_any() -> None: + # error: Incompatible types in assignment (expression has type "Union[Series, + # bool]", variable has type "bool") + res1: bool = pd.Series([False, False]).any() # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # bool]", variable has type "bool") + res2: bool = pd.Series([False, False]).any( # type: ignore[assignment] + bool_only=False + ) + # error: Incompatible types in assignment (expression has type "Union[Series, + # bool]", variable has type "bool") + res3: bool = pd.Series([np.nan]).any(skipna=False) # type: ignore[assignment] + + +def test_types_all() -> None: + # error: Incompatible types in assignment (expression has type "Union[Series, + # bool]", variable has type "bool") + res1: bool = pd.Series([False, False]).all() # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # bool]", variable has type "bool") + res2: bool = pd.Series([False, False]).all( # type: ignore[assignment] + bool_only=False + ) + # error: Incompatible types in assignment (expression has type "Union[Series, + # bool]", variable has type "bool") + res3: bool = pd.Series([np.nan]).all(skipna=False) # type: ignore[assignment] + + +def test_types_csv() -> None: + s = pd.Series(data=[1, 2, 3]) + # error: Incompatible types in assignment (expression has type "Optional[str]", + # variable has type "str") + csv_df: str = s.to_csv() # type: ignore[assignment] + + with tempfile.NamedTemporaryFile() as file: + s.to_csv(file.name) + s2: pd.DataFrame = pd.read_csv(file.name) + + with tempfile.NamedTemporaryFile() as file: + s.to_csv(Path(file.name)) + s3: pd.DataFrame = pd.read_csv(Path(file.name)) + + # This keyword was added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + with tempfile.NamedTemporaryFile() as file: + s.to_csv(file.name, errors="replace") + s4: pd.DataFrame = pd.read_csv(file.name) + + +def test_types_copy() -> None: + s = pd.Series(data=[1, 2, 3, 4]) + s2: pd.Series = s.copy() + + +def test_types_select() -> None: + s = pd.Series(data={"row1": 1, "row2": 2}) + s[0] + s[1:] + + +def test_types_iloc_iat() -> None: + s = pd.Series(data={"row1": 1, "row2": 2}) + s2 = pd.Series(data=[1, 2]) + s.loc["row1"] + s.iat[0] + s2.loc[0] + s2.iat[0] + + +def test_types_loc_at() -> None: + s = pd.Series(data={"row1": 1, "row2": 2}) + s2 = pd.Series(data=[1, 2]) + s.loc["row1"] + s.at["row1"] + s2.loc[1] + s2.at[1] + + +def test_types_boolean_indexing() -> None: + s = pd.Series([0, 1, 2]) + s[s > 1] + s[s] + + +def test_types_head_tail() -> None: + s = pd.Series([0, 1, 2]) + s.head(1) + s.tail(1) + + +def test_types_sample() -> None: + s = pd.Series([0, 1, 2]) + s.sample(frac=0.5) + s.sample(n=1) + + +def test_types_nlargest_nsmallest() -> None: + s = pd.Series([0, 1, 2]) + s.nlargest(1) + s.nlargest(1, "first") + s.nsmallest(1, "last") + s.nsmallest(1, "all") + + +def test_types_filter() -> None: + s = pd.Series(data=[1, 2, 3, 4], index=["cow", "coal", "coalesce", ""]) + s.filter(items=["cow"]) + s.filter(regex="co.*") + s.filter(like="al") + + +def test_types_setting() -> None: + s = pd.Series([0, 1, 2]) + s[3] = 4 + s[s == 1] = 5 + s[:] = 3 + + +def test_types_drop() -> None: + s = pd.Series([0, 1, 2]) + res: pd.Series = s.drop(0) + res2: pd.Series = s.drop([0, 1]) + res3: pd.Series = s.drop(0, axis=0) + # error: Incompatible types in assignment (expression has type "Series", variable + # has type "None") + res4: None = s.drop( # type: ignore[assignment] + [0, 1], inplace=True, errors="raise" + ) + # error: Incompatible types in assignment (expression has type "Series", variable + # has type "None") + res5: None = s.drop( # type: ignore[assignment] + [0, 1], inplace=True, errors="ignore" + ) + + +def test_types_drop_multilevel() -> None: + index = pd.MultiIndex( + levels=[["top", "bottom"], ["first", "second", "third"]], + codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], + ) + s = pd.Series(data=[1, 2, 3, 4, 5, 6], index=index) + res: pd.Series = s.drop(labels="first", level=1) + + +def test_types_dropna() -> None: + s = pd.Series([1, np.nan, np.nan]) + res: pd.Series = s.dropna() + res2: None = s.dropna(axis=0, inplace=True) + + +def test_types_fillna() -> None: + s = pd.Series([1, np.nan, np.nan, 3]) + res: pd.Series = s.fillna(0) + res2: pd.Series = s.fillna(0, axis="index") + res3: pd.Series = s.fillna(method="backfill", axis=0) + res4: None = s.fillna(method="bfill", inplace=True) + res5: pd.Series = s.fillna(method="pad") + res6: pd.Series = s.fillna(method="ffill", limit=1) + + +def test_types_sort_index() -> None: + s = pd.Series([1, 2, 3], index=[2, 3, 1]) + res: pd.Series = s.sort_index() + res2: None = s.sort_index(ascending=False, inplace=True) + res3: pd.Series = s.sort_index(kind="mergesort") + + +# This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html +def test_types_sort_index_with_key() -> None: + s = pd.Series([1, 2, 3], index=["a", "B", "c"]) + res: pd.Series = s.sort_index(key=lambda k: k.str.lower()) + + +def test_types_sort_values() -> None: + s = pd.Series([4, 2, 1, 3]) + res: pd.Series = s.sort_values(0) + res2: pd.Series = s.sort_values(ascending=False) + res3: None = s.sort_values(inplace=True, kind="quicksort") + res4: pd.Series = s.sort_values(na_position="last") + res5: pd.Series = s.sort_values(ignore_index=True) + + +# This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html +def test_types_sort_values_with_key() -> None: + s = pd.Series([1, 2, 3], index=[2, 3, 1]) + res: pd.Series = s.sort_values(key=lambda k: -k) + + +def test_types_shift() -> None: + s = pd.Series([1, 2, 3]) + s.shift() + s.shift(axis=0, periods=1) + s.shift(-1, fill_value=0) + + +def test_types_rank() -> None: + s = pd.Series([1, 1, 2, 5, 6, np.nan, "million"]) + s.rank() + s.rank(axis=0, na_option="bottom") + s.rank(method="min", pct=True) + s.rank(method="dense", ascending=True) + s.rank(method="first", numeric_only=True) + + +def test_types_mean() -> None: + s = pd.Series([1, 2, 3, np.nan]) + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "float") + f1: float = s.mean() # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "Series") + s1: pd.Series = s.mean(axis=0, level=0) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "float") + f2: float = s.mean(skipna=False) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "float") + f3: float = s.mean(numeric_only=False) # type: ignore[assignment] + + +def test_types_median() -> None: + s = pd.Series([1, 2, 3, np.nan]) + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "float") + f1: float = s.median() # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "Series") + s1: pd.Series = s.median(axis=0, level=0) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "float") + f2: float = s.median(skipna=False) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "float") + f3: float = s.median(numeric_only=False) # type: ignore[assignment] + + +def test_types_sum() -> None: + s = pd.Series([1, 2, 3, np.nan]) + s.sum() + s.sum(axis=0, level=0) + s.sum(skipna=False) + s.sum(numeric_only=False) + s.sum(min_count=4) + + +def test_types_cumsum() -> None: + s = pd.Series([1, 2, 3, np.nan]) + s.cumsum() + s.cumsum(axis=0) + s.cumsum(skipna=False) + + +def test_types_min() -> None: + s = pd.Series([1, 2, 3, np.nan]) + s.min() + s.min(axis=0) + s.min(level=0) + s.min(skipna=False) + + +def test_types_max() -> None: + s = pd.Series([1, 2, 3, np.nan]) + s.max() + s.max(axis=0) + s.max(level=0) + s.max(skipna=False) + + +def test_types_quantile() -> None: + s = pd.Series([1, 2, 3, 10]) + s.quantile([0.25, 0.5]) + s.quantile(0.75) + s.quantile() + s.quantile(interpolation="nearest") + + +def test_types_clip() -> None: + s = pd.Series([-10, 2, 3, 10]) + s.clip(lower=0, upper=5) + s.clip(lower=0, upper=5, inplace=True) + + +def test_types_abs() -> None: + s = pd.Series([-10, 2, 3, 10]) + s.abs() + + +def test_types_var() -> None: + s = pd.Series([-10, 2, 3, 10]) + s.var() + s.var(axis=0, ddof=1) + s.var(skipna=True, numeric_only=False) + + +def test_types_std() -> None: + s = pd.Series([-10, 2, 3, 10]) + s.std() + s.std(axis=0, ddof=1) + s.std(skipna=True, numeric_only=False) + + +def test_types_idxmin() -> None: + s = pd.Series([-10, 2, 3, 10]) + s.idxmin() + s.idxmin(axis=0) + + +def test_types_idxmax() -> None: + s = pd.Series([-10, 2, 3, 10]) + s.idxmax() + s.idxmax(axis=0) + + +def test_types_value_counts() -> None: + s = pd.Series([1, 2]) + s.value_counts() + + +def test_types_unique() -> None: + s = pd.Series([-10, 2, 2, 3, 10, 10]) + s.unique() + + +def test_types_apply() -> None: + s = pd.Series([-10, 2, 2, 3, 10, 10]) + s.apply(lambda x: x ** 2) + s.apply(np.exp) + s.apply(str) + + +def test_types_element_wise_arithmetic() -> None: + s = pd.Series([0, 1, -10]) + s2 = pd.Series([7, -5, 10]) + + s + s2 + s.add(s2, fill_value=0) + + s - s2 + s.sub(s2, fill_value=0) + + s * s2 + s.mul(s2, fill_value=0) + + s / s2 + # error: Unexpected keyword argument "fill_value" + s.div(s2, fill_value=0) # type: ignore[call-arg] + + s // s2 + s.floordiv(s2, fill_value=0) + + s % s2 + s.mod(s2, fill_value=0) + + +def test_types_groupby() -> None: + s = pd.Series([4, 2, 1, 8], index=["a", "b", "a", "b"]) + s.groupby(["a", "b", "a", "b"]) + s.groupby(level=0) + s.groupby(s > 2) + + +# This added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html +def test_types_group_by_with_dropna_keyword() -> None: + s = pd.Series([1, 2, 3, 3], index=["col1", "col2", "col3", np.nan]) + s.groupby(level=0, dropna=True).sum() + s.groupby(level=0, dropna=False).sum() + s.groupby(level=0).sum() + + +def test_types_plot() -> None: + s = pd.Series([0, 1, 1, 0, -10]) + s.plot.hist() + + +def test_types_window() -> None: + s = pd.Series([0, 1, 1, 0, 5, 1, -10]) + s.expanding() + s.expanding(axis=0, center=True) + + s.rolling(2) + s.rolling(2, axis=0, center=True) + + +def test_types_cov() -> None: + s1 = pd.Series([0, 1, 1, 0, 5, 1, -10]) + s2 = pd.Series([0, 2, 12, -4, 7, 9, 2]) + s1.cov(s2) + s1.cov(s2, min_periods=1) + # ddof param was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + s1.cov(s2, ddof=2) + + +def test_update() -> None: + s1 = pd.Series([0, 1, 1, 0, 5, 1, -10]) + s1.update(pd.Series([0, 2, 12])) + # Series.update() accepting objects that can be coerced to a + # Series was added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + s1.update([1, 2, -4, 3]) + s1.update([1, "b", "c", "d"]) + s1.update({1: 9, 3: 4}) + + +# error: Untyped decorator makes function "test_to_markdown" untyped +@td.skip_if_no("tabulate") # type: ignore[misc] +def test_to_markdown() -> None: + s = pd.Series([0, 1, 1, 0, 5, 1, -10]) + s.to_markdown() + s.to_markdown(buf=None, mode="wt") + # index param was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + s.to_markdown(index=False) + + +# compare() method added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html +def test_types_compare() -> None: + s1 = pd.Series([0, 1, 1, 0, 5, 1, -10]) + s2 = pd.Series([0, 2, 12, -4, 7, 9, 2]) + s1.compare(s2) + s2.compare(s1, align_axis="columns", keep_shape=True, keep_equal=True) + + +def test_types_agg() -> None: + s = pd.Series([1, 2, 3], index=["col1", "col2", "col3"]) + s.agg("min") + s.agg(x=max, y="min", z=np.mean) + s.agg("mean", axis=0) + + +def test_types_describe() -> None: + s = pd.Series([1, 2, 3, np.datetime64("2000-01-01")]) + s.describe() + s.describe(percentiles=[0.5], include="all") + s.describe(exclude=np.number) + # datetime_is_numeric param added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + s.describe(datetime_is_numeric=True) + + +def test_types_resample() -> None: + s = pd.Series(range(9), index=pd.date_range("1/1/2000", periods=9, freq="T")) + s.resample("3T").sum() + # origin and offset params added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + s.resample("20min", origin="epoch", offset=pd.Timedelta(value=2, unit="minutes")) + + +# set_flags() method added in 1.2.0 https://pandas.pydata.org/docs/whatsnew/v1.2.0.html +def test_types_set_flags() -> None: + pd.Series([1, 2], index=["a", "b"]).set_flags(allows_duplicate_labels=False) + pd.Series([3, 4], index=["a", "a"]).set_flags(allows_duplicate_labels=True) + pd.Series([5, 2], index=["a", "a"]) + + +def test_types_getitem() -> None: + s = pd.Series({"key": [0, 1, 2, 3]}) + key: List[int] = s["key"] + s2 = pd.Series([0, 1, 2, 3]) + value: int = s2[0] + s3: pd.Series = s[:2] + + +def test_types_eq() -> None: + s1 = pd.Series([1, 2, 3]) + res1: pd.Series = s1 == 1 + s2 = pd.Series([1, 2, 4]) + res2: pd.Series = s1 == s2 + + +def test_types_rename_axis() -> None: + s: pd.Series = pd.Series([1, 2, 3]).rename_axis("A") + + +def test_types_values() -> None: + n1: np.ndarray = pd.Series([1, 2, 3]).values + n2: np.ndarray = pd.Series(list("aabc")).values + n3: np.ndarray = pd.Series(list("aabc")).astype("category").values + n4: np.ndarray = pd.Series( + pd.date_range("20130101", periods=3, tz="US/Eastern") + ).values + + +def test_types_rename() -> None: + # Scalar + s1 = pd.Series([1, 2, 3]).rename("A") + # Hashable Sequence + s2 = pd.Series([1, 2, 3]).rename(("A", "B")) + # Optional + s3 = pd.Series([1, 2, 3]).rename(None) + + # Functions + def add1(x: int) -> int: + return x + 1 + + s4 = pd.Series([1, 2, 3]).rename(add1) + + # Dictionary + s5 = pd.Series([1, 2, 3]).rename({1: 10}) + # inplace + # error: Incompatible types in assignment (expression has type "Optional[Series]", + # variable has type "None") + s6: None = pd.Series([1, 2, 3]).rename( # type: ignore[assignment] + "A", inplace=True + ) + + +def test_types_ne() -> None: + s1 = pd.Series([1, 2, 3]) + s2 = pd.Series([1, 2, 4]) + s3: pd.Series = s1 != s2 + + +def test_types_bfill() -> None: + s1 = pd.Series([1, 2, 3]) + # error: Incompatible types in assignment (expression has type "Optional[Series]", + # variable has type "Series") + s2: pd.Series = s1.bfill(inplace=False) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Optional[Series]", + # variable has type "None") + s3: None = s1.bfill(inplace=True) # type: ignore[assignment] + + +def test_types_ewm() -> None: + s1 = pd.Series([1, 2, 3]) + w1: ExponentialMovingWindow = s1.ewm( + com=0.3, min_periods=0, adjust=False, ignore_na=True, axis=0 + ) + w2: ExponentialMovingWindow = s1.ewm(alpha=0.4) + w3: ExponentialMovingWindow = s1.ewm(span=1.6) + w4: ExponentialMovingWindow = s1.ewm(halflife=0.7) + + +def test_types_ffill() -> None: + s1 = pd.Series([1, 2, 3]) + # error: Incompatible types in assignment (expression has type "Optional[Series]", + # variable has type "Series") + s2: pd.Series = s1.ffill(inplace=False) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Optional[Series]", + # variable has type "None") + s3: None = s1.ffill(inplace=True) # type: ignore[assignment] + + +def test_types_as_type() -> None: + s1 = pd.Series([1, 2, 8, 9]) + s2: pd.Series = s1.astype("int32") + + +def test_types_dot() -> None: + s1 = pd.Series([0, 1, 2, 3]) + s2 = pd.Series([-1, 2, -3, 4]) + df1 = pd.DataFrame([[0, 1], [-2, 3], [4, -5], [6, 7]]) + n1 = np.array([[0, 1], [1, 2], [-1, -1], [2, 0]]) + sc1: Scalar = s1.dot(s2) + sc2: Scalar = s1 @ s2 + s3: pd.Series = s1.dot(df1) + s4: pd.Series = s1 @ df1 + n2: np.ndarray = s1.dot(n1) + n3: np.ndarray = s1 @ n1 diff --git a/pandas/tests/typing/valid/test_testing.py b/pandas/tests/typing/valid/test_testing.py new file mode 100644 index 0000000000000..a98d8faee5098 --- /dev/null +++ b/pandas/tests/typing/valid/test_testing.py @@ -0,0 +1,21 @@ +# pyright: reportGeneralTypeIssues = true + +import pandas as pd +import pandas._testing as tm + + +def test_types_assert_series_equal() -> None: + s1 = pd.Series([0, 1, 1, 0]) + s2 = pd.Series([0, 1, 1, 0]) + tm.assert_series_equal(left=s1, right=s2) + tm.assert_series_equal( + s1, + s2, + check_freq=False, + check_categorical=True, + check_flags=True, + check_datetimelike_compat=True, + ) + tm.assert_series_equal( + s1, s2, check_dtype=True, check_less_precise=True, check_names=True + ) diff --git a/pandas/tests/typing/valid/test_timestamp.py b/pandas/tests/typing/valid/test_timestamp.py new file mode 100644 index 0000000000000..cd9f920d5f9ff --- /dev/null +++ b/pandas/tests/typing/valid/test_timestamp.py @@ -0,0 +1,67 @@ +# flake8: noqa: F841 +# TODO: many functions need return types annotations for pyright +# to run with reportGeneralTypeIssues = true + +import datetime as dt + +import pandas as pd + + +def test_types_init() -> None: + ts: pd.Timestamp = pd.Timestamp("2021-03-01T12") + ts1: pd.Timestamp = pd.Timestamp(dt.date(2021, 3, 15)) + ts2: pd.Timestamp = pd.Timestamp(dt.datetime(2021, 3, 10, 12)) + ts3: pd.Timestamp = pd.Timestamp(pd.Timestamp("2021-03-01T12")) + ts4: pd.Timestamp = pd.Timestamp(1515590000.1, unit="s") + ts5: pd.Timestamp = pd.Timestamp(1515590000.1, unit="s", tz="US/Pacific") + ts6: pd.Timestamp = pd.Timestamp(1515590000100000000) # plain integer (nanosecond) + ts7: pd.Timestamp = pd.Timestamp(2021, 3, 10, 12) + ts8: pd.Timestamp = pd.Timestamp(year=2021, month=3, day=10, hour=12) + ts9: pd.Timestamp = pd.Timestamp( + year=2021, month=3, day=10, hour=12, tz="US/Pacific" + ) + + +def test_types_arithmetic() -> None: + # error: Incompatible types in assignment (expression has type "datetime", variable + # has type "Timestamp") + # error: Argument 1 to "to_datetime" has incompatible type "str"; expected + # "datetime" + ts: pd.Timestamp = pd.to_datetime("2021-03-01") # type:ignore[assignment,arg-type] + # error: Incompatible types in assignment (expression has type "datetime", variable + # has type "Timestamp") + # error: Argument 1 to "to_datetime" has incompatible type "str"; expected + # "datetime" + ts2: pd.Timestamp = pd.to_datetime("2021-01-01") # type:ignore[assignment,arg-type] + delta: pd.Timedelta = pd.to_timedelta("1 day") + + # error: Incompatible types in assignment (expression has type "timedelta", variable + # has type "Timedelta") + tsr: pd.Timedelta = ts - ts2 # type: ignore[assignment] + tsr2: pd.Timestamp = ts + delta + + +def test_types_comparison() -> None: + # Incompatible types in assignment (expression has type "datetime", variable has + # type "Timestamp") + # error: Argument 1 to "to_datetime" has incompatible type "str"; expected + # "datetime" + ts: pd.Timestamp = pd.to_datetime("2021-03-01") # type: ignore[assignment,arg-type] + # Incompatible types in assignment (expression has type "datetime", variable has + # type "Timestamp") + # error: Argument 1 to "to_datetime" has incompatible type "str"; expected + # "datetime" + ts2: pd.Timestamp = pd.to_datetime( # type: ignore[assignment] + "2021-01-01" # type: ignore[arg-type] + ) + + tsr: bool = ts < ts2 + tsr2: bool = ts > ts2 + + +def test_types_pydatetime() -> None: + ts: pd.Timestamp = pd.Timestamp("2021-03-01T12") + + datet: dt.datetime = ts.to_pydatetime() + datet2: dt.datetime = ts.to_pydatetime(False) + datet3: dt.datetime = ts.to_pydatetime(warn=True) diff --git a/pyproject.toml b/pyproject.toml index c3ed07defa60d..c960279d2b192 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,6 +57,7 @@ markers = [ "arm_slow: mark a test as slow for arm64 architecture", "arraymanager: mark a test to run with ArrayManager enabled", ] +norecursedirs = ["pandas/tests/typing/invalid"] [tool.mypy] # Import discovery @@ -116,6 +117,10 @@ module = [ ] check_untyped_defs = false +[[tool.mypy.overrides]] +module = ["pandas.tests.typing"] +check_untyped_defs = true + [[tool.mypy.overrides]] module = [ "pandas.tests.apply.test_series_apply", @@ -150,8 +155,30 @@ skip = "pandas/__init__.py" [tool.pyright] pythonVersion = "3.8" typeCheckingMode = "basic" -include = ["pandas", "typings"] -exclude = ["pandas/tests", "pandas/io/clipboard", "pandas/util/version"] +include = ["pandas", "typings", "pandas/tests/typing/valid"] +exclude = [ + "pandas/io/clipboard", + "pandas/util/version", + # ignore everything in /pandas/tests except typing + "pandas/tests/*.py", + "pandas/tests/a*", + "pandas/tests/b*", + "pandas/tests/c*", + "pandas/tests/d*", + "pandas/tests/e*", + "pandas/tests/f*", + "pandas/tests/g*", + "pandas/tests/i*", + "pandas/tests/l*", + "pandas/tests/p*", + "pandas/tests/r*", + "pandas/tests/s*", + "pandas/tests/to*", + "pandas/tests/ts*", + "pandas/tests/s*", + "pandas/tests/u*", + "pandas/tests/w*", +] reportGeneralTypeIssues = false reportConstantRedefinition = false reportFunctionMemberAccess = false From 54773e5fc683adc9bc463463bd55c81cb940e15c Mon Sep 17 00:00:00 2001 From: zbigniewkrolikowski Date: Tue, 25 Jan 2022 17:56:56 +0100 Subject: [PATCH 02/26] README, isort, skip_if_no --- pandas/tests/typing/README.md | 36 +++++++++++++++++++++++++ pandas/tests/typing/valid/test_frame.py | 10 ++++--- 2 files changed, 42 insertions(+), 4 deletions(-) create mode 100644 pandas/tests/typing/README.md diff --git a/pandas/tests/typing/README.md b/pandas/tests/typing/README.md new file mode 100644 index 0000000000000..3762218fbd7c0 --- /dev/null +++ b/pandas/tests/typing/README.md @@ -0,0 +1,36 @@ +## Purpose of those tests + +The tests contained in the `valid` directory are snippets that when +process through a type checker ensure that type annotations and type +stubs from this repository conform to common pandas API use-patterns. + +## Running the tests + +Tests can be run in following ways: + +`pyright pandas/tests/typing` + +`mypy pandas/tests/typing` + +They'll also be automatically detected and executed by pytest. This +is to ensure that the test code itself is valid. + +## Developing the tests + +Some tests contain type checker ignore-instructions along with an +error that's supposed to be thrown. + + # error: No overload variant of "to_datetime" matches argument type "DataFrame" + pd.to_datetime(df) # type: ignore[call-overload] + +All such constructs are placed because of the missing/invalid API +type information. When the API signature becomes valid again type +checker will ask you to remove `type: ignore`. Please remove the +above comment as well. + +When adding new tests please use the above solution as well. + +## Origins and attribution + +The tests come from the [pandas-stubs](https://github.com/VirtusLab/pandas-stubs) +repository originally released under the MIT license. diff --git a/pandas/tests/typing/valid/test_frame.py b/pandas/tests/typing/valid/test_frame.py index b4d7b637ede3f..ff381c7612465 100644 --- a/pandas/tests/typing/valid/test_frame.py +++ b/pandas/tests/typing/valid/test_frame.py @@ -35,17 +35,17 @@ def test_types_to_csv() -> None: # variable has type "str") csv_df: str = df.to_csv() # type: ignore[assignment] - with tempfile.NamedTemporaryFile() as file: + with tempfile.Namedtempfile.TemporaryFile() as file: df.to_csv(file.name) df2: pd.DataFrame = pd.read_csv(file.name) - with tempfile.NamedTemporaryFile() as file: + with tempfile.Namedtempfile.TemporaryFile() as file: df.to_csv(Path(file.name)) df3: pd.DataFrame = pd.read_csv(Path(file.name)) # This keyword was added in 1.1.0 # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html - with tempfile.NamedTemporaryFile() as file: + with tempfile.Namedtempfile.TemporaryFile() as file: df.to_csv(file.name, errors="replace") df4: pd.DataFrame = pd.read_csv(file.name) @@ -529,6 +529,7 @@ def test_types_merge() -> None: df.merge(df2, on=l) +@td.skip_if_no("matplotlib") # type: ignore[misc] def test_types_plot() -> None: df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]}) df.plot.hist() @@ -710,6 +711,7 @@ def test_types_from_dict() -> None: ) +@td.skip_if_no("jinja") # type: ignore[misc] def test_pipe() -> None: def foo(df: pd.DataFrame) -> pd.DataFrame: return df @@ -751,7 +753,7 @@ def test_types_to_parquet() -> None: df = pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]).set_flags( allows_duplicate_labels=False ) - with tempfile.NamedTemporaryFile() as file: + with tempfile.Namedtempfile.TemporaryFile() as file: df.to_parquet(Path(file.name)) # to_parquet() returns bytes when no path given since 1.2.0 # https://pandas.pydata.org/docs/whatsnew/v1.2.0.html From 5bedab934fbaf2497acdfc1076951fc394875f28 Mon Sep 17 00:00:00 2001 From: zbigniewkrolikowski Date: Tue, 25 Jan 2022 18:22:45 +0100 Subject: [PATCH 03/26] Configured isort for black profile --- .pre-commit-config.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6de9bd786404f..d40ef03b84c0d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -55,6 +55,7 @@ repos: rev: 5.10.1 hooks: - id: isort + args: ["--profile", "black"] - repo: https://github.com/asottile/pyupgrade rev: v2.31.0 hooks: From a58cd25113c4b33a8197374f690db08004a0506d Mon Sep 17 00:00:00 2001 From: zbigniewkrolikowski Date: Wed, 26 Jan 2022 08:29:18 +0100 Subject: [PATCH 04/26] Configured isort for black profile --- LICENSES/STUBS_LICENSE | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 LICENSES/STUBS_LICENSE diff --git a/LICENSES/STUBS_LICENSE b/LICENSES/STUBS_LICENSE new file mode 100644 index 0000000000000..d236c20fbe6b0 --- /dev/null +++ b/LICENSES/STUBS_LICENSE @@ -0,0 +1,30 @@ +MIT License + +Copyright (c) Virtus Lab sp. z o.o. (Ltd.) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + + +""" +Copyright (c) Virtus Lab sp. z o.o. (Ltd.) + +Distributed under the terms of the MIT license. + +The full license is in the STUBS_LICENSE file, distributed with this software. +""" \ No newline at end of file From d2b55ab0f7c3986df5b2e133f56e8b563f314e12 Mon Sep 17 00:00:00 2001 From: zbigniewkrolikowski Date: Wed, 26 Jan 2022 08:33:04 +0100 Subject: [PATCH 05/26] Copyright notice, tempfile --- LICENSES/STUBS_LICENSE | 11 +---------- pandas/tests/typing/valid/test_frame.py | 15 +++++++++++---- pandas/tests/typing/valid/test_interval.py | 7 +++++++ pandas/tests/typing/valid/test_pandas.py | 7 +++++++ pandas/tests/typing/valid/test_series.py | 7 +++++++ pandas/tests/typing/valid/test_testing.py | 7 +++++++ pandas/tests/typing/valid/test_timestamp.py | 7 +++++++ 7 files changed, 47 insertions(+), 14 deletions(-) diff --git a/LICENSES/STUBS_LICENSE b/LICENSES/STUBS_LICENSE index d236c20fbe6b0..811d0beb50ddb 100644 --- a/LICENSES/STUBS_LICENSE +++ b/LICENSES/STUBS_LICENSE @@ -18,13 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - - -""" -Copyright (c) Virtus Lab sp. z o.o. (Ltd.) - -Distributed under the terms of the MIT license. - -The full license is in the STUBS_LICENSE file, distributed with this software. -""" \ No newline at end of file +SOFTWARE. \ No newline at end of file diff --git a/pandas/tests/typing/valid/test_frame.py b/pandas/tests/typing/valid/test_frame.py index ff381c7612465..3715c503c67ad 100644 --- a/pandas/tests/typing/valid/test_frame.py +++ b/pandas/tests/typing/valid/test_frame.py @@ -1,3 +1,10 @@ +""" +Copyright (c) Virtus Lab sp. z o.o. (Ltd.) + +Distributed under the terms of the MIT license. + +The full license is in the STUBS_LICENSE file, distributed with this software. +""" # flake8: noqa: F841 # TODO: many functions need return types annotations for pyright # to run with reportGeneralTypeIssues = true @@ -35,17 +42,17 @@ def test_types_to_csv() -> None: # variable has type "str") csv_df: str = df.to_csv() # type: ignore[assignment] - with tempfile.Namedtempfile.TemporaryFile() as file: + with tempfile.NamedTemporaryFile() as file: df.to_csv(file.name) df2: pd.DataFrame = pd.read_csv(file.name) - with tempfile.Namedtempfile.TemporaryFile() as file: + with tempfile.NamedTemporaryFile() as file: df.to_csv(Path(file.name)) df3: pd.DataFrame = pd.read_csv(Path(file.name)) # This keyword was added in 1.1.0 # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html - with tempfile.Namedtempfile.TemporaryFile() as file: + with tempfile.NamedTemporaryFile() as file: df.to_csv(file.name, errors="replace") df4: pd.DataFrame = pd.read_csv(file.name) @@ -753,7 +760,7 @@ def test_types_to_parquet() -> None: df = pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]).set_flags( allows_duplicate_labels=False ) - with tempfile.Namedtempfile.TemporaryFile() as file: + with tempfile.NamedTemporaryFile() as file: df.to_parquet(Path(file.name)) # to_parquet() returns bytes when no path given since 1.2.0 # https://pandas.pydata.org/docs/whatsnew/v1.2.0.html diff --git a/pandas/tests/typing/valid/test_interval.py b/pandas/tests/typing/valid/test_interval.py index 937af8cc85d0d..6a6b2abf38c5f 100644 --- a/pandas/tests/typing/valid/test_interval.py +++ b/pandas/tests/typing/valid/test_interval.py @@ -1,3 +1,10 @@ +""" +Copyright (c) Virtus Lab sp. z o.o. (Ltd.) + +Distributed under the terms of the MIT license. + +The full license is in the STUBS_LICENSE file, distributed with this software. +""" # flake8: noqa: F841 # pyright: reportGeneralTypeIssues = true diff --git a/pandas/tests/typing/valid/test_pandas.py b/pandas/tests/typing/valid/test_pandas.py index b74dcc88f1363..42342f10b387c 100644 --- a/pandas/tests/typing/valid/test_pandas.py +++ b/pandas/tests/typing/valid/test_pandas.py @@ -1,3 +1,10 @@ +""" +Copyright (c) Virtus Lab sp. z o.o. (Ltd.) + +Distributed under the terms of the MIT license. + +The full license is in the STUBS_LICENSE file, distributed with this software. +""" # flake8: noqa: F841 # TODO: many functions need return types annotations for pyright # to run with reportGeneralTypeIssues = true diff --git a/pandas/tests/typing/valid/test_series.py b/pandas/tests/typing/valid/test_series.py index 965241c067176..b3df4ff478437 100644 --- a/pandas/tests/typing/valid/test_series.py +++ b/pandas/tests/typing/valid/test_series.py @@ -1,3 +1,10 @@ +""" +Copyright (c) Virtus Lab sp. z o.o. (Ltd.) + +Distributed under the terms of the MIT license. + +The full license is in the STUBS_LICENSE file, distributed with this software. +""" # flake8: noqa: F841 # TODO: many functions need return types annotations for pyright # to run with reportGeneralTypeIssues = true diff --git a/pandas/tests/typing/valid/test_testing.py b/pandas/tests/typing/valid/test_testing.py index a98d8faee5098..b5ef32560bdff 100644 --- a/pandas/tests/typing/valid/test_testing.py +++ b/pandas/tests/typing/valid/test_testing.py @@ -1,3 +1,10 @@ +""" +Copyright (c) Virtus Lab sp. z o.o. (Ltd.) + +Distributed under the terms of the MIT license. + +The full license is in the STUBS_LICENSE file, distributed with this software. +""" # pyright: reportGeneralTypeIssues = true import pandas as pd diff --git a/pandas/tests/typing/valid/test_timestamp.py b/pandas/tests/typing/valid/test_timestamp.py index cd9f920d5f9ff..1cbb55f99bf91 100644 --- a/pandas/tests/typing/valid/test_timestamp.py +++ b/pandas/tests/typing/valid/test_timestamp.py @@ -1,3 +1,10 @@ +""" +Copyright (c) Virtus Lab sp. z o.o. (Ltd.) + +Distributed under the terms of the MIT license. + +The full license is in the STUBS_LICENSE file, distributed with this software. +""" # flake8: noqa: F841 # TODO: many functions need return types annotations for pyright # to run with reportGeneralTypeIssues = true From 479b6bfaaffa0a3311c0584a808b99da744fd373 Mon Sep 17 00:00:00 2001 From: zbigniewkrolikowski Date: Wed, 26 Jan 2022 08:46:04 +0100 Subject: [PATCH 06/26] Remove redundant profile configuration --- .pre-commit-config.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d40ef03b84c0d..6de9bd786404f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -55,7 +55,6 @@ repos: rev: 5.10.1 hooks: - id: isort - args: ["--profile", "black"] - repo: https://github.com/asottile/pyupgrade rev: v2.31.0 hooks: From feb94b9778a907d3b1c7db2c6595830b4db3bba2 Mon Sep 17 00:00:00 2001 From: zbigniewkrolikowski Date: Thu, 27 Jan 2022 14:41:33 +0100 Subject: [PATCH 07/26] Missing matplotlib. Isort again --- pandas/tests/typing/valid/test_series.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/typing/valid/test_series.py b/pandas/tests/typing/valid/test_series.py index b3df4ff478437..5e7090f9a7637 100644 --- a/pandas/tests/typing/valid/test_series.py +++ b/pandas/tests/typing/valid/test_series.py @@ -400,6 +400,7 @@ def test_types_group_by_with_dropna_keyword() -> None: s.groupby(level=0).sum() +@td.skip_if_no("matplotlib") # type: ignore[misc] def test_types_plot() -> None: s = pd.Series([0, 1, 1, 0, -10]) s.plot.hist() From 4d1eefd9ee0fff44da3074772eff6299ec908d09 Mon Sep 17 00:00:00 2001 From: zbigniewkrolikowski Date: Thu, 27 Jan 2022 15:00:30 +0100 Subject: [PATCH 08/26] Isort --- pandas/tests/typing/valid/test_frame.py | 2 +- pandas/tests/typing/valid/test_pandas.py | 1 + pandas/tests/typing/valid/test_series.py | 5 +++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/tests/typing/valid/test_frame.py b/pandas/tests/typing/valid/test_frame.py index 3715c503c67ad..48a865ebcc2c4 100644 --- a/pandas/tests/typing/valid/test_frame.py +++ b/pandas/tests/typing/valid/test_frame.py @@ -9,8 +9,8 @@ # TODO: many functions need return types annotations for pyright # to run with reportGeneralTypeIssues = true import io -import tempfile from pathlib import Path +import tempfile from typing import ( Any, Iterable, diff --git a/pandas/tests/typing/valid/test_pandas.py b/pandas/tests/typing/valid/test_pandas.py index 42342f10b387c..5e1be8d6d6148 100644 --- a/pandas/tests/typing/valid/test_pandas.py +++ b/pandas/tests/typing/valid/test_pandas.py @@ -17,6 +17,7 @@ ) import pandas as pd + from pandas.io.parsers import TextFileReader diff --git a/pandas/tests/typing/valid/test_series.py b/pandas/tests/typing/valid/test_series.py index 5e7090f9a7637..2ba05be61ce4d 100644 --- a/pandas/tests/typing/valid/test_series.py +++ b/pandas/tests/typing/valid/test_series.py @@ -9,14 +9,15 @@ # TODO: many functions need return types annotations for pyright # to run with reportGeneralTypeIssues = true -import tempfile from pathlib import Path +import tempfile from typing import List import numpy as np -import pandas as pd from pandas._typing import Scalar + +import pandas as pd from pandas.core.window import ExponentialMovingWindow from pandas.util import _test_decorators as td From fc359756a14e9b212e07dca293516c4be950cbce Mon Sep 17 00:00:00 2001 From: zbigniewkrolikowski Date: Mon, 24 Jan 2022 18:07:47 +0100 Subject: [PATCH 09/26] Add typing tests according to the issue 45252. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Test stubs originally authored by Joanna Sendorek, Zbigniew Królikowski and pandas_stubs contributors: https://github.com/VirtusLab/pandas-stubs/graphs/contributors Adapted to pandas needs by Torsten Wörtwein --- pandas/tests/typing/__init__.py | 0 pandas/tests/typing/valid/__init__.py | 0 pandas/tests/typing/valid/test_frame.py | 825 ++++++++++++++++++++ pandas/tests/typing/valid/test_interval.py | 38 + pandas/tests/typing/valid/test_pandas.py | 167 ++++ pandas/tests/typing/valid/test_series.py | 583 ++++++++++++++ pandas/tests/typing/valid/test_testing.py | 21 + pandas/tests/typing/valid/test_timestamp.py | 67 ++ pyproject.toml | 31 +- 9 files changed, 1730 insertions(+), 2 deletions(-) create mode 100644 pandas/tests/typing/__init__.py create mode 100644 pandas/tests/typing/valid/__init__.py create mode 100644 pandas/tests/typing/valid/test_frame.py create mode 100644 pandas/tests/typing/valid/test_interval.py create mode 100644 pandas/tests/typing/valid/test_pandas.py create mode 100644 pandas/tests/typing/valid/test_series.py create mode 100644 pandas/tests/typing/valid/test_testing.py create mode 100644 pandas/tests/typing/valid/test_timestamp.py diff --git a/pandas/tests/typing/__init__.py b/pandas/tests/typing/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/typing/valid/__init__.py b/pandas/tests/typing/valid/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/typing/valid/test_frame.py b/pandas/tests/typing/valid/test_frame.py new file mode 100644 index 0000000000000..b4d7b637ede3f --- /dev/null +++ b/pandas/tests/typing/valid/test_frame.py @@ -0,0 +1,825 @@ +# flake8: noqa: F841 +# TODO: many functions need return types annotations for pyright +# to run with reportGeneralTypeIssues = true +import io +import tempfile +from pathlib import Path +from typing import ( + Any, + Iterable, + List, + Tuple, +) + +import numpy as np + +import pandas as pd +from pandas.util import _test_decorators as td + + +def test_types_init() -> None: + pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}, index=[2, 1]) + pd.DataFrame(data=[1, 2, 3, 4], dtype=np.int8) + pd.DataFrame( + np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), + columns=["a", "b", "c"], + dtype=np.int8, + copy=True, + ) + + +def test_types_to_csv() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + # error: Incompatible types in assignment (expression has type "Optional[str]", + # variable has type "str") + csv_df: str = df.to_csv() # type: ignore[assignment] + + with tempfile.NamedTemporaryFile() as file: + df.to_csv(file.name) + df2: pd.DataFrame = pd.read_csv(file.name) + + with tempfile.NamedTemporaryFile() as file: + df.to_csv(Path(file.name)) + df3: pd.DataFrame = pd.read_csv(Path(file.name)) + + # This keyword was added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + with tempfile.NamedTemporaryFile() as file: + df.to_csv(file.name, errors="replace") + df4: pd.DataFrame = pd.read_csv(file.name) + + # Testing support for binary file handles, added in 1.2.0 + # https://pandas.pydata.org/docs/whatsnew/v1.2.0.html + df.to_csv(io.BytesIO(), encoding="utf-8", compression="gzip") + + +def test_types_to_csv_when_path_passed() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + path: Path = Path("./dummy_path.txt") + try: + assert not path.exists() + df.to_csv(path) + df5: pd.DataFrame = pd.read_csv(path) + finally: + path.unlink() + + +def test_types_copy() -> None: + df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + df2: pd.DataFrame = df.copy() + + +def test_types_getitem() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4], 5: [6, 7]}) + i = pd.Index(["col1", "col2"]) + s = pd.Series(["col1", "col2"]) + select_df = pd.DataFrame({"col1": [True, True], "col2": [False, True]}) + a = np.array(["col1", "col2"]) + df["col1"] + df[5] + df[["col1", "col2"]] + df[1:] + df[s] + df[a] + df[select_df] + df[i] + + +def test_slice_setitem() -> None: + # Due to the bug in pandas 1.2.3 + # (https://github.com/pandas-dev/pandas/issues/40440), + # this is in separate test case + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4], 5: [6, 7]}) + df[1:] = ["a", "b", "c"] + + +def test_types_setitem() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4], 5: [6, 7]}) + i = pd.Index(["col1", "col2"]) + s = pd.Series(["col1", "col2"]) + a = np.array(["col1", "col2"]) + df["col1"] = [1, 2] + df[5] = [5, 6] + df[["col1", "col2"]] = [[1, 2], [3, 4]] + df[s] = [5, 6] + df[a] = [[1, 2], [3, 4]] + df[i] = [8, 9] + + +def test_types_setitem_mask() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4], 5: [6, 7]}) + select_df = pd.DataFrame({"col1": [True, True], "col2": [False, True]}) + df[select_df] = [1, 2, 3] + + +def test_types_iloc_iat() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df.iloc[1, 1] + df.iloc[[1], [1]] + df.iat[0, 0] + + +def test_types_loc_at() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df.loc[[0], "col1"] + df.at[0, "col1"] + + +def test_types_boolean_indexing() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df[df > 1] + df[~(df > 1.0)] + + +def test_types_head_tail() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df.head(1) + df.tail(1) + + +def test_types_assign() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df.assign(col3=lambda frame: frame.sum(axis=1)) + df["col3"] = df.sum(axis=1) + + +def test_types_sample() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df.sample(frac=0.5) + df.sample(n=1) + + +def test_types_nlargest_nsmallest() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df.nlargest(1, "col1") + df.nsmallest(1, "col2") + + +def test_types_filter() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df.filter(items=["col1"]) + df.filter(regex="co.*") + df.filter(like="1") + + +def test_types_setting() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df["col1"] = 1 + df[df == 1] = 7 + + +def test_types_drop() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + res: pd.DataFrame = df.drop("col1", axis=1) + res2: pd.DataFrame = df.drop(columns=["col1"]) + res3: pd.DataFrame = df.drop({0}) + res4: pd.DataFrame = df.drop(index={0}) + res5: pd.DataFrame = df.drop(columns={"col1"}) + res6: pd.DataFrame = df.drop(index=1) + res7: pd.DataFrame = df.drop(labels=0) + res8: None = df.drop([0, 0], inplace=True) + + +def test_types_dropna() -> None: + df = pd.DataFrame(data={"col1": [np.nan, np.nan], "col2": [3, np.nan]}) + res: pd.DataFrame = df.dropna() + res2: pd.DataFrame = df.dropna(axis=1, thresh=1) + res3: None = df.dropna(axis=0, how="all", subset=["col1"], inplace=True) + + +def test_types_fillna() -> None: + df = pd.DataFrame(data={"col1": [np.nan, np.nan], "col2": [3, np.nan]}) + res: pd.DataFrame = df.fillna(0) + res2: None = df.fillna(method="pad", axis=1, inplace=True) + + +def test_types_sort_index() -> None: + df = pd.DataFrame(data={"col1": [1, 2, 3, 4]}, index=[5, 1, 3, 2]) + df2 = pd.DataFrame(data={"col1": [1, 2, 3, 4]}, index=["a", "b", "c", "d"]) + res: pd.DataFrame = df.sort_index() + level1 = (1, 2) + res2: pd.DataFrame = df.sort_index(ascending=False, level=level1) + level2: List[str] = ["a", "b", "c"] + # error: Argument "level" to "sort_index" of "DataFrame" has incompatible type + # "List[str]"; expected "Optional[Union[Hashable, int]]" + res3: pd.DataFrame = df2.sort_index(level=level2) # type: ignore[arg-type] + res4: pd.DataFrame = df.sort_index(ascending=False, level=3) + res5: None = df.sort_index(kind="mergesort", inplace=True) + + +# This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html +def test_types_sort_index_with_key() -> None: + df = pd.DataFrame(data={"col1": [1, 2, 3, 4]}, index=["a", "b", "C", "d"]) + res: pd.DataFrame = df.sort_index(key=lambda k: k.str.lower()) + + +def test_types_set_index() -> None: + df = pd.DataFrame( + data={"col1": [1, 2, 3, 4], "col2": ["a", "b", "c", "d"]}, index=[5, 1, 3, 2] + ) + res: pd.DataFrame = df.set_index("col1") + res2: pd.DataFrame = df.set_index("col1", drop=False) + res3: pd.DataFrame = df.set_index("col1", append=True) + res4: pd.DataFrame = df.set_index("col1", verify_integrity=True) + res5: pd.DataFrame = df.set_index(["col1", "col2"]) + res6: None = df.set_index("col1", inplace=True) + + +def test_types_query() -> None: + df = pd.DataFrame(data={"col1": [1, 2, 3, 4], "col2": [3, 0, 1, 7]}) + res: pd.DataFrame = df.query("col1 > col2") + res2: None = df.query("col1 % col2 == 0", inplace=True) + + +def test_types_eval() -> None: + df = pd.DataFrame(data={"col1": [1, 2, 3, 4], "col2": [3, 0, 1, 7]}) + df.eval("col1 > col2") + res: None = df.eval("C = col1 % col2 == 0", inplace=True) + + +def test_types_sort_values() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + res: pd.DataFrame = df.sort_values("col1") + res2: None = df.sort_values("col1", ascending=False, inplace=True) + res3: pd.DataFrame = df.sort_values(by=["col1", "col2"], ascending=[True, False]) + + +# This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html +def test_types_sort_values_with_key() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + res: pd.DataFrame = df.sort_values(by="col1", key=lambda k: -k) + + +def test_types_shift() -> None: + df = pd.DataFrame(data={"col1": [1, 1], "col2": [3, 4]}) + df.shift() + df.shift(1) + df.shift(-1) + + +def test_types_rank() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df.rank(axis=0, na_option="bottom") + df.rank(method="min", pct=True) + df.rank(method="dense", ascending=True) + df.rank(method="first", numeric_only=True) + + +def test_types_mean() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "Series") + s1: pd.Series = df.mean() # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "Series") + s2: pd.Series = df.mean(axis=0) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "DataFrame") + df2: pd.DataFrame = df.mean(level=0) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "DataFrame") + df3: pd.DataFrame = df.mean(axis=1, level=0) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "DataFrame") + df4: pd.DataFrame = df.mean(1, True, level=0) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "Series")# error: Incompatible types in assignment + # (expression has type "Union[Series, float]", variable has type "Series") + s3: pd.Series = df.mean( # type: ignore[assignment] + axis=1, skipna=True, numeric_only=False + ) + + +def test_types_median() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "Series") + s1: pd.Series = df.median() # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "Series") + s2: pd.Series = df.median(axis=0) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "DataFrame") + df2: pd.DataFrame = df.median(level=0) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "DataFrame") + df3: pd.DataFrame = df.median(axis=1, level=0) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "DataFrame") + df4: pd.DataFrame = df.median(1, True, level=0) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "Series") + s3: pd.Series = df.median( # type: ignore[assignment] + axis=1, skipna=True, numeric_only=False + ) + + +def test_types_itertuples() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + res1: Iterable[Tuple[Any, ...]] = df.itertuples() + res2: Iterable[Tuple[Any, ...]] = df.itertuples(index=False, name="Foobar") + res3: Iterable[Tuple[Any, ...]] = df.itertuples(index=False, name=None) + + +def test_types_sum() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df.sum() + df.sum(axis=1) + + +def test_types_cumsum() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df.cumsum() + df.sum(axis=0) + + +def test_types_min() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df.min() + df.min(axis=0) + + +def test_types_max() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df.max() + df.max(axis=0) + + +def test_types_quantile() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df.quantile([0.25, 0.5]) + df.quantile(0.75) + df.quantile() + + +def test_types_clip() -> None: + df = pd.DataFrame(data={"col1": [20, 12], "col2": [3, 14]}) + df.clip(lower=5, upper=15) + + +def test_types_abs() -> None: + df = pd.DataFrame(data={"col1": [-5, 1], "col2": [3, -14]}) + df.abs() + + +def test_types_var() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [1, 4]}) + df.var() + df.var(axis=1, ddof=1) + df.var(skipna=True, numeric_only=False) + + +def test_types_std() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [1, 4]}) + df.std() + df.std(axis=1, ddof=1) + df.std(skipna=True, numeric_only=False) + + +def test_types_idxmin() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df.idxmin() + df.idxmin(axis=0) + + +def test_types_idxmax() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df.idxmax() + df.idxmax(axis=0) + + +# This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html +def test_types_value_counts() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [1, 4]}) + df.value_counts() + + +def test_types_unique() -> None: + # This is really more for of a Series test + df = pd.DataFrame(data={"col1": [1, 2], "col2": [1, 4]}) + df["col1"].unique() + + +def test_types_apply() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df.apply(lambda x: x ** 2) + df.apply(np.exp) + df.apply(str) + + +def test_types_applymap() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df.applymap(lambda x: x ** 2) + df.applymap(np.exp) + df.applymap(str) + # na_action parameter was added in 1.2.0 + # https://pandas.pydata.org/docs/whatsnew/v1.2.0.html + df.applymap(np.exp, na_action="ignore") + df.applymap(str, na_action=None) + + +def test_types_element_wise_arithmetic() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df2 = pd.DataFrame(data={"col1": [10, 20], "col3": [3, 4]}) + + df + df2 + df.add(df2, fill_value=0) + + df - df2 + df.sub(df2, fill_value=0) + + df * df2 + df.mul(df2, fill_value=0) + + df / df2 + df.div(df2, fill_value=0) + + df // df2 + df.floordiv(df2, fill_value=0) + + df % df2 + df.mod(df2, fill_value=0) + + # divmod operation was added in 1.2.0 + # https://pandas.pydata.org/docs/whatsnew/v1.2.0.html + # noinspection PyTypeChecker + divmod(df, df2) + df.__divmod__(df2) + df.__rdivmod__(df2) + + +def test_types_melt() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df.melt() + df.melt(id_vars=["col1"], value_vars=["col2"]) + df.melt( + id_vars=["col1"], + value_vars=["col2"], + var_name="someVariable", + value_name="someValue", + ) + + pd.melt(df) + pd.melt(df, id_vars=["col1"], value_vars=["col2"]) + pd.melt( + df, + id_vars=["col1"], + value_vars=["col2"], + var_name="someVariable", + value_name="someValue", + ) + + +def test_types_pivot() -> None: + df = pd.DataFrame( + data={ + "col1": ["first", "second", "third", "fourth"], + "col2": [50, 70, 56, 111], + "col3": ["A", "B", "B", "A"], + "col4": [100, 102, 500, 600], + } + ) + df.pivot(index="col1", columns="col3", values="col2") + df.pivot(index="col1", columns="col3") + df.pivot(index="col1", columns="col3", values=["col2", "col4"]) + + +def test_types_groupby() -> None: + df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5], "col3": [0, 1, 0]}) + df.index.name = "ind" + df.groupby(by="col1") + df.groupby(level="ind") + df.groupby(by="col1", sort=False, as_index=True) + df.groupby(by=["col1", "col2"]) + + df1: pd.DataFrame = df.groupby(by="col1").agg("sum") + df2: pd.DataFrame = df.groupby(level="ind").aggregate("sum") + df3: pd.DataFrame = df.groupby(by="col1", sort=False, as_index=True).transform( + lambda x: x.max() + ) + # error: Incompatible types in assignment (expression has type "Union[Series, + # DataFrame]", variable has type "DataFrame") + df4: pd.DataFrame = df.groupby( # type: ignore[assignment] + by=["col1", "col2"] + ).count() + df5: pd.DataFrame = df.groupby(by=["col1", "col2"]).filter(lambda x: x["col1"] > 0) + df6: pd.DataFrame = df.groupby(by=["col1", "col2"]).nunique() + + +# This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html +def test_types_group_by_with_dropna_keyword() -> None: + df = pd.DataFrame( + data={"col1": [1, 1, 2, 1], "col2": [2, None, 1, 2], "col3": [3, 4, 3, 2]} + ) + df.groupby(by="col2", dropna=True).sum() + df.groupby(by="col2", dropna=False).sum() + df.groupby(by="col2").sum() + + +def test_types_merge() -> None: + df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]}) + df2 = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [0, 1, 0]}) + df.merge(df2) + df.merge(df2, on="col1") + df.merge(df2, on="col1", how="left") + df.merge(df2, on=["col1", "col2"], how="left") + df.merge(df2, on=("col1", "col2"), how="left") + l: List[str] = ["col1", "col2"] + df.merge(df2, on=l) + + +def test_types_plot() -> None: + df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]}) + df.plot.hist() + df.plot.scatter(x="col2", y="col1") + + +def test_types_window() -> None: + df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]}) + df.expanding() + df.expanding(axis=1, center=True) + + df.rolling(2) + df.rolling(2, axis=1, center=True) + + +def test_types_cov() -> None: + df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]}) + df.cov() + df.cov(min_periods=1) + # ddof param was added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + df.cov(ddof=2) + + +def test_types_to_numpy() -> None: + df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]}) + df.to_numpy() + df.to_numpy(dtype="str", copy=True) + # na_value param was added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + df.to_numpy(na_value=0) + + +# error: Untyped decorator makes function "test_types_to_feather" untyped +@td.skip_if_no("tabulate") # type: ignore[misc] +def test_to_markdown() -> None: + df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]}) + df.to_markdown() + df.to_markdown(buf=None, mode="wt") + # index param was added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + df.to_markdown(index=False) + + +# error: Untyped decorator makes function "test_types_to_feather" untyped +@td.skip_if_no("pyarrow") # type: ignore[misc] +def test_types_to_feather() -> None: + df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]}) + df.to_feather("dummy_path") + # kwargs for pyarrow.feather.write_feather added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + df.to_feather( + "dummy_path", + compression="zstd", + compression_level=3, + chunksize=2, + ) + + # to_feather has been able to accept a buffer since pandas 1.0.0 + # See https://pandas.pydata.org/docs/whatsnew/v1.0.0.html + # Docstring and type were updated in 1.2.0. + # https://github.com/pandas-dev/pandas/pull/35408 + with tempfile.TemporaryFile() as f: + df.to_feather(f) + + +# compare() method added in 1.1.0 +# https://pandas.pydata.org/docs/whatsnew/v1.1.0.html +def test_types_compare() -> None: + df1 = pd.DataFrame( + data={"col1": [1, 1, 2, 1], "col2": [2, None, 1, 2], "col3": [3, 4, 3, 2]} + ) + df2 = pd.DataFrame( + data={"col1": [1, 2, 5, 6], "col2": [3, 4, 1, 1], "col3": [3, 4, 3, 2]} + ) + df1.compare(df2) + df2.compare(df1, align_axis=0, keep_shape=True, keep_equal=True) + + +def test_types_agg() -> None: + df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["A", "B", "C"]) + df.agg("min") + df.agg(x=("A", max), y=("B", "min"), z=("C", np.mean)) + df.agg("mean", axis=1) + + +def test_types_describe() -> None: + df = pd.DataFrame( + data={ + "col1": [1, 2, -4], + "col2": [ + np.datetime64("2000-01-01"), + np.datetime64("2010-01-01"), + np.datetime64("2010-01-01"), + ], + } + ) + df.describe() + df.describe(percentiles=[0.5], include="all") + df.describe(exclude=np.number) + # datetime_is_numeric param added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + df.describe(datetime_is_numeric=True) + + +def test_types_to_string() -> None: + df = pd.DataFrame( + data={ + "col1": [1, None, -4], + "col2": [ + np.datetime64("2000-01-01"), + np.datetime64("2010-01-01"), + np.datetime64("2010-01-01"), + ], + } + ) + df.to_string( + index=True, + col_space=2, + header=["a", "b"], + na_rep="0", + justify="left", + max_rows=2, + min_rows=0, + max_cols=2, + show_dimensions=True, + line_width=3, + ) + # col_space accepting list or dict added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + df.to_string(col_space=[1, 2]) + df.to_string(col_space={"col1": 1, "col2": 3}) + + +def test_types_to_html() -> None: + df = pd.DataFrame( + data={ + "col1": [1, None, -4], + "col2": [ + np.datetime64("2000-01-01"), + np.datetime64("2010-01-01"), + np.datetime64("2010-01-01"), + ], + } + ) + df.to_html( + index=True, + col_space=2, + header=["a", "b"], + na_rep="0", + justify="left", + max_rows=2, + max_cols=2, + show_dimensions=True, + ) + # col_space accepting list or dict added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + df.to_html(col_space=[1, 2]) + df.to_html(col_space={"col1": 1, "col2": 3}) + + +def test_types_resample() -> None: + df = pd.DataFrame({"values": [2, 11, 3, 13, 14, 18, 17, 19]}) + df["date"] = pd.date_range("01/01/2018", periods=8, freq="W") + df.resample("M", on="date") + # origin and offset params added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + df.resample("20min", origin="epoch", offset=pd.Timedelta(2, "minutes"), on="date") + + +def test_types_from_dict() -> None: + pd.DataFrame.from_dict({"col_1": [3, 2, 1, 0], "col_2": ["a", "b", "c", "d"]}) + pd.DataFrame.from_dict({1: [3, 2, 1, 0], 2: ["a", "b", "c", "d"]}) + pd.DataFrame.from_dict({"a": {1: 2}, "b": {3: 4, 1: 4}}, orient="index") + pd.DataFrame.from_dict({"a": {"row1": 2}, "b": {"row2": 4, "row1": 4}}) + pd.DataFrame.from_dict({"a": (1, 2, 3), "b": (2, 4, 5)}) + pd.DataFrame.from_dict( + data={"col_1": {"a": 1}, "col_2": {"a": 1, "b": 2}}, orient="columns" + ) + + +def test_pipe() -> None: + def foo(df: pd.DataFrame) -> pd.DataFrame: + return df + + df1: pd.DataFrame = pd.DataFrame({"a": [1]}).pipe(foo) + + df2: pd.DataFrame = ( + pd.DataFrame( + { + "price": [10, 11, 9, 13, 14, 18, 17, 19], + "volume": [50, 60, 40, 100, 50, 100, 40, 50], + } + ) + .assign(week_starting=pd.date_range("01/01/2018", periods=8, freq="W")) + .resample("M", on="week_starting") + .pipe(foo) + ) + + df3: pd.DataFrame = pd.DataFrame({"a": [1], "b": [1]}).groupby("a").pipe(foo) + + df4: pd.DataFrame = pd.DataFrame({"a": [1], "b": [1]}).style.pipe(foo) + + +# set_flags() method added in 1.2.0 +# https://pandas.pydata.org/docs/whatsnew/v1.2.0.html +def test_types_set_flags() -> None: + pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]).set_flags( + allows_duplicate_labels=False + ) + pd.DataFrame([[1, 2], [8, 9]], columns=["A", "A"]).set_flags( + allows_duplicate_labels=True + ) + pd.DataFrame([[1, 2], [8, 9]], columns=["A", "A"]) + + +# error: Untyped decorator makes function "test_types_to_parquet" untyped +@td.skip_if_no("pyarrow") # type: ignore[misc] +def test_types_to_parquet() -> None: + df = pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]).set_flags( + allows_duplicate_labels=False + ) + with tempfile.NamedTemporaryFile() as file: + df.to_parquet(Path(file.name)) + # to_parquet() returns bytes when no path given since 1.2.0 + # https://pandas.pydata.org/docs/whatsnew/v1.2.0.html + # error: Incompatible types in assignment (expression has type "Optional[bytes]", + # variable has type "bytes") + b: bytes = df.to_parquet() # type: ignore[assignment] + + +def test_types_to_latex() -> None: + df = pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]) + df.to_latex( + columns=["A"], label="some_label", caption="some_caption", multirow=True + ) + df.to_latex(escape=False, decimal=",", column_format="r") + # position param was added in 1.2.0 + # https://pandas.pydata.org/docs/whatsnew/v1.2.0.html + df.to_latex(position="some") + # caption param was extended to accept tuple in 1.2.0 + # https://pandas.pydata.org/docs/whatsnew/v1.2.0.html + df.to_latex(caption=("cap1", "cap2")) + + +def test_types_explode() -> None: + df = pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]) + res1: pd.DataFrame = df.explode("A") + res2: pd.DataFrame = df.explode("A", ignore_index=False) + res3: pd.DataFrame = df.explode("A", ignore_index=True) + + +def test_types_rename() -> None: + df = pd.DataFrame(columns=["a"]) + col_map = {"a": "b"} + # error: Argument "columns" to "rename" of "DataFrame" has incompatible type + # "Dict[str, str]"; expected "Optional[Union[Mapping[Hashable, Any], + # Callable[[Hashable], Hashable]]]" + df.rename(columns=col_map) # type: ignore[arg-type] + df.rename(columns={"a": "b"}) + df.rename(columns={1: "b"}) + # Apparently all of these calls are accepted by pandas + df.rename(columns={None: "b"}) + df.rename(columns={"": "b"}) + df.rename(columns={(2, 1): "b"}) + + +def test_types_eq() -> None: + df1 = pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]) + res1: pd.DataFrame = df1 == 1 + df2 = pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]) + res2: pd.DataFrame = df1 == df2 + + +def test_types_as_type() -> None: + df1 = pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]) + df2: pd.DataFrame = df1.astype({"A": "int32"}) + + +def test_types_dot() -> None: + df1 = pd.DataFrame([[0, 1, -2, -1], [1, 1, 1, 1]]) + df2 = pd.DataFrame([[0, 1], [1, 2], [-1, -1], [2, 0]]) + s1 = pd.Series([1, 1, 2, 1]) + np_array = np.array([[0, 1], [1, 2], [-1, -1], [2, 0]]) + # error: Incompatible types in assignment (expression has type "Union[DataFrame, + # Series]", variable has type "DataFrame") + df3: pd.DataFrame = df1 @ df2 # type: ignore[assignment] + df4: pd.DataFrame = df1.dot(df2) + # error: Incompatible types in assignment (expression has type "Union[DataFrame, + # Series]", variable has type "DataFrame") + df5: pd.DataFrame = df1 @ np_array # type: ignore[assignment] + df6: pd.DataFrame = df1.dot(np_array) + df7: pd.Series = df1 @ s1 + df8: pd.Series = df1.dot(s1) diff --git a/pandas/tests/typing/valid/test_interval.py b/pandas/tests/typing/valid/test_interval.py new file mode 100644 index 0000000000000..937af8cc85d0d --- /dev/null +++ b/pandas/tests/typing/valid/test_interval.py @@ -0,0 +1,38 @@ +# flake8: noqa: F841 +# pyright: reportGeneralTypeIssues = true + +import pandas as pd + + +def test_interval_init() -> None: + i1: pd.Interval = pd.Interval(1, 2, closed="both") + i2: pd.Interval = pd.Interval(1, right=2, closed="right") + i3: pd.Interval = pd.Interval(left=1, right=2, closed="left") + + +def test_interval_arithmetic() -> None: + i1: pd.Interval = pd.Interval(1, 2, closed="both") + i2: pd.Interval = pd.Interval(1, right=2, closed="right") + + i3: pd.Interval = i1 + 1 + i4: pd.Interval = i1 - 1 + i5: pd.Interval = i1 * 2 + i6: pd.Interval = i1 / 2 + i7: pd.Interval = i1 // 2 + + +def test_max_intervals() -> None: + i1 = pd.Interval( + pd.Timestamp("2000-01-01"), pd.Timestamp("2000-01-02"), closed="both" + ) + i2 = pd.Interval( + pd.Timestamp("2000-01-01T12:00:00"), pd.Timestamp("2000-01-02"), closed="both" + ) + print(max(i1.left, i2.left)) + + +def test_interval_length() -> None: + i1 = pd.Interval( + pd.Timestamp("2000-01-01"), pd.Timestamp("2000-01-02"), closed="both" + ) + i1.length.total_seconds() diff --git a/pandas/tests/typing/valid/test_pandas.py b/pandas/tests/typing/valid/test_pandas.py new file mode 100644 index 0000000000000..b74dcc88f1363 --- /dev/null +++ b/pandas/tests/typing/valid/test_pandas.py @@ -0,0 +1,167 @@ +# flake8: noqa: F841 +# TODO: many functions need return types annotations for pyright +# to run with reportGeneralTypeIssues = true +import tempfile +from typing import ( + Any, + Dict, + List, + Union, +) + +import pandas as pd +from pandas.io.parsers import TextFileReader + + +def test_types_to_datetime() -> None: + df = pd.DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]}) + # error: No overload variant of "to_datetime" matches argument type "DataFrame" + pd.to_datetime(df) # type: ignore[call-overload] + # error: No overload variant of "to_datetime" matches argument types "DataFrame", + # "str", "str", "bool" + pd.to_datetime( # type: ignore[call-overload] + df, unit="s", origin="unix", infer_datetime_format=True + ) + # error: No overload variant of "to_datetime" matches argument types "DataFrame", + # "str", "bool", "None", "str", "bool" + pd.to_datetime( # type: ignore[call-overload] + df, unit="ns", dayfirst=True, utc=None, format="%M:%D", exact=False + ) + pd.to_datetime([1, 2], unit="D", origin=pd.Timestamp("01/01/2000")) + pd.to_datetime([1, 2], unit="D", origin=3) + + +def test_types_concat() -> None: + s = pd.Series([0, 1, -10]) + s2 = pd.Series([7, -5, 10]) + + pd.concat([s, s2]) + pd.concat([s, s2], axis=1) + pd.concat([s, s2], keys=["first", "second"], sort=True) + pd.concat([s, s2], keys=["first", "second"], names=["source", "row"]) + + # Depends on the axis + # error: Argument 1 to "concat" has incompatible type "Dict[str, Series]"; expected + # "Union[Iterable[DataFrame], Mapping[Hashable, DataFrame]]" + rs1: Union[pd.Series, pd.DataFrame] = pd.concat( + {"a": s, "b": s2} # type:ignore[arg-type] + ) + # error: Argument 1 to "concat" has incompatible type "Dict[str, Series]"; expected + # "Union[Iterable[NDFrame], Mapping[Hashable, NDFrame]]" + rs1a: Union[pd.Series, pd.DataFrame] = pd.concat( + {"a": s, "b": s2}, axis=1 # type:ignore[arg-type] + ) + # error: Argument 1 to "concat" has incompatible type "Dict[int, Series]"; expected + # "Union[Iterable[DataFrame], Mapping[Hashable, DataFrame]]" + rs2: Union[pd.Series, pd.DataFrame] = pd.concat( + {1: s, 2: s2} # type:ignore[arg-type] + ) + # error: Argument 1 to "concat" has incompatible type "Dict[int, Series]"; expected + # "Union[Iterable[NDFrame], Mapping[Hashable, NDFrame]]" + rs2a: Union[pd.Series, pd.DataFrame] = pd.concat( + {1: s, 2: s2}, axis=1 # type:ignore[arg-type] + ) + # error: Argument 1 to "concat" has incompatible type "Dict[Optional[int], Series]"; + # expected "Union[Iterable[DataFrame], Mapping[Hashable, DataFrame]]" + rs3: Union[pd.Series, pd.DataFrame] = pd.concat( + {1: s, None: s2} # type:ignore[arg-type] + ) + # error: Argument 1 to "concat" has incompatible type "Dict[Optional[int], Series]"; + # expected "Union[Iterable[NDFrame], Mapping[Hashable, NDFrame]]" + rs3a: Union[pd.Series, pd.DataFrame] = pd.concat( + {1: s, None: s2}, axis=1 # type:ignore[arg-type] + ) + + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df2 = pd.DataFrame(data={"col1": [10, 20], "col2": [30, 40]}) + + pd.concat([df, df2]) + pd.concat([df, df2], axis=1) + pd.concat([df, df2], keys=["first", "second"], sort=True) + pd.concat([df, df2], keys=["first", "second"], names=["source", "row"]) + + # error: Incompatible types in assignment (expression has type "Union[DataFrame, + # Series]", variable has type "DataFrame") + # error: Argument 1 to "concat" has incompatible type "Dict[str, DataFrame]"; + # expected "Union[Iterable[NDFrame], Mapping[Hashable, NDFrame]]" + result: pd.DataFrame = pd.concat( # type: ignore[assignment] + { + "a": pd.DataFrame([1, 2, 3]), + "b": pd.DataFrame([4, 5, 6]), + }, # type:ignore[arg-type] + axis=1, + ) + # error: Argument 1 to "concat" has incompatible type "Dict[str, Series]"; expected + # "Union[Iterable[NDFrame], Mapping[Hashable, NDFrame]]" + result2: Union[pd.DataFrame, pd.Series] = pd.concat( + { + "a": pd.Series([1, 2, 3]), + "b": pd.Series([4, 5, 6]), + }, # type:ignore[arg-type] + axis=1, + ) + + # error: Argument 1 to "concat" has incompatible type "Dict[str, DataFrame]"; + # expected "Union[Iterable[DataFrame], Mapping[Hashable, DataFrame]]" + rdf1: pd.DataFrame = pd.concat({"a": df, "b": df2}) # type:ignore[arg-type] + # error: Argument 1 to "concat" has incompatible type "Dict[int, DataFrame]"; + # expected "Union[Iterable[DataFrame], Mapping[Hashable, DataFrame]]" + rdf2: pd.DataFrame = pd.concat({1: df, 2: df2}) # type:ignore[arg-type] + # error: Argument 1 to "concat" has incompatible type "Dict[Optional[int], + # DataFrame]"; expected "Union[Iterable[DataFrame], Mapping[Hashable, DataFrame]]" + rdf3: pd.DataFrame = pd.concat({1: df, None: df2}) # type:ignore[arg-type] + + rdf4: pd.DataFrame = pd.concat(list(map(lambda x: s2, ["some_value", 3])), axis=1) + + +def test_types_json_normalize() -> None: + data1: List[Dict[str, Any]] = [ + {"id": 1, "name": {"first": "Coleen", "last": "Volk"}}, + {"name": {"given": "More", "family": "Regner"}}, + {"id": 2, "name": "Faye Raker"}, + ] + df1: pd.DataFrame = pd.json_normalize(data=data1) + df2: pd.DataFrame = pd.json_normalize(data=data1, max_level=0, sep=";") + df3: pd.DataFrame = pd.json_normalize( + data=data1, meta_prefix="id", record_prefix="name", errors="raise" + ) + df4: pd.DataFrame = pd.json_normalize(data=data1, record_path=None, meta="id") + data2: Dict[str, Any] = {"name": {"given": "More", "family": "Regner"}} + df5: pd.DataFrame = pd.json_normalize(data=data2) + + +def test_types_read_csv() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + # error: Incompatible types in assignment (expression has type "Optional[str]", + # variable has type "str") + csv_df: str = df.to_csv() # type: ignore[assignment] + + with tempfile.NamedTemporaryFile() as file: + df.to_csv(file.name) + df2: pd.DataFrame = pd.read_csv(file.name) + df3: pd.DataFrame = pd.read_csv(file.name, sep="a", squeeze=False) + df4: pd.DataFrame = pd.read_csv( + file.name, + header=None, + prefix="b", + mangle_dupe_cols=True, + keep_default_na=False, + ) + df5: pd.DataFrame = pd.read_csv( + file.name, engine="python", true_values=[0, 1, 3], na_filter=False + ) + df6: pd.DataFrame = pd.read_csv( + file.name, + skiprows=lambda x: x in [0, 2], + skip_blank_lines=True, + dayfirst=False, + ) + df7: pd.DataFrame = pd.read_csv(file.name, nrows=2) + tfr1: TextFileReader = pd.read_csv( + file.name, nrows=2, iterator=True, chunksize=3 + ) + tfr2: TextFileReader = pd.read_csv(file.name, nrows=2, chunksize=1) + tfr3: TextFileReader = pd.read_csv( + file.name, nrows=2, iterator=False, chunksize=1 + ) + tfr4: TextFileReader = pd.read_csv(file.name, nrows=2, iterator=True) diff --git a/pandas/tests/typing/valid/test_series.py b/pandas/tests/typing/valid/test_series.py new file mode 100644 index 0000000000000..965241c067176 --- /dev/null +++ b/pandas/tests/typing/valid/test_series.py @@ -0,0 +1,583 @@ +# flake8: noqa: F841 +# TODO: many functions need return types annotations for pyright +# to run with reportGeneralTypeIssues = true + +import tempfile +from pathlib import Path +from typing import List + +import numpy as np + +import pandas as pd +from pandas._typing import Scalar +from pandas.core.window import ExponentialMovingWindow +from pandas.util import _test_decorators as td + + +def test_types_init() -> None: + pd.Series(1) + pd.Series((1, 2, 3)) + pd.Series(np.array([1, 2, 3])) + pd.Series(data=[1, 2, 3, 4], name="series") + pd.Series(data=[1, 2, 3, 4], dtype=np.int8) + pd.Series(data={"row1": [1, 2], "row2": [3, 4]}) + pd.Series(data=[1, 2, 3, 4], index=[4, 3, 2, 1], copy=True) + + +def test_types_any() -> None: + # error: Incompatible types in assignment (expression has type "Union[Series, + # bool]", variable has type "bool") + res1: bool = pd.Series([False, False]).any() # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # bool]", variable has type "bool") + res2: bool = pd.Series([False, False]).any( # type: ignore[assignment] + bool_only=False + ) + # error: Incompatible types in assignment (expression has type "Union[Series, + # bool]", variable has type "bool") + res3: bool = pd.Series([np.nan]).any(skipna=False) # type: ignore[assignment] + + +def test_types_all() -> None: + # error: Incompatible types in assignment (expression has type "Union[Series, + # bool]", variable has type "bool") + res1: bool = pd.Series([False, False]).all() # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # bool]", variable has type "bool") + res2: bool = pd.Series([False, False]).all( # type: ignore[assignment] + bool_only=False + ) + # error: Incompatible types in assignment (expression has type "Union[Series, + # bool]", variable has type "bool") + res3: bool = pd.Series([np.nan]).all(skipna=False) # type: ignore[assignment] + + +def test_types_csv() -> None: + s = pd.Series(data=[1, 2, 3]) + # error: Incompatible types in assignment (expression has type "Optional[str]", + # variable has type "str") + csv_df: str = s.to_csv() # type: ignore[assignment] + + with tempfile.NamedTemporaryFile() as file: + s.to_csv(file.name) + s2: pd.DataFrame = pd.read_csv(file.name) + + with tempfile.NamedTemporaryFile() as file: + s.to_csv(Path(file.name)) + s3: pd.DataFrame = pd.read_csv(Path(file.name)) + + # This keyword was added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + with tempfile.NamedTemporaryFile() as file: + s.to_csv(file.name, errors="replace") + s4: pd.DataFrame = pd.read_csv(file.name) + + +def test_types_copy() -> None: + s = pd.Series(data=[1, 2, 3, 4]) + s2: pd.Series = s.copy() + + +def test_types_select() -> None: + s = pd.Series(data={"row1": 1, "row2": 2}) + s[0] + s[1:] + + +def test_types_iloc_iat() -> None: + s = pd.Series(data={"row1": 1, "row2": 2}) + s2 = pd.Series(data=[1, 2]) + s.loc["row1"] + s.iat[0] + s2.loc[0] + s2.iat[0] + + +def test_types_loc_at() -> None: + s = pd.Series(data={"row1": 1, "row2": 2}) + s2 = pd.Series(data=[1, 2]) + s.loc["row1"] + s.at["row1"] + s2.loc[1] + s2.at[1] + + +def test_types_boolean_indexing() -> None: + s = pd.Series([0, 1, 2]) + s[s > 1] + s[s] + + +def test_types_head_tail() -> None: + s = pd.Series([0, 1, 2]) + s.head(1) + s.tail(1) + + +def test_types_sample() -> None: + s = pd.Series([0, 1, 2]) + s.sample(frac=0.5) + s.sample(n=1) + + +def test_types_nlargest_nsmallest() -> None: + s = pd.Series([0, 1, 2]) + s.nlargest(1) + s.nlargest(1, "first") + s.nsmallest(1, "last") + s.nsmallest(1, "all") + + +def test_types_filter() -> None: + s = pd.Series(data=[1, 2, 3, 4], index=["cow", "coal", "coalesce", ""]) + s.filter(items=["cow"]) + s.filter(regex="co.*") + s.filter(like="al") + + +def test_types_setting() -> None: + s = pd.Series([0, 1, 2]) + s[3] = 4 + s[s == 1] = 5 + s[:] = 3 + + +def test_types_drop() -> None: + s = pd.Series([0, 1, 2]) + res: pd.Series = s.drop(0) + res2: pd.Series = s.drop([0, 1]) + res3: pd.Series = s.drop(0, axis=0) + # error: Incompatible types in assignment (expression has type "Series", variable + # has type "None") + res4: None = s.drop( # type: ignore[assignment] + [0, 1], inplace=True, errors="raise" + ) + # error: Incompatible types in assignment (expression has type "Series", variable + # has type "None") + res5: None = s.drop( # type: ignore[assignment] + [0, 1], inplace=True, errors="ignore" + ) + + +def test_types_drop_multilevel() -> None: + index = pd.MultiIndex( + levels=[["top", "bottom"], ["first", "second", "third"]], + codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], + ) + s = pd.Series(data=[1, 2, 3, 4, 5, 6], index=index) + res: pd.Series = s.drop(labels="first", level=1) + + +def test_types_dropna() -> None: + s = pd.Series([1, np.nan, np.nan]) + res: pd.Series = s.dropna() + res2: None = s.dropna(axis=0, inplace=True) + + +def test_types_fillna() -> None: + s = pd.Series([1, np.nan, np.nan, 3]) + res: pd.Series = s.fillna(0) + res2: pd.Series = s.fillna(0, axis="index") + res3: pd.Series = s.fillna(method="backfill", axis=0) + res4: None = s.fillna(method="bfill", inplace=True) + res5: pd.Series = s.fillna(method="pad") + res6: pd.Series = s.fillna(method="ffill", limit=1) + + +def test_types_sort_index() -> None: + s = pd.Series([1, 2, 3], index=[2, 3, 1]) + res: pd.Series = s.sort_index() + res2: None = s.sort_index(ascending=False, inplace=True) + res3: pd.Series = s.sort_index(kind="mergesort") + + +# This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html +def test_types_sort_index_with_key() -> None: + s = pd.Series([1, 2, 3], index=["a", "B", "c"]) + res: pd.Series = s.sort_index(key=lambda k: k.str.lower()) + + +def test_types_sort_values() -> None: + s = pd.Series([4, 2, 1, 3]) + res: pd.Series = s.sort_values(0) + res2: pd.Series = s.sort_values(ascending=False) + res3: None = s.sort_values(inplace=True, kind="quicksort") + res4: pd.Series = s.sort_values(na_position="last") + res5: pd.Series = s.sort_values(ignore_index=True) + + +# This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html +def test_types_sort_values_with_key() -> None: + s = pd.Series([1, 2, 3], index=[2, 3, 1]) + res: pd.Series = s.sort_values(key=lambda k: -k) + + +def test_types_shift() -> None: + s = pd.Series([1, 2, 3]) + s.shift() + s.shift(axis=0, periods=1) + s.shift(-1, fill_value=0) + + +def test_types_rank() -> None: + s = pd.Series([1, 1, 2, 5, 6, np.nan, "million"]) + s.rank() + s.rank(axis=0, na_option="bottom") + s.rank(method="min", pct=True) + s.rank(method="dense", ascending=True) + s.rank(method="first", numeric_only=True) + + +def test_types_mean() -> None: + s = pd.Series([1, 2, 3, np.nan]) + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "float") + f1: float = s.mean() # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "Series") + s1: pd.Series = s.mean(axis=0, level=0) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "float") + f2: float = s.mean(skipna=False) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "float") + f3: float = s.mean(numeric_only=False) # type: ignore[assignment] + + +def test_types_median() -> None: + s = pd.Series([1, 2, 3, np.nan]) + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "float") + f1: float = s.median() # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "Series") + s1: pd.Series = s.median(axis=0, level=0) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "float") + f2: float = s.median(skipna=False) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "float") + f3: float = s.median(numeric_only=False) # type: ignore[assignment] + + +def test_types_sum() -> None: + s = pd.Series([1, 2, 3, np.nan]) + s.sum() + s.sum(axis=0, level=0) + s.sum(skipna=False) + s.sum(numeric_only=False) + s.sum(min_count=4) + + +def test_types_cumsum() -> None: + s = pd.Series([1, 2, 3, np.nan]) + s.cumsum() + s.cumsum(axis=0) + s.cumsum(skipna=False) + + +def test_types_min() -> None: + s = pd.Series([1, 2, 3, np.nan]) + s.min() + s.min(axis=0) + s.min(level=0) + s.min(skipna=False) + + +def test_types_max() -> None: + s = pd.Series([1, 2, 3, np.nan]) + s.max() + s.max(axis=0) + s.max(level=0) + s.max(skipna=False) + + +def test_types_quantile() -> None: + s = pd.Series([1, 2, 3, 10]) + s.quantile([0.25, 0.5]) + s.quantile(0.75) + s.quantile() + s.quantile(interpolation="nearest") + + +def test_types_clip() -> None: + s = pd.Series([-10, 2, 3, 10]) + s.clip(lower=0, upper=5) + s.clip(lower=0, upper=5, inplace=True) + + +def test_types_abs() -> None: + s = pd.Series([-10, 2, 3, 10]) + s.abs() + + +def test_types_var() -> None: + s = pd.Series([-10, 2, 3, 10]) + s.var() + s.var(axis=0, ddof=1) + s.var(skipna=True, numeric_only=False) + + +def test_types_std() -> None: + s = pd.Series([-10, 2, 3, 10]) + s.std() + s.std(axis=0, ddof=1) + s.std(skipna=True, numeric_only=False) + + +def test_types_idxmin() -> None: + s = pd.Series([-10, 2, 3, 10]) + s.idxmin() + s.idxmin(axis=0) + + +def test_types_idxmax() -> None: + s = pd.Series([-10, 2, 3, 10]) + s.idxmax() + s.idxmax(axis=0) + + +def test_types_value_counts() -> None: + s = pd.Series([1, 2]) + s.value_counts() + + +def test_types_unique() -> None: + s = pd.Series([-10, 2, 2, 3, 10, 10]) + s.unique() + + +def test_types_apply() -> None: + s = pd.Series([-10, 2, 2, 3, 10, 10]) + s.apply(lambda x: x ** 2) + s.apply(np.exp) + s.apply(str) + + +def test_types_element_wise_arithmetic() -> None: + s = pd.Series([0, 1, -10]) + s2 = pd.Series([7, -5, 10]) + + s + s2 + s.add(s2, fill_value=0) + + s - s2 + s.sub(s2, fill_value=0) + + s * s2 + s.mul(s2, fill_value=0) + + s / s2 + # error: Unexpected keyword argument "fill_value" + s.div(s2, fill_value=0) # type: ignore[call-arg] + + s // s2 + s.floordiv(s2, fill_value=0) + + s % s2 + s.mod(s2, fill_value=0) + + +def test_types_groupby() -> None: + s = pd.Series([4, 2, 1, 8], index=["a", "b", "a", "b"]) + s.groupby(["a", "b", "a", "b"]) + s.groupby(level=0) + s.groupby(s > 2) + + +# This added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html +def test_types_group_by_with_dropna_keyword() -> None: + s = pd.Series([1, 2, 3, 3], index=["col1", "col2", "col3", np.nan]) + s.groupby(level=0, dropna=True).sum() + s.groupby(level=0, dropna=False).sum() + s.groupby(level=0).sum() + + +def test_types_plot() -> None: + s = pd.Series([0, 1, 1, 0, -10]) + s.plot.hist() + + +def test_types_window() -> None: + s = pd.Series([0, 1, 1, 0, 5, 1, -10]) + s.expanding() + s.expanding(axis=0, center=True) + + s.rolling(2) + s.rolling(2, axis=0, center=True) + + +def test_types_cov() -> None: + s1 = pd.Series([0, 1, 1, 0, 5, 1, -10]) + s2 = pd.Series([0, 2, 12, -4, 7, 9, 2]) + s1.cov(s2) + s1.cov(s2, min_periods=1) + # ddof param was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + s1.cov(s2, ddof=2) + + +def test_update() -> None: + s1 = pd.Series([0, 1, 1, 0, 5, 1, -10]) + s1.update(pd.Series([0, 2, 12])) + # Series.update() accepting objects that can be coerced to a + # Series was added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + s1.update([1, 2, -4, 3]) + s1.update([1, "b", "c", "d"]) + s1.update({1: 9, 3: 4}) + + +# error: Untyped decorator makes function "test_to_markdown" untyped +@td.skip_if_no("tabulate") # type: ignore[misc] +def test_to_markdown() -> None: + s = pd.Series([0, 1, 1, 0, 5, 1, -10]) + s.to_markdown() + s.to_markdown(buf=None, mode="wt") + # index param was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + s.to_markdown(index=False) + + +# compare() method added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html +def test_types_compare() -> None: + s1 = pd.Series([0, 1, 1, 0, 5, 1, -10]) + s2 = pd.Series([0, 2, 12, -4, 7, 9, 2]) + s1.compare(s2) + s2.compare(s1, align_axis="columns", keep_shape=True, keep_equal=True) + + +def test_types_agg() -> None: + s = pd.Series([1, 2, 3], index=["col1", "col2", "col3"]) + s.agg("min") + s.agg(x=max, y="min", z=np.mean) + s.agg("mean", axis=0) + + +def test_types_describe() -> None: + s = pd.Series([1, 2, 3, np.datetime64("2000-01-01")]) + s.describe() + s.describe(percentiles=[0.5], include="all") + s.describe(exclude=np.number) + # datetime_is_numeric param added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + s.describe(datetime_is_numeric=True) + + +def test_types_resample() -> None: + s = pd.Series(range(9), index=pd.date_range("1/1/2000", periods=9, freq="T")) + s.resample("3T").sum() + # origin and offset params added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + s.resample("20min", origin="epoch", offset=pd.Timedelta(value=2, unit="minutes")) + + +# set_flags() method added in 1.2.0 https://pandas.pydata.org/docs/whatsnew/v1.2.0.html +def test_types_set_flags() -> None: + pd.Series([1, 2], index=["a", "b"]).set_flags(allows_duplicate_labels=False) + pd.Series([3, 4], index=["a", "a"]).set_flags(allows_duplicate_labels=True) + pd.Series([5, 2], index=["a", "a"]) + + +def test_types_getitem() -> None: + s = pd.Series({"key": [0, 1, 2, 3]}) + key: List[int] = s["key"] + s2 = pd.Series([0, 1, 2, 3]) + value: int = s2[0] + s3: pd.Series = s[:2] + + +def test_types_eq() -> None: + s1 = pd.Series([1, 2, 3]) + res1: pd.Series = s1 == 1 + s2 = pd.Series([1, 2, 4]) + res2: pd.Series = s1 == s2 + + +def test_types_rename_axis() -> None: + s: pd.Series = pd.Series([1, 2, 3]).rename_axis("A") + + +def test_types_values() -> None: + n1: np.ndarray = pd.Series([1, 2, 3]).values + n2: np.ndarray = pd.Series(list("aabc")).values + n3: np.ndarray = pd.Series(list("aabc")).astype("category").values + n4: np.ndarray = pd.Series( + pd.date_range("20130101", periods=3, tz="US/Eastern") + ).values + + +def test_types_rename() -> None: + # Scalar + s1 = pd.Series([1, 2, 3]).rename("A") + # Hashable Sequence + s2 = pd.Series([1, 2, 3]).rename(("A", "B")) + # Optional + s3 = pd.Series([1, 2, 3]).rename(None) + + # Functions + def add1(x: int) -> int: + return x + 1 + + s4 = pd.Series([1, 2, 3]).rename(add1) + + # Dictionary + s5 = pd.Series([1, 2, 3]).rename({1: 10}) + # inplace + # error: Incompatible types in assignment (expression has type "Optional[Series]", + # variable has type "None") + s6: None = pd.Series([1, 2, 3]).rename( # type: ignore[assignment] + "A", inplace=True + ) + + +def test_types_ne() -> None: + s1 = pd.Series([1, 2, 3]) + s2 = pd.Series([1, 2, 4]) + s3: pd.Series = s1 != s2 + + +def test_types_bfill() -> None: + s1 = pd.Series([1, 2, 3]) + # error: Incompatible types in assignment (expression has type "Optional[Series]", + # variable has type "Series") + s2: pd.Series = s1.bfill(inplace=False) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Optional[Series]", + # variable has type "None") + s3: None = s1.bfill(inplace=True) # type: ignore[assignment] + + +def test_types_ewm() -> None: + s1 = pd.Series([1, 2, 3]) + w1: ExponentialMovingWindow = s1.ewm( + com=0.3, min_periods=0, adjust=False, ignore_na=True, axis=0 + ) + w2: ExponentialMovingWindow = s1.ewm(alpha=0.4) + w3: ExponentialMovingWindow = s1.ewm(span=1.6) + w4: ExponentialMovingWindow = s1.ewm(halflife=0.7) + + +def test_types_ffill() -> None: + s1 = pd.Series([1, 2, 3]) + # error: Incompatible types in assignment (expression has type "Optional[Series]", + # variable has type "Series") + s2: pd.Series = s1.ffill(inplace=False) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Optional[Series]", + # variable has type "None") + s3: None = s1.ffill(inplace=True) # type: ignore[assignment] + + +def test_types_as_type() -> None: + s1 = pd.Series([1, 2, 8, 9]) + s2: pd.Series = s1.astype("int32") + + +def test_types_dot() -> None: + s1 = pd.Series([0, 1, 2, 3]) + s2 = pd.Series([-1, 2, -3, 4]) + df1 = pd.DataFrame([[0, 1], [-2, 3], [4, -5], [6, 7]]) + n1 = np.array([[0, 1], [1, 2], [-1, -1], [2, 0]]) + sc1: Scalar = s1.dot(s2) + sc2: Scalar = s1 @ s2 + s3: pd.Series = s1.dot(df1) + s4: pd.Series = s1 @ df1 + n2: np.ndarray = s1.dot(n1) + n3: np.ndarray = s1 @ n1 diff --git a/pandas/tests/typing/valid/test_testing.py b/pandas/tests/typing/valid/test_testing.py new file mode 100644 index 0000000000000..a98d8faee5098 --- /dev/null +++ b/pandas/tests/typing/valid/test_testing.py @@ -0,0 +1,21 @@ +# pyright: reportGeneralTypeIssues = true + +import pandas as pd +import pandas._testing as tm + + +def test_types_assert_series_equal() -> None: + s1 = pd.Series([0, 1, 1, 0]) + s2 = pd.Series([0, 1, 1, 0]) + tm.assert_series_equal(left=s1, right=s2) + tm.assert_series_equal( + s1, + s2, + check_freq=False, + check_categorical=True, + check_flags=True, + check_datetimelike_compat=True, + ) + tm.assert_series_equal( + s1, s2, check_dtype=True, check_less_precise=True, check_names=True + ) diff --git a/pandas/tests/typing/valid/test_timestamp.py b/pandas/tests/typing/valid/test_timestamp.py new file mode 100644 index 0000000000000..cd9f920d5f9ff --- /dev/null +++ b/pandas/tests/typing/valid/test_timestamp.py @@ -0,0 +1,67 @@ +# flake8: noqa: F841 +# TODO: many functions need return types annotations for pyright +# to run with reportGeneralTypeIssues = true + +import datetime as dt + +import pandas as pd + + +def test_types_init() -> None: + ts: pd.Timestamp = pd.Timestamp("2021-03-01T12") + ts1: pd.Timestamp = pd.Timestamp(dt.date(2021, 3, 15)) + ts2: pd.Timestamp = pd.Timestamp(dt.datetime(2021, 3, 10, 12)) + ts3: pd.Timestamp = pd.Timestamp(pd.Timestamp("2021-03-01T12")) + ts4: pd.Timestamp = pd.Timestamp(1515590000.1, unit="s") + ts5: pd.Timestamp = pd.Timestamp(1515590000.1, unit="s", tz="US/Pacific") + ts6: pd.Timestamp = pd.Timestamp(1515590000100000000) # plain integer (nanosecond) + ts7: pd.Timestamp = pd.Timestamp(2021, 3, 10, 12) + ts8: pd.Timestamp = pd.Timestamp(year=2021, month=3, day=10, hour=12) + ts9: pd.Timestamp = pd.Timestamp( + year=2021, month=3, day=10, hour=12, tz="US/Pacific" + ) + + +def test_types_arithmetic() -> None: + # error: Incompatible types in assignment (expression has type "datetime", variable + # has type "Timestamp") + # error: Argument 1 to "to_datetime" has incompatible type "str"; expected + # "datetime" + ts: pd.Timestamp = pd.to_datetime("2021-03-01") # type:ignore[assignment,arg-type] + # error: Incompatible types in assignment (expression has type "datetime", variable + # has type "Timestamp") + # error: Argument 1 to "to_datetime" has incompatible type "str"; expected + # "datetime" + ts2: pd.Timestamp = pd.to_datetime("2021-01-01") # type:ignore[assignment,arg-type] + delta: pd.Timedelta = pd.to_timedelta("1 day") + + # error: Incompatible types in assignment (expression has type "timedelta", variable + # has type "Timedelta") + tsr: pd.Timedelta = ts - ts2 # type: ignore[assignment] + tsr2: pd.Timestamp = ts + delta + + +def test_types_comparison() -> None: + # Incompatible types in assignment (expression has type "datetime", variable has + # type "Timestamp") + # error: Argument 1 to "to_datetime" has incompatible type "str"; expected + # "datetime" + ts: pd.Timestamp = pd.to_datetime("2021-03-01") # type: ignore[assignment,arg-type] + # Incompatible types in assignment (expression has type "datetime", variable has + # type "Timestamp") + # error: Argument 1 to "to_datetime" has incompatible type "str"; expected + # "datetime" + ts2: pd.Timestamp = pd.to_datetime( # type: ignore[assignment] + "2021-01-01" # type: ignore[arg-type] + ) + + tsr: bool = ts < ts2 + tsr2: bool = ts > ts2 + + +def test_types_pydatetime() -> None: + ts: pd.Timestamp = pd.Timestamp("2021-03-01T12") + + datet: dt.datetime = ts.to_pydatetime() + datet2: dt.datetime = ts.to_pydatetime(False) + datet3: dt.datetime = ts.to_pydatetime(warn=True) diff --git a/pyproject.toml b/pyproject.toml index c3ed07defa60d..c960279d2b192 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,6 +57,7 @@ markers = [ "arm_slow: mark a test as slow for arm64 architecture", "arraymanager: mark a test to run with ArrayManager enabled", ] +norecursedirs = ["pandas/tests/typing/invalid"] [tool.mypy] # Import discovery @@ -116,6 +117,10 @@ module = [ ] check_untyped_defs = false +[[tool.mypy.overrides]] +module = ["pandas.tests.typing"] +check_untyped_defs = true + [[tool.mypy.overrides]] module = [ "pandas.tests.apply.test_series_apply", @@ -150,8 +155,30 @@ skip = "pandas/__init__.py" [tool.pyright] pythonVersion = "3.8" typeCheckingMode = "basic" -include = ["pandas", "typings"] -exclude = ["pandas/tests", "pandas/io/clipboard", "pandas/util/version"] +include = ["pandas", "typings", "pandas/tests/typing/valid"] +exclude = [ + "pandas/io/clipboard", + "pandas/util/version", + # ignore everything in /pandas/tests except typing + "pandas/tests/*.py", + "pandas/tests/a*", + "pandas/tests/b*", + "pandas/tests/c*", + "pandas/tests/d*", + "pandas/tests/e*", + "pandas/tests/f*", + "pandas/tests/g*", + "pandas/tests/i*", + "pandas/tests/l*", + "pandas/tests/p*", + "pandas/tests/r*", + "pandas/tests/s*", + "pandas/tests/to*", + "pandas/tests/ts*", + "pandas/tests/s*", + "pandas/tests/u*", + "pandas/tests/w*", +] reportGeneralTypeIssues = false reportConstantRedefinition = false reportFunctionMemberAccess = false From 8d17f218a913a97842c4828078b87037babfaf55 Mon Sep 17 00:00:00 2001 From: zbigniewkrolikowski Date: Tue, 25 Jan 2022 17:56:56 +0100 Subject: [PATCH 10/26] README, isort, skip_if_no --- pandas/tests/typing/README.md | 36 +++++++++++++++++++++++++ pandas/tests/typing/valid/test_frame.py | 10 ++++--- 2 files changed, 42 insertions(+), 4 deletions(-) create mode 100644 pandas/tests/typing/README.md diff --git a/pandas/tests/typing/README.md b/pandas/tests/typing/README.md new file mode 100644 index 0000000000000..3762218fbd7c0 --- /dev/null +++ b/pandas/tests/typing/README.md @@ -0,0 +1,36 @@ +## Purpose of those tests + +The tests contained in the `valid` directory are snippets that when +process through a type checker ensure that type annotations and type +stubs from this repository conform to common pandas API use-patterns. + +## Running the tests + +Tests can be run in following ways: + +`pyright pandas/tests/typing` + +`mypy pandas/tests/typing` + +They'll also be automatically detected and executed by pytest. This +is to ensure that the test code itself is valid. + +## Developing the tests + +Some tests contain type checker ignore-instructions along with an +error that's supposed to be thrown. + + # error: No overload variant of "to_datetime" matches argument type "DataFrame" + pd.to_datetime(df) # type: ignore[call-overload] + +All such constructs are placed because of the missing/invalid API +type information. When the API signature becomes valid again type +checker will ask you to remove `type: ignore`. Please remove the +above comment as well. + +When adding new tests please use the above solution as well. + +## Origins and attribution + +The tests come from the [pandas-stubs](https://github.com/VirtusLab/pandas-stubs) +repository originally released under the MIT license. diff --git a/pandas/tests/typing/valid/test_frame.py b/pandas/tests/typing/valid/test_frame.py index b4d7b637ede3f..ff381c7612465 100644 --- a/pandas/tests/typing/valid/test_frame.py +++ b/pandas/tests/typing/valid/test_frame.py @@ -35,17 +35,17 @@ def test_types_to_csv() -> None: # variable has type "str") csv_df: str = df.to_csv() # type: ignore[assignment] - with tempfile.NamedTemporaryFile() as file: + with tempfile.Namedtempfile.TemporaryFile() as file: df.to_csv(file.name) df2: pd.DataFrame = pd.read_csv(file.name) - with tempfile.NamedTemporaryFile() as file: + with tempfile.Namedtempfile.TemporaryFile() as file: df.to_csv(Path(file.name)) df3: pd.DataFrame = pd.read_csv(Path(file.name)) # This keyword was added in 1.1.0 # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html - with tempfile.NamedTemporaryFile() as file: + with tempfile.Namedtempfile.TemporaryFile() as file: df.to_csv(file.name, errors="replace") df4: pd.DataFrame = pd.read_csv(file.name) @@ -529,6 +529,7 @@ def test_types_merge() -> None: df.merge(df2, on=l) +@td.skip_if_no("matplotlib") # type: ignore[misc] def test_types_plot() -> None: df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]}) df.plot.hist() @@ -710,6 +711,7 @@ def test_types_from_dict() -> None: ) +@td.skip_if_no("jinja") # type: ignore[misc] def test_pipe() -> None: def foo(df: pd.DataFrame) -> pd.DataFrame: return df @@ -751,7 +753,7 @@ def test_types_to_parquet() -> None: df = pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]).set_flags( allows_duplicate_labels=False ) - with tempfile.NamedTemporaryFile() as file: + with tempfile.Namedtempfile.TemporaryFile() as file: df.to_parquet(Path(file.name)) # to_parquet() returns bytes when no path given since 1.2.0 # https://pandas.pydata.org/docs/whatsnew/v1.2.0.html From 1c32250b9a70d25b053db9592da7f6eef465baec Mon Sep 17 00:00:00 2001 From: zbigniewkrolikowski Date: Tue, 25 Jan 2022 18:22:45 +0100 Subject: [PATCH 11/26] Configured isort for black profile --- .pre-commit-config.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6de9bd786404f..d40ef03b84c0d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -55,6 +55,7 @@ repos: rev: 5.10.1 hooks: - id: isort + args: ["--profile", "black"] - repo: https://github.com/asottile/pyupgrade rev: v2.31.0 hooks: From 47ae9755741735cb886e05456d45f5be4f251990 Mon Sep 17 00:00:00 2001 From: zbigniewkrolikowski Date: Wed, 26 Jan 2022 08:29:18 +0100 Subject: [PATCH 12/26] Configured isort for black profile --- LICENSES/STUBS_LICENSE | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 LICENSES/STUBS_LICENSE diff --git a/LICENSES/STUBS_LICENSE b/LICENSES/STUBS_LICENSE new file mode 100644 index 0000000000000..d236c20fbe6b0 --- /dev/null +++ b/LICENSES/STUBS_LICENSE @@ -0,0 +1,30 @@ +MIT License + +Copyright (c) Virtus Lab sp. z o.o. (Ltd.) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + + +""" +Copyright (c) Virtus Lab sp. z o.o. (Ltd.) + +Distributed under the terms of the MIT license. + +The full license is in the STUBS_LICENSE file, distributed with this software. +""" \ No newline at end of file From 16d21e52204bf7c476420d18d6a52e0eac06fa26 Mon Sep 17 00:00:00 2001 From: zbigniewkrolikowski Date: Wed, 26 Jan 2022 08:33:04 +0100 Subject: [PATCH 13/26] Copyright notice, tempfile --- LICENSES/STUBS_LICENSE | 11 +---------- pandas/tests/typing/valid/test_frame.py | 15 +++++++++++---- pandas/tests/typing/valid/test_interval.py | 7 +++++++ pandas/tests/typing/valid/test_pandas.py | 7 +++++++ pandas/tests/typing/valid/test_series.py | 7 +++++++ pandas/tests/typing/valid/test_testing.py | 7 +++++++ pandas/tests/typing/valid/test_timestamp.py | 7 +++++++ 7 files changed, 47 insertions(+), 14 deletions(-) diff --git a/LICENSES/STUBS_LICENSE b/LICENSES/STUBS_LICENSE index d236c20fbe6b0..811d0beb50ddb 100644 --- a/LICENSES/STUBS_LICENSE +++ b/LICENSES/STUBS_LICENSE @@ -18,13 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - - -""" -Copyright (c) Virtus Lab sp. z o.o. (Ltd.) - -Distributed under the terms of the MIT license. - -The full license is in the STUBS_LICENSE file, distributed with this software. -""" \ No newline at end of file +SOFTWARE. \ No newline at end of file diff --git a/pandas/tests/typing/valid/test_frame.py b/pandas/tests/typing/valid/test_frame.py index ff381c7612465..3715c503c67ad 100644 --- a/pandas/tests/typing/valid/test_frame.py +++ b/pandas/tests/typing/valid/test_frame.py @@ -1,3 +1,10 @@ +""" +Copyright (c) Virtus Lab sp. z o.o. (Ltd.) + +Distributed under the terms of the MIT license. + +The full license is in the STUBS_LICENSE file, distributed with this software. +""" # flake8: noqa: F841 # TODO: many functions need return types annotations for pyright # to run with reportGeneralTypeIssues = true @@ -35,17 +42,17 @@ def test_types_to_csv() -> None: # variable has type "str") csv_df: str = df.to_csv() # type: ignore[assignment] - with tempfile.Namedtempfile.TemporaryFile() as file: + with tempfile.NamedTemporaryFile() as file: df.to_csv(file.name) df2: pd.DataFrame = pd.read_csv(file.name) - with tempfile.Namedtempfile.TemporaryFile() as file: + with tempfile.NamedTemporaryFile() as file: df.to_csv(Path(file.name)) df3: pd.DataFrame = pd.read_csv(Path(file.name)) # This keyword was added in 1.1.0 # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html - with tempfile.Namedtempfile.TemporaryFile() as file: + with tempfile.NamedTemporaryFile() as file: df.to_csv(file.name, errors="replace") df4: pd.DataFrame = pd.read_csv(file.name) @@ -753,7 +760,7 @@ def test_types_to_parquet() -> None: df = pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]).set_flags( allows_duplicate_labels=False ) - with tempfile.Namedtempfile.TemporaryFile() as file: + with tempfile.NamedTemporaryFile() as file: df.to_parquet(Path(file.name)) # to_parquet() returns bytes when no path given since 1.2.0 # https://pandas.pydata.org/docs/whatsnew/v1.2.0.html diff --git a/pandas/tests/typing/valid/test_interval.py b/pandas/tests/typing/valid/test_interval.py index 937af8cc85d0d..6a6b2abf38c5f 100644 --- a/pandas/tests/typing/valid/test_interval.py +++ b/pandas/tests/typing/valid/test_interval.py @@ -1,3 +1,10 @@ +""" +Copyright (c) Virtus Lab sp. z o.o. (Ltd.) + +Distributed under the terms of the MIT license. + +The full license is in the STUBS_LICENSE file, distributed with this software. +""" # flake8: noqa: F841 # pyright: reportGeneralTypeIssues = true diff --git a/pandas/tests/typing/valid/test_pandas.py b/pandas/tests/typing/valid/test_pandas.py index b74dcc88f1363..42342f10b387c 100644 --- a/pandas/tests/typing/valid/test_pandas.py +++ b/pandas/tests/typing/valid/test_pandas.py @@ -1,3 +1,10 @@ +""" +Copyright (c) Virtus Lab sp. z o.o. (Ltd.) + +Distributed under the terms of the MIT license. + +The full license is in the STUBS_LICENSE file, distributed with this software. +""" # flake8: noqa: F841 # TODO: many functions need return types annotations for pyright # to run with reportGeneralTypeIssues = true diff --git a/pandas/tests/typing/valid/test_series.py b/pandas/tests/typing/valid/test_series.py index 965241c067176..b3df4ff478437 100644 --- a/pandas/tests/typing/valid/test_series.py +++ b/pandas/tests/typing/valid/test_series.py @@ -1,3 +1,10 @@ +""" +Copyright (c) Virtus Lab sp. z o.o. (Ltd.) + +Distributed under the terms of the MIT license. + +The full license is in the STUBS_LICENSE file, distributed with this software. +""" # flake8: noqa: F841 # TODO: many functions need return types annotations for pyright # to run with reportGeneralTypeIssues = true diff --git a/pandas/tests/typing/valid/test_testing.py b/pandas/tests/typing/valid/test_testing.py index a98d8faee5098..b5ef32560bdff 100644 --- a/pandas/tests/typing/valid/test_testing.py +++ b/pandas/tests/typing/valid/test_testing.py @@ -1,3 +1,10 @@ +""" +Copyright (c) Virtus Lab sp. z o.o. (Ltd.) + +Distributed under the terms of the MIT license. + +The full license is in the STUBS_LICENSE file, distributed with this software. +""" # pyright: reportGeneralTypeIssues = true import pandas as pd diff --git a/pandas/tests/typing/valid/test_timestamp.py b/pandas/tests/typing/valid/test_timestamp.py index cd9f920d5f9ff..1cbb55f99bf91 100644 --- a/pandas/tests/typing/valid/test_timestamp.py +++ b/pandas/tests/typing/valid/test_timestamp.py @@ -1,3 +1,10 @@ +""" +Copyright (c) Virtus Lab sp. z o.o. (Ltd.) + +Distributed under the terms of the MIT license. + +The full license is in the STUBS_LICENSE file, distributed with this software. +""" # flake8: noqa: F841 # TODO: many functions need return types annotations for pyright # to run with reportGeneralTypeIssues = true From e382af0a5df2da0a3a81b5405aec32b1555e2598 Mon Sep 17 00:00:00 2001 From: zbigniewkrolikowski Date: Wed, 26 Jan 2022 08:46:04 +0100 Subject: [PATCH 14/26] Remove redundant profile configuration --- .pre-commit-config.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d40ef03b84c0d..6de9bd786404f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -55,7 +55,6 @@ repos: rev: 5.10.1 hooks: - id: isort - args: ["--profile", "black"] - repo: https://github.com/asottile/pyupgrade rev: v2.31.0 hooks: From ff2d31ba50193fc95e5f65f448e98af5d701d400 Mon Sep 17 00:00:00 2001 From: zbigniewkrolikowski Date: Thu, 27 Jan 2022 14:41:33 +0100 Subject: [PATCH 15/26] Missing matplotlib. Isort again --- pandas/tests/typing/valid/test_series.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/typing/valid/test_series.py b/pandas/tests/typing/valid/test_series.py index b3df4ff478437..5e7090f9a7637 100644 --- a/pandas/tests/typing/valid/test_series.py +++ b/pandas/tests/typing/valid/test_series.py @@ -400,6 +400,7 @@ def test_types_group_by_with_dropna_keyword() -> None: s.groupby(level=0).sum() +@td.skip_if_no("matplotlib") # type: ignore[misc] def test_types_plot() -> None: s = pd.Series([0, 1, 1, 0, -10]) s.plot.hist() From 70902cc1a6a942cd255da11aa29b1c45568e7468 Mon Sep 17 00:00:00 2001 From: zbigniewkrolikowski Date: Thu, 27 Jan 2022 15:00:30 +0100 Subject: [PATCH 16/26] Isort --- pandas/tests/typing/valid/test_frame.py | 2 +- pandas/tests/typing/valid/test_pandas.py | 1 + pandas/tests/typing/valid/test_series.py | 5 +++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/tests/typing/valid/test_frame.py b/pandas/tests/typing/valid/test_frame.py index 3715c503c67ad..48a865ebcc2c4 100644 --- a/pandas/tests/typing/valid/test_frame.py +++ b/pandas/tests/typing/valid/test_frame.py @@ -9,8 +9,8 @@ # TODO: many functions need return types annotations for pyright # to run with reportGeneralTypeIssues = true import io -import tempfile from pathlib import Path +import tempfile from typing import ( Any, Iterable, diff --git a/pandas/tests/typing/valid/test_pandas.py b/pandas/tests/typing/valid/test_pandas.py index 42342f10b387c..5e1be8d6d6148 100644 --- a/pandas/tests/typing/valid/test_pandas.py +++ b/pandas/tests/typing/valid/test_pandas.py @@ -17,6 +17,7 @@ ) import pandas as pd + from pandas.io.parsers import TextFileReader diff --git a/pandas/tests/typing/valid/test_series.py b/pandas/tests/typing/valid/test_series.py index 5e7090f9a7637..2ba05be61ce4d 100644 --- a/pandas/tests/typing/valid/test_series.py +++ b/pandas/tests/typing/valid/test_series.py @@ -9,14 +9,15 @@ # TODO: many functions need return types annotations for pyright # to run with reportGeneralTypeIssues = true -import tempfile from pathlib import Path +import tempfile from typing import List import numpy as np -import pandas as pd from pandas._typing import Scalar + +import pandas as pd from pandas.core.window import ExponentialMovingWindow from pandas.util import _test_decorators as td From 0d2b8f9921f7deeacc3564797ed15b461d28f309 Mon Sep 17 00:00:00 2001 From: zbigniewkrolikowski Date: Fri, 28 Jan 2022 18:13:40 +0100 Subject: [PATCH 17/26] Windows error for tempfile --- pandas/tests/typing/valid/test_frame.py | 43 +++++++++++++++++------- pandas/tests/typing/valid/test_series.py | 41 ++++++++++++++-------- 2 files changed, 58 insertions(+), 26 deletions(-) diff --git a/pandas/tests/typing/valid/test_frame.py b/pandas/tests/typing/valid/test_frame.py index 48a865ebcc2c4..d40d2e8d10436 100644 --- a/pandas/tests/typing/valid/test_frame.py +++ b/pandas/tests/typing/valid/test_frame.py @@ -9,8 +9,9 @@ # TODO: many functions need return types annotations for pyright # to run with reportGeneralTypeIssues = true import io -from pathlib import Path +import os import tempfile +from pathlib import Path from typing import ( Any, Iterable, @@ -42,19 +43,33 @@ def test_types_to_csv() -> None: # variable has type "str") csv_df: str = df.to_csv() # type: ignore[assignment] - with tempfile.NamedTemporaryFile() as file: - df.to_csv(file.name) - df2: pd.DataFrame = pd.read_csv(file.name) + # NamedTemporaryFile cannot be used with delete=True on Windows + # see https://docs.python.org/3/library/tempfile.html#tempfile.NamedTemporaryFile + try: + with tempfile.NamedTemporaryFile(delete=False) as file: + tmp_file = file.name + df.to_csv(tmp_file) + df2: pd.DataFrame = pd.read_csv(tmp_file) + finally: + os.unlink(tmp_file) - with tempfile.NamedTemporaryFile() as file: - df.to_csv(Path(file.name)) - df3: pd.DataFrame = pd.read_csv(Path(file.name)) + try: + with tempfile.NamedTemporaryFile(delete=False) as file: + tmp_file = file.name + df.to_csv(Path(tmp_file)) + df3: pd.DataFrame = pd.read_csv(Path(tmp_file)) + finally: + os.unlink(tmp_file) # This keyword was added in 1.1.0 # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html - with tempfile.NamedTemporaryFile() as file: - df.to_csv(file.name, errors="replace") - df4: pd.DataFrame = pd.read_csv(file.name) + try: + with tempfile.NamedTemporaryFile(delete=False) as file: + tmp_file = file.name + df.to_csv(tmp_file, errors="replace") + df4: pd.DataFrame = pd.read_csv(tmp_file) + finally: + os.unlink(tmp_file) # Testing support for binary file handles, added in 1.2.0 # https://pandas.pydata.org/docs/whatsnew/v1.2.0.html @@ -760,8 +775,12 @@ def test_types_to_parquet() -> None: df = pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]).set_flags( allows_duplicate_labels=False ) - with tempfile.NamedTemporaryFile() as file: - df.to_parquet(Path(file.name)) + try: + with tempfile.NamedTemporaryFile(delete=False) as file: + tmp_file = file.name + df.to_parquet(Path(tmp_file)) + finally: + os.unlink(tmp_file) # to_parquet() returns bytes when no path given since 1.2.0 # https://pandas.pydata.org/docs/whatsnew/v1.2.0.html # error: Incompatible types in assignment (expression has type "Optional[bytes]", diff --git a/pandas/tests/typing/valid/test_series.py b/pandas/tests/typing/valid/test_series.py index 2ba05be61ce4d..bfcfdfc3e9456 100644 --- a/pandas/tests/typing/valid/test_series.py +++ b/pandas/tests/typing/valid/test_series.py @@ -8,16 +8,15 @@ # flake8: noqa: F841 # TODO: many functions need return types annotations for pyright # to run with reportGeneralTypeIssues = true - -from pathlib import Path +import os import tempfile +from pathlib import Path from typing import List import numpy as np -from pandas._typing import Scalar - import pandas as pd +from pandas._typing import Scalar from pandas.core.window import ExponentialMovingWindow from pandas.util import _test_decorators as td @@ -66,19 +65,33 @@ def test_types_csv() -> None: # variable has type "str") csv_df: str = s.to_csv() # type: ignore[assignment] - with tempfile.NamedTemporaryFile() as file: - s.to_csv(file.name) - s2: pd.DataFrame = pd.read_csv(file.name) - - with tempfile.NamedTemporaryFile() as file: - s.to_csv(Path(file.name)) - s3: pd.DataFrame = pd.read_csv(Path(file.name)) + # NamedTemporaryFile cannot be used with delete=True on Windows + # see https://docs.python.org/3/library/tempfile.html#tempfile.NamedTemporaryFile + try: + with tempfile.NamedTemporaryFile(delete=False) as file: + tmp_file = file.name + s.to_csv(tmp_file) + s2: pd.DataFrame = pd.read_csv(tmp_file) + finally: + os.unlink(tmp_file) + + try: + with tempfile.NamedTemporaryFile(delete=False) as file: + tmp_file = file.name + s.to_csv(Path(tmp_file)) + s3: pd.DataFrame = pd.read_csv(Path(tmp_file)) + finally: + os.unlink(tmp_file) # This keyword was added in 1.1.0 # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html - with tempfile.NamedTemporaryFile() as file: - s.to_csv(file.name, errors="replace") - s4: pd.DataFrame = pd.read_csv(file.name) + try: + with tempfile.NamedTemporaryFile(delete=False) as file: + tmp_file = file.name + s.to_csv(tmp_file=tmp_file, errors="replace") + s4: pd.DataFrame = pd.read_csv(tmp_file=tmp_file) + finally: + os.unlink(tmp_file) def test_types_copy() -> None: From c0cc7d2ff76fbddbb3b01ceea88b5b048209fb0d Mon Sep 17 00:00:00 2001 From: zbigniewkrolikowski Date: Fri, 28 Jan 2022 18:15:01 +0100 Subject: [PATCH 18/26] Isort --- pandas/tests/typing/valid/test_frame.py | 2 +- pandas/tests/typing/valid/test_series.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/typing/valid/test_frame.py b/pandas/tests/typing/valid/test_frame.py index d40d2e8d10436..71ce777deb234 100644 --- a/pandas/tests/typing/valid/test_frame.py +++ b/pandas/tests/typing/valid/test_frame.py @@ -10,8 +10,8 @@ # to run with reportGeneralTypeIssues = true import io import os -import tempfile from pathlib import Path +import tempfile from typing import ( Any, Iterable, diff --git a/pandas/tests/typing/valid/test_series.py b/pandas/tests/typing/valid/test_series.py index bfcfdfc3e9456..4d9df37576783 100644 --- a/pandas/tests/typing/valid/test_series.py +++ b/pandas/tests/typing/valid/test_series.py @@ -9,14 +9,15 @@ # TODO: many functions need return types annotations for pyright # to run with reportGeneralTypeIssues = true import os -import tempfile from pathlib import Path +import tempfile from typing import List import numpy as np -import pandas as pd from pandas._typing import Scalar + +import pandas as pd from pandas.core.window import ExponentialMovingWindow from pandas.util import _test_decorators as td From 16918a1fb909bdd88a5166874768f17cb4babacc Mon Sep 17 00:00:00 2001 From: zbigniewkrolikowski Date: Fri, 28 Jan 2022 19:17:31 +0100 Subject: [PATCH 19/26] Isort --- pandas/tests/typing/valid/test_series.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/typing/valid/test_series.py b/pandas/tests/typing/valid/test_series.py index b0522f1fd01e7..50f3d859628af 100644 --- a/pandas/tests/typing/valid/test_series.py +++ b/pandas/tests/typing/valid/test_series.py @@ -9,14 +9,15 @@ # TODO: many functions need return types annotations for pyright # to run with reportGeneralTypeIssues = true import os -import tempfile from pathlib import Path +import tempfile from typing import List import numpy as np -import pandas as pd from pandas._typing import Scalar + +import pandas as pd from pandas.core.window import ExponentialMovingWindow from pandas.util import _test_decorators as td From bd002eb0c5932edd89be080cf46995845fb374c8 Mon Sep 17 00:00:00 2001 From: zbigniewkrolikowski Date: Tue, 1 Feb 2022 19:38:11 +0100 Subject: [PATCH 20/26] Removed use of tempfile --- pandas/tests/typing/valid/test_frame.py | 48 ++++++++---------------- pandas/tests/typing/valid/test_pandas.py | 31 +++++++-------- pandas/tests/typing/valid/test_series.py | 40 ++++++-------------- 3 files changed, 40 insertions(+), 79 deletions(-) diff --git a/pandas/tests/typing/valid/test_frame.py b/pandas/tests/typing/valid/test_frame.py index 71ce777deb234..d1807887b7f80 100644 --- a/pandas/tests/typing/valid/test_frame.py +++ b/pandas/tests/typing/valid/test_frame.py @@ -9,9 +9,7 @@ # TODO: many functions need return types annotations for pyright # to run with reportGeneralTypeIssues = true import io -import os from pathlib import Path -import tempfile from typing import ( Any, Iterable, @@ -22,6 +20,7 @@ import numpy as np import pandas as pd +import pandas._testing as tm from pandas.util import _test_decorators as td @@ -43,33 +42,19 @@ def test_types_to_csv() -> None: # variable has type "str") csv_df: str = df.to_csv() # type: ignore[assignment] - # NamedTemporaryFile cannot be used with delete=True on Windows - # see https://docs.python.org/3/library/tempfile.html#tempfile.NamedTemporaryFile - try: - with tempfile.NamedTemporaryFile(delete=False) as file: - tmp_file = file.name - df.to_csv(tmp_file) - df2: pd.DataFrame = pd.read_csv(tmp_file) - finally: - os.unlink(tmp_file) + with tm.ensure_clean() as path: + df.to_csv(path) + df2: pd.DataFrame = pd.read_csv(path) - try: - with tempfile.NamedTemporaryFile(delete=False) as file: - tmp_file = file.name - df.to_csv(Path(tmp_file)) - df3: pd.DataFrame = pd.read_csv(Path(tmp_file)) - finally: - os.unlink(tmp_file) + with tm.ensure_clean() as path: + df.to_csv(Path(path)) + df3: pd.DataFrame = pd.read_csv(Path(path)) # This keyword was added in 1.1.0 # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html - try: - with tempfile.NamedTemporaryFile(delete=False) as file: - tmp_file = file.name - df.to_csv(tmp_file, errors="replace") - df4: pd.DataFrame = pd.read_csv(tmp_file) - finally: - os.unlink(tmp_file) + with tm.ensure_clean() as path: + df.to_csv(path, errors="replace") + df4: pd.DataFrame = pd.read_csv(path) # Testing support for binary file handles, added in 1.2.0 # https://pandas.pydata.org/docs/whatsnew/v1.2.0.html @@ -614,8 +599,9 @@ def test_types_to_feather() -> None: # See https://pandas.pydata.org/docs/whatsnew/v1.0.0.html # Docstring and type were updated in 1.2.0. # https://github.com/pandas-dev/pandas/pull/35408 - with tempfile.TemporaryFile() as f: - df.to_feather(f) + with tm.ensure_clean() as path: + with open(path, "wb") as f: + df.to_feather(f) # compare() method added in 1.1.0 @@ -775,12 +761,8 @@ def test_types_to_parquet() -> None: df = pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]).set_flags( allows_duplicate_labels=False ) - try: - with tempfile.NamedTemporaryFile(delete=False) as file: - tmp_file = file.name - df.to_parquet(Path(tmp_file)) - finally: - os.unlink(tmp_file) + with tm.ensure_clean() as path: + df.to_parquet(Path(path)) # to_parquet() returns bytes when no path given since 1.2.0 # https://pandas.pydata.org/docs/whatsnew/v1.2.0.html # error: Incompatible types in assignment (expression has type "Optional[bytes]", diff --git a/pandas/tests/typing/valid/test_pandas.py b/pandas/tests/typing/valid/test_pandas.py index 5e1be8d6d6148..fe2a2dfa28183 100644 --- a/pandas/tests/typing/valid/test_pandas.py +++ b/pandas/tests/typing/valid/test_pandas.py @@ -8,7 +8,6 @@ # flake8: noqa: F841 # TODO: many functions need return types annotations for pyright # to run with reportGeneralTypeIssues = true -import tempfile from typing import ( Any, Dict, @@ -17,7 +16,7 @@ ) import pandas as pd - +import pandas._testing as tm from pandas.io.parsers import TextFileReader @@ -144,32 +143,28 @@ def test_types_read_csv() -> None: # variable has type "str") csv_df: str = df.to_csv() # type: ignore[assignment] - with tempfile.NamedTemporaryFile() as file: - df.to_csv(file.name) - df2: pd.DataFrame = pd.read_csv(file.name) - df3: pd.DataFrame = pd.read_csv(file.name, sep="a", squeeze=False) + with tm.ensure_clean() as path: + df.to_csv(path) + df2: pd.DataFrame = pd.read_csv(path) + df3: pd.DataFrame = pd.read_csv(path, sep="a", squeeze=False) df4: pd.DataFrame = pd.read_csv( - file.name, + path, header=None, prefix="b", mangle_dupe_cols=True, keep_default_na=False, ) df5: pd.DataFrame = pd.read_csv( - file.name, engine="python", true_values=[0, 1, 3], na_filter=False + path, engine="python", true_values=[0, 1, 3], na_filter=False ) df6: pd.DataFrame = pd.read_csv( - file.name, + path, skiprows=lambda x: x in [0, 2], skip_blank_lines=True, dayfirst=False, ) - df7: pd.DataFrame = pd.read_csv(file.name, nrows=2) - tfr1: TextFileReader = pd.read_csv( - file.name, nrows=2, iterator=True, chunksize=3 - ) - tfr2: TextFileReader = pd.read_csv(file.name, nrows=2, chunksize=1) - tfr3: TextFileReader = pd.read_csv( - file.name, nrows=2, iterator=False, chunksize=1 - ) - tfr4: TextFileReader = pd.read_csv(file.name, nrows=2, iterator=True) + df7: pd.DataFrame = pd.read_csv(path, nrows=2) + tfr1: TextFileReader = pd.read_csv(path, nrows=2, iterator=True, chunksize=3) + tfr2: TextFileReader = pd.read_csv(path, nrows=2, chunksize=1) + tfr3: TextFileReader = pd.read_csv(path, nrows=2, iterator=False, chunksize=1) + tfr4: TextFileReader = pd.read_csv(path, nrows=2, iterator=True) diff --git a/pandas/tests/typing/valid/test_series.py b/pandas/tests/typing/valid/test_series.py index 50f3d859628af..c6c002c2d3460 100644 --- a/pandas/tests/typing/valid/test_series.py +++ b/pandas/tests/typing/valid/test_series.py @@ -8,16 +8,14 @@ # flake8: noqa: F841 # TODO: many functions need return types annotations for pyright # to run with reportGeneralTypeIssues = true -import os from pathlib import Path -import tempfile from typing import List import numpy as np -from pandas._typing import Scalar - import pandas as pd +import pandas._testing as tm +from pandas._typing import Scalar from pandas.core.window import ExponentialMovingWindow from pandas.util import _test_decorators as td @@ -66,33 +64,19 @@ def test_types_csv() -> None: # variable has type "str") csv_df: str = s.to_csv() # type: ignore[assignment] - # NamedTemporaryFile cannot be used with delete=True on Windows - # see https://docs.python.org/3/library/tempfile.html#tempfile.NamedTemporaryFile - try: - with tempfile.NamedTemporaryFile(delete=False) as file: - tmp_file = file.name - s.to_csv(tmp_file) - s2: pd.DataFrame = pd.read_csv(tmp_file) - finally: - os.unlink(tmp_file) - - try: - with tempfile.NamedTemporaryFile(delete=False) as file: - tmp_file = file.name - s.to_csv(Path(tmp_file)) - s3: pd.DataFrame = pd.read_csv(Path(tmp_file)) - finally: - os.unlink(tmp_file) + with tm.ensure_clean() as path: + s.to_csv(path) + s2: pd.DataFrame = pd.read_csv(path) + + with tm.ensure_clean() as path: + s.to_csv(Path(path)) + s3: pd.DataFrame = pd.read_csv(Path(path)) # This keyword was added in 1.1.0 # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html - try: - with tempfile.NamedTemporaryFile(delete=False) as file: - tmp_file = file.name - s.to_csv(tmp_file, errors="replace") - s4: pd.DataFrame = pd.read_csv(tmp_file) - finally: - os.unlink(tmp_file) + with tm.ensure_clean() as path: + s.to_csv(path, errors="replace") + s4: pd.DataFrame = pd.read_csv(path) def test_types_copy() -> None: From 6d965ee89eb568e0eb5f7700622c8e8604ea7a77 Mon Sep 17 00:00:00 2001 From: zbigniewkrolikowski Date: Tue, 1 Feb 2022 19:39:06 +0100 Subject: [PATCH 21/26] Isort --- pandas/tests/typing/valid/test_pandas.py | 1 + pandas/tests/typing/valid/test_series.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/typing/valid/test_pandas.py b/pandas/tests/typing/valid/test_pandas.py index fe2a2dfa28183..67f80ee94ee65 100644 --- a/pandas/tests/typing/valid/test_pandas.py +++ b/pandas/tests/typing/valid/test_pandas.py @@ -17,6 +17,7 @@ import pandas as pd import pandas._testing as tm + from pandas.io.parsers import TextFileReader diff --git a/pandas/tests/typing/valid/test_series.py b/pandas/tests/typing/valid/test_series.py index c6c002c2d3460..f813d19e1e477 100644 --- a/pandas/tests/typing/valid/test_series.py +++ b/pandas/tests/typing/valid/test_series.py @@ -13,9 +13,10 @@ import numpy as np +from pandas._typing import Scalar + import pandas as pd import pandas._testing as tm -from pandas._typing import Scalar from pandas.core.window import ExponentialMovingWindow from pandas.util import _test_decorators as td From 2ab9bfd9f0621ca2d60ffe3f38b3eff32763876c Mon Sep 17 00:00:00 2001 From: zbigniewkrolikowski Date: Thu, 3 Feb 2022 17:47:04 +0100 Subject: [PATCH 22/26] Remove deprecated usage. Type temporary paths --- pandas/tests/typing/valid/test_frame.py | 45 +++++++++++------------ pandas/tests/typing/valid/test_pandas.py | 4 +- pandas/tests/typing/valid/test_series.py | 34 ++++++++--------- pandas/tests/typing/valid/test_testing.py | 4 +- 4 files changed, 41 insertions(+), 46 deletions(-) diff --git a/pandas/tests/typing/valid/test_frame.py b/pandas/tests/typing/valid/test_frame.py index d1807887b7f80..739e7d99d07fb 100644 --- a/pandas/tests/typing/valid/test_frame.py +++ b/pandas/tests/typing/valid/test_frame.py @@ -15,6 +15,7 @@ Iterable, List, Tuple, + cast, ) import numpy as np @@ -43,18 +44,18 @@ def test_types_to_csv() -> None: csv_df: str = df.to_csv() # type: ignore[assignment] with tm.ensure_clean() as path: - df.to_csv(path) - df2: pd.DataFrame = pd.read_csv(path) + df.to_csv(cast(str, path)) + df2: pd.DataFrame = pd.read_csv(cast(str, path)) with tm.ensure_clean() as path: - df.to_csv(Path(path)) - df3: pd.DataFrame = pd.read_csv(Path(path)) + df.to_csv(Path(cast(str, path))) + df3: pd.DataFrame = pd.read_csv(Path(cast(str, path))) # This keyword was added in 1.1.0 # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html with tm.ensure_clean() as path: - df.to_csv(path, errors="replace") - df4: pd.DataFrame = pd.read_csv(path) + df.to_csv(cast(str, path), errors="replace") + df4: pd.DataFrame = pd.read_csv(cast(str, path)) # Testing support for binary file handles, added in 1.2.0 # https://pandas.pydata.org/docs/whatsnew/v1.2.0.html @@ -283,13 +284,13 @@ def test_types_mean() -> None: s2: pd.Series = df.mean(axis=0) # type: ignore[assignment] # error: Incompatible types in assignment (expression has type "Union[Series, # float]", variable has type "DataFrame") - df2: pd.DataFrame = df.mean(level=0) # type: ignore[assignment] + df2: pd.DataFrame = df.mean() # type: ignore[assignment] # error: Incompatible types in assignment (expression has type "Union[Series, # float]", variable has type "DataFrame") - df3: pd.DataFrame = df.mean(axis=1, level=0) # type: ignore[assignment] + df3: pd.DataFrame = df.mean(axis=1) # type: ignore[assignment] # error: Incompatible types in assignment (expression has type "Union[Series, # float]", variable has type "DataFrame") - df4: pd.DataFrame = df.mean(1, True, level=0) # type: ignore[assignment] + df4: pd.DataFrame = df.mean(1, True) # type: ignore[assignment] # error: Incompatible types in assignment (expression has type "Union[Series, # float]", variable has type "Series")# error: Incompatible types in assignment # (expression has type "Union[Series, float]", variable has type "Series") @@ -308,13 +309,13 @@ def test_types_median() -> None: s2: pd.Series = df.median(axis=0) # type: ignore[assignment] # error: Incompatible types in assignment (expression has type "Union[Series, # float]", variable has type "DataFrame") - df2: pd.DataFrame = df.median(level=0) # type: ignore[assignment] + df2: pd.DataFrame = df.median() # type: ignore[assignment] # error: Incompatible types in assignment (expression has type "Union[Series, # float]", variable has type "DataFrame") - df3: pd.DataFrame = df.median(axis=1, level=0) # type: ignore[assignment] + df3: pd.DataFrame = df.median(axis=1) # type: ignore[assignment] # error: Incompatible types in assignment (expression has type "Union[Series, # float]", variable has type "DataFrame") - df4: pd.DataFrame = df.median(1, True, level=0) # type: ignore[assignment] + df4: pd.DataFrame = df.median(1, True) # type: ignore[assignment] # error: Incompatible types in assignment (expression has type "Union[Series, # float]", variable has type "Series") s3: pd.Series = df.median( # type: ignore[assignment] @@ -546,7 +547,7 @@ def test_types_plot() -> None: def test_types_window() -> None: df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]}) df.expanding() - df.expanding(axis=1, center=True) + df.expanding(axis=1) df.rolling(2) df.rolling(2, axis=1, center=True) @@ -600,7 +601,7 @@ def test_types_to_feather() -> None: # Docstring and type were updated in 1.2.0. # https://github.com/pandas-dev/pandas/pull/35408 with tm.ensure_clean() as path: - with open(path, "wb") as f: + with open(cast(str, path), "wb") as f: df.to_feather(f) @@ -636,8 +637,8 @@ def test_types_describe() -> None: } ) df.describe() - df.describe(percentiles=[0.5], include="all") - df.describe(exclude=np.number) + df.describe(percentiles=[0.5], include="all", datetime_is_numeric=True) + df.describe(exclude=np.number, datetime_is_numeric=True) # datetime_is_numeric param added in 1.1.0 # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html df.describe(datetime_is_numeric=True) @@ -762,7 +763,7 @@ def test_types_to_parquet() -> None: allows_duplicate_labels=False ) with tm.ensure_clean() as path: - df.to_parquet(Path(path)) + df.to_parquet(Path(cast(str, path))) # to_parquet() returns bytes when no path given since 1.2.0 # https://pandas.pydata.org/docs/whatsnew/v1.2.0.html # error: Incompatible types in assignment (expression has type "Optional[bytes]", @@ -772,16 +773,14 @@ def test_types_to_parquet() -> None: def test_types_to_latex() -> None: df = pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]) - df.to_latex( - columns=["A"], label="some_label", caption="some_caption", multirow=True - ) - df.to_latex(escape=False, decimal=",", column_format="r") + df.style.to_latex(label="some_label", caption="some_caption") + df.style.to_latex(column_format="r") # position param was added in 1.2.0 # https://pandas.pydata.org/docs/whatsnew/v1.2.0.html - df.to_latex(position="some") + df.style.to_latex(position="some") # caption param was extended to accept tuple in 1.2.0 # https://pandas.pydata.org/docs/whatsnew/v1.2.0.html - df.to_latex(caption=("cap1", "cap2")) + df.style.to_latex(caption=("cap1", "cap2")) def test_types_explode() -> None: diff --git a/pandas/tests/typing/valid/test_pandas.py b/pandas/tests/typing/valid/test_pandas.py index 67f80ee94ee65..acdbabecb16a9 100644 --- a/pandas/tests/typing/valid/test_pandas.py +++ b/pandas/tests/typing/valid/test_pandas.py @@ -17,7 +17,6 @@ import pandas as pd import pandas._testing as tm - from pandas.io.parsers import TextFileReader @@ -147,11 +146,10 @@ def test_types_read_csv() -> None: with tm.ensure_clean() as path: df.to_csv(path) df2: pd.DataFrame = pd.read_csv(path) - df3: pd.DataFrame = pd.read_csv(path, sep="a", squeeze=False) + df3: pd.DataFrame = pd.read_csv(path, sep="a") df4: pd.DataFrame = pd.read_csv( path, header=None, - prefix="b", mangle_dupe_cols=True, keep_default_na=False, ) diff --git a/pandas/tests/typing/valid/test_series.py b/pandas/tests/typing/valid/test_series.py index f813d19e1e477..2debdfeee41a3 100644 --- a/pandas/tests/typing/valid/test_series.py +++ b/pandas/tests/typing/valid/test_series.py @@ -9,14 +9,16 @@ # TODO: many functions need return types annotations for pyright # to run with reportGeneralTypeIssues = true from pathlib import Path -from typing import List +from typing import ( + List, + cast, +) import numpy as np -from pandas._typing import Scalar - import pandas as pd import pandas._testing as tm +from pandas._typing import Scalar from pandas.core.window import ExponentialMovingWindow from pandas.util import _test_decorators as td @@ -66,18 +68,18 @@ def test_types_csv() -> None: csv_df: str = s.to_csv() # type: ignore[assignment] with tm.ensure_clean() as path: - s.to_csv(path) - s2: pd.DataFrame = pd.read_csv(path) + s.to_csv(cast(str, path)) + s2: pd.DataFrame = pd.read_csv(cast(str, path)) with tm.ensure_clean() as path: - s.to_csv(Path(path)) - s3: pd.DataFrame = pd.read_csv(Path(path)) + s.to_csv(Path(cast(str, path))) + s3: pd.DataFrame = pd.read_csv(Path(cast(str, path))) # This keyword was added in 1.1.0 # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html with tm.ensure_clean() as path: - s.to_csv(path, errors="replace") - s4: pd.DataFrame = pd.read_csv(path) + s.to_csv(cast(str, path), errors="replace") + s4: pd.DataFrame = pd.read_csv(cast(str, path)) def test_types_copy() -> None: @@ -206,7 +208,7 @@ def test_types_sort_index_with_key() -> None: def test_types_sort_values() -> None: s = pd.Series([4, 2, 1, 3]) - res: pd.Series = s.sort_values(0) + res: pd.Series = s.sort_values(axis=0) res2: pd.Series = s.sort_values(ascending=False) res3: None = s.sort_values(inplace=True, kind="quicksort") res4: pd.Series = s.sort_values(na_position="last") @@ -227,7 +229,7 @@ def test_types_shift() -> None: def test_types_rank() -> None: - s = pd.Series([1, 1, 2, 5, 6, np.nan, "million"]) + s = pd.Series([1, 1, 2, 5, 6, np.nan]) s.rank() s.rank(axis=0, na_option="bottom") s.rank(method="min", pct=True) @@ -242,7 +244,7 @@ def test_types_mean() -> None: f1: float = s.mean() # type: ignore[assignment] # error: Incompatible types in assignment (expression has type "Union[Series, # float]", variable has type "Series") - s1: pd.Series = s.mean(axis=0, level=0) # type: ignore[assignment] + s1: pd.Series = s.mean(axis=0) # type: ignore[assignment] # error: Incompatible types in assignment (expression has type "Union[Series, # float]", variable has type "float") f2: float = s.mean(skipna=False) # type: ignore[assignment] @@ -258,7 +260,7 @@ def test_types_median() -> None: f1: float = s.median() # type: ignore[assignment] # error: Incompatible types in assignment (expression has type "Union[Series, # float]", variable has type "Series") - s1: pd.Series = s.median(axis=0, level=0) # type: ignore[assignment] + s1: pd.Series = s.median(axis=0) # type: ignore[assignment] # error: Incompatible types in assignment (expression has type "Union[Series, # float]", variable has type "float") f2: float = s.median(skipna=False) # type: ignore[assignment] @@ -270,7 +272,7 @@ def test_types_median() -> None: def test_types_sum() -> None: s = pd.Series([1, 2, 3, np.nan]) s.sum() - s.sum(axis=0, level=0) + s.sum(axis=0) s.sum(skipna=False) s.sum(numeric_only=False) s.sum(min_count=4) @@ -287,7 +289,6 @@ def test_types_min() -> None: s = pd.Series([1, 2, 3, np.nan]) s.min() s.min(axis=0) - s.min(level=0) s.min(skipna=False) @@ -295,7 +296,6 @@ def test_types_max() -> None: s = pd.Series([1, 2, 3, np.nan]) s.max() s.max(axis=0) - s.max(level=0) s.max(skipna=False) @@ -409,7 +409,7 @@ def test_types_plot() -> None: def test_types_window() -> None: s = pd.Series([0, 1, 1, 0, 5, 1, -10]) s.expanding() - s.expanding(axis=0, center=True) + s.expanding(axis=0) s.rolling(2) s.rolling(2, axis=0, center=True) diff --git a/pandas/tests/typing/valid/test_testing.py b/pandas/tests/typing/valid/test_testing.py index b5ef32560bdff..b47ec331b6d14 100644 --- a/pandas/tests/typing/valid/test_testing.py +++ b/pandas/tests/typing/valid/test_testing.py @@ -23,6 +23,4 @@ def test_types_assert_series_equal() -> None: check_flags=True, check_datetimelike_compat=True, ) - tm.assert_series_equal( - s1, s2, check_dtype=True, check_less_precise=True, check_names=True - ) + tm.assert_series_equal(s1, s2, check_dtype=True, check_names=True) From 4ec409fe4f55715a51ec8717b93503b6ce88848c Mon Sep 17 00:00:00 2001 From: zbigniewkrolikowski Date: Thu, 3 Feb 2022 17:47:33 +0100 Subject: [PATCH 23/26] Isort --- pandas/tests/typing/valid/test_pandas.py | 1 + pandas/tests/typing/valid/test_series.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/typing/valid/test_pandas.py b/pandas/tests/typing/valid/test_pandas.py index acdbabecb16a9..23380175e808a 100644 --- a/pandas/tests/typing/valid/test_pandas.py +++ b/pandas/tests/typing/valid/test_pandas.py @@ -17,6 +17,7 @@ import pandas as pd import pandas._testing as tm + from pandas.io.parsers import TextFileReader diff --git a/pandas/tests/typing/valid/test_series.py b/pandas/tests/typing/valid/test_series.py index 2debdfeee41a3..85633c6386344 100644 --- a/pandas/tests/typing/valid/test_series.py +++ b/pandas/tests/typing/valid/test_series.py @@ -16,9 +16,10 @@ import numpy as np +from pandas._typing import Scalar + import pandas as pd import pandas._testing as tm -from pandas._typing import Scalar from pandas.core.window import ExponentialMovingWindow from pandas.util import _test_decorators as td From 889754145ce2f0b45a106515ee98241b1938a4ef Mon Sep 17 00:00:00 2001 From: zbigniewkrolikowski Date: Fri, 4 Feb 2022 17:47:42 +0100 Subject: [PATCH 24/26] Fixing CI issues --- pandas/tests/typing/valid/test_frame.py | 5 +++-- pandas/tests/typing/valid/test_pandas.py | 4 ++++ pandas/tests/typing/valid/test_series.py | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/tests/typing/valid/test_frame.py b/pandas/tests/typing/valid/test_frame.py index 739e7d99d07fb..b66af4ceea2a6 100644 --- a/pandas/tests/typing/valid/test_frame.py +++ b/pandas/tests/typing/valid/test_frame.py @@ -411,14 +411,14 @@ def test_types_unique() -> None: def test_types_apply() -> None: df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) - df.apply(lambda x: x ** 2) + df.apply(lambda x: x**2) df.apply(np.exp) df.apply(str) def test_types_applymap() -> None: df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) - df.applymap(lambda x: x ** 2) + df.applymap(lambda x: x**2) df.applymap(np.exp) df.applymap(str) # na_action parameter was added in 1.2.0 @@ -771,6 +771,7 @@ def test_types_to_parquet() -> None: b: bytes = df.to_parquet() # type: ignore[assignment] +@td.skip_if_no("jinja2") # type: ignore[misc] def test_types_to_latex() -> None: df = pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]) df.style.to_latex(label="some_label", caption="some_caption") diff --git a/pandas/tests/typing/valid/test_pandas.py b/pandas/tests/typing/valid/test_pandas.py index 23380175e808a..b3d2eeb718d58 100644 --- a/pandas/tests/typing/valid/test_pandas.py +++ b/pandas/tests/typing/valid/test_pandas.py @@ -165,6 +165,10 @@ def test_types_read_csv() -> None: ) df7: pd.DataFrame = pd.read_csv(path, nrows=2) tfr1: TextFileReader = pd.read_csv(path, nrows=2, iterator=True, chunksize=3) + tfr1.close() tfr2: TextFileReader = pd.read_csv(path, nrows=2, chunksize=1) + tfr2.close() tfr3: TextFileReader = pd.read_csv(path, nrows=2, iterator=False, chunksize=1) + tfr3.close() tfr4: TextFileReader = pd.read_csv(path, nrows=2, iterator=True) + tfr4.close() diff --git a/pandas/tests/typing/valid/test_series.py b/pandas/tests/typing/valid/test_series.py index 85633c6386344..3191139c6e60d 100644 --- a/pandas/tests/typing/valid/test_series.py +++ b/pandas/tests/typing/valid/test_series.py @@ -357,7 +357,7 @@ def test_types_unique() -> None: def test_types_apply() -> None: s = pd.Series([-10, 2, 2, 3, 10, 10]) - s.apply(lambda x: x ** 2) + s.apply(lambda x: x**2) s.apply(np.exp) s.apply(str) From d1b134f16a3db9f8f23557c1f625b4657f6c67a3 Mon Sep 17 00:00:00 2001 From: zbigniewkrolikowski Date: Wed, 9 Feb 2022 18:52:46 +0100 Subject: [PATCH 25/26] Fixed experimental data manager issues --- pandas/tests/typing/valid/test_frame.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/tests/typing/valid/test_frame.py b/pandas/tests/typing/valid/test_frame.py index b66af4ceea2a6..ca9d75d505825 100644 --- a/pandas/tests/typing/valid/test_frame.py +++ b/pandas/tests/typing/valid/test_frame.py @@ -115,10 +115,12 @@ def test_types_setitem() -> None: df[i] = [8, 9] -def test_types_setitem_mask() -> None: - df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4], 5: [6, 7]}) - select_df = pd.DataFrame({"col1": [True, True], "col2": [False, True]}) - df[select_df] = [1, 2, 3] +def test_types_where() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + replace_df = pd.DataFrame(data={"col1": [5, 6], "col2": [7, 8]}) + mask = pd.DataFrame({"col1": [True, True], "col2": [False, True]}) + res: pd.DataFrame = df.where(~mask, replace_df) + res2: pd.DataFrame = df.where(~mask, 10) def test_types_iloc_iat() -> None: @@ -199,7 +201,7 @@ def test_types_dropna() -> None: def test_types_fillna() -> None: df = pd.DataFrame(data={"col1": [np.nan, np.nan], "col2": [3, np.nan]}) res: pd.DataFrame = df.fillna(0) - res2: None = df.fillna(method="pad", axis=1, inplace=True) + res2: None = df.fillna(method="pad", inplace=True) def test_types_sort_index() -> None: From 8f0c21747ca7aea4519546d9e58b215e1d28d5ae Mon Sep 17 00:00:00 2001 From: zbigniewkrolikowski Date: Fri, 18 Feb 2022 18:22:51 +0100 Subject: [PATCH 26/26] Fixed invalid values return type for series --- pandas/tests/typing/valid/test_series.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/pandas/tests/typing/valid/test_series.py b/pandas/tests/typing/valid/test_series.py index 3191139c6e60d..2071311396407 100644 --- a/pandas/tests/typing/valid/test_series.py +++ b/pandas/tests/typing/valid/test_series.py @@ -16,7 +16,10 @@ import numpy as np -from pandas._typing import Scalar +from pandas._typing import ( + ArrayLike, + Scalar, +) import pandas as pd import pandas._testing as tm @@ -506,10 +509,10 @@ def test_types_rename_axis() -> None: def test_types_values() -> None: - n1: np.ndarray = pd.Series([1, 2, 3]).values - n2: np.ndarray = pd.Series(list("aabc")).values - n3: np.ndarray = pd.Series(list("aabc")).astype("category").values - n4: np.ndarray = pd.Series( + n1: ArrayLike = pd.Series([1, 2, 3]).values + n2: ArrayLike = pd.Series(list("aabc")).values + n3: ArrayLike = pd.Series(list("aabc")).astype("category").values + n4: ArrayLike = pd.Series( pd.date_range("20130101", periods=3, tz="US/Eastern") ).values