diff --git a/pandas/tests/typing/valid/__init__.py b/pandas/tests/typing/valid/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/typing/valid/test_frame.py b/pandas/tests/typing/valid/test_frame.py new file mode 100644 index 0000000000000..0937ebbc3927d --- /dev/null +++ b/pandas/tests/typing/valid/test_frame.py @@ -0,0 +1,822 @@ +# flake8: noqa: F841 +# TODO: many functions need return types annotations for pyright +# to run with reportGeneralTypeIssues = true +import io +from pathlib import Path +import tempfile +from typing import ( + Any, + Dict, + Iterable, + List, + Tuple, +) + +import numpy as np + +import pandas as pd +from pandas.util import _test_decorators as td + + +def test_types_init() -> None: + pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}, index=[2, 1]) + pd.DataFrame(data=[1, 2, 3, 4], dtype=np.int8) + pd.DataFrame( + np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), + columns=["a", "b", "c"], + dtype=np.int8, + copy=True, + ) + + +def test_types_to_csv() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + # error: Incompatible types in assignment (expression has type "Optional[str]", + # variable has type "str") + csv_df: str = df.to_csv() # type: ignore[assignment] + + with tempfile.NamedTemporaryFile() as file: + df.to_csv(file.name) + df2: pd.DataFrame = pd.read_csv(file.name) + + with tempfile.NamedTemporaryFile() as file: + df.to_csv(Path(file.name)) + df3: pd.DataFrame = pd.read_csv(Path(file.name)) + + # This keyword was added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + with tempfile.NamedTemporaryFile() as file: + df.to_csv(file.name, errors="replace") + df4: pd.DataFrame = pd.read_csv(file.name) + + # Testing support for binary file handles, added in 1.2.0 + # https://pandas.pydata.org/docs/whatsnew/v1.2.0.html + df.to_csv(io.BytesIO(), encoding="utf-8", compression="gzip") + + +def test_types_to_csv_when_path_passed() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + path: Path = Path("./dummy_path.txt") + try: + assert not path.exists() + df.to_csv(path) + df5: pd.DataFrame = pd.read_csv(path) + finally: + path.unlink() + + +def test_types_copy() -> None: + df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + df2: pd.DataFrame = df.copy() + + +def test_types_getitem() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4], 5: [6, 7]}) + i = pd.Index(["col1", "col2"]) + s = pd.Series(["col1", "col2"]) + select_df = pd.DataFrame({"col1": [True, True], "col2": [False, True]}) + a = np.array(["col1", "col2"]) + df["col1"] + df[5] + df[["col1", "col2"]] + df[1:] + df[s] + df[a] + df[select_df] + df[i] + + +def test_slice_setitem() -> None: + # Due to the bug in pandas 1.2.3 + # (https://github.com/pandas-dev/pandas/issues/40440), + # this is in separate test case + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4], 5: [6, 7]}) + df[1:] = ["a", "b", "c"] + + +def test_types_setitem() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4], 5: [6, 7]}) + i = pd.Index(["col1", "col2"]) + s = pd.Series(["col1", "col2"]) + select_df = pd.DataFrame({"col1": [True, True], "col2": [False, True]}) + a = np.array(["col1", "col2"]) + df["col1"] = [1, 2] + df[5] = [5, 6] + df[["col1", "col2"]] = [[1, 2], [3, 4]] + df[s] = [5, 6] + df[a] = [[1, 2], [3, 4]] + df[select_df] = [1, 2, 3] + df[i] = [8, 9] + + +def test_types_iloc_iat() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df.iloc[1, 1] + df.iloc[[1], [1]] + df.iat[0, 0] + + +def test_types_loc_at() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df.loc[[0], "col1"] + df.at[0, "col1"] + + +def test_types_boolean_indexing() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df[df > 1] + df[~(df > 1.0)] + + +def test_types_head_tail() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df.head(1) + df.tail(1) + + +def test_types_assign() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df.assign(col3=lambda frame: frame.sum(axis=1)) + df["col3"] = df.sum(axis=1) + + +def test_types_sample() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df.sample(frac=0.5) + df.sample(n=1) + + +def test_types_nlargest_nsmallest() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df.nlargest(1, "col1") + df.nsmallest(1, "col2") + + +def test_types_filter() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df.filter(items=["col1"]) + df.filter(regex="co.*") + df.filter(like="1") + + +def test_types_setting() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df["col1"] = 1 + df[df == 1] = 7 + + +def test_types_drop() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + res: pd.DataFrame = df.drop("col1", axis=1) + res2: pd.DataFrame = df.drop(columns=["col1"]) + res3: pd.DataFrame = df.drop({0}) + res4: pd.DataFrame = df.drop(index={0}) + res5: pd.DataFrame = df.drop(columns={"col1"}) + res6: pd.DataFrame = df.drop(index=1) + res7: pd.DataFrame = df.drop(labels=0) + res8: None = df.drop([0, 0], inplace=True) + + +def test_types_dropna() -> None: + df = pd.DataFrame(data={"col1": [np.nan, np.nan], "col2": [3, np.nan]}) + res: pd.DataFrame = df.dropna() + res2: pd.DataFrame = df.dropna(axis=1, thresh=1) + res3: None = df.dropna(axis=0, how="all", subset=["col1"], inplace=True) + + +def test_types_fillna() -> None: + df = pd.DataFrame(data={"col1": [np.nan, np.nan], "col2": [3, np.nan]}) + res: pd.DataFrame = df.fillna(0) + res2: None = df.fillna(method="pad", axis=1, inplace=True) + + +def test_types_sort_index() -> None: + df = pd.DataFrame(data={"col1": [1, 2, 3, 4]}, index=[5, 1, 3, 2]) + df2 = pd.DataFrame(data={"col1": [1, 2, 3, 4]}, index=["a", "b", "c", "d"]) + res: pd.DataFrame = df.sort_index() + level1 = (1, 2) + res2: pd.DataFrame = df.sort_index(ascending=False, level=level1) + level2: List[str] = ["a", "b", "c"] + # error: Argument "level" to "sort_index" of "DataFrame" has incompatible type + # "List[str]"; expected "Optional[Union[Hashable, int]]" + res3: pd.DataFrame = df2.sort_index(level=level2) # type: ignore[arg-type] + res4: pd.DataFrame = df.sort_index(ascending=False, level=3) + res5: None = df.sort_index(kind="mergesort", inplace=True) + + +# This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html +def test_types_sort_index_with_key() -> None: + df = pd.DataFrame(data={"col1": [1, 2, 3, 4]}, index=["a", "b", "C", "d"]) + res: pd.DataFrame = df.sort_index(key=lambda k: k.str.lower()) + + +def test_types_set_index() -> None: + df = pd.DataFrame( + data={"col1": [1, 2, 3, 4], "col2": ["a", "b", "c", "d"]}, index=[5, 1, 3, 2] + ) + res: pd.DataFrame = df.set_index("col1") + res2: pd.DataFrame = df.set_index("col1", drop=False) + res3: pd.DataFrame = df.set_index("col1", append=True) + res4: pd.DataFrame = df.set_index("col1", verify_integrity=True) + res5: pd.DataFrame = df.set_index(["col1", "col2"]) + res6: None = df.set_index("col1", inplace=True) + + +def test_types_query() -> None: + df = pd.DataFrame(data={"col1": [1, 2, 3, 4], "col2": [3, 0, 1, 7]}) + res: pd.DataFrame = df.query("col1 > col2") + res2: None = df.query("col1 % col2 == 0", inplace=True) + + +def test_types_eval() -> None: + df = pd.DataFrame(data={"col1": [1, 2, 3, 4], "col2": [3, 0, 1, 7]}) + df.eval("col1 > col2") + res: None = df.eval("C = col1 % col2 == 0", inplace=True) + + +def test_types_sort_values() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + res: pd.DataFrame = df.sort_values("col1") + res2: None = df.sort_values("col1", ascending=False, inplace=True) + res3: pd.DataFrame = df.sort_values(by=["col1", "col2"], ascending=[True, False]) + + +# This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html +def test_types_sort_values_with_key() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + res: pd.DataFrame = df.sort_values(by="col1", key=lambda k: -k) + + +def test_types_shift() -> None: + df = pd.DataFrame(data={"col1": [1, 1], "col2": [3, 4]}) + df.shift() + df.shift(1) + df.shift(-1) + + +def test_types_rank() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df.rank(axis=0, na_option="bottom") + df.rank(method="min", pct=True) + df.rank(method="dense", ascending=True) + df.rank(method="first", numeric_only=True) + + +def test_types_mean() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "Series") + s1: pd.Series = df.mean() # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "Series") + s2: pd.Series = df.mean(axis=0) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "DataFrame") + df2: pd.DataFrame = df.mean(level=0) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "DataFrame") + df3: pd.DataFrame = df.mean(axis=1, level=0) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "DataFrame") + df4: pd.DataFrame = df.mean(1, True, level=0) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "Series")# error: Incompatible types in assignment + # (expression has type "Union[Series, float]", variable has type "Series") + s3: pd.Series = df.mean( # type: ignore[assignment] + axis=1, skipna=True, numeric_only=False + ) + + +def test_types_median() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "Series") + s1: pd.Series = df.median() # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "Series") + s2: pd.Series = df.median(axis=0) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "DataFrame") + df2: pd.DataFrame = df.median(level=0) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "DataFrame") + df3: pd.DataFrame = df.median(axis=1, level=0) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "DataFrame") + df4: pd.DataFrame = df.median(1, True, level=0) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "Series") + s3: pd.Series = df.median( # type: ignore[assignment] + axis=1, skipna=True, numeric_only=False + ) + + +def test_types_itertuples() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + res1: Iterable[Tuple[Any, ...]] = df.itertuples() + res2: Iterable[Tuple[Any, ...]] = df.itertuples(index=False, name="Foobar") + res3: Iterable[Tuple[Any, ...]] = df.itertuples(index=False, name=None) + + +def test_types_sum() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df.sum() + df.sum(axis=1) + + +def test_types_cumsum() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df.cumsum() + df.sum(axis=0) + + +def test_types_min() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df.min() + df.min(axis=0) + + +def test_types_max() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df.max() + df.max(axis=0) + + +def test_types_quantile() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df.quantile([0.25, 0.5]) + df.quantile(0.75) + df.quantile() + + +def test_types_clip() -> None: + df = pd.DataFrame(data={"col1": [20, 12], "col2": [3, 14]}) + df.clip(lower=5, upper=15) + + +def test_types_abs() -> None: + df = pd.DataFrame(data={"col1": [-5, 1], "col2": [3, -14]}) + df.abs() + + +def test_types_var() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [1, 4]}) + df.var() + df.var(axis=1, ddof=1) + df.var(skipna=True, numeric_only=False) + + +def test_types_std() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [1, 4]}) + df.std() + df.std(axis=1, ddof=1) + df.std(skipna=True, numeric_only=False) + + +def test_types_idxmin() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df.idxmin() + df.idxmin(axis=0) + + +def test_types_idxmax() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df.idxmax() + df.idxmax(axis=0) + + +# This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html +def test_types_value_counts() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [1, 4]}) + df.value_counts() + + +def test_types_unique() -> None: + # This is really more for of a Series test + df = pd.DataFrame(data={"col1": [1, 2], "col2": [1, 4]}) + df["col1"].unique() + + +def test_types_apply() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df.apply(lambda x: x ** 2) + df.apply(np.exp) + df.apply(str) + + +def test_types_applymap() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df.applymap(lambda x: x ** 2) + df.applymap(np.exp) + df.applymap(str) + # na_action parameter was added in 1.2.0 + # https://pandas.pydata.org/docs/whatsnew/v1.2.0.html + df.applymap(np.exp, na_action="ignore") + df.applymap(str, na_action=None) + + +def test_types_element_wise_arithmetic() -> None: + df = pd.DataFrame(data={"col1": [2, 1], "col2": [3, 4]}) + df2 = pd.DataFrame(data={"col1": [10, 20], "col3": [3, 4]}) + + df + df2 + df.add(df2, fill_value=0) + + df - df2 + df.sub(df2, fill_value=0) + + df * df2 + df.mul(df2, fill_value=0) + + df / df2 + df.div(df2, fill_value=0) + + df // df2 + df.floordiv(df2, fill_value=0) + + df % df2 + df.mod(df2, fill_value=0) + + # divmod operation was added in 1.2.0 + # https://pandas.pydata.org/docs/whatsnew/v1.2.0.html + # noinspection PyTypeChecker + divmod(df, df2) + df.__divmod__(df2) + df.__rdivmod__(df2) + + +def test_types_melt() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df.melt() + df.melt(id_vars=["col1"], value_vars=["col2"]) + df.melt( + id_vars=["col1"], + value_vars=["col2"], + var_name="someVariable", + value_name="someValue", + ) + + pd.melt(df) + pd.melt(df, id_vars=["col1"], value_vars=["col2"]) + pd.melt( + df, + id_vars=["col1"], + value_vars=["col2"], + var_name="someVariable", + value_name="someValue", + ) + + +def test_types_pivot() -> None: + df = pd.DataFrame( + data={ + "col1": ["first", "second", "third", "fourth"], + "col2": [50, 70, 56, 111], + "col3": ["A", "B", "B", "A"], + "col4": [100, 102, 500, 600], + } + ) + df.pivot(index="col1", columns="col3", values="col2") + df.pivot(index="col1", columns="col3") + df.pivot(index="col1", columns="col3", values=["col2", "col4"]) + + +def test_types_groupby() -> None: + df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5], "col3": [0, 1, 0]}) + df.index.name = "ind" + df.groupby(by="col1") + df.groupby(level="ind") + df.groupby(by="col1", sort=False, as_index=True) + df.groupby(by=["col1", "col2"]) + + df1: pd.DataFrame = df.groupby(by="col1").agg("sum") + df2: pd.DataFrame = df.groupby(level="ind").aggregate("sum") + df3: pd.DataFrame = df.groupby(by="col1", sort=False, as_index=True).transform( + lambda x: x.max() + ) + # error: Incompatible types in assignment (expression has type "Union[Series, + # DataFrame]", variable has type "DataFrame") + df4: pd.DataFrame = df.groupby( # type: ignore[assignment] + by=["col1", "col2"] + ).count() + df5: pd.DataFrame = df.groupby(by=["col1", "col2"]).filter(lambda x: x["col1"] > 0) + df6: pd.DataFrame = df.groupby(by=["col1", "col2"]).nunique() + + +# This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html +def test_types_group_by_with_dropna_keyword() -> None: + df = pd.DataFrame( + data={"col1": [1, 1, 2, 1], "col2": [2, None, 1, 2], "col3": [3, 4, 3, 2]} + ) + df.groupby(by="col2", dropna=True).sum() + df.groupby(by="col2", dropna=False).sum() + df.groupby(by="col2").sum() + + +def test_types_merge() -> None: + df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]}) + df2 = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [0, 1, 0]}) + df.merge(df2) + df.merge(df2, on="col1") + df.merge(df2, on="col1", how="left") + df.merge(df2, on=["col1", "col2"], how="left") + df.merge(df2, on=("col1", "col2"), how="left") + l: List[str] = ["col1", "col2"] + df.merge(df2, on=l) + + +def test_types_plot() -> None: + df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]}) + df.plot.hist() + df.plot.scatter(x="col2", y="col1") + + +def test_types_window() -> None: + df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]}) + df.expanding() + df.expanding(axis=1, center=True) + + df.rolling(2) + df.rolling(2, axis=1, center=True) + + +def test_types_cov() -> None: + df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]}) + df.cov() + df.cov(min_periods=1) + # ddof param was added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + df.cov(ddof=2) + + +def test_types_to_numpy() -> None: + df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]}) + df.to_numpy() + df.to_numpy(dtype="str", copy=True) + # na_value param was added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + df.to_numpy(na_value=0) + + +# error: Untyped decorator makes function "test_types_to_feather" untyped +@td.skip_if_no("tabulate") # type: ignore[misc] +def test_to_markdown() -> None: + df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]}) + df.to_markdown() + df.to_markdown(buf=None, mode="wt") + # index param was added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + df.to_markdown(index=False) + + +# error: Untyped decorator makes function "test_types_to_feather" untyped +@td.skip_if_no("pyarrow") # type: ignore[misc] +def test_types_to_feather() -> None: + df = pd.DataFrame(data={"col1": [1, 1, 2], "col2": [3, 4, 5]}) + df.to_feather("dummy_path") + # kwargs for pyarrow.feather.write_feather added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + df.to_feather( + "dummy_path", + compression="zstd", + compression_level=3, + chunksize=2, + ) + + # to_feather has been able to accept a buffer since pandas 1.0.0 + # See https://pandas.pydata.org/docs/whatsnew/v1.0.0.html + # Docstring and type were updated in 1.2.0. + # https://github.com/pandas-dev/pandas/pull/35408 + with tempfile.TemporaryFile() as f: + df.to_feather(f) + + +# compare() method added in 1.1.0 +# https://pandas.pydata.org/docs/whatsnew/v1.1.0.html +def test_types_compare() -> None: + df1 = pd.DataFrame( + data={"col1": [1, 1, 2, 1], "col2": [2, None, 1, 2], "col3": [3, 4, 3, 2]} + ) + df2 = pd.DataFrame( + data={"col1": [1, 2, 5, 6], "col2": [3, 4, 1, 1], "col3": [3, 4, 3, 2]} + ) + df1.compare(df2) + df2.compare(df1, align_axis=0, keep_shape=True, keep_equal=True) + + +def test_types_agg() -> None: + df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns=["A", "B", "C"]) + df.agg("min") + df.agg(x=("A", max), y=("B", "min"), z=("C", np.mean)) + df.agg("mean", axis=1) + + +def test_types_describe() -> None: + df = pd.DataFrame( + data={ + "col1": [1, 2, -4], + "col2": [ + np.datetime64("2000-01-01"), + np.datetime64("2010-01-01"), + np.datetime64("2010-01-01"), + ], + } + ) + df.describe() + df.describe(percentiles=[0.5], include="all") + df.describe(exclude=np.number) + # datetime_is_numeric param added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + df.describe(datetime_is_numeric=True) + + +def test_types_to_string() -> None: + df = pd.DataFrame( + data={ + "col1": [1, None, -4], + "col2": [ + np.datetime64("2000-01-01"), + np.datetime64("2010-01-01"), + np.datetime64("2010-01-01"), + ], + } + ) + df.to_string( + index=True, + col_space=2, + header=["a", "b"], + na_rep="0", + justify="left", + max_rows=2, + min_rows=0, + max_cols=2, + show_dimensions=True, + line_width=3, + ) + # col_space accepting list or dict added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + df.to_string(col_space=[1, 2]) + df.to_string(col_space={"col1": 1, "col2": 3}) + + +def test_types_to_html() -> None: + df = pd.DataFrame( + data={ + "col1": [1, None, -4], + "col2": [ + np.datetime64("2000-01-01"), + np.datetime64("2010-01-01"), + np.datetime64("2010-01-01"), + ], + } + ) + df.to_html( + index=True, + col_space=2, + header=["a", "b"], + na_rep="0", + justify="left", + max_rows=2, + max_cols=2, + show_dimensions=True, + ) + # col_space accepting list or dict added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + df.to_html(col_space=[1, 2]) + df.to_html(col_space={"col1": 1, "col2": 3}) + + +def test_types_resample() -> None: + df = pd.DataFrame({"values": [2, 11, 3, 13, 14, 18, 17, 19]}) + df["date"] = pd.date_range("01/01/2018", periods=8, freq="W") + df.resample("M", on="date") + # origin and offset params added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + df.resample("20min", origin="epoch", offset=pd.Timedelta(2, "minutes"), on="date") + + +def test_types_from_dict() -> None: + pd.DataFrame.from_dict({"col_1": [3, 2, 1, 0], "col_2": ["a", "b", "c", "d"]}) + pd.DataFrame.from_dict({1: [3, 2, 1, 0], 2: ["a", "b", "c", "d"]}) + pd.DataFrame.from_dict({"a": {1: 2}, "b": {3: 4, 1: 4}}, orient="index") + pd.DataFrame.from_dict({"a": {"row1": 2}, "b": {"row2": 4, "row1": 4}}) + pd.DataFrame.from_dict({"a": (1, 2, 3), "b": (2, 4, 5)}) + pd.DataFrame.from_dict( + data={"col_1": {"a": 1}, "col_2": {"a": 1, "b": 2}}, orient="columns" + ) + + +def test_pipe() -> None: + def foo(df: pd.DataFrame) -> pd.DataFrame: + return df + + df1: pd.DataFrame = pd.DataFrame({"a": [1]}).pipe(foo) + + df2: pd.DataFrame = ( + pd.DataFrame( + { + "price": [10, 11, 9, 13, 14, 18, 17, 19], + "volume": [50, 60, 40, 100, 50, 100, 40, 50], + } + ) + .assign(week_starting=pd.date_range("01/01/2018", periods=8, freq="W")) + .resample("M", on="week_starting") + .pipe(foo) + ) + + df3: pd.DataFrame = pd.DataFrame({"a": [1], "b": [1]}).groupby("a").pipe(foo) + + df4: pd.DataFrame = pd.DataFrame({"a": [1], "b": [1]}).style.pipe(foo) + + +# set_flags() method added in 1.2.0 +# https://pandas.pydata.org/docs/whatsnew/v1.2.0.html +def test_types_set_flags() -> None: + pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]).set_flags( + allows_duplicate_labels=False + ) + pd.DataFrame([[1, 2], [8, 9]], columns=["A", "A"]).set_flags( + allows_duplicate_labels=True + ) + pd.DataFrame([[1, 2], [8, 9]], columns=["A", "A"]) + + +# error: Untyped decorator makes function "test_types_to_parquet" untyped +@td.skip_if_no("pyarrow") # type: ignore[misc] +def test_types_to_parquet() -> None: + df = pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]).set_flags( + allows_duplicate_labels=False + ) + with tempfile.NamedTemporaryFile() as file: + df.to_parquet(Path(file.name)) + # to_parquet() returns bytes when no path given since 1.2.0 + # https://pandas.pydata.org/docs/whatsnew/v1.2.0.html + # error: Incompatible types in assignment (expression has type "Optional[bytes]", + # variable has type "bytes") + b: bytes = df.to_parquet() # type: ignore[assignment] + + +def test_types_to_latex() -> None: + df = pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]) + df.to_latex( + columns=["A"], label="some_label", caption="some_caption", multirow=True + ) + df.to_latex(escape=False, decimal=",", column_format="r") + # position param was added in 1.2.0 + # https://pandas.pydata.org/docs/whatsnew/v1.2.0.html + df.to_latex(position="some") + # caption param was extended to accept tuple in 1.2.0 + # https://pandas.pydata.org/docs/whatsnew/v1.2.0.html + df.to_latex(caption=("cap1", "cap2")) + + +def test_types_explode() -> None: + df = pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]) + res1: pd.DataFrame = df.explode("A") + res2: pd.DataFrame = df.explode("A", ignore_index=False) + res3: pd.DataFrame = df.explode("A", ignore_index=True) + + +def test_types_rename() -> None: + df = pd.DataFrame(columns=["a"]) + col_map = {"a": "b"} + # error: Argument "columns" to "rename" of "DataFrame" has incompatible type + # "Dict[str, str]"; expected "Optional[Union[Mapping[Hashable, Any], + # Callable[[Hashable], Hashable]]]" + df.rename(columns=col_map) # type: ignore[arg-type] + df.rename(columns={"a": "b"}) + df.rename(columns={1: "b"}) + # Apparently all of these calls are accepted by pandas + df.rename(columns={None: "b"}) + df.rename(columns={"": "b"}) + df.rename(columns={(2, 1): "b"}) + + +def test_types_eq() -> None: + df1 = pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]) + res1: pd.DataFrame = df1 == 1 + df2 = pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]) + res2: pd.DataFrame = df1 == df2 + + +def test_types_as_type() -> None: + df1 = pd.DataFrame([[1, 2], [8, 9]], columns=["A", "B"]) + df2: pd.DataFrame = df1.astype({"A": "int32"}) + + +def test_types_dot() -> None: + df1 = pd.DataFrame([[0, 1, -2, -1], [1, 1, 1, 1]]) + df2 = pd.DataFrame([[0, 1], [1, 2], [-1, -1], [2, 0]]) + s1 = pd.Series([1, 1, 2, 1]) + np_array = np.array([[0, 1], [1, 2], [-1, -1], [2, 0]]) + # error: Incompatible types in assignment (expression has type "Union[DataFrame, + # Series]", variable has type "DataFrame") + df3: pd.DataFrame = df1 @ df2 # type: ignore[assignment] + df4: pd.DataFrame = df1.dot(df2) + # error: Incompatible types in assignment (expression has type "Union[DataFrame, + # Series]", variable has type "DataFrame") + df5: pd.DataFrame = df1 @ np_array # type: ignore[assignment] + df6: pd.DataFrame = df1.dot(np_array) + df7: pd.Series = df1 @ s1 + df8: pd.Series = df1.dot(s1) diff --git a/pandas/tests/typing/valid/test_interval.py b/pandas/tests/typing/valid/test_interval.py new file mode 100644 index 0000000000000..9c5161721157d --- /dev/null +++ b/pandas/tests/typing/valid/test_interval.py @@ -0,0 +1,31 @@ +# flake8: noqa: F841 +# pyright: reportGeneralTypeIssues = true + +import pandas as pd + + +def test_interval_init() -> None: + i1: pd.Interval = pd.Interval(1, 2, closed="both") + i2: pd.Interval = pd.Interval(1, right=2, closed="right") + i3: pd.Interval = pd.Interval(left=1, right=2, closed="left") + + +def test_interval_arithmetic() -> None: + i1: pd.Interval = pd.Interval(1, 2, closed="both") + i2: pd.Interval = pd.Interval(1, right=2, closed="right") + + i3: pd.Interval = i1 + 1 + i4: pd.Interval = i1 - 1 + i5: pd.Interval = i1 * 2 + i6: pd.Interval = i1 / 2 + i7: pd.Interval = i1 // 2 + + +def test_max_intervals() -> None: + i1 = pd.Interval( + pd.Timestamp("2000-01-01"), pd.Timestamp("2000-01-02"), closed="both" + ) + i2 = pd.Interval( + pd.Timestamp("2000-01-01T12:00:00"), pd.Timestamp("2000-01-02"), closed="both" + ) + print(max(i1.left, i2.left)) diff --git a/pandas/tests/typing/valid/test_pandas.py b/pandas/tests/typing/valid/test_pandas.py new file mode 100644 index 0000000000000..f5cc7dc990014 --- /dev/null +++ b/pandas/tests/typing/valid/test_pandas.py @@ -0,0 +1,168 @@ +# flake8: noqa: F841 +# TODO: many functions need return types annotations for pyright +# to run with reportGeneralTypeIssues = true +import tempfile +from typing import ( + Any, + Dict, + List, + Union, +) + +import pandas as pd + +from pandas.io.parsers import TextFileReader + + +def test_types_to_datetime() -> None: + df = pd.DataFrame({"year": [2015, 2016], "month": [2, 3], "day": [4, 5]}) + # error: No overload variant of "to_datetime" matches argument type "DataFrame" + pd.to_datetime(df) # type: ignore[call-overload] + # error: No overload variant of "to_datetime" matches argument types "DataFrame", + # "str", "str", "bool" + pd.to_datetime( # type: ignore[call-overload] + df, unit="s", origin="unix", infer_datetime_format=True + ) + # error: No overload variant of "to_datetime" matches argument types "DataFrame", + # "str", "bool", "None", "str", "bool" + pd.to_datetime( # type: ignore[call-overload] + df, unit="ns", dayfirst=True, utc=None, format="%M:%D", exact=False + ) + pd.to_datetime([1, 2], unit="D", origin=pd.Timestamp("01/01/2000")) + pd.to_datetime([1, 2], unit="D", origin=3) + + +def test_types_concat() -> None: + s = pd.Series([0, 1, -10]) + s2 = pd.Series([7, -5, 10]) + + pd.concat([s, s2]) + pd.concat([s, s2], axis=1) + pd.concat([s, s2], keys=["first", "second"], sort=True) + pd.concat([s, s2], keys=["first", "second"], names=["source", "row"]) + + # Depends on the axis + # error: Argument 1 to "concat" has incompatible type "Dict[str, Series]"; expected + # "Union[Iterable[DataFrame], Mapping[Hashable, DataFrame]]" + rs1: Union[pd.Series, pd.DataFrame] = pd.concat( + {"a": s, "b": s2} # type:ignore[arg-type] + ) + # error: Argument 1 to "concat" has incompatible type "Dict[str, Series]"; expected + # "Union[Iterable[NDFrame], Mapping[Hashable, NDFrame]]" + rs1a: Union[pd.Series, pd.DataFrame] = pd.concat( + {"a": s, "b": s2}, axis=1 # type:ignore[arg-type] + ) + # error: Argument 1 to "concat" has incompatible type "Dict[int, Series]"; expected + # "Union[Iterable[DataFrame], Mapping[Hashable, DataFrame]]" + rs2: Union[pd.Series, pd.DataFrame] = pd.concat( + {1: s, 2: s2} # type:ignore[arg-type] + ) + # error: Argument 1 to "concat" has incompatible type "Dict[int, Series]"; expected + # "Union[Iterable[NDFrame], Mapping[Hashable, NDFrame]]" + rs2a: Union[pd.Series, pd.DataFrame] = pd.concat( + {1: s, 2: s2}, axis=1 # type:ignore[arg-type] + ) + # error: Argument 1 to "concat" has incompatible type "Dict[Optional[int], Series]"; + # expected "Union[Iterable[DataFrame], Mapping[Hashable, DataFrame]]" + rs3: Union[pd.Series, pd.DataFrame] = pd.concat( + {1: s, None: s2} # type:ignore[arg-type] + ) + # error: Argument 1 to "concat" has incompatible type "Dict[Optional[int], Series]"; + # expected "Union[Iterable[NDFrame], Mapping[Hashable, NDFrame]]" + rs3a: Union[pd.Series, pd.DataFrame] = pd.concat( + {1: s, None: s2}, axis=1 # type:ignore[arg-type] + ) + + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + df2 = pd.DataFrame(data={"col1": [10, 20], "col2": [30, 40]}) + + pd.concat([df, df2]) + pd.concat([df, df2], axis=1) + pd.concat([df, df2], keys=["first", "second"], sort=True) + pd.concat([df, df2], keys=["first", "second"], names=["source", "row"]) + + # error: Incompatible types in assignment (expression has type "Union[DataFrame, + # Series]", variable has type "DataFrame") + # error: Argument 1 to "concat" has incompatible type "Dict[str, DataFrame]"; + # expected "Union[Iterable[NDFrame], Mapping[Hashable, NDFrame]]" + result: pd.DataFrame = pd.concat( # type: ignore[assignment] + { + "a": pd.DataFrame([1, 2, 3]), + "b": pd.DataFrame([4, 5, 6]), + }, # type:ignore[arg-type] + axis=1, + ) + # error: Argument 1 to "concat" has incompatible type "Dict[str, Series]"; expected + # "Union[Iterable[NDFrame], Mapping[Hashable, NDFrame]]" + result2: Union[pd.DataFrame, pd.Series] = pd.concat( + { + "a": pd.Series([1, 2, 3]), + "b": pd.Series([4, 5, 6]), + }, # type:ignore[arg-type] + axis=1, + ) + + # error: Argument 1 to "concat" has incompatible type "Dict[str, DataFrame]"; + # expected "Union[Iterable[DataFrame], Mapping[Hashable, DataFrame]]" + rdf1: pd.DataFrame = pd.concat({"a": df, "b": df2}) # type:ignore[arg-type] + # error: Argument 1 to "concat" has incompatible type "Dict[int, DataFrame]"; + # expected "Union[Iterable[DataFrame], Mapping[Hashable, DataFrame]]" + rdf2: pd.DataFrame = pd.concat({1: df, 2: df2}) # type:ignore[arg-type] + # error: Argument 1 to "concat" has incompatible type "Dict[Optional[int], + # DataFrame]"; expected "Union[Iterable[DataFrame], Mapping[Hashable, DataFrame]]" + rdf3: pd.DataFrame = pd.concat({1: df, None: df2}) # type:ignore[arg-type] + + rdf4: pd.DataFrame = pd.concat(list(map(lambda x: s2, ["some_value", 3])), axis=1) + + +def test_types_json_normalize() -> None: + data1: List[Dict[str, Any]] = [ + {"id": 1, "name": {"first": "Coleen", "last": "Volk"}}, + {"name": {"given": "More", "family": "Regner"}}, + {"id": 2, "name": "Faye Raker"}, + ] + df1: pd.DataFrame = pd.json_normalize(data=data1) + df2: pd.DataFrame = pd.json_normalize(data=data1, max_level=0, sep=";") + df3: pd.DataFrame = pd.json_normalize( + data=data1, meta_prefix="id", record_prefix="name", errors="raise" + ) + df4: pd.DataFrame = pd.json_normalize(data=data1, record_path=None, meta="id") + data2: Dict[str, Any] = {"name": {"given": "More", "family": "Regner"}} + df5: pd.DataFrame = pd.json_normalize(data=data2) + + +def test_types_read_csv() -> None: + df = pd.DataFrame(data={"col1": [1, 2], "col2": [3, 4]}) + # error: Incompatible types in assignment (expression has type "Optional[str]", + # variable has type "str") + csv_df: str = df.to_csv() # type: ignore[assignment] + + with tempfile.NamedTemporaryFile() as file: + df.to_csv(file.name) + df2: pd.DataFrame = pd.read_csv(file.name) + df3: pd.DataFrame = pd.read_csv(file.name, sep="a", squeeze=False) + df4: pd.DataFrame = pd.read_csv( + file.name, + header=None, + prefix="b", + mangle_dupe_cols=True, + keep_default_na=False, + ) + df5: pd.DataFrame = pd.read_csv( + file.name, engine="python", true_values=[0, 1, 3], na_filter=False + ) + df6: pd.DataFrame = pd.read_csv( + file.name, + skiprows=lambda x: x in [0, 2], + skip_blank_lines=True, + dayfirst=False, + ) + df7: pd.DataFrame = pd.read_csv(file.name, nrows=2) + tfr1: TextFileReader = pd.read_csv( + file.name, nrows=2, iterator=True, chunksize=3 + ) + tfr2: TextFileReader = pd.read_csv(file.name, nrows=2, chunksize=1) + tfr3: TextFileReader = pd.read_csv( + file.name, nrows=2, iterator=False, chunksize=1 + ) + tfr4: TextFileReader = pd.read_csv(file.name, nrows=2, iterator=True) diff --git a/pandas/tests/typing/valid/test_series.py b/pandas/tests/typing/valid/test_series.py new file mode 100644 index 0000000000000..3ed0d156635ef --- /dev/null +++ b/pandas/tests/typing/valid/test_series.py @@ -0,0 +1,584 @@ +# flake8: noqa: F841 +# TODO: many functions need return types annotations for pyright +# to run with reportGeneralTypeIssues = true + +from pathlib import Path +import tempfile +from typing import List + +import numpy as np + +from pandas._typing import Scalar + +import pandas as pd +from pandas.core.window import ExponentialMovingWindow +from pandas.util import _test_decorators as td + + +def test_types_init() -> None: + pd.Series(1) + pd.Series((1, 2, 3)) + pd.Series(np.array([1, 2, 3])) + pd.Series(data=[1, 2, 3, 4], name="series") + pd.Series(data=[1, 2, 3, 4], dtype=np.int8) + pd.Series(data={"row1": [1, 2], "row2": [3, 4]}) + pd.Series(data=[1, 2, 3, 4], index=[4, 3, 2, 1], copy=True) + + +def test_types_any() -> None: + # error: Incompatible types in assignment (expression has type "Union[Series, + # bool]", variable has type "bool") + res1: bool = pd.Series([False, False]).any() # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # bool]", variable has type "bool") + res2: bool = pd.Series([False, False]).any( # type: ignore[assignment] + bool_only=False + ) + # error: Incompatible types in assignment (expression has type "Union[Series, + # bool]", variable has type "bool") + res3: bool = pd.Series([np.nan]).any(skipna=False) # type: ignore[assignment] + + +def test_types_all() -> None: + # error: Incompatible types in assignment (expression has type "Union[Series, + # bool]", variable has type "bool") + res1: bool = pd.Series([False, False]).all() # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # bool]", variable has type "bool") + res2: bool = pd.Series([False, False]).all( # type: ignore[assignment] + bool_only=False + ) + # error: Incompatible types in assignment (expression has type "Union[Series, + # bool]", variable has type "bool") + res3: bool = pd.Series([np.nan]).all(skipna=False) # type: ignore[assignment] + + +def test_types_csv() -> None: + s = pd.Series(data=[1, 2, 3]) + # error: Incompatible types in assignment (expression has type "Optional[str]", + # variable has type "str") + csv_df: str = s.to_csv() # type: ignore[assignment] + + with tempfile.NamedTemporaryFile() as file: + s.to_csv(file.name) + s2: pd.DataFrame = pd.read_csv(file.name) + + with tempfile.NamedTemporaryFile() as file: + s.to_csv(Path(file.name)) + s3: pd.DataFrame = pd.read_csv(Path(file.name)) + + # This keyword was added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + with tempfile.NamedTemporaryFile() as file: + s.to_csv(file.name, errors="replace") + s4: pd.DataFrame = pd.read_csv(file.name) + + +def test_types_copy() -> None: + s = pd.Series(data=[1, 2, 3, 4]) + s2: pd.Series = s.copy() + + +def test_types_select() -> None: + s = pd.Series(data={"row1": 1, "row2": 2}) + s[0] + s[1:] + + +def test_types_iloc_iat() -> None: + s = pd.Series(data={"row1": 1, "row2": 2}) + s2 = pd.Series(data=[1, 2]) + s.loc["row1"] + s.iat[0] + s2.loc[0] + s2.iat[0] + + +def test_types_loc_at() -> None: + s = pd.Series(data={"row1": 1, "row2": 2}) + s2 = pd.Series(data=[1, 2]) + s.loc["row1"] + s.at["row1"] + s2.loc[1] + s2.at[1] + + +def test_types_boolean_indexing() -> None: + s = pd.Series([0, 1, 2]) + s[s > 1] + s[s] + + +def test_types_head_tail() -> None: + s = pd.Series([0, 1, 2]) + s.head(1) + s.tail(1) + + +def test_types_sample() -> None: + s = pd.Series([0, 1, 2]) + s.sample(frac=0.5) + s.sample(n=1) + + +def test_types_nlargest_nsmallest() -> None: + s = pd.Series([0, 1, 2]) + s.nlargest(1) + s.nlargest(1, "first") + s.nsmallest(1, "last") + s.nsmallest(1, "all") + + +def test_types_filter() -> None: + s = pd.Series(data=[1, 2, 3, 4], index=["cow", "coal", "coalesce", ""]) + s.filter(items=["cow"]) + s.filter(regex="co.*") + s.filter(like="al") + + +def test_types_setting() -> None: + s = pd.Series([0, 1, 2]) + s[3] = 4 + s[s == 1] = 5 + s[:] = 3 + + +def test_types_drop() -> None: + s = pd.Series([0, 1, 2]) + res: pd.Series = s.drop(0) + res2: pd.Series = s.drop([0, 1]) + res3: pd.Series = s.drop(0, axis=0) + # error: Incompatible types in assignment (expression has type "Series", variable + # has type "None") + res4: None = s.drop( # type: ignore[assignment] + [0, 1], inplace=True, errors="raise" + ) + # error: Incompatible types in assignment (expression has type "Series", variable + # has type "None") + res5: None = s.drop( # type: ignore[assignment] + [0, 1], inplace=True, errors="ignore" + ) + + +def test_types_drop_multilevel() -> None: + index = pd.MultiIndex( + levels=[["top", "bottom"], ["first", "second", "third"]], + codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], + ) + s = pd.Series(data=[1, 2, 3, 4, 5, 6], index=index) + res: pd.Series = s.drop(labels="first", level=1) + + +def test_types_dropna() -> None: + s = pd.Series([1, np.nan, np.nan]) + res: pd.Series = s.dropna() + res2: None = s.dropna(axis=0, inplace=True) + + +def test_types_fillna() -> None: + s = pd.Series([1, np.nan, np.nan, 3]) + res: pd.Series = s.fillna(0) + res2: pd.Series = s.fillna(0, axis="index") + res3: pd.Series = s.fillna(method="backfill", axis=0) + res4: None = s.fillna(method="bfill", inplace=True) + res5: pd.Series = s.fillna(method="pad") + res6: pd.Series = s.fillna(method="ffill", limit=1) + + +def test_types_sort_index() -> None: + s = pd.Series([1, 2, 3], index=[2, 3, 1]) + res: pd.Series = s.sort_index() + res2: None = s.sort_index(ascending=False, inplace=True) + res3: pd.Series = s.sort_index(kind="mergesort") + + +# This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html +def test_types_sort_index_with_key() -> None: + s = pd.Series([1, 2, 3], index=["a", "B", "c"]) + res: pd.Series = s.sort_index(key=lambda k: k.str.lower()) + + +def test_types_sort_values() -> None: + s = pd.Series([4, 2, 1, 3]) + res: pd.Series = s.sort_values(0) + res2: pd.Series = s.sort_values(ascending=False) + res3: None = s.sort_values(inplace=True, kind="quicksort") + res4: pd.Series = s.sort_values(na_position="last") + res5: pd.Series = s.sort_values(ignore_index=True) + + +# This was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html +def test_types_sort_values_with_key() -> None: + s = pd.Series([1, 2, 3], index=[2, 3, 1]) + res: pd.Series = s.sort_values(key=lambda k: -k) + + +def test_types_shift() -> None: + s = pd.Series([1, 2, 3]) + s.shift() + s.shift(axis=0, periods=1) + s.shift(-1, fill_value=0) + + +def test_types_rank() -> None: + s = pd.Series([1, 1, 2, 5, 6, np.nan, "million"]) + s.rank() + s.rank(axis=0, na_option="bottom") + s.rank(method="min", pct=True) + s.rank(method="dense", ascending=True) + s.rank(method="first", numeric_only=True) + + +def test_types_mean() -> None: + s = pd.Series([1, 2, 3, np.nan]) + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "float") + f1: float = s.mean() # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "Series") + s1: pd.Series = s.mean(axis=0, level=0) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "float") + f2: float = s.mean(skipna=False) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "float") + f3: float = s.mean(numeric_only=False) # type: ignore[assignment] + + +def test_types_median() -> None: + s = pd.Series([1, 2, 3, np.nan]) + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "float") + f1: float = s.median() # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "Series") + s1: pd.Series = s.median(axis=0, level=0) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "float") + f2: float = s.median(skipna=False) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Union[Series, + # float]", variable has type "float") + f3: float = s.median(numeric_only=False) # type: ignore[assignment] + + +def test_types_sum() -> None: + s = pd.Series([1, 2, 3, np.nan]) + s.sum() + s.sum(axis=0, level=0) + s.sum(skipna=False) + s.sum(numeric_only=False) + s.sum(min_count=4) + + +def test_types_cumsum() -> None: + s = pd.Series([1, 2, 3, np.nan]) + s.cumsum() + s.cumsum(axis=0) + s.cumsum(skipna=False) + + +def test_types_min() -> None: + s = pd.Series([1, 2, 3, np.nan]) + s.min() + s.min(axis=0) + s.min(level=0) + s.min(skipna=False) + + +def test_types_max() -> None: + s = pd.Series([1, 2, 3, np.nan]) + s.max() + s.max(axis=0) + s.max(level=0) + s.max(skipna=False) + + +def test_types_quantile() -> None: + s = pd.Series([1, 2, 3, 10]) + s.quantile([0.25, 0.5]) + s.quantile(0.75) + s.quantile() + s.quantile(interpolation="nearest") + + +def test_types_clip() -> None: + s = pd.Series([-10, 2, 3, 10]) + s.clip(lower=0, upper=5) + s.clip(lower=0, upper=5, inplace=True) + + +def test_types_abs() -> None: + s = pd.Series([-10, 2, 3, 10]) + s.abs() + + +def test_types_var() -> None: + s = pd.Series([-10, 2, 3, 10]) + s.var() + s.var(axis=0, ddof=1) + s.var(skipna=True, numeric_only=False) + + +def test_types_std() -> None: + s = pd.Series([-10, 2, 3, 10]) + s.std() + s.std(axis=0, ddof=1) + s.std(skipna=True, numeric_only=False) + + +def test_types_idxmin() -> None: + s = pd.Series([-10, 2, 3, 10]) + s.idxmin() + s.idxmin(axis=0) + + +def test_types_idxmax() -> None: + s = pd.Series([-10, 2, 3, 10]) + s.idxmax() + s.idxmax(axis=0) + + +def test_types_value_counts() -> None: + s = pd.Series([1, 2]) + s.value_counts() + + +def test_types_unique() -> None: + s = pd.Series([-10, 2, 2, 3, 10, 10]) + s.unique() + + +def test_types_apply() -> None: + s = pd.Series([-10, 2, 2, 3, 10, 10]) + s.apply(lambda x: x ** 2) + s.apply(np.exp) + s.apply(str) + + +def test_types_element_wise_arithmetic() -> None: + s = pd.Series([0, 1, -10]) + s2 = pd.Series([7, -5, 10]) + + s + s2 + s.add(s2, fill_value=0) + + s - s2 + s.sub(s2, fill_value=0) + + s * s2 + s.mul(s2, fill_value=0) + + s / s2 + # error: Unexpected keyword argument "fill_value" + s.div(s2, fill_value=0) # type: ignore[call-arg] + + s // s2 + s.floordiv(s2, fill_value=0) + + s % s2 + s.mod(s2, fill_value=0) + + +def test_types_groupby() -> None: + s = pd.Series([4, 2, 1, 8], index=["a", "b", "a", "b"]) + s.groupby(["a", "b", "a", "b"]) + s.groupby(level=0) + s.groupby(s > 2) + + +# This added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html +def test_types_group_by_with_dropna_keyword() -> None: + s = pd.Series([1, 2, 3, 3], index=["col1", "col2", "col3", np.nan]) + s.groupby(level=0, dropna=True).sum() + s.groupby(level=0, dropna=False).sum() + s.groupby(level=0).sum() + + +def test_types_plot() -> None: + s = pd.Series([0, 1, 1, 0, -10]) + s.plot.hist() + + +def test_types_window() -> None: + s = pd.Series([0, 1, 1, 0, 5, 1, -10]) + s.expanding() + s.expanding(axis=0, center=True) + + s.rolling(2) + s.rolling(2, axis=0, center=True) + + +def test_types_cov() -> None: + s1 = pd.Series([0, 1, 1, 0, 5, 1, -10]) + s2 = pd.Series([0, 2, 12, -4, 7, 9, 2]) + s1.cov(s2) + s1.cov(s2, min_periods=1) + # ddof param was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + s1.cov(s2, ddof=2) + + +def test_update() -> None: + s1 = pd.Series([0, 1, 1, 0, 5, 1, -10]) + s1.update(pd.Series([0, 2, 12])) + # Series.update() accepting objects that can be coerced to a + # Series was added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + s1.update([1, 2, -4, 3]) + s1.update([1, "b", "c", "d"]) + s1.update({1: 9, 3: 4}) + + +# error: Untyped decorator makes function "test_to_markdown" untyped +@td.skip_if_no("tabulate") # type: ignore[misc] +def test_to_markdown() -> None: + s = pd.Series([0, 1, 1, 0, 5, 1, -10]) + s.to_markdown() + s.to_markdown(buf=None, mode="wt") + # index param was added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + s.to_markdown(index=False) + + +# compare() method added in 1.1.0 https://pandas.pydata.org/docs/whatsnew/v1.1.0.html +def test_types_compare() -> None: + s1 = pd.Series([0, 1, 1, 0, 5, 1, -10]) + s2 = pd.Series([0, 2, 12, -4, 7, 9, 2]) + s1.compare(s2) + s2.compare(s1, align_axis="columns", keep_shape=True, keep_equal=True) + + +def test_types_agg() -> None: + s = pd.Series([1, 2, 3], index=["col1", "col2", "col3"]) + s.agg("min") + s.agg(x=max, y="min", z=np.mean) + s.agg("mean", axis=0) + + +def test_types_describe() -> None: + s = pd.Series([1, 2, 3, np.datetime64("2000-01-01")]) + s.describe() + s.describe(percentiles=[0.5], include="all") + s.describe(exclude=np.number) + # datetime_is_numeric param added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + s.describe(datetime_is_numeric=True) + + +def test_types_resample() -> None: + s = pd.Series(range(9), index=pd.date_range("1/1/2000", periods=9, freq="T")) + s.resample("3T").sum() + # origin and offset params added in 1.1.0 + # https://pandas.pydata.org/docs/whatsnew/v1.1.0.html + s.resample("20min", origin="epoch", offset=pd.Timedelta(value=2, unit="minutes")) + + +# set_flags() method added in 1.2.0 https://pandas.pydata.org/docs/whatsnew/v1.2.0.html +def test_types_set_flags() -> None: + pd.Series([1, 2], index=["a", "b"]).set_flags(allows_duplicate_labels=False) + pd.Series([3, 4], index=["a", "a"]).set_flags(allows_duplicate_labels=True) + pd.Series([5, 2], index=["a", "a"]) + + +def test_types_getitem() -> None: + s = pd.Series({"key": [0, 1, 2, 3]}) + key: List[int] = s["key"] + s2 = pd.Series([0, 1, 2, 3]) + value: int = s2[0] + s3: pd.Series = s[:2] + + +def test_types_eq() -> None: + s1 = pd.Series([1, 2, 3]) + res1: pd.Series = s1 == 1 + s2 = pd.Series([1, 2, 4]) + res2: pd.Series = s1 == s2 + + +def test_types_rename_axis() -> None: + s: pd.Series = pd.Series([1, 2, 3]).rename_axis("A") + + +def test_types_values() -> None: + n1: np.ndarray = pd.Series([1, 2, 3]).values + n2: np.ndarray = pd.Series(list("aabc")).values + n3: np.ndarray = pd.Series(list("aabc")).astype("category").values + n4: np.ndarray = pd.Series( + pd.date_range("20130101", periods=3, tz="US/Eastern") + ).values + + +def test_types_rename() -> None: + # Scalar + s1 = pd.Series([1, 2, 3]).rename("A") + # Hashable Sequence + s2 = pd.Series([1, 2, 3]).rename(("A", "B")) + # Optional + s3 = pd.Series([1, 2, 3]).rename(None) + + # Functions + def add1(x: int) -> int: + return x + 1 + + s4 = pd.Series([1, 2, 3]).rename(add1) + + # Dictionary + s5 = pd.Series([1, 2, 3]).rename({1: 10}) + # inplace + # error: Incompatible types in assignment (expression has type "Optional[Series]", + # variable has type "None") + s6: None = pd.Series([1, 2, 3]).rename( # type: ignore[assignment] + "A", inplace=True + ) + + +def test_types_ne() -> None: + s1 = pd.Series([1, 2, 3]) + s2 = pd.Series([1, 2, 4]) + s3: pd.Series = s1 != s2 + + +def test_types_bfill() -> None: + s1 = pd.Series([1, 2, 3]) + # error: Incompatible types in assignment (expression has type "Optional[Series]", + # variable has type "Series") + s2: pd.Series = s1.bfill(inplace=False) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Optional[Series]", + # variable has type "None") + s3: None = s1.bfill(inplace=True) # type: ignore[assignment] + + +def test_types_ewm() -> None: + s1 = pd.Series([1, 2, 3]) + w1: ExponentialMovingWindow = s1.ewm( + com=0.3, min_periods=0, adjust=False, ignore_na=True, axis=0 + ) + w2: ExponentialMovingWindow = s1.ewm(alpha=0.4) + w3: ExponentialMovingWindow = s1.ewm(span=1.6) + w4: ExponentialMovingWindow = s1.ewm(halflife=0.7) + + +def test_types_ffill() -> None: + s1 = pd.Series([1, 2, 3]) + # error: Incompatible types in assignment (expression has type "Optional[Series]", + # variable has type "Series") + s2: pd.Series = s1.ffill(inplace=False) # type: ignore[assignment] + # error: Incompatible types in assignment (expression has type "Optional[Series]", + # variable has type "None") + s3: None = s1.ffill(inplace=True) # type: ignore[assignment] + + +def test_types_as_type() -> None: + s1 = pd.Series([1, 2, 8, 9]) + s2: pd.Series = s1.astype("int32") + + +def test_types_dot() -> None: + s1 = pd.Series([0, 1, 2, 3]) + s2 = pd.Series([-1, 2, -3, 4]) + df1 = pd.DataFrame([[0, 1], [-2, 3], [4, -5], [6, 7]]) + n1 = np.array([[0, 1], [1, 2], [-1, -1], [2, 0]]) + sc1: Scalar = s1.dot(s2) + sc2: Scalar = s1 @ s2 + s3: pd.Series = s1.dot(df1) + s4: pd.Series = s1 @ df1 + n2: np.ndarray = s1.dot(n1) + n3: np.ndarray = s1 @ n1 diff --git a/pandas/tests/typing/valid/test_testing.py b/pandas/tests/typing/valid/test_testing.py new file mode 100644 index 0000000000000..a98d8faee5098 --- /dev/null +++ b/pandas/tests/typing/valid/test_testing.py @@ -0,0 +1,21 @@ +# pyright: reportGeneralTypeIssues = true + +import pandas as pd +import pandas._testing as tm + + +def test_types_assert_series_equal() -> None: + s1 = pd.Series([0, 1, 1, 0]) + s2 = pd.Series([0, 1, 1, 0]) + tm.assert_series_equal(left=s1, right=s2) + tm.assert_series_equal( + s1, + s2, + check_freq=False, + check_categorical=True, + check_flags=True, + check_datetimelike_compat=True, + ) + tm.assert_series_equal( + s1, s2, check_dtype=True, check_less_precise=True, check_names=True + ) diff --git a/pandas/tests/typing/valid/test_timestamp.py b/pandas/tests/typing/valid/test_timestamp.py new file mode 100644 index 0000000000000..cd9f920d5f9ff --- /dev/null +++ b/pandas/tests/typing/valid/test_timestamp.py @@ -0,0 +1,67 @@ +# flake8: noqa: F841 +# TODO: many functions need return types annotations for pyright +# to run with reportGeneralTypeIssues = true + +import datetime as dt + +import pandas as pd + + +def test_types_init() -> None: + ts: pd.Timestamp = pd.Timestamp("2021-03-01T12") + ts1: pd.Timestamp = pd.Timestamp(dt.date(2021, 3, 15)) + ts2: pd.Timestamp = pd.Timestamp(dt.datetime(2021, 3, 10, 12)) + ts3: pd.Timestamp = pd.Timestamp(pd.Timestamp("2021-03-01T12")) + ts4: pd.Timestamp = pd.Timestamp(1515590000.1, unit="s") + ts5: pd.Timestamp = pd.Timestamp(1515590000.1, unit="s", tz="US/Pacific") + ts6: pd.Timestamp = pd.Timestamp(1515590000100000000) # plain integer (nanosecond) + ts7: pd.Timestamp = pd.Timestamp(2021, 3, 10, 12) + ts8: pd.Timestamp = pd.Timestamp(year=2021, month=3, day=10, hour=12) + ts9: pd.Timestamp = pd.Timestamp( + year=2021, month=3, day=10, hour=12, tz="US/Pacific" + ) + + +def test_types_arithmetic() -> None: + # error: Incompatible types in assignment (expression has type "datetime", variable + # has type "Timestamp") + # error: Argument 1 to "to_datetime" has incompatible type "str"; expected + # "datetime" + ts: pd.Timestamp = pd.to_datetime("2021-03-01") # type:ignore[assignment,arg-type] + # error: Incompatible types in assignment (expression has type "datetime", variable + # has type "Timestamp") + # error: Argument 1 to "to_datetime" has incompatible type "str"; expected + # "datetime" + ts2: pd.Timestamp = pd.to_datetime("2021-01-01") # type:ignore[assignment,arg-type] + delta: pd.Timedelta = pd.to_timedelta("1 day") + + # error: Incompatible types in assignment (expression has type "timedelta", variable + # has type "Timedelta") + tsr: pd.Timedelta = ts - ts2 # type: ignore[assignment] + tsr2: pd.Timestamp = ts + delta + + +def test_types_comparison() -> None: + # Incompatible types in assignment (expression has type "datetime", variable has + # type "Timestamp") + # error: Argument 1 to "to_datetime" has incompatible type "str"; expected + # "datetime" + ts: pd.Timestamp = pd.to_datetime("2021-03-01") # type: ignore[assignment,arg-type] + # Incompatible types in assignment (expression has type "datetime", variable has + # type "Timestamp") + # error: Argument 1 to "to_datetime" has incompatible type "str"; expected + # "datetime" + ts2: pd.Timestamp = pd.to_datetime( # type: ignore[assignment] + "2021-01-01" # type: ignore[arg-type] + ) + + tsr: bool = ts < ts2 + tsr2: bool = ts > ts2 + + +def test_types_pydatetime() -> None: + ts: pd.Timestamp = pd.Timestamp("2021-03-01T12") + + datet: dt.datetime = ts.to_pydatetime() + datet2: dt.datetime = ts.to_pydatetime(False) + datet3: dt.datetime = ts.to_pydatetime(warn=True) diff --git a/pyproject.toml b/pyproject.toml index c3ed07defa60d..c960279d2b192 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,6 +57,7 @@ markers = [ "arm_slow: mark a test as slow for arm64 architecture", "arraymanager: mark a test to run with ArrayManager enabled", ] +norecursedirs = ["pandas/tests/typing/invalid"] [tool.mypy] # Import discovery @@ -116,6 +117,10 @@ module = [ ] check_untyped_defs = false +[[tool.mypy.overrides]] +module = ["pandas.tests.typing"] +check_untyped_defs = true + [[tool.mypy.overrides]] module = [ "pandas.tests.apply.test_series_apply", @@ -150,8 +155,30 @@ skip = "pandas/__init__.py" [tool.pyright] pythonVersion = "3.8" typeCheckingMode = "basic" -include = ["pandas", "typings"] -exclude = ["pandas/tests", "pandas/io/clipboard", "pandas/util/version"] +include = ["pandas", "typings", "pandas/tests/typing/valid"] +exclude = [ + "pandas/io/clipboard", + "pandas/util/version", + # ignore everything in /pandas/tests except typing + "pandas/tests/*.py", + "pandas/tests/a*", + "pandas/tests/b*", + "pandas/tests/c*", + "pandas/tests/d*", + "pandas/tests/e*", + "pandas/tests/f*", + "pandas/tests/g*", + "pandas/tests/i*", + "pandas/tests/l*", + "pandas/tests/p*", + "pandas/tests/r*", + "pandas/tests/s*", + "pandas/tests/to*", + "pandas/tests/ts*", + "pandas/tests/s*", + "pandas/tests/u*", + "pandas/tests/w*", +] reportGeneralTypeIssues = false reportConstantRedefinition = false reportFunctionMemberAccess = false