From 8bcdfccedcffca3a5f1580d09df28d13a445f3b2 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Mon, 26 Sep 2022 17:27:58 +0100 Subject: [PATCH 1/3] ENH: Add from_dummies --- pandas-stubs/__init__.pyi | 1 + pandas-stubs/core/reshape/api.pyi | 5 ++- pandas-stubs/core/reshape/encoding.pyi | 31 ++++++++++++++ pandas-stubs/core/reshape/reshape.pyi | 19 +++------ tests/test_utility.py | 57 ++++++++++++++++++++++++++ 5 files changed, 99 insertions(+), 14 deletions(-) create mode 100644 pandas-stubs/core/reshape/encoding.pyi diff --git a/pandas-stubs/__init__.pyi b/pandas-stubs/__init__.pyi index ec2213aa0..d497523a1 100644 --- a/pandas-stubs/__init__.pyi +++ b/pandas-stubs/__init__.pyi @@ -80,6 +80,7 @@ from .core.reshape.api import ( concat as concat, crosstab as crosstab, cut as cut, + from_dummies as from_dummies, get_dummies as get_dummies, lreshape as lreshape, melt as melt, diff --git a/pandas-stubs/core/reshape/api.pyi b/pandas-stubs/core/reshape/api.pyi index 4b92ec1d8..dfef82744 100644 --- a/pandas-stubs/core/reshape/api.pyi +++ b/pandas-stubs/core/reshape/api.pyi @@ -1,4 +1,8 @@ from pandas.core.reshape.concat import concat as concat +from pandas.core.reshape.encoding import ( + from_dummies as from_dummies, + get_dummies as get_dummies, +) from pandas.core.reshape.melt import ( lreshape as lreshape, melt as melt, @@ -14,7 +18,6 @@ from pandas.core.reshape.pivot import ( pivot as pivot, pivot_table as pivot_table, ) -from pandas.core.reshape.reshape import get_dummies as get_dummies from pandas.core.reshape.tile import ( cut as cut, qcut as qcut, diff --git a/pandas-stubs/core/reshape/encoding.pyi b/pandas-stubs/core/reshape/encoding.pyi new file mode 100644 index 000000000..132fdac34 --- /dev/null +++ b/pandas-stubs/core/reshape/encoding.pyi @@ -0,0 +1,31 @@ +from typing import ( + Hashable, + Iterable, +) + +from pandas import ( + DataFrame, + Series, +) + +from pandas._typing import ( + ArrayLike, + Dtype, + HashableT, +) + +def get_dummies( + data: ArrayLike | DataFrame | Series, + prefix: str | Iterable[str] | dict[str, str] | None = ..., + prefix_sep: str = ..., + dummy_na: bool = ..., + columns: list[HashableT] | None = ..., + sparse: bool = ..., + drop_first: bool = ..., + dtype: Dtype | None = ..., +) -> DataFrame: ... +def from_dummies( + data: DataFrame, + sep: str | None = ..., + default_category: Hashable | dict[str, Hashable] | None = ..., +) -> DataFrame: ... diff --git a/pandas-stubs/core/reshape/reshape.pyi b/pandas-stubs/core/reshape/reshape.pyi index f912c1cd1..4797ef9f5 100644 --- a/pandas-stubs/core/reshape/reshape.pyi +++ b/pandas-stubs/core/reshape/reshape.pyi @@ -1,16 +1,9 @@ import numpy as np -from pandas.core.frame import DataFrame +from pandas import ( + DataFrame, + Series, +) -def unstack(obj, level, fill_value=...): ... -def stack(frame, level: int = ..., dropna: bool = ...): ... +def unstack(obj: Series | DataFrame, level, fill_value: object | None = ...): ... +def stack(frame: DataFrame, level: int = ..., dropna: bool = ...): ... def stack_multiple(frame, level, dropna: bool = ...): ... -def get_dummies( - data, - prefix=..., - prefix_sep=..., - dummy_na=..., - columns=..., - sparse=..., - drop_first=..., - dtype=..., -) -> DataFrame: ... diff --git a/tests/test_utility.py b/tests/test_utility.py index 80df143c9..045adf465 100644 --- a/tests/test_utility.py +++ b/tests/test_utility.py @@ -9,3 +9,60 @@ def test_show_version(): with pytest.warns(UserWarning, match="Setuptools is replacing distutils"): check(assert_type(pd.show_versions(True), None), type(None)) check(assert_type(pd.show_versions(False), None), type(None)) + + +def test_dummies(): + df = pd.DataFrame( + pd.Series(["a", "b", "a", "b", "c", "a", "a"], dtype="category"), columns=["A"] + ) + dummies = pd.get_dummies(df) + check(assert_type(dummies, pd.DataFrame), pd.DataFrame) + check(assert_type(pd.from_dummies(dummies), pd.DataFrame), pd.DataFrame) + + +def test_get_dummies_args(): + df = pd.DataFrame( + { + "A": pd.Series(["a", "b", "a", "b", "c", "a", "a"], dtype="category"), + "B": pd.Series([1, 2, 1, 2, 3, 1, 1]), + } + ) + check( + assert_type( + pd.get_dummies(df, prefix="foo", prefix_sep="-", sparse=True), pd.DataFrame + ), + pd.DataFrame, + ) + check( + assert_type( + pd.get_dummies( + df, prefix=["foo"], dummy_na=True, drop_first=True, dtype="bool" + ), + pd.DataFrame, + ), + pd.DataFrame, + ) + check( + assert_type( + pd.get_dummies(df, prefix={"A": "foo", "B": "baz"}, columns=["A", "B"]), + pd.DataFrame, + ), + pd.DataFrame, + ) + + +def test_from_dummies_args(): + df = pd.DataFrame( + { + "A": pd.Series(["a", "b", "a", "b", "c", "a", "a"], dtype="category"), + } + ) + dummies = pd.get_dummies(df, drop_first=True) + + check( + assert_type( + pd.from_dummies(dummies, sep="_", default_category="a"), + pd.DataFrame, + ), + pd.DataFrame, + ) From d5c35d5f2801476e85de8744697075e3c568b944 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Tue, 27 Sep 2022 17:27:40 +0100 Subject: [PATCH 2/3] TYP: Improve typing accuracy --- pandas-stubs/core/reshape/encoding.pyi | 2 +- pandas-stubs/core/reshape/reshape.pyi | 9 --------- tests/test_utility.py | 9 +++++++++ 3 files changed, 10 insertions(+), 10 deletions(-) delete mode 100644 pandas-stubs/core/reshape/reshape.pyi diff --git a/pandas-stubs/core/reshape/encoding.pyi b/pandas-stubs/core/reshape/encoding.pyi index 132fdac34..02db853dd 100644 --- a/pandas-stubs/core/reshape/encoding.pyi +++ b/pandas-stubs/core/reshape/encoding.pyi @@ -16,7 +16,7 @@ from pandas._typing import ( def get_dummies( data: ArrayLike | DataFrame | Series, - prefix: str | Iterable[str] | dict[str, str] | None = ..., + prefix: str | Iterable[str] | dict[Hashable, str] | None = ..., prefix_sep: str = ..., dummy_na: bool = ..., columns: list[HashableT] | None = ..., diff --git a/pandas-stubs/core/reshape/reshape.pyi b/pandas-stubs/core/reshape/reshape.pyi deleted file mode 100644 index 4797ef9f5..000000000 --- a/pandas-stubs/core/reshape/reshape.pyi +++ /dev/null @@ -1,9 +0,0 @@ -import numpy as np -from pandas import ( - DataFrame, - Series, -) - -def unstack(obj: Series | DataFrame, level, fill_value: object | None = ...): ... -def stack(frame: DataFrame, level: int = ..., dropna: bool = ...): ... -def stack_multiple(frame, level, dropna: bool = ...): ... diff --git a/tests/test_utility.py b/tests/test_utility.py index 045adf465..9a27f822c 100644 --- a/tests/test_utility.py +++ b/tests/test_utility.py @@ -19,6 +19,15 @@ def test_dummies(): check(assert_type(dummies, pd.DataFrame), pd.DataFrame) check(assert_type(pd.from_dummies(dummies), pd.DataFrame), pd.DataFrame) + df2 = pd.DataFrame( + pd.Series(["a", "b", "a", "b", "c", "a", "a"], dtype="category"), + columns=[("A",)], + ) + check( + assert_type(pd.get_dummies(df2, prefix={("A",): "bar"}), pd.DataFrame), + pd.DataFrame, + ) + def test_get_dummies_args(): df = pd.DataFrame( From c2d5e01f1d6fa9a9fb27c4d4c1ac3d38d42e8826 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Tue, 27 Sep 2022 17:30:55 +0100 Subject: [PATCH 3/3] BUG: Use HashableT for non covariant dict --- pandas-stubs/core/reshape/encoding.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas-stubs/core/reshape/encoding.pyi b/pandas-stubs/core/reshape/encoding.pyi index 02db853dd..a8c545606 100644 --- a/pandas-stubs/core/reshape/encoding.pyi +++ b/pandas-stubs/core/reshape/encoding.pyi @@ -16,7 +16,7 @@ from pandas._typing import ( def get_dummies( data: ArrayLike | DataFrame | Series, - prefix: str | Iterable[str] | dict[Hashable, str] | None = ..., + prefix: str | Iterable[str] | dict[HashableT, str] | None = ..., prefix_sep: str = ..., dummy_na: bool = ..., columns: list[HashableT] | None = ...,