diff --git a/pandas-stubs/__init__.pyi b/pandas-stubs/__init__.pyi index ec2213aa0..d497523a1 100644 --- a/pandas-stubs/__init__.pyi +++ b/pandas-stubs/__init__.pyi @@ -80,6 +80,7 @@ from .core.reshape.api import ( concat as concat, crosstab as crosstab, cut as cut, + from_dummies as from_dummies, get_dummies as get_dummies, lreshape as lreshape, melt as melt, diff --git a/pandas-stubs/core/reshape/api.pyi b/pandas-stubs/core/reshape/api.pyi index 4b92ec1d8..dfef82744 100644 --- a/pandas-stubs/core/reshape/api.pyi +++ b/pandas-stubs/core/reshape/api.pyi @@ -1,4 +1,8 @@ from pandas.core.reshape.concat import concat as concat +from pandas.core.reshape.encoding import ( + from_dummies as from_dummies, + get_dummies as get_dummies, +) from pandas.core.reshape.melt import ( lreshape as lreshape, melt as melt, @@ -14,7 +18,6 @@ from pandas.core.reshape.pivot import ( pivot as pivot, pivot_table as pivot_table, ) -from pandas.core.reshape.reshape import get_dummies as get_dummies from pandas.core.reshape.tile import ( cut as cut, qcut as qcut, diff --git a/pandas-stubs/core/reshape/encoding.pyi b/pandas-stubs/core/reshape/encoding.pyi new file mode 100644 index 000000000..a8c545606 --- /dev/null +++ b/pandas-stubs/core/reshape/encoding.pyi @@ -0,0 +1,31 @@ +from typing import ( + Hashable, + Iterable, +) + +from pandas import ( + DataFrame, + Series, +) + +from pandas._typing import ( + ArrayLike, + Dtype, + HashableT, +) + +def get_dummies( + data: ArrayLike | DataFrame | Series, + prefix: str | Iterable[str] | dict[HashableT, str] | None = ..., + prefix_sep: str = ..., + dummy_na: bool = ..., + columns: list[HashableT] | None = ..., + sparse: bool = ..., + drop_first: bool = ..., + dtype: Dtype | None = ..., +) -> DataFrame: ... +def from_dummies( + data: DataFrame, + sep: str | None = ..., + default_category: Hashable | dict[str, Hashable] | None = ..., +) -> DataFrame: ... diff --git a/pandas-stubs/core/reshape/reshape.pyi b/pandas-stubs/core/reshape/reshape.pyi deleted file mode 100644 index f912c1cd1..000000000 --- a/pandas-stubs/core/reshape/reshape.pyi +++ /dev/null @@ -1,16 +0,0 @@ -import numpy as np -from pandas.core.frame import DataFrame - -def unstack(obj, level, fill_value=...): ... -def stack(frame, level: int = ..., dropna: bool = ...): ... -def stack_multiple(frame, level, dropna: bool = ...): ... -def get_dummies( - data, - prefix=..., - prefix_sep=..., - dummy_na=..., - columns=..., - sparse=..., - drop_first=..., - dtype=..., -) -> DataFrame: ... diff --git a/tests/test_utility.py b/tests/test_utility.py index 80df143c9..9a27f822c 100644 --- a/tests/test_utility.py +++ b/tests/test_utility.py @@ -9,3 +9,69 @@ def test_show_version(): with pytest.warns(UserWarning, match="Setuptools is replacing distutils"): check(assert_type(pd.show_versions(True), None), type(None)) check(assert_type(pd.show_versions(False), None), type(None)) + + +def test_dummies(): + df = pd.DataFrame( + pd.Series(["a", "b", "a", "b", "c", "a", "a"], dtype="category"), columns=["A"] + ) + dummies = pd.get_dummies(df) + check(assert_type(dummies, pd.DataFrame), pd.DataFrame) + check(assert_type(pd.from_dummies(dummies), pd.DataFrame), pd.DataFrame) + + df2 = pd.DataFrame( + pd.Series(["a", "b", "a", "b", "c", "a", "a"], dtype="category"), + columns=[("A",)], + ) + check( + assert_type(pd.get_dummies(df2, prefix={("A",): "bar"}), pd.DataFrame), + pd.DataFrame, + ) + + +def test_get_dummies_args(): + df = pd.DataFrame( + { + "A": pd.Series(["a", "b", "a", "b", "c", "a", "a"], dtype="category"), + "B": pd.Series([1, 2, 1, 2, 3, 1, 1]), + } + ) + check( + assert_type( + pd.get_dummies(df, prefix="foo", prefix_sep="-", sparse=True), pd.DataFrame + ), + pd.DataFrame, + ) + check( + assert_type( + pd.get_dummies( + df, prefix=["foo"], dummy_na=True, drop_first=True, dtype="bool" + ), + pd.DataFrame, + ), + pd.DataFrame, + ) + check( + assert_type( + pd.get_dummies(df, prefix={"A": "foo", "B": "baz"}, columns=["A", "B"]), + pd.DataFrame, + ), + pd.DataFrame, + ) + + +def test_from_dummies_args(): + df = pd.DataFrame( + { + "A": pd.Series(["a", "b", "a", "b", "c", "a", "a"], dtype="category"), + } + ) + dummies = pd.get_dummies(df, drop_first=True) + + check( + assert_type( + pd.from_dummies(dummies, sep="_", default_category="a"), + pd.DataFrame, + ), + pd.DataFrame, + )