Skip to content

Commit 8bcdfcc

Browse files
Kevin Sheppardbashtage
Kevin Sheppard
authored andcommitted
ENH: Add from_dummies
1 parent 7b84785 commit 8bcdfcc

File tree

5 files changed

+99
-14
lines changed

5 files changed

+99
-14
lines changed

pandas-stubs/__init__.pyi

+1
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ from .core.reshape.api import (
8080
concat as concat,
8181
crosstab as crosstab,
8282
cut as cut,
83+
from_dummies as from_dummies,
8384
get_dummies as get_dummies,
8485
lreshape as lreshape,
8586
melt as melt,

pandas-stubs/core/reshape/api.pyi

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
from pandas.core.reshape.concat import concat as concat
2+
from pandas.core.reshape.encoding import (
3+
from_dummies as from_dummies,
4+
get_dummies as get_dummies,
5+
)
26
from pandas.core.reshape.melt import (
37
lreshape as lreshape,
48
melt as melt,
@@ -14,7 +18,6 @@ from pandas.core.reshape.pivot import (
1418
pivot as pivot,
1519
pivot_table as pivot_table,
1620
)
17-
from pandas.core.reshape.reshape import get_dummies as get_dummies
1821
from pandas.core.reshape.tile import (
1922
cut as cut,
2023
qcut as qcut,
+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from typing import (
2+
Hashable,
3+
Iterable,
4+
)
5+
6+
from pandas import (
7+
DataFrame,
8+
Series,
9+
)
10+
11+
from pandas._typing import (
12+
ArrayLike,
13+
Dtype,
14+
HashableT,
15+
)
16+
17+
def get_dummies(
18+
data: ArrayLike | DataFrame | Series,
19+
prefix: str | Iterable[str] | dict[str, str] | None = ...,
20+
prefix_sep: str = ...,
21+
dummy_na: bool = ...,
22+
columns: list[HashableT] | None = ...,
23+
sparse: bool = ...,
24+
drop_first: bool = ...,
25+
dtype: Dtype | None = ...,
26+
) -> DataFrame: ...
27+
def from_dummies(
28+
data: DataFrame,
29+
sep: str | None = ...,
30+
default_category: Hashable | dict[str, Hashable] | None = ...,
31+
) -> DataFrame: ...

pandas-stubs/core/reshape/reshape.pyi

+6-13
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,9 @@
11
import numpy as np
2-
from pandas.core.frame import DataFrame
2+
from pandas import (
3+
DataFrame,
4+
Series,
5+
)
36

4-
def unstack(obj, level, fill_value=...): ...
5-
def stack(frame, level: int = ..., dropna: bool = ...): ...
7+
def unstack(obj: Series | DataFrame, level, fill_value: object | None = ...): ...
8+
def stack(frame: DataFrame, level: int = ..., dropna: bool = ...): ...
69
def stack_multiple(frame, level, dropna: bool = ...): ...
7-
def get_dummies(
8-
data,
9-
prefix=...,
10-
prefix_sep=...,
11-
dummy_na=...,
12-
columns=...,
13-
sparse=...,
14-
drop_first=...,
15-
dtype=...,
16-
) -> DataFrame: ...

tests/test_utility.py

+57
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,60 @@ def test_show_version():
99
with pytest.warns(UserWarning, match="Setuptools is replacing distutils"):
1010
check(assert_type(pd.show_versions(True), None), type(None))
1111
check(assert_type(pd.show_versions(False), None), type(None))
12+
13+
14+
def test_dummies():
15+
df = pd.DataFrame(
16+
pd.Series(["a", "b", "a", "b", "c", "a", "a"], dtype="category"), columns=["A"]
17+
)
18+
dummies = pd.get_dummies(df)
19+
check(assert_type(dummies, pd.DataFrame), pd.DataFrame)
20+
check(assert_type(pd.from_dummies(dummies), pd.DataFrame), pd.DataFrame)
21+
22+
23+
def test_get_dummies_args():
24+
df = pd.DataFrame(
25+
{
26+
"A": pd.Series(["a", "b", "a", "b", "c", "a", "a"], dtype="category"),
27+
"B": pd.Series([1, 2, 1, 2, 3, 1, 1]),
28+
}
29+
)
30+
check(
31+
assert_type(
32+
pd.get_dummies(df, prefix="foo", prefix_sep="-", sparse=True), pd.DataFrame
33+
),
34+
pd.DataFrame,
35+
)
36+
check(
37+
assert_type(
38+
pd.get_dummies(
39+
df, prefix=["foo"], dummy_na=True, drop_first=True, dtype="bool"
40+
),
41+
pd.DataFrame,
42+
),
43+
pd.DataFrame,
44+
)
45+
check(
46+
assert_type(
47+
pd.get_dummies(df, prefix={"A": "foo", "B": "baz"}, columns=["A", "B"]),
48+
pd.DataFrame,
49+
),
50+
pd.DataFrame,
51+
)
52+
53+
54+
def test_from_dummies_args():
55+
df = pd.DataFrame(
56+
{
57+
"A": pd.Series(["a", "b", "a", "b", "c", "a", "a"], dtype="category"),
58+
}
59+
)
60+
dummies = pd.get_dummies(df, drop_first=True)
61+
62+
check(
63+
assert_type(
64+
pd.from_dummies(dummies, sep="_", default_category="a"),
65+
pd.DataFrame,
66+
),
67+
pd.DataFrame,
68+
)

0 commit comments

Comments
 (0)