Skip to content

Commit 579ec33

Browse files
authored
get_dummies to select string dtype in addition to object and categorical (#45516)
1 parent eae37b0 commit 579ec33

File tree

3 files changed

+19
-2
lines changed

3 files changed

+19
-2
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,7 @@ Groupby/resample/rolling
309309
Reshaping
310310
^^^^^^^^^
311311
- Bug in :func:`concat` between a :class:`Series` with integer dtype and another with :class:`CategoricalDtype` with integer categories and containing ``NaN`` values casting to object dtype instead of ``float64`` (:issue:`45359`)
312+
- Bug in :func:`get_dummies` that selected object and categorical dtypes but not string (:issue:`44965`)
312313
-
313314

314315
Sparse

pandas/core/reshape/reshape.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -838,7 +838,7 @@ def get_dummies(
838838
columns : list-like, default None
839839
Column names in the DataFrame to be encoded.
840840
If `columns` is None then all the columns with
841-
`object` or `category` dtype will be converted.
841+
`object`, `string`, or `category` dtype will be converted.
842842
sparse : bool, default False
843843
Whether the dummy-encoded columns should be backed by
844844
a :class:`SparseArray` (True) or a regular NumPy array (False).
@@ -919,7 +919,7 @@ def get_dummies(
919919
"""
920920
from pandas.core.reshape.concat import concat
921921

922-
dtypes_to_encode = ["object", "category"]
922+
dtypes_to_encode = ["object", "string", "category"]
923923

924924
if isinstance(data, DataFrame):
925925
# determine columns being encoded

pandas/tests/reshape/test_get_dummies.py

+16
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,22 @@ def test_dataframe_dummies_all_obj(self, df, sparse):
196196

197197
tm.assert_frame_equal(result, expected)
198198

199+
def test_dataframe_dummies_string_dtype(self, df):
200+
# GH44965
201+
df = df[["A", "B"]]
202+
df = df.astype({"A": "object", "B": "string"})
203+
result = get_dummies(df)
204+
expected = DataFrame(
205+
{
206+
"A_a": [1, 0, 1],
207+
"A_b": [0, 1, 0],
208+
"B_b": [1, 1, 0],
209+
"B_c": [0, 0, 1],
210+
},
211+
dtype=np.uint8,
212+
)
213+
tm.assert_frame_equal(result, expected)
214+
199215
def test_dataframe_dummies_mix_default(self, df, sparse, dtype):
200216
result = get_dummies(df, sparse=sparse, dtype=dtype)
201217
if sparse:

0 commit comments

Comments
 (0)