Skip to content

Commit 7ab6a10

Browse files
authored
BUG: convert_dtypes not converting with pd.NA and object dtype (#48857)
* BUG: convert_dtypes not converting with pd.NA and object dtype * Add doc * Fix typo
1 parent 5a7c7e8 commit 7ab6a10

File tree

4 files changed

+38
-0
lines changed

4 files changed

+38
-0
lines changed

doc/source/whatsnew/v1.6.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,7 @@ Conversion
188188
^^^^^^^^^^
189189
- Bug in constructing :class:`Series` with ``int64`` dtype from a string list raising instead of casting (:issue:`44923`)
190190
- Bug in :meth:`DataFrame.eval` incorrectly raising an ``AttributeError`` when there are negative values in function call (:issue:`46471`)
191+
- Bug in :meth:`Series.convert_dtypes` not converting dtype to nullable dtype when :class:`Series` contains ``NA`` and has dtype ``object`` (:issue:`48791`)
191192
- Bug where any :class:`ExtensionDtype` subclass with ``kind="M"`` would be interpreted as a timezone type (:issue:`34986`)
192193
-
193194

pandas/core/dtypes/cast.py

+16
Original file line numberDiff line numberDiff line change
@@ -1083,6 +1083,7 @@ def convert_dtypes(
10831083
convert_integer: bool = True,
10841084
convert_boolean: bool = True,
10851085
convert_floating: bool = True,
1086+
infer_objects: bool = False,
10861087
) -> DtypeObj:
10871088
"""
10881089
Convert objects to best possible type, and optionally,
@@ -1101,6 +1102,9 @@ def convert_dtypes(
11011102
Whether, if possible, conversion can be done to floating extension types.
11021103
If `convert_integer` is also True, preference will be give to integer
11031104
dtypes if the floats can be faithfully casted to integers.
1105+
infer_objects : bool, defaults False
1106+
Whether to also infer objects to float/int if possible. Is only hit if the
1107+
object array contains pd.NA.
11041108
11051109
Returns
11061110
-------
@@ -1139,6 +1143,12 @@ def convert_dtypes(
11391143
inferred_dtype = target_int_dtype
11401144
else:
11411145
inferred_dtype = input_array.dtype
1146+
elif (
1147+
infer_objects
1148+
and is_object_dtype(input_array.dtype)
1149+
and inferred_dtype == "integer"
1150+
):
1151+
inferred_dtype = target_int_dtype
11421152

11431153
if convert_floating:
11441154
if not is_integer_dtype(input_array.dtype) and is_numeric_dtype(
@@ -1160,6 +1170,12 @@ def convert_dtypes(
11601170
inferred_dtype = inferred_float_dtype
11611171
else:
11621172
inferred_dtype = inferred_float_dtype
1173+
elif (
1174+
infer_objects
1175+
and is_object_dtype(input_array.dtype)
1176+
and inferred_dtype == "mixed-integer-float"
1177+
):
1178+
inferred_dtype = pandas_dtype("Float64")
11631179

11641180
if convert_boolean:
11651181
if is_bool_dtype(input_array.dtype):

pandas/core/series.py

+1
Original file line numberDiff line numberDiff line change
@@ -5704,6 +5704,7 @@ def _convert_dtypes(
57045704
convert_integer,
57055705
convert_boolean,
57065706
convert_floating,
5707+
infer_objects,
57075708
)
57085709
result = input_series.astype(inferred_dtype)
57095710
else:

pandas/tests/series/methods/test_convert_dtypes.py

+20
Original file line numberDiff line numberDiff line change
@@ -229,3 +229,23 @@ def test_convert_byte_string_dtype(self):
229229
result = df.convert_dtypes()
230230
expected = df
231231
tm.assert_frame_equal(result, expected)
232+
233+
@pytest.mark.parametrize(
234+
"infer_objects, dtype", [(True, "Int64"), (False, "object")]
235+
)
236+
def test_convert_dtype_object_with_na(self, infer_objects, dtype):
237+
# GH#48791
238+
ser = pd.Series([1, pd.NA])
239+
result = ser.convert_dtypes(infer_objects=infer_objects)
240+
expected = pd.Series([1, pd.NA], dtype=dtype)
241+
tm.assert_series_equal(result, expected)
242+
243+
@pytest.mark.parametrize(
244+
"infer_objects, dtype", [(True, "Float64"), (False, "object")]
245+
)
246+
def test_convert_dtype_object_with_na_float(self, infer_objects, dtype):
247+
# GH#48791
248+
ser = pd.Series([1.5, pd.NA])
249+
result = ser.convert_dtypes(infer_objects=infer_objects)
250+
expected = pd.Series([1.5, pd.NA], dtype=dtype)
251+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)