From c3d0be1106869a830ddb1bff70a50a94116bf174 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 29 Sep 2022 15:24:08 +0200 Subject: [PATCH 1/3] BUG: convert_dtypes not converting with pd.NA and object dtype --- doc/source/whatsnew/v1.6.0.rst | 1 + pandas/core/dtypes/cast.py | 13 ++++++++++++ pandas/core/series.py | 1 + .../series/methods/test_convert_dtypes.py | 20 +++++++++++++++++++ 4 files changed, 35 insertions(+) diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index 9f793532e5e6b..a25fcaaff46bf 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -186,6 +186,7 @@ Conversion ^^^^^^^^^^ - Bug in constructing :class:`Series` with ``int64`` dtype from a string list raising instead of casting (:issue:`44923`) - Bug in :meth:`DataFrame.eval` incorrectly raising an ``AttributeError`` when there are negative values in function call (:issue:`46471`) +- Bug in :meth:`Series.convert_dtypes` not converting dtype to nullable dtype when :class:`Series` contains ``NA`` and has dtype ``object`` (:issue:`48791`) - Bug where any :class:`ExtensionDtype` subclass with ``kind="M"`` would be interpreted as a timezone type (:issue:`34986`) - diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 75a0db3233130..509edf25bb284 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1083,6 +1083,7 @@ def convert_dtypes( convert_integer: bool = True, convert_boolean: bool = True, convert_floating: bool = True, + infer_objects: bool = False, ) -> DtypeObj: """ Convert objects to best possible type, and optionally, @@ -1139,6 +1140,12 @@ def convert_dtypes( inferred_dtype = target_int_dtype else: inferred_dtype = input_array.dtype + elif ( + infer_objects + and is_object_dtype(input_array.dtype) + and inferred_dtype == "integer" + ): + inferred_dtype = target_int_dtype if convert_floating: if not is_integer_dtype(input_array.dtype) and is_numeric_dtype( @@ -1160,6 +1167,12 @@ def convert_dtypes( inferred_dtype = inferred_float_dtype else: inferred_dtype = inferred_float_dtype + elif ( + infer_objects + and is_object_dtype(input_array.dtype) + and inferred_dtype == "mixed-integer-float" + ): + inferred_dtype = pandas_dtype("Float64") if convert_boolean: if is_bool_dtype(input_array.dtype): diff --git a/pandas/core/series.py b/pandas/core/series.py index 25ce6ea739d0e..211649c7b08d4 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5704,6 +5704,7 @@ def _convert_dtypes( convert_integer, convert_boolean, convert_floating, + infer_objects, ) result = input_series.astype(inferred_dtype) else: diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index 25cbcf2a84490..0fd508b08f1db 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -229,3 +229,23 @@ def test_convert_byte_string_dtype(self): result = df.convert_dtypes() expected = df tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "infer_objects, dtype", [(True, "Int64"), (False, "object")] + ) + def test_convert_dtype_object_with_na(self, infer_objects, dtype): + # GH#48791 + ser = pd.Series([1, pd.NA]) + result = ser.convert_dtypes(infer_objects=infer_objects) + expected = pd.Series([1, pd.NA], dtype=dtype) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "infer_objects, dtype", [(True, "Float64"), (False, "object")] + ) + def test_convert_dtype_object_with_na_float(self, infer_objects, dtype): + # GH#48791 + ser = pd.Series([1.5, pd.NA]) + result = ser.convert_dtypes(infer_objects=infer_objects) + expected = pd.Series([1.5, pd.NA], dtype=dtype) + tm.assert_series_equal(result, expected) From f251d0ebaa7925276456e9770843b383336faa79 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 29 Sep 2022 21:29:34 +0200 Subject: [PATCH 2/3] Add doc --- pandas/core/dtypes/cast.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 509edf25bb284..da5927cf06326 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1102,6 +1102,9 @@ def convert_dtypes( Whether, if possible, conversion can be done to floating extension types. If `convert_integer` is also True, preference will be give to integer dtypes if the floats can be faithfully casted to integers. + infer_objects : bool, defaults False + Whether to also infert objects to float/int if possible. Is only hit if the + object array contains pd.NA. Returns ------- From 6872e1c983dc54a7f3a4a0c98ab997d0b242232c Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 29 Sep 2022 21:51:32 +0200 Subject: [PATCH 3/3] Fix typo --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index da5927cf06326..db74ebb1f7ccc 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1103,7 +1103,7 @@ def convert_dtypes( If `convert_integer` is also True, preference will be give to integer dtypes if the floats can be faithfully casted to integers. infer_objects : bool, defaults False - Whether to also infert objects to float/int if possible. Is only hit if the + Whether to also infer objects to float/int if possible. Is only hit if the object array contains pd.NA. Returns