pandas-dev · mroeschke · Dec 14, 2022 · Dec 2, 2022 · Dec 5, 2022 · Dec 6, 2022
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -30,25 +30,31 @@ sql-other, html, xml, plot, output_formatting, clipboard, compression, test]`` (
 
 .. _whatsnew_200.enhancements.io_use_nullable_dtypes_and_nullable_backend:
 
-Configuration option, ``io.nullable_backend``, to return pyarrow-backed dtypes from IO functions
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Configuration option, ``mode.nullable_backend``, to return pyarrow-backed dtypes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The ``use_nullable_dtypes`` keyword argument has been expanded to the following functions to enable automatic conversion to nullable dtypes (:issue:`36712`)
 
 * :func:`read_csv`
 * :func:`read_excel`
 * :func:`read_sql`
 
-Additionally a new global configuration, ``io.nullable_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in the following functions
+Additionally a new global configuration, ``mode.nullable_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in the following functions
 to select the nullable dtypes implementation.
 
 * :func:`read_csv` (with ``engine="pyarrow"``)
 * :func:`read_excel`
 * :func:`read_parquet`
 * :func:`read_orc`
 
-By default, ``io.nullable_backend`` is set to ``"pandas"`` to return existing, numpy-backed nullable dtypes, but it can also
-be set to ``"pyarrow"`` to return pyarrow-backed, nullable :class:`ArrowDtype` (:issue:`48957`).
+
+And the following methods will also utilize the ``mode.nullable_backend`` option.
+
+* :meth:`DataFrame.convert_dtypes`
+* :meth:`Series.convert_dtypes`
+
+By default, ``mode.nullable_backend`` is set to ``"pandas"`` to return existing, numpy-backed nullable dtypes, but it can also
+be set to ``"pyarrow"`` to return pyarrow-backed, nullable :class:`ArrowDtype` (:issue:`48957`, :issue:`49997`).
 
 .. ipython:: python
 
@@ -57,12 +63,12 @@ be set to ``"pyarrow"`` to return pyarrow-backed, nullable :class:`ArrowDtype` (
         1,2.5,True,a,,,,,
         3,4.5,False,b,6,7.5,True,a,
     """)
-    with pd.option_context("io.nullable_backend", "pandas"):
+    with pd.option_context("mode.nullable_backend", "pandas"):
         df = pd.read_csv(data, use_nullable_dtypes=True)
     df.dtypes
 
     data.seek(0)
-    with pd.option_context("io.nullable_backend", "pyarrow"):
+    with pd.option_context("mode.nullable_backend", "pyarrow"):
         df_pyarrow = pd.read_csv(data, use_nullable_dtypes=True, engine="pyarrow")
     df_pyarrow.dtypes
 

diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
@@ -539,13 +539,25 @@ def use_inf_as_na_cb(key) -> None:
     The default storage for StringDtype.
 """
 
+nullable_backend_doc = """
+: string
+    The nullable dtype implementation to return.
+    Available options: 'pandas', 'pyarrow', the default is 'pandas'.
+"""
+
 with cf.config_prefix("mode"):
     cf.register_option(
         "string_storage",
         "python",
         string_storage_doc,
         validator=is_one_of_factory(["python", "pyarrow"]),
     )
+    cf.register_option(
+        "nullable_backend",
+        "pandas",
+        nullable_backend_doc,
+        validator=is_one_of_factory(["pandas", "pyarrow"]),
+    )
 
 # Set up the io.excel specific reader configuration.
 reader_engine_doc = """
@@ -673,20 +685,6 @@ def use_inf_as_na_cb(key) -> None:
         validator=is_one_of_factory(["auto", "sqlalchemy"]),
     )
 
-io_nullable_backend_doc = """
-: string
-    The nullable dtype implementation to return when ``use_nullable_dtypes=True``.
-    Available options: 'pandas', 'pyarrow', the default is 'pandas'.
-"""
-
-with cf.config_prefix("io.nullable_backend"):
-    cf.register_option(
-        "io_nullable_backend",
-        "pandas",
-        io_nullable_backend_doc,
-        validator=is_one_of_factory(["pandas", "pyarrow"]),
-    )
-
 # --------
 # Plotting
 # ---------

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -9,6 +9,7 @@
 from typing import (
     TYPE_CHECKING,
     Any,
+    Literal,
     Sized,
     TypeVar,
     cast,
@@ -70,10 +71,12 @@
     pandas_dtype as pandas_dtype_func,
 )
 from pandas.core.dtypes.dtypes import (
+    BaseMaskedDtype,
     CategoricalDtype,
     DatetimeTZDtype,
     ExtensionDtype,
     IntervalDtype,
+    PandasExtensionDtype,
     PeriodDtype,
 )
 from pandas.core.dtypes.generic import (
@@ -958,6 +961,7 @@ def convert_dtypes(
     convert_boolean: bool = True,
     convert_floating: bool = True,
     infer_objects: bool = False,
+    nullable_backend: Literal["pandas", "pyarrow"] = "pandas",
 ) -> DtypeObj:
     """
     Convert objects to best possible type, and optionally,
@@ -979,6 +983,11 @@ def convert_dtypes(
     infer_objects : bool, defaults False
         Whether to also infer objects to float/int if possible. Is only hit if the
         object array contains pd.NA.
+    nullable_backend : str, default "pandas"
+        Nullable dtype implementation to use.
+
+        * "pandas" returns numpy-backed nullable types
+        * "pyarrow" returns pyarrow-backed nullable types using ``ArrowDtype``
 
     Returns
     -------
@@ -997,9 +1006,9 @@ def convert_dtypes(
 
         if is_string_dtype(inferred_dtype):
             if not convert_string or inferred_dtype == "bytes":
-                return input_array.dtype
+                inferred_dtype = input_array.dtype
             else:
-                return pandas_dtype_func("string")
+                inferred_dtype = pandas_dtype_func("string")
 
         if convert_integer:
             target_int_dtype = pandas_dtype_func("Int64")
@@ -1020,7 +1029,7 @@ def convert_dtypes(
             elif (
                 infer_objects
                 and is_object_dtype(input_array.dtype)
-                and inferred_dtype == "integer"
+                and (isinstance(inferred_dtype, str) and inferred_dtype == "integer")
             ):
                 inferred_dtype = target_int_dtype
 
@@ -1047,7 +1056,10 @@ def convert_dtypes(
             elif (
                 infer_objects
                 and is_object_dtype(input_array.dtype)
-                and inferred_dtype == "mixed-integer-float"
+                and (
+                    isinstance(inferred_dtype, str)
+                    and inferred_dtype == "mixed-integer-float"
+                )
             ):
                 inferred_dtype = pandas_dtype_func("Float64")
 
@@ -1062,7 +1074,27 @@ def convert_dtypes(
             inferred_dtype = input_array.dtype
 
     else:
-        return input_array.dtype
+        inferred_dtype = input_array.dtype
+
+    if nullable_backend == "pyarrow":
+        from pandas.core.arrays.arrow.array import to_pyarrow_type
+        from pandas.core.arrays.arrow.dtype import ArrowDtype
+        from pandas.core.arrays.string_ import StringDtype
+
+        if isinstance(inferred_dtype, PandasExtensionDtype):
+            base_dtype = inferred_dtype.base
+        elif isinstance(inferred_dtype, (BaseMaskedDtype, ArrowDtype)):
+            base_dtype = inferred_dtype.numpy_dtype
+        elif isinstance(inferred_dtype, StringDtype):
+            base_dtype = np.dtype(str)
+        else:
+            # error: Incompatible types in assignment (expression has type
+            # "Union[str, Any, dtype[Any], ExtensionDtype]",
+            # variable has type "Union[dtype[Any], ExtensionDtype, None]")
+            base_dtype = inferred_dtype  # type: ignore[assignment]
+        pa_type = to_pyarrow_type(base_dtype)
+        if pa_type is not None:
+            inferred_dtype = ArrowDtype(pa_type)
 
     # error: Incompatible return value type (got "Union[str, Union[dtype[Any],
     # ExtensionDtype]]", expected "Union[dtype[Any], ExtensionDtype]")

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -6433,6 +6433,13 @@ def convert_dtypes(
         In the future, as new dtypes are added that support ``pd.NA``, the results
         of this method will change to support those new dtypes.
 
+        .. versionadded:: 2.0
+            The nullable dtype implementation can be configured by calling
+            ``pd.set_option("mode.nullable_backend", "pandas")`` to use
+            numpy-backed nullable dtypes or
+            ``pd.set_option("mode.nullable_backend", "pyarrow")`` to use
+            pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
+
         Examples
         --------
         >>> df = pd.DataFrame(

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -5410,13 +5410,15 @@ def _convert_dtypes(
                 input_series = input_series.copy()
 
         if convert_string or convert_integer or convert_boolean or convert_floating:
+            nullable_backend = get_option("mode.nullable_backend")
             inferred_dtype = convert_dtypes(
                 input_series._values,
                 convert_string,
                 convert_integer,
                 convert_boolean,
                 convert_floating,
                 infer_objects,
+                nullable_backend,
             )
             result = input_series.astype(inferred_dtype)
         else:

diff --git a/pandas/io/orc.py b/pandas/io/orc.py
@@ -58,16 +58,17 @@ def read_orc(
         If True, use dtypes that use ``pd.NA`` as missing value indicator
         for the resulting DataFrame.
 
-        The nullable dtype implementation can be configured by setting the global
-        ``io.nullable_backend`` configuration option to ``"pandas"`` to use
-        numpy-backed nullable dtypes or ``"pyarrow"`` to use pyarrow-backed
-        nullable dtypes (using ``pd.ArrowDtype``).
+        The nullable dtype implementation can be configured by calling
+        ``pd.set_option("mode.nullable_backend", "pandas")`` to use
+        numpy-backed nullable dtypes or
+        ``pd.set_option("mode.nullable_backend", "pyarrow")`` to use
+        pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
 
         .. versionadded:: 2.0.0
 
         .. note
 
-            Currently only ``io.nullable_backend`` set to ``"pyarrow"`` is supported.
+            Currently only ``mode.nullable_backend`` set to ``"pyarrow"`` is supported.
 
     **kwargs
         Any additional kwargs are passed to pyarrow.
@@ -89,10 +90,10 @@ def read_orc(
         orc_file = orc.ORCFile(handles.handle)
         pa_table = orc_file.read(columns=columns, **kwargs)
     if use_nullable_dtypes:
-        nullable_backend = get_option("io.nullable_backend")
+        nullable_backend = get_option("mode.nullable_backend")
         if nullable_backend != "pyarrow":
             raise NotImplementedError(
-                f"io.nullable_backend set to {nullable_backend} is not implemented."
+                f"mode.nullable_backend set to {nullable_backend} is not implemented."
             )
         df = DataFrame(
             {

diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
@@ -222,7 +222,7 @@ def read(
     ) -> DataFrame:
         kwargs["use_pandas_metadata"] = True
 
-        nullable_backend = get_option("io.nullable_backend")
+        nullable_backend = get_option("mode.nullable_backend")
         to_pandas_kwargs = {}
         if use_nullable_dtypes:
             import pandas as pd
@@ -508,10 +508,11 @@ def read_parquet(
 
         .. versionadded:: 1.2.0
 
-        The nullable dtype implementation can be configured by setting the global
-        ``io.nullable_backend`` configuration option to ``"pandas"`` to use
-        numpy-backed nullable dtypes or ``"pyarrow"`` to use pyarrow-backed
-        nullable dtypes (using ``pd.ArrowDtype``).
+        The nullable dtype implementation can be configured by calling
+        ``pd.set_option("mode.nullable_backend", "pandas")`` to use
+        numpy-backed nullable dtypes or
+        ``pd.set_option("mode.nullable_backend", "pyarrow")`` to use
+        pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
 
         .. versionadded:: 2.0.0
 

diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py
@@ -151,7 +151,7 @@ def read(self) -> DataFrame:
         )
         if (
             self.kwds["use_nullable_dtypes"]
-            and get_option("io.nullable_backend") == "pyarrow"
+            and get_option("mode.nullable_backend") == "pyarrow"
         ):
             frame = DataFrame(
                 {

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
@@ -713,7 +713,7 @@ def _infer_types(
         use_nullable_dtypes: Literal[True] | Literal[False] = (
             self.use_nullable_dtypes and no_dtype_specified
         )
-        nullable_backend = get_option("io.nullable_backend")
+        nullable_backend = get_option("mode.nullable_backend")
         result: ArrayLike
 
         if try_num_bool and is_object_dtype(values.dtype):

diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
@@ -398,10 +398,11 @@
     set to True, nullable dtypes are used for all dtypes that have a nullable
     implementation, even if no nulls are present.
 
-    The nullable dtype implementation can be configured by setting the global
-    ``io.nullable_backend`` configuration option to ``"pandas"`` to use
-    numpy-backed nullable dtypes or ``"pyarrow"`` to use pyarrow-backed
-    nullable dtypes (using ``pd.ArrowDtype``).
+    The nullable dtype implementation can be configured by calling
+    ``pd.set_option("mode.nullable_backend", "pandas")`` to use
+    numpy-backed nullable dtypes or
+    ``pd.set_option("mode.nullable_backend", "pyarrow")`` to use
+    pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
 
     .. versionadded:: 2.0
 
@@ -560,11 +561,11 @@ def _read(
             )
     elif (
         kwds.get("use_nullable_dtypes", False)
-        and get_option("io.nullable_backend") == "pyarrow"
+        and get_option("mode.nullable_backend") == "pyarrow"
     ):
         raise NotImplementedError(
             f"use_nullable_dtypes=True and engine={kwds['engine']} with "
-            "io.nullable_backend set to 'pyarrow' is not implemented."
+            "mode.nullable_backend set to 'pyarrow' is not implemented."
         )
     else:
         chunksize = validate_integer("chunksize", chunksize, 1)