From d5c82b65f0568fe7423ffe7127c38f6b22c45f82 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 29 Nov 2022 16:15:30 -0800
Subject: [PATCH 1/3] ENH: Add io.nullable_backend=pyarrow support to
 read_excel

---
 doc/source/whatsnew/v2.0.0.rst        | 10 +++++----
 pandas/io/parsers/base_parser.py      | 15 +++++++++++++
 pandas/tests/io/excel/test_readers.py | 32 +++++++++++++++++++++++----
 3 files changed, 49 insertions(+), 8 deletions(-)

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index 9c65f10eb1d4c..46103265a09b7 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -28,13 +28,16 @@ The available extras, found in the :ref:`installation guide<install.dependencies
 ``[all, performance, computation, timezone, fss, aws, gcp, excel, parquet, feather, hdf5, spss, postgresql, mysql,
 sql-other, html, xml, plot, output_formatting, clipboard, compression, test]`` (:issue:`39164`).
 
-.. _whatsnew_200.enhancements.io_readers_nullable_pyarrow:
+.. _whatsnew_200.enhancements.io_use_nullable_dtypes_and_nullable_backend:
 
 Configuration option, ``io.nullable_backend``, to return pyarrow-backed dtypes from IO functions
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-A new global configuration, ``io.nullable_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in :func:`read_parquet`, :func:`read_orc` and :func:`read_csv` (with ``engine="pyarrow"``)
-to return pyarrow-backed dtypes when set to ``"pyarrow"`` (:issue:`48957`).
+The ``use_nullable_dtypes`` keyword argument has been expanded to :func:`read_csv` and :func:`read_excel` to enable automatic conversion to nullable dtypes (:issue:`36712`)
+
+Additionally a new global configuration, ``io.nullable_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in :func:`read_parquet`, :func:`read_orc`, :func:`read_excel` and :func:`read_csv` (with ``engine="pyarrow"``)
+to select the nullable dtypes implementation. By default, ``io.nullable_backend`` is set to ``"pandas"`` to return existing, numpy-backed nullable dtypes, but it can also
+be set to ``"pyarrow"`` to return pyarrow-backed, nullable :class:`ArrowDtype` (:issue:`48957`).
 
 .. ipython:: python
 
@@ -55,7 +58,6 @@ Other enhancements
 - :meth:`.DataFrameGroupBy.quantile` and :meth:`.SeriesGroupBy.quantile` now preserve nullable dtypes instead of casting to numpy dtypes (:issue:`37493`)
 - :meth:`Series.add_suffix`, :meth:`DataFrame.add_suffix`, :meth:`Series.add_prefix` and :meth:`DataFrame.add_prefix` support an ``axis`` argument. If ``axis`` is set, the default behaviour of which axis to consider can be overwritten (:issue:`47819`)
 - :func:`assert_frame_equal` now shows the first element where the DataFrames differ, analogously to ``pytest``'s output (:issue:`47910`)
-- Added new argument ``use_nullable_dtypes`` to :func:`read_csv` and :func:`read_excel` to enable automatic conversion to nullable dtypes (:issue:`36712`)
 - Added ``index`` parameter to :meth:`DataFrame.to_dict` (:issue:`46398`)
 - Added metadata propagation for binary operators on :class:`DataFrame` (:issue:`28283`)
 - :class:`.CategoricalConversionWarning`, :class:`.InvalidComparison`, :class:`.InvalidVersion`, :class:`.LossySetitemError`, and :class:`.NoBufferPresent` are now exposed in ``pandas.errors`` (:issue:`27656`)
diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index b0f3754271894..387f957739152 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -26,6 +26,8 @@
 
 import numpy as np
 
+from pandas._config.config import get_option
+
 from pandas._libs import (
     lib,
     parsers,
@@ -39,6 +41,7 @@
     DtypeObj,
     Scalar,
 )
+from pandas.compat._optional import import_optional_dependency
 from pandas.errors import (
     ParserError,
     ParserWarning,
@@ -71,6 +74,7 @@
 from pandas import StringDtype
 from pandas.core import algorithms
 from pandas.core.arrays import (
+    ArrowExtensionArray,
     BooleanArray,
     Categorical,
     ExtensionArray,
@@ -717,6 +721,7 @@ def _infer_types(
         use_nullable_dtypes: Literal[True] | Literal[False] = (
             self.use_nullable_dtypes and no_dtype_specified
         )
+        nullable_backend = get_option("io.nullable_backend")
         result: ArrayLike
 
         if try_num_bool and is_object_dtype(values.dtype):
@@ -774,6 +779,16 @@ def _infer_types(
                 if inferred_type != "datetime":
                     result = StringDtype().construct_array_type()._from_sequence(values)
 
+        if use_nullable_dtypes and nullable_backend == "pyarrow":
+            pa = import_optional_dependency("pyarrow")
+            if isinstance(result, np.ndarray):
+                result = ArrowExtensionArray(pa.array(result, from_pandas=True))
+            else:
+                # ExtensionArray
+                result = ArrowExtensionArray(
+                    pa.array(result.to_numpy(), from_pandas=True)
+                )
+
         return result, na_count
 
     def _cast_types(self, values: ArrayLike, cast_type: DtypeObj, column) -> ArrayLike:
diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
index bff4c98fe2842..822e24b224052 100644
--- a/pandas/tests/io/excel/test_readers.py
+++ b/pandas/tests/io/excel/test_readers.py
@@ -536,7 +536,11 @@ def test_reader_dtype_str(self, read_ext, dtype, expected):
         actual = pd.read_excel(basename + read_ext, dtype=dtype)
         tm.assert_frame_equal(actual, expected)
 
-    def test_use_nullable_dtypes(self, read_ext):
+    @pytest.mark.parametrize(
+        "nullable_backend",
+        ["pandas", pytest.param("pyarrow", marks=td.skip_if_no("pyarrow"))],
+    )
+    def test_use_nullable_dtypes(self, read_ext, nullable_backend):
         # GH#36712
         if read_ext in (".xlsb", ".xls"):
             pytest.skip(f"No engine for filetype: '{read_ext}'")
@@ -557,10 +561,30 @@ def test_use_nullable_dtypes(self, read_ext):
         )
         with tm.ensure_clean(read_ext) as file_path:
             df.to_excel(file_path, "test", index=False)
-            result = pd.read_excel(
-                file_path, sheet_name="test", use_nullable_dtypes=True
+            with pd.option_context("io.nullable_backend", nullable_backend):
+                result = pd.read_excel(
+                    file_path, sheet_name="test", use_nullable_dtypes=True
+                )
+        if nullable_backend == "pyarrow":
+            import pyarrow as pa
+
+            from pandas.arrays import ArrowExtensionArray
+
+            expected = DataFrame(
+                {
+                    col: ArrowExtensionArray(pa.array(df[col], from_pandas=True))
+                    for col in df.columns
+                }
             )
-        tm.assert_frame_equal(result, df)
+            # pyarrow by default infers timestamp resolution as us, not ns
+            expected["i"] = ArrowExtensionArray(
+                expected["i"].array._data.cast(pa.timestamp(unit="us"))
+            )
+            # pyarrow supports a null type, so don't have to default to Int64
+            expected["j"] = ArrowExtensionArray(pa.array([None, None]))
+        else:
+            expected = df
+        tm.assert_frame_equal(result, expected)
 
     def test_use_nullabla_dtypes_and_dtype(self, read_ext):
         # GH#36712

From b5d87aacc8dba05ad963294f9fc7d4b7041b4f6d Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 1 Dec 2022 13:19:35 -0800
Subject: [PATCH 2/3] Address review for whatsnew

---
 doc/source/whatsnew/v2.0.0.rst | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index c52c5d14e5902..8b91b17786541 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -33,10 +33,20 @@ sql-other, html, xml, plot, output_formatting, clipboard, compression, test]`` (
 Configuration option, ``io.nullable_backend``, to return pyarrow-backed dtypes from IO functions
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-The ``use_nullable_dtypes`` keyword argument has been expanded to :func:`read_csv` and :func:`read_excel` to enable automatic conversion to nullable dtypes (:issue:`36712`)
+The ``use_nullable_dtypes`` keyword argument has been expanded to the following functions to enable automatic conversion to nullable dtypes (:issue:`36712`)
 
-Additionally a new global configuration, ``io.nullable_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in :func:`read_parquet`, :func:`read_orc`, :func:`read_excel` and :func:`read_csv` (with ``engine="pyarrow"``)
-to select the nullable dtypes implementation. By default, ``io.nullable_backend`` is set to ``"pandas"`` to return existing, numpy-backed nullable dtypes, but it can also
+* :func:`read_csv`
+* :func:`read_excel`
+
+Additionally a new global configuration, ``io.nullable_backend`` can now be used in conjunction with the parameter ``use_nullable_dtypes=True`` in the following functions
+to select the nullable dtypes implementation.
+
+* :func:`read_csv` (with ``engine="pyarrow"``)
+* :func:`read_excel`
+* :func:`read_parquet`
+* :func:`read_orc`
+
+By default, ``io.nullable_backend`` is set to ``"pandas"`` to return existing, numpy-backed nullable dtypes, but it can also
 be set to ``"pyarrow"`` to return pyarrow-backed, nullable :class:`ArrowDtype` (:issue:`48957`).
 
 .. ipython:: python
@@ -46,10 +56,14 @@ be set to ``"pyarrow"`` to return pyarrow-backed, nullable :class:`ArrowDtype` (
         1,2.5,True,a,,,,,
         3,4.5,False,b,6,7.5,True,a,
     """)
-    with pd.option_context("io.nullable_backend", "pyarrow"):
-        df = pd.read_csv(data, use_nullable_dtypes=True, engine="pyarrow")
+    with pd.option_context("io.nullable_backend", "pandas"):
+        df = pd.read_csv(data, use_nullable_dtypes=True)
     df.dtypes
 
+    with pd.option_context("io.nullable_backend", "pyarrow"):
+        df_pyarrow = pd.read_csv(data, use_nullable_dtypes=True, engine="pyarrow")
+    df_pyarrow.dtypes
+
 .. _whatsnew_200.enhancements.other:
 
 Other enhancements

From a1069493aafe925c28d6ddf05e9db2ca16e279bb Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 1 Dec 2022 15:27:12 -0800
Subject: [PATCH 3/3] Seek StringIO

---
 doc/source/whatsnew/v2.0.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index 8b63e9a57a109..0c3d85cbcb620 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -60,6 +60,7 @@ be set to ``"pyarrow"`` to return pyarrow-backed, nullable :class:`ArrowDtype` (
         df = pd.read_csv(data, use_nullable_dtypes=True)
     df.dtypes
 
+    data.seek(0)
     with pd.option_context("io.nullable_backend", "pyarrow"):
         df_pyarrow = pd.read_csv(data, use_nullable_dtypes=True, engine="pyarrow")
     df_pyarrow.dtypes