From 2097d9f60d38de9ce620db3d5f47f274ea6abbc3 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Wed, 25 Jan 2023 21:20:57 -0500
Subject: [PATCH 1/3] ENH: Add dtype_backend support to read_sql

---
 doc/source/whatsnew/v2.0.0.rst |   3 +
 pandas/io/sql.py               |  31 +++++++++
 pandas/tests/io/test_sql.py    | 112 ++++++++++++++++++++++-----------
 3 files changed, 108 insertions(+), 38 deletions(-)

diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
index c28c9fdad1804..ca98d41070fb3 100644
--- a/doc/source/whatsnew/v2.0.0.rst
+++ b/doc/source/whatsnew/v2.0.0.rst
@@ -69,6 +69,9 @@ to select the nullable dtypes implementation.
 * :func:`read_html`
 * :func:`read_xml`
 * :func:`read_json`
+* :func:`read_sql`
+* :func:`read_sql_query`
+* :func:`read_sql_table`
 * :func:`read_parquet`
 * :func:`read_orc`
 * :func:`read_feather`
diff --git a/pandas/io/sql.py b/pandas/io/sql.py
index 8ba208aa84286..aba6df57b0da5 100644
--- a/pandas/io/sql.py
+++ b/pandas/io/sql.py
@@ -58,6 +58,7 @@
     DataFrame,
     Series,
 )
+from pandas.core.arrays import ArrowExtensionArray
 from pandas.core.base import PandasObject
 import pandas.core.common as com
 from pandas.core.internals.construction import convert_object_array
@@ -155,6 +156,12 @@ def _convert_arrays_to_dataframe(
         coerce_float=coerce_float,
         use_nullable_dtypes=use_nullable_dtypes,
     )
+    dtype_backend = get_option("mode.dtype_backend")
+    if dtype_backend == "pyarrow":
+        pa = import_optional_dependency("pyarrow")
+        arrays = [
+            ArrowExtensionArray(pa.array(arr, from_pandas=True)) for arr in arrays
+        ]
     if arrays:
         return DataFrame(dict(zip(columns, arrays)))
     else:
@@ -303,6 +310,12 @@ def read_sql_table(
         set to True, nullable dtypes are used for all dtypes that have a nullable
         implementation, even if no nulls are present.
 
+        The nullable dtype implementation can be configured by calling
+        ``pd.set_option("mode.dtype_backend", "pandas")`` to use
+        numpy-backed nullable dtypes or
+        ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
+        pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
+
         .. versionadded:: 2.0
 
     Returns
@@ -438,6 +451,12 @@ def read_sql_query(
         set to True, nullable dtypes are used for all dtypes that have a nullable
         implementation, even if no nulls are present.
 
+        The nullable dtype implementation can be configured by calling
+        ``pd.set_option("mode.dtype_backend", "pandas")`` to use
+        numpy-backed nullable dtypes or
+        ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
+        pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
+
         .. versionadded:: 2.0
 
     Returns
@@ -568,6 +587,12 @@ def read_sql(
         set to True, nullable dtypes are used for all dtypes that have a nullable
         implementation, even if no nulls are present.
 
+        The nullable dtype implementation can be configured by calling
+        ``pd.set_option("mode.dtype_backend", "pandas")`` to use
+        numpy-backed nullable dtypes or
+        ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
+        pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
+
         .. versionadded:: 2.0
     dtype : Type name or dict of columns
         Data type for data or columns. E.g. np.float64 or
@@ -1609,6 +1634,12 @@ def read_table(
             set to True, nullable dtypes are used for all dtypes that have a nullable
             implementation, even if no nulls are present.
 
+            The nullable dtype implementation can be configured by calling
+            ``pd.set_option("mode.dtype_backend", "pandas")`` to use
+            numpy-backed nullable dtypes or
+            ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
+            pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
+
             .. versionadded:: 2.0
 
         Returns
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index a5bcfa8845785..9483ad1e23c7e 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -2293,59 +2293,71 @@ def test_get_engine_auto_error_message(self):
 
     @pytest.mark.parametrize("option", [True, False])
     @pytest.mark.parametrize("func", ["read_sql", "read_sql_query"])
-    def test_read_sql_nullable_dtypes(self, string_storage, func, option):
+    @pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"])
+    def test_read_sql_nullable_dtypes(
+        self, string_storage, func, option, dtype_backend
+    ):
         # GH#50048
         table = "test"
         df = self.nullable_data()
         df.to_sql(table, self.conn, index=False, if_exists="replace")
 
         with pd.option_context("mode.string_storage", string_storage):
-            if option:
-                with pd.option_context("mode.nullable_dtypes", True):
-                    result = getattr(pd, func)(f"Select * from {table}", self.conn)
-            else:
-                result = getattr(pd, func)(
-                    f"Select * from {table}", self.conn, use_nullable_dtypes=True
-                )
-        expected = self.nullable_expected(string_storage)
+            with pd.option_context("mode.dtype_backend", dtype_backend):
+                if option:
+                    with pd.option_context("mode.nullable_dtypes", True):
+                        result = getattr(pd, func)(f"Select * from {table}", self.conn)
+                else:
+                    result = getattr(pd, func)(
+                        f"Select * from {table}", self.conn, use_nullable_dtypes=True
+                    )
+        expected = self.nullable_expected(string_storage, dtype_backend)
         tm.assert_frame_equal(result, expected)
 
         with pd.option_context("mode.string_storage", string_storage):
-            iterator = getattr(pd, func)(
-                f"Select * from {table}",
-                self.conn,
-                use_nullable_dtypes=True,
-                chunksize=3,
-            )
-            expected = self.nullable_expected(string_storage)
-            for result in iterator:
-                tm.assert_frame_equal(result, expected)
+            with pd.option_context("mode.dtype_backend", dtype_backend):
+                iterator = getattr(pd, func)(
+                    f"Select * from {table}",
+                    self.conn,
+                    use_nullable_dtypes=True,
+                    chunksize=3,
+                )
+                expected = self.nullable_expected(string_storage, dtype_backend)
+                for result in iterator:
+                    tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize("option", [True, False])
     @pytest.mark.parametrize("func", ["read_sql", "read_sql_table"])
-    def test_read_sql_nullable_dtypes_table(self, string_storage, func, option):
+    @pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"])
+    def test_read_sql_nullable_dtypes_table(
+        self, string_storage, func, option, dtype_backend
+    ):
         # GH#50048
         table = "test"
         df = self.nullable_data()
         df.to_sql(table, self.conn, index=False, if_exists="replace")
 
         with pd.option_context("mode.string_storage", string_storage):
-            if option:
-                with pd.option_context("mode.nullable_dtypes", True):
-                    result = getattr(pd, func)(table, self.conn)
-            else:
-                result = getattr(pd, func)(table, self.conn, use_nullable_dtypes=True)
-        expected = self.nullable_expected(string_storage)
+            with pd.option_context("mode.dtype_backend", dtype_backend):
+                if option:
+                    with pd.option_context("mode.nullable_dtypes", True):
+                        result = getattr(pd, func)(table, self.conn)
+                else:
+                    result = getattr(pd, func)(
+                        table, self.conn, use_nullable_dtypes=True
+                    )
+        expected = self.nullable_expected(string_storage, dtype_backend)
         tm.assert_frame_equal(result, expected)
 
         with pd.option_context("mode.string_storage", string_storage):
-            iterator = getattr(pd, func)(
-                table,
-                self.conn,
-                use_nullable_dtypes=True,
-                chunksize=3,
-            )
-            expected = self.nullable_expected(string_storage)
+            with pd.option_context("mode.dtype_backend", dtype_backend):
+                iterator = getattr(pd, func)(
+                    table,
+                    self.conn,
+                    use_nullable_dtypes=True,
+                    chunksize=3,
+                )
+            expected = self.nullable_expected(string_storage, dtype_backend)
             for result in iterator:
                 tm.assert_frame_equal(result, expected)
 
@@ -2363,7 +2375,7 @@ def nullable_data(self) -> DataFrame:
             }
         )
 
-    def nullable_expected(self, storage) -> DataFrame:
+    def nullable_expected(self, storage, dtype_backend) -> DataFrame:
 
         string_array: StringArray | ArrowStringArray
         string_array_na: StringArray | ArrowStringArray
@@ -2376,7 +2388,7 @@ def nullable_expected(self, storage) -> DataFrame:
             string_array = ArrowStringArray(pa.array(["a", "b", "c"]))
             string_array_na = ArrowStringArray(pa.array(["a", "b", None]))
 
-        return DataFrame(
+        df = DataFrame(
             {
                 "a": Series([1, np.nan, 3], dtype="Int64"),
                 "b": Series([1, 2, 3], dtype="Int64"),
@@ -2388,6 +2400,18 @@ def nullable_expected(self, storage) -> DataFrame:
                 "h": string_array_na,
             }
         )
+        if dtype_backend == "pyarrow":
+            pa = pytest.importorskip("pyarrow")
+
+            from pandas.arrays import ArrowExtensionArray
+
+            df = DataFrame(
+                {
+                    col: ArrowExtensionArray(pa.array(df[col], from_pandas=True))
+                    for col in df.columns
+                }
+            )
+        return df
 
     def test_chunksize_empty_dtypes(self):
         # GH#50245
@@ -2511,8 +2535,14 @@ class Test(BaseModel):
 
         assert list(df.columns) == ["id", "string_column"]
 
-    def nullable_expected(self, storage) -> DataFrame:
-        return super().nullable_expected(storage).astype({"e": "Int64", "f": "Int64"})
+    def nullable_expected(self, storage, dtype_backend) -> DataFrame:
+        df = super().nullable_expected(storage, dtype_backend)
+        if dtype_backend == "pandas":
+            df = df.astype({"e": "Int64", "f": "Int64"})
+        else:
+            df = df.astype({"e": "int64[pyarrow]", "f": "int64[pyarrow]"})
+
+        return df
 
     @pytest.mark.parametrize("func", ["read_sql", "read_sql_table"])
     def test_read_sql_nullable_dtypes_table(self, string_storage, func):
@@ -2546,8 +2576,14 @@ def setup_driver(cls):
     def test_default_type_conversion(self):
         pass
 
-    def nullable_expected(self, storage) -> DataFrame:
-        return super().nullable_expected(storage).astype({"e": "Int64", "f": "Int64"})
+    def nullable_expected(self, storage, dtype_backend) -> DataFrame:
+        df = super().nullable_expected(storage, dtype_backend)
+        if dtype_backend == "pandas":
+            df = df.astype({"e": "Int64", "f": "Int64"})
+        else:
+            df = df.astype({"e": "int64[pyarrow]", "f": "int64[pyarrow]"})
+
+        return df
 
 
 @pytest.mark.db

From 45a1cfe683e06eabba5cc18666f7ce2b57afb785 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Thu, 26 Jan 2023 18:20:48 -0500
Subject: [PATCH 2/3] Fix intendation

---
 pandas/tests/io/test_sql.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index 9483ad1e23c7e..e467d4d2bc9a8 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -2357,9 +2357,9 @@ def test_read_sql_nullable_dtypes_table(
                     use_nullable_dtypes=True,
                     chunksize=3,
                 )
-            expected = self.nullable_expected(string_storage, dtype_backend)
-            for result in iterator:
-                tm.assert_frame_equal(result, expected)
+                expected = self.nullable_expected(string_storage, dtype_backend)
+                for result in iterator:
+                    tm.assert_frame_equal(result, expected)
 
     def nullable_data(self) -> DataFrame:
         return DataFrame(

From 9aaefea38d71a22672a6f317370fcfdb49fd6463 Mon Sep 17 00:00:00 2001
From: Patrick Hoefler <patrick_hoefler@gmx.net>
Date: Mon, 30 Jan 2023 21:03:26 +0100
Subject: [PATCH 3/3] Update doc

---
 pandas/io/sql.py | 48 ++++++++++++++++++++++++++++--------------------
 1 file changed, 28 insertions(+), 20 deletions(-)

diff --git a/pandas/io/sql.py b/pandas/io/sql.py
index aba6df57b0da5..dc929de9c2888 100644
--- a/pandas/io/sql.py
+++ b/pandas/io/sql.py
@@ -310,11 +310,13 @@ def read_sql_table(
         set to True, nullable dtypes are used for all dtypes that have a nullable
         implementation, even if no nulls are present.
 
-        The nullable dtype implementation can be configured by calling
-        ``pd.set_option("mode.dtype_backend", "pandas")`` to use
-        numpy-backed nullable dtypes or
-        ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
-        pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
+        .. note::
+
+            The nullable dtype implementation can be configured by calling
+            ``pd.set_option("mode.dtype_backend", "pandas")`` to use
+            numpy-backed nullable dtypes or
+            ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
+            pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
 
         .. versionadded:: 2.0
 
@@ -451,11 +453,13 @@ def read_sql_query(
         set to True, nullable dtypes are used for all dtypes that have a nullable
         implementation, even if no nulls are present.
 
-        The nullable dtype implementation can be configured by calling
-        ``pd.set_option("mode.dtype_backend", "pandas")`` to use
-        numpy-backed nullable dtypes or
-        ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
-        pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
+        .. note::
+
+            The nullable dtype implementation can be configured by calling
+            ``pd.set_option("mode.dtype_backend", "pandas")`` to use
+            numpy-backed nullable dtypes or
+            ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
+            pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
 
         .. versionadded:: 2.0
 
@@ -587,11 +591,13 @@ def read_sql(
         set to True, nullable dtypes are used for all dtypes that have a nullable
         implementation, even if no nulls are present.
 
-        The nullable dtype implementation can be configured by calling
-        ``pd.set_option("mode.dtype_backend", "pandas")`` to use
-        numpy-backed nullable dtypes or
-        ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
-        pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
+        .. note::
+
+            The nullable dtype implementation can be configured by calling
+            ``pd.set_option("mode.dtype_backend", "pandas")`` to use
+            numpy-backed nullable dtypes or
+            ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
+            pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
 
         .. versionadded:: 2.0
     dtype : Type name or dict of columns
@@ -1634,11 +1640,13 @@ def read_table(
             set to True, nullable dtypes are used for all dtypes that have a nullable
             implementation, even if no nulls are present.
 
-            The nullable dtype implementation can be configured by calling
-            ``pd.set_option("mode.dtype_backend", "pandas")`` to use
-            numpy-backed nullable dtypes or
-            ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
-            pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
+            .. note::
+
+                The nullable dtype implementation can be configured by calling
+                ``pd.set_option("mode.dtype_backend", "pandas")`` to use
+                numpy-backed nullable dtypes or
+                ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
+                pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
 
             .. versionadded:: 2.0