pandas-dev · mroeschke · Jan 31, 2023 · Jan 26, 2023 · Jan 26, 2023 · Jan 30, 2023
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -69,6 +69,9 @@ to select the nullable dtypes implementation.
 * :func:`read_html`
 * :func:`read_xml`
 * :func:`read_json`
+* :func:`read_sql`
+* :func:`read_sql_query`
+* :func:`read_sql_table`
 * :func:`read_parquet`
 * :func:`read_orc`
 * :func:`read_feather`

diff --git a/pandas/io/sql.py b/pandas/io/sql.py
@@ -58,6 +58,7 @@
     DataFrame,
     Series,
 )
+from pandas.core.arrays import ArrowExtensionArray
 from pandas.core.base import PandasObject
 import pandas.core.common as com
 from pandas.core.internals.construction import convert_object_array
@@ -155,6 +156,12 @@ def _convert_arrays_to_dataframe(
         coerce_float=coerce_float,
         use_nullable_dtypes=use_nullable_dtypes,
     )
+    dtype_backend = get_option("mode.dtype_backend")
+    if dtype_backend == "pyarrow":
+        pa = import_optional_dependency("pyarrow")
+        arrays = [
+            ArrowExtensionArray(pa.array(arr, from_pandas=True)) for arr in arrays
+        ]
     if arrays:
         return DataFrame(dict(zip(columns, arrays)))
     else:
@@ -303,6 +310,14 @@ def read_sql_table(
         set to True, nullable dtypes are used for all dtypes that have a nullable
         implementation, even if no nulls are present.
 
+        .. note::
+
+            The nullable dtype implementation can be configured by calling
+            ``pd.set_option("mode.dtype_backend", "pandas")`` to use
+            numpy-backed nullable dtypes or
+            ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
+            pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
+
         .. versionadded:: 2.0
 
     Returns
@@ -438,6 +453,14 @@ def read_sql_query(
         set to True, nullable dtypes are used for all dtypes that have a nullable
         implementation, even if no nulls are present.
 
+        .. note::
+
+            The nullable dtype implementation can be configured by calling
+            ``pd.set_option("mode.dtype_backend", "pandas")`` to use
+            numpy-backed nullable dtypes or
+            ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
+            pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
+
         .. versionadded:: 2.0
 
     Returns
@@ -568,6 +591,14 @@ def read_sql(
         set to True, nullable dtypes are used for all dtypes that have a nullable
         implementation, even if no nulls are present.
 
+        .. note::
+
+            The nullable dtype implementation can be configured by calling
+            ``pd.set_option("mode.dtype_backend", "pandas")`` to use
+            numpy-backed nullable dtypes or
+            ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
+            pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
+
         .. versionadded:: 2.0
     dtype : Type name or dict of columns
         Data type for data or columns. E.g. np.float64 or
@@ -1609,6 +1640,14 @@ def read_table(
             set to True, nullable dtypes are used for all dtypes that have a nullable
             implementation, even if no nulls are present.
 
+            .. note::
+
+                The nullable dtype implementation can be configured by calling
+                ``pd.set_option("mode.dtype_backend", "pandas")`` to use
+                numpy-backed nullable dtypes or
+                ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
+                pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
+
             .. versionadded:: 2.0
 
         Returns

diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
@@ -2293,61 +2293,73 @@ def test_get_engine_auto_error_message(self):
 
     @pytest.mark.parametrize("option", [True, False])
     @pytest.mark.parametrize("func", ["read_sql", "read_sql_query"])
-    def test_read_sql_nullable_dtypes(self, string_storage, func, option):
+    @pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"])
+    def test_read_sql_nullable_dtypes(
+        self, string_storage, func, option, dtype_backend
+    ):
         # GH#50048
         table = "test"
         df = self.nullable_data()
         df.to_sql(table, self.conn, index=False, if_exists="replace")
 
         with pd.option_context("mode.string_storage", string_storage):
-            if option:
-                with pd.option_context("mode.nullable_dtypes", True):
-                    result = getattr(pd, func)(f"Select * from {table}", self.conn)
-            else:
-                result = getattr(pd, func)(
-                    f"Select * from {table}", self.conn, use_nullable_dtypes=True
-                )
-        expected = self.nullable_expected(string_storage)
+            with pd.option_context("mode.dtype_backend", dtype_backend):
+                if option:
+                    with pd.option_context("mode.nullable_dtypes", True):
+                        result = getattr(pd, func)(f"Select * from {table}", self.conn)
+                else:
+                    result = getattr(pd, func)(
+                        f"Select * from {table}", self.conn, use_nullable_dtypes=True
+                    )
+        expected = self.nullable_expected(string_storage, dtype_backend)
         tm.assert_frame_equal(result, expected)
 
         with pd.option_context("mode.string_storage", string_storage):
-            iterator = getattr(pd, func)(
-                f"Select * from {table}",
-                self.conn,
-                use_nullable_dtypes=True,
-                chunksize=3,
-            )
-            expected = self.nullable_expected(string_storage)
-            for result in iterator:
-                tm.assert_frame_equal(result, expected)
+            with pd.option_context("mode.dtype_backend", dtype_backend):
+                iterator = getattr(pd, func)(
+                    f"Select * from {table}",
+                    self.conn,
+                    use_nullable_dtypes=True,
+                    chunksize=3,
+                )
+                expected = self.nullable_expected(string_storage, dtype_backend)
+                for result in iterator:
+                    tm.assert_frame_equal(result, expected)
 
     @pytest.mark.parametrize("option", [True, False])
     @pytest.mark.parametrize("func", ["read_sql", "read_sql_table"])
-    def test_read_sql_nullable_dtypes_table(self, string_storage, func, option):
+    @pytest.mark.parametrize("dtype_backend", ["pandas", "pyarrow"])
+    def test_read_sql_nullable_dtypes_table(
+        self, string_storage, func, option, dtype_backend
+    ):
         # GH#50048
         table = "test"
         df = self.nullable_data()
         df.to_sql(table, self.conn, index=False, if_exists="replace")
 
         with pd.option_context("mode.string_storage", string_storage):
-            if option:
-                with pd.option_context("mode.nullable_dtypes", True):
-                    result = getattr(pd, func)(table, self.conn)
-            else:
-                result = getattr(pd, func)(table, self.conn, use_nullable_dtypes=True)
-        expected = self.nullable_expected(string_storage)
+            with pd.option_context("mode.dtype_backend", dtype_backend):
+                if option:
+                    with pd.option_context("mode.nullable_dtypes", True):
+                        result = getattr(pd, func)(table, self.conn)
+                else:
+                    result = getattr(pd, func)(
+                        table, self.conn, use_nullable_dtypes=True
+                    )
+        expected = self.nullable_expected(string_storage, dtype_backend)
         tm.assert_frame_equal(result, expected)
 
         with pd.option_context("mode.string_storage", string_storage):
-            iterator = getattr(pd, func)(
-                table,
-                self.conn,
-                use_nullable_dtypes=True,
-                chunksize=3,
-            )
-            expected = self.nullable_expected(string_storage)
-            for result in iterator:
-                tm.assert_frame_equal(result, expected)
+            with pd.option_context("mode.dtype_backend", dtype_backend):
+                iterator = getattr(pd, func)(
+                    table,
+                    self.conn,
+                    use_nullable_dtypes=True,
+                    chunksize=3,
+                )
+                expected = self.nullable_expected(string_storage, dtype_backend)
+                for result in iterator:
+                    tm.assert_frame_equal(result, expected)
 
     def nullable_data(self) -> DataFrame:
         return DataFrame(
@@ -2363,7 +2375,7 @@ def nullable_data(self) -> DataFrame:
             }
         )
 
-    def nullable_expected(self, storage) -> DataFrame:
+    def nullable_expected(self, storage, dtype_backend) -> DataFrame:
 
         string_array: StringArray | ArrowStringArray
         string_array_na: StringArray | ArrowStringArray
@@ -2376,7 +2388,7 @@ def nullable_expected(self, storage) -> DataFrame:
             string_array = ArrowStringArray(pa.array(["a", "b", "c"]))
             string_array_na = ArrowStringArray(pa.array(["a", "b", None]))
 
-        return DataFrame(
+        df = DataFrame(
             {
                 "a": Series([1, np.nan, 3], dtype="Int64"),
                 "b": Series([1, 2, 3], dtype="Int64"),
@@ -2388,6 +2400,18 @@ def nullable_expected(self, storage) -> DataFrame:
                 "h": string_array_na,
             }
         )
+        if dtype_backend == "pyarrow":
+            pa = pytest.importorskip("pyarrow")
+
+            from pandas.arrays import ArrowExtensionArray
+
+            df = DataFrame(
+                {
+                    col: ArrowExtensionArray(pa.array(df[col], from_pandas=True))
+                    for col in df.columns
+                }
+            )
+        return df
 
     def test_chunksize_empty_dtypes(self):
         # GH#50245
@@ -2511,8 +2535,14 @@ class Test(BaseModel):
 
         assert list(df.columns) == ["id", "string_column"]
 
-    def nullable_expected(self, storage) -> DataFrame:
-        return super().nullable_expected(storage).astype({"e": "Int64", "f": "Int64"})
+    def nullable_expected(self, storage, dtype_backend) -> DataFrame:
+        df = super().nullable_expected(storage, dtype_backend)
+        if dtype_backend == "pandas":
+            df = df.astype({"e": "Int64", "f": "Int64"})
+        else:
+            df = df.astype({"e": "int64[pyarrow]", "f": "int64[pyarrow]"})
+
+        return df
 
     @pytest.mark.parametrize("func", ["read_sql", "read_sql_table"])
     def test_read_sql_nullable_dtypes_table(self, string_storage, func):
@@ -2546,8 +2576,14 @@ def setup_driver(cls):
     def test_default_type_conversion(self):
         pass
 
-    def nullable_expected(self, storage) -> DataFrame:
-        return super().nullable_expected(storage).astype({"e": "Int64", "f": "Int64"})
+    def nullable_expected(self, storage, dtype_backend) -> DataFrame:
+        df = super().nullable_expected(storage, dtype_backend)
+        if dtype_backend == "pandas":
+            df = df.astype({"e": "Int64", "f": "Int64"})
+        else:
+            df = df.astype({"e": "int64[pyarrow]", "f": "int64[pyarrow]"})
+
+        return df
 
 
 @pytest.mark.db