ENH: Add dtype to read sql to be consistent with read_sql_query (pandas-dev#50797)

phofl · web-flow · commit 69a8150ee703 · 2023-01-20T10:56:46.000-08:00
* ENH: Add dtype to read sql to be consistent with read_sql_query

* Add gh ref

* Fix docstring

* Add test
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -165,6 +165,7 @@ Other enhancements
 - Improved error message in :func:`to_datetime` for non-ISO8601 formats, informing users about the position of the first error (:issue:`50361`)
 - Improved error message when trying to align :class:`DataFrame` objects (for example, in :func:`DataFrame.compare`) to clarify that "identically labelled" refers to both index and columns (:issue:`50083`)
 - Added :meth:`DatetimeIndex.as_unit` and :meth:`TimedeltaIndex.as_unit` to convert to different resolutions; supported resolutions are "s", "ms", "us", and "ns" (:issue:`50616`)
+- Added new argument ``dtype`` to :func:`read_sql` to be consistent with :func:`read_sql_query` (:issue:`50797`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/io/sql.py b/pandas/io/sql.py
@@ -470,6 +470,7 @@ def read_sql(
     columns: list[str] = ...,
     chunksize: None = ...,
     use_nullable_dtypes: bool = ...,
+    dtype: DtypeArg | None = None,
 ) -> DataFrame:
     ...
 
@@ -485,6 +486,7 @@ def read_sql(
     columns: list[str] = ...,
     chunksize: int = ...,
     use_nullable_dtypes: bool = ...,
+    dtype: DtypeArg | None = None,
 ) -> Iterator[DataFrame]:
     ...
 
@@ -499,6 +501,7 @@ def read_sql(
     columns: list[str] | None = None,
     chunksize: int | None = None,
     use_nullable_dtypes: bool = False,
+    dtype: DtypeArg | None = None,
 ) -> DataFrame | Iterator[DataFrame]:
     """
     Read SQL query or database table into a DataFrame.
@@ -552,6 +555,12 @@ def read_sql(
         implementation, even if no nulls are present.
 
         .. versionadded:: 2.0
+    dtype : Type name or dict of columns
+        Data type for data or columns. E.g. np.float64 or
+        {‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’}.
+        The argument is ignored if a table is passed instead of a query.
+
+        .. versionadded:: 2.0.0
 
     Returns
     -------
@@ -632,6 +641,7 @@ def read_sql(
                 parse_dates=parse_dates,
                 chunksize=chunksize,
                 use_nullable_dtypes=use_nullable_dtypes,
+                dtype=dtype,
             )
 
         try:
@@ -659,6 +669,7 @@ def read_sql(
                 parse_dates=parse_dates,
                 chunksize=chunksize,
                 use_nullable_dtypes=use_nullable_dtypes,
+                dtype=dtype,
             )
 
 
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
@@ -2394,6 +2394,30 @@ def test_chunksize_empty_dtypes(self):
         ):
             tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.parametrize("use_nullable_dtypes", [True, False])
+    @pytest.mark.parametrize("func", ["read_sql", "read_sql_query"])
+    def test_read_sql_dtype(self, func, use_nullable_dtypes):
+        # GH#50797
+        table = "test"
+        df = DataFrame({"a": [1, 2, 3], "b": 5})
+        df.to_sql(table, self.conn, index=False, if_exists="replace")
+
+        result = getattr(pd, func)(
+            f"Select * from {table}",
+            self.conn,
+            dtype={"a": np.float64},
+            use_nullable_dtypes=use_nullable_dtypes,
+        )
+        expected = DataFrame(
+            {
+                "a": Series([1, 2, 3], dtype=np.float64),
+                "b": Series(
+                    [5, 5, 5], dtype="int64" if not use_nullable_dtypes else "Int64"
+                ),
+            }
+        )
+        tm.assert_frame_equal(result, expected)
+
 
 class TestSQLiteAlchemy(_TestSQLAlchemy):
     """

Original file line number	Diff line number	Diff line change
`@@ -165,6 +165,7 @@ Other enhancements`
`165`	`165`	- Improved error message in :func:`to_datetime` for non-ISO8601 formats, informing users about the position of the first error (:issue:`50361`)
`166`	`166`	- Improved error message when trying to align :class:`DataFrame` objects (for example, in :func:`DataFrame.compare`) to clarify that "identically labelled" refers to both index and columns (:issue:`50083`)
`167`	`167`	- Added :meth:`DatetimeIndex.as_unit` and :meth:`TimedeltaIndex.as_unit` to convert to different resolutions; supported resolutions are "s", "ms", "us", and "ns" (:issue:`50616`)
	`168`	+- Added new argument ``dtype`` to :func:`read_sql` to be consistent with :func:`read_sql_query` (:issue:`50797`)
`168`	`169`	`-`
`169`	`170`
`170`	`171`	`.. ---------------------------------------------------------------------------`