From 511d77ad03e9284ca1626aaa0156bf4e02ba8206 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Thu, 15 Dec 2022 23:29:30 +0100 Subject: [PATCH] BUG: sql ignores dtype when chunksize is set and result is empty --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/io/sql.py | 6 +++++- pandas/tests/io/test_sql.py | 15 +++++++++++++++ 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 5c635f2d9d3be..e6ee469ce9a62 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -867,6 +867,7 @@ I/O - Bug in :func:`read_sas` caused fragmentation of :class:`DataFrame` and raised :class:`.errors.PerformanceWarning` (:issue:`48595`) - Improved error message in :func:`read_excel` by including the offending sheet name when an exception is raised while reading a file (:issue:`48706`) - Bug when a pickling a subset PyArrow-backed data that would serialize the entire data instead of the subset (:issue:`42600`) +- Bug in :func:`read_sql_query` ignoring ``dtype`` argument when ``chunksize`` is specified and result is empty (:issue:`50245`) - Bug in :func:`read_csv` for a single-line csv with fewer columns than ``names`` raised :class:`.errors.ParserError` with ``engine="c"`` (:issue:`47566`) - Bug in displaying ``string`` dtypes not showing storage option (:issue:`50099`) - Bug in :func:`DataFrame.to_string` with ``header=False`` that printed the index name on the same line as the first row of the data (:issue:`49230`) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 2c98ff61cbef6..1304e5d59063f 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -1597,6 +1597,7 @@ def _query_iterator( index_col=index_col, coerce_float=coerce_float, parse_dates=parse_dates, + dtype=dtype, use_nullable_dtypes=use_nullable_dtypes, ) break @@ -2162,9 +2163,12 @@ def _query_iterator( if not data: cursor.close() if not has_read_data: - yield DataFrame.from_records( + result = DataFrame.from_records( [], columns=columns, coerce_float=coerce_float ) + if dtype: + result = result.astype(dtype) + yield result break has_read_data = True diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 490b425ee52bf..4c43d5d1ef3d7 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2364,6 +2364,21 @@ def nullable_expected(self, storage) -> DataFrame: } ) + def test_chunksize_empty_dtypes(self): + # GH#50245 + dtypes = {"a": "int64", "b": "object"} + df = DataFrame(columns=["a", "b"]).astype(dtypes) + expected = df.copy() + df.to_sql("test", self.conn, index=False, if_exists="replace") + + for result in read_sql_query( + "SELECT * FROM test", + self.conn, + dtype=dtypes, + chunksize=1, + ): + tm.assert_frame_equal(result, expected) + class TestSQLiteAlchemy(_TestSQLAlchemy): """