From 33bdac606d5c90d33184d5ddfedeba5ed3feabde Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Fri, 13 Nov 2020 22:09:11 +0100 Subject: [PATCH 01/14] BUG: Allow custom error values in parse_dates argument of read_sql (GH35185) --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/io/sql.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 09cb024cbd95c..e19d2024f9c17 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -517,6 +517,7 @@ I/O - Bug in :meth:`DataFrame.to_hdf` was not dropping missing rows with ``dropna=True`` (:issue:`35719`) - Bug in :func:`read_html` was raising a ``TypeError`` when supplying a ``pathlib.Path`` argument to the ``io`` parameter (:issue:`37705`) - :meth:`to_excel` and :meth:`to_markdown` support writing to fsspec URLs such as S3 and Google Cloud Storage (:issue:`33987`) +- Allow custom error values in parse_dates argument of :func:`read_sql` (:issue:`GH35185`) Plotting ^^^^^^^^ diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 51888e5021d80..b842e2632ffc3 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -79,7 +79,7 @@ def _process_parse_dates_argument(parse_dates): def _handle_date_column(col, utc=None, format=None): if isinstance(format, dict): - return to_datetime(col, errors="ignore", **format) + return to_datetime(col, **{"errors": "ignore", **format}) else: # Allow passing of formatting string for integers # GH17855 From bfdc0166a092faa77d0e200980d8f11ab6c99411 Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Sat, 14 Nov 2020 00:07:20 +0100 Subject: [PATCH 02/14] Ignore call overload for one time exception --- pandas/io/sql.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index b842e2632ffc3..d5309eba3b727 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -79,7 +79,9 @@ def _process_parse_dates_argument(parse_dates): def _handle_date_column(col, utc=None, format=None): if isinstance(format, dict): - return to_datetime(col, **{"errors": "ignore", **format}) + return to_datetime( + col, **{"errors": "ignore", **format} # type: ignore[call-overload] + ) else: # Allow passing of formatting string for integers # GH17855 From e451c0e1049ceebb3fee66b3245a95bf49738c8e Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Sat, 14 Nov 2020 20:02:59 +0100 Subject: [PATCH 03/14] Add tests for custom dateparsing error for read_sql --- pandas/tests/io/test_sql.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 19eb64be1be29..565f166253bea 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -741,6 +741,16 @@ def test_date_parsing(self): Timestamp("2010-12-12"), ] + def test_custom_dateparsing_error(self): + sql.read_sql( + "SELECT * FROM types_test_data", + con=self.conn, + parse_dates={ + "DateCol": {"errors": "coerce"}, + "IntDateCol": {"errors": "ignore"}, + }, + ) + def test_date_and_index(self): # Test case where same column appears in parse_date and index_col From a7637d723422edcda6bcf94ced6c75edf466afdc Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Sat, 14 Nov 2020 20:14:40 +0100 Subject: [PATCH 04/14] Generalize test for all sql read functions --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/tests/io/test_sql.py | 15 ++++++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 80caf8184d68f..2dc02436d0b38 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -565,7 +565,7 @@ I/O - Bug in :meth:`DataFrame.to_hdf` was not dropping missing rows with ``dropna=True`` (:issue:`35719`) - Bug in :func:`read_html` was raising a ``TypeError`` when supplying a ``pathlib.Path`` argument to the ``io`` parameter (:issue:`37705`) - :meth:`to_excel` and :meth:`to_markdown` support writing to fsspec URLs such as S3 and Google Cloud Storage (:issue:`33987`) -- Allow custom error values in parse_dates argument of :func:`read_sql` (:issue:`GH35185`) +- Allow custom error values in parse_dates argument of :func:`read_sql`, :func:`read_sql_query` and :func:`read_sql_table` (:issue:`GH35185`) Plotting ^^^^^^^^ diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 565f166253bea..12723e13c2af0 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -741,9 +741,18 @@ def test_date_parsing(self): Timestamp("2010-12-12"), ] - def test_custom_dateparsing_error(self): - sql.read_sql( - "SELECT * FROM types_test_data", + @pytest.mark.parametrize( + "read_sql, text", + [ + (sql.read_sql, "SELECT * FROM types_test_data"), + (sql.read_sql, "types_test_data"), + (sql.read_sql_query, "SELECT * FROM types_test_data"), + (sql.read_sql_table, "types_test_data"), + ], + ) + def test_custom_dateparsing_error(self, read_sql, text): + read_sql( + text, con=self.conn, parse_dates={ "DateCol": {"errors": "coerce"}, From 7d8a5b0b879f98e533a76c3532951d4b9ee0ec02 Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Sat, 14 Nov 2020 22:09:48 +0100 Subject: [PATCH 05/14] Add conditional mode for tests --- pandas/tests/io/test_sql.py | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 12723e13c2af0..978337aa9e6ee 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -742,23 +742,28 @@ def test_date_parsing(self): ] @pytest.mark.parametrize( - "read_sql, text", + "read_sql, text, mode", [ - (sql.read_sql, "SELECT * FROM types_test_data"), - (sql.read_sql, "types_test_data"), - (sql.read_sql_query, "SELECT * FROM types_test_data"), - (sql.read_sql_table, "types_test_data"), + (sql.read_sql, "SELECT * FROM types_test_data", ("sqlalchemy", "fallback")), + (sql.read_sql, "types_test_data", ("sqlalchemy")), + ( + sql.read_sql_query, + "SELECT * FROM types_test_data", + ("sqlalchemy", "fallback"), + ), + (sql.read_sql_table, "types_test_data", ("sqlalchemy")), ], ) - def test_custom_dateparsing_error(self, read_sql, text): - read_sql( - text, - con=self.conn, - parse_dates={ - "DateCol": {"errors": "coerce"}, - "IntDateCol": {"errors": "ignore"}, - }, - ) + def test_custom_dateparsing_error(self, read_sql, text, mode): + if self.mode in mode: + read_sql( + text, + con=self.conn, + parse_dates={ + "DateCol": {"errors": "coerce"}, + "IntDateCol": {"errors": "ignore"}, + }, + ) def test_date_and_index(self): # Test case where same column appears in parse_date and index_col From 1d3d25ae91e88af542c567fa7eea03dfd5fccc5f Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Sat, 14 Nov 2020 22:12:31 +0100 Subject: [PATCH 06/14] Typo --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 975ffba1a907f..a9f446554593a 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -624,7 +624,7 @@ I/O - Bug in :func:`read_html` was raising a ``TypeError`` when supplying a ``pathlib.Path`` argument to the ``io`` parameter (:issue:`37705`) - :meth:`to_excel` and :meth:`to_markdown` support writing to fsspec URLs such as S3 and Google Cloud Storage (:issue:`33987`) - Bug in :meth:`read_fw` was not skipping blank lines (even with ``skip_blank_lines=True``) (:issue:`37758`) -- Allow custom error values in parse_dates argument of :func:`read_sql`, :func:`read_sql_query` and :func:`read_sql_table` (:issue:`GH35185`) +- Allow custom error values for parse_dates argument of :func:`read_sql`, :func:`read_sql_query` and :func:`read_sql_table` (:issue:`GH35185`) Plotting ^^^^^^^^ From c4597b6bedd0a06155b214aac2bf866344fdca63 Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Sun, 15 Nov 2020 19:36:21 +0100 Subject: [PATCH 07/14] Updated test --- pandas/tests/io/test_sql.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 978337aa9e6ee..4e49e757be82d 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -741,6 +741,7 @@ def test_date_parsing(self): Timestamp("2010-12-12"), ] + @pytest.mark.parametrize("error", ["ignore", "raise", "coerce"]) @pytest.mark.parametrize( "read_sql, text, mode", [ @@ -754,16 +755,20 @@ def test_date_parsing(self): (sql.read_sql_table, "types_test_data", ("sqlalchemy")), ], ) - def test_custom_dateparsing_error(self, read_sql, text, mode): + def test_custom_dateparsing_error(self, read_sql, text, mode, error): if self.mode in mode: - read_sql( + df = read_sql( text, con=self.conn, parse_dates={ - "DateCol": {"errors": "coerce"}, - "IntDateCol": {"errors": "ignore"}, + "DateCol": {"errors": error}, }, ) + assert issubclass(df.DateCol.dtype.type, np.datetime64) + assert df.DateCol.tolist() == [ + Timestamp(2000, 1, 3, 0, 0, 0), + Timestamp(2000, 1, 4, 0, 0, 0), + ] def test_date_and_index(self): # Test case where same column appears in parse_date and index_col From 7673ed8398caf8d76693e45f7a1d7daef708db88 Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Tue, 1 Dec 2020 23:51:30 +0100 Subject: [PATCH 08/14] Update to_datetime call in _handle_date_column --- pandas/io/sql.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 50b5a69852c91..32f7aa1da5d72 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -79,9 +79,12 @@ def _process_parse_dates_argument(parse_dates): def _handle_date_column(col, utc=None, format=None): if isinstance(format, dict): - return to_datetime( - col, **{"errors": "ignore", **format} # type: ignore[call-overload] - ) + # GH35185 Allow custom error values in parse_dates argument of + # read_sql like functions. + # Format can take on custom to_datetime argument values such as + # {"errors": "coerce"} or {"dayfirst": True} + error = format.pop("errors", None) or "ignore" + return to_datetime(col, errors=error, **format) else: # Allow passing of formatting string for integers # GH17855 From 42fd3887a8742337a6b349373f250afa3f88df11 Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Thu, 10 Dec 2020 23:11:28 +0100 Subject: [PATCH 09/14] Move whatsnew message to v1.3.0 --- doc/source/whatsnew/v1.2.0.rst | 1 - doc/source/whatsnew/v1.3.0.rst | 3 ++- pandas/tests/io/test_sql.py | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 4fb4edc64d4c7..af9219bc25931 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -745,7 +745,6 @@ I/O - :meth:`read_fwf` was inferring compression with ``compression=None`` which was not consistent with the other :meth:``read_*`` functions (:issue:`37909`) - :meth:`DataFrame.to_html` was ignoring ``formatters`` argument for ``ExtensionDtype`` columns (:issue:`36525`) - Bumped minimum xarray version to 0.12.3 to avoid reference to the removed ``Panel`` class (:issue:`27101`) -- Allow custom error values for parse_dates argument of :func:`read_sql`, :func:`read_sql_query` and :func:`read_sql_table` (:issue:`GH35185`) Period ^^^^^^ diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 90f611c55e710..5fad03e92e455 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -146,7 +146,8 @@ MultiIndex I/O ^^^ -- +- Allow custom error values for parse_dates argument of :func:`read_sql`, :func:`read_sql_query` and :func:`read_sql_table` (:issue:`GH35185`) + - Period diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 28b7a0b93eb19..b14e8ff61196d 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -764,6 +764,7 @@ def test_custom_dateparsing_error(self, read_sql, text, mode, error): "DateCol": {"errors": error}, }, ) + assert issubclass(df.DateCol.dtype.type, np.datetime64) assert df.DateCol.tolist() == [ Timestamp(2000, 1, 3, 0, 0, 0), From 99d4bb120aa337d5ba15c4719307d2ae4d787524 Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Fri, 11 Dec 2020 20:47:57 +0100 Subject: [PATCH 10/14] Update test --- pandas/tests/io/test_sql.py | 62 +++++++++++++++++++++++++++++++++---- 1 file changed, 56 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index b14e8ff61196d..d62e7b08a859a 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -369,6 +369,54 @@ def _load_test3_data(self): self.test_frame3 = DataFrame(data, columns=columns) + def _load_types_test_data(self, data): + def _filter_to_flavor(flavor, df): + flavor_dtypes = { + "sqlite": { + "TextCol": "str", + "DateCol": "str", + "IntDateCol": "int", + "IntDateOnlyCol": "int", + "FloatCol": "float", + "IntCol": "int", + "BoolCol": "int", + "IntColWithNull": "float", + "BoolColWithNull": "float", + }, + "mysql": { + "TextCol": "str", + "DateCol": "str", + "IntDateCol": "int", + "IntDateOnlyCol": "int", + "FloatCol": "float", + "IntCol": "int", + "BoolCol": "bool", + "IntColWithNull": "float", + "BoolColWithNull": "float", + }, + "postgresql": { + "TextCol": "str", + "DateCol": "str", + "DateColWithTz": "str", + "IntDateCol": "int", + "IntDateOnlyCol": "int", + "FloatCol": "float", + "IntCol": "int", + "BoolCol": "bool", + "IntColWithNull": "float", + "BoolColWithNull": "float", + }, + } + + dtypes = flavor_dtypes[flavor] + return df[dtypes.keys()].astype(dtypes) + + df = DataFrame(data) + self.types_test = { + flavor: _filter_to_flavor(flavor, df) + for flavor in ("sqlite", "mysql", "postgresql") + } + def _load_raw_sql(self): self.drop_table("types_test_data") self._get_exec().execute(SQL_STRINGS["create_test_types"][self.flavor]) @@ -405,6 +453,8 @@ def _load_raw_sql(self): ins["query"], [d[field] for field in ins["fields"]] ) + self._load_types_test_data(data) + def _count_rows(self, table_name): result = ( self._get_exec() @@ -757,7 +807,11 @@ def test_date_parsing(self): ) def test_custom_dateparsing_error(self, read_sql, text, mode, error): if self.mode in mode: - df = read_sql( + expected = self.types_test[self.flavor].astype( + {"DateCol": "datetime64[ns]"} + ) + + result = read_sql( text, con=self.conn, parse_dates={ @@ -765,11 +819,7 @@ def test_custom_dateparsing_error(self, read_sql, text, mode, error): }, ) - assert issubclass(df.DateCol.dtype.type, np.datetime64) - assert df.DateCol.tolist() == [ - Timestamp(2000, 1, 3, 0, 0, 0), - Timestamp(2000, 1, 4, 0, 0, 0), - ] + tm.assert_frame_equal(result, expected) def test_date_and_index(self): # Test case where same column appears in parse_date and index_col From 0077f6b334b69ba7a54c058c8a9dcb62c41c0ae2 Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Fri, 11 Dec 2020 23:10:21 +0100 Subject: [PATCH 11/14] Explicit cast to int64 --- pandas/tests/io/test_sql.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index d62e7b08a859a..3dd578d3787c8 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -375,21 +375,21 @@ def _filter_to_flavor(flavor, df): "sqlite": { "TextCol": "str", "DateCol": "str", - "IntDateCol": "int", - "IntDateOnlyCol": "int", + "IntDateCol": "int64", + "IntDateOnlyCol": "int64", "FloatCol": "float", - "IntCol": "int", - "BoolCol": "int", + "IntCol": "int64", + "BoolCol": "int64", "IntColWithNull": "float", "BoolColWithNull": "float", }, "mysql": { "TextCol": "str", "DateCol": "str", - "IntDateCol": "int", - "IntDateOnlyCol": "int", + "IntDateCol": "int64", + "IntDateOnlyCol": "int64", "FloatCol": "float", - "IntCol": "int", + "IntCol": "int64", "BoolCol": "bool", "IntColWithNull": "float", "BoolColWithNull": "float", @@ -398,10 +398,10 @@ def _filter_to_flavor(flavor, df): "TextCol": "str", "DateCol": "str", "DateColWithTz": "str", - "IntDateCol": "int", - "IntDateOnlyCol": "int", + "IntDateCol": "int64", + "IntDateOnlyCol": "int64", "FloatCol": "float", - "IntCol": "int", + "IntCol": "int64", "BoolCol": "bool", "IntColWithNull": "float", "BoolColWithNull": "float", @@ -819,7 +819,7 @@ def test_custom_dateparsing_error(self, read_sql, text, mode, error): }, ) - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_dtype=False) def test_date_and_index(self): # Test case where same column appears in parse_date and index_col From ddcf6467ccc7de1942f99b69b2a99099d6e72a02 Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Sat, 12 Dec 2020 00:04:12 +0100 Subject: [PATCH 12/14] Remove accidental check_dtype=False --- pandas/tests/io/test_sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 3dd578d3787c8..497039de99196 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -819,7 +819,7 @@ def test_custom_dateparsing_error(self, read_sql, text, mode, error): }, ) - tm.assert_frame_equal(result, expected, check_dtype=False) + tm.assert_frame_equal(result, expected) def test_date_and_index(self): # Test case where same column appears in parse_date and index_col From 664e97a3369913eea0e6ab7a80ebd3813fe97823 Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Mon, 14 Dec 2020 00:25:47 +0100 Subject: [PATCH 13/14] Fix wrong reference in whatsnew --- doc/source/whatsnew/v1.3.0.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 5ab36865835f0..eaa7aee1bcfef 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -220,9 +220,8 @@ I/O - Bug in :func:`read_csv` raising ``IndexError`` with multiple header columns and ``index_col`` specified when file has no data rows (:issue:`38292`) - Bug in :func:`read_csv` not accepting ``usecols`` with different length than ``names`` for ``engine="python"`` (:issue:`16469`) - Bug in :func:`read_csv` raising ``TypeError`` when ``names`` and ``parse_dates`` is specified for ``engine="c"`` (:issue:`33699`) -- Allow custom error values for parse_dates argument of :func:`read_sql`, :func:`read_sql_query` and :func:`read_sql_table` (:issue:`GH35185`) +- Allow custom error values for parse_dates argument of :func:`read_sql`, :func:`read_sql_query` and :func:`read_sql_table` (:issue:`35185`) -- Period ^^^^^^ From 29a7c267b742fbdd7abfafadef9d58c87a7e39fb Mon Sep 17 00:00:00 2001 From: Avinash Pancham Date: Mon, 14 Dec 2020 00:27:30 +0100 Subject: [PATCH 14/14] Add hyphen --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index eaa7aee1bcfef..7c2a1199bdf0e 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -221,7 +221,7 @@ I/O - Bug in :func:`read_csv` not accepting ``usecols`` with different length than ``names`` for ``engine="python"`` (:issue:`16469`) - Bug in :func:`read_csv` raising ``TypeError`` when ``names`` and ``parse_dates`` is specified for ``engine="c"`` (:issue:`33699`) - Allow custom error values for parse_dates argument of :func:`read_sql`, :func:`read_sql_query` and :func:`read_sql_table` (:issue:`35185`) - +- Period ^^^^^^