BUG: Allow custom error values in parse_dates argument of read_sql like functions (GH35185) (#37823)

avinashpancham · jreback · web-flow · commit 7ffbf1a5ab6b · 2020-12-13T17:41:43.000-08:00
* BUG: Allow custom error values in parse_dates argument of read_sql (GH35185)

* Ignore call overload for one time exception

* Add tests for custom dateparsing error for read_sql

* Generalize test for all sql read functions

* Add conditional mode for tests

* Typo

* Updated test

* Update to_datetime call in _handle_date_column

* Move whatsnew message to v1.3.0

* Update test

* Explicit cast to int64

* Remove accidental check_dtype=False

* Fix wrong reference in whatsnew

* Add hyphen

Co-authored-by: Jeff Reback &lt;jeff@reback.net&gt;
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -221,6 +221,7 @@ I/O
 - Bug in :func:`read_csv` raising ``IndexError`` with multiple header columns and ``index_col`` specified when file has no data rows (:issue:`38292`)
 - Bug in :func:`read_csv` not accepting ``usecols`` with different length than ``names`` for ``engine="python"`` (:issue:`16469`)
 - Bug in :func:`read_csv` raising ``TypeError`` when ``names`` and ``parse_dates`` is specified for ``engine="c"`` (:issue:`33699`)
+- Allow custom error values for parse_dates argument of :func:`read_sql`, :func:`read_sql_query` and :func:`read_sql_table` (:issue:`35185`)
 -
 
 Period
diff --git a/pandas/io/sql.py b/pandas/io/sql.py
@@ -79,7 +79,12 @@ def _process_parse_dates_argument(parse_dates):
 
 def _handle_date_column(col, utc=None, format=None):
     if isinstance(format, dict):
-        return to_datetime(col, errors="ignore", **format)
+        # GH35185 Allow custom error values in parse_dates argument of
+        # read_sql like functions.
+        # Format can take on custom to_datetime argument values such as
+        # {"errors": "coerce"} or {"dayfirst": True}
+        error = format.pop("errors", None) or "ignore"
+        return to_datetime(col, errors=error, **format)
     else:
         # Allow passing of formatting string for integers
         # GH17855
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
@@ -369,6 +369,54 @@ def _load_test3_data(self):
 
         self.test_frame3 = DataFrame(data, columns=columns)
 
+    def _load_types_test_data(self, data):
+        def _filter_to_flavor(flavor, df):
+            flavor_dtypes = {
+                "sqlite": {
+                    "TextCol": "str",
+                    "DateCol": "str",
+                    "IntDateCol": "int64",
+                    "IntDateOnlyCol": "int64",
+                    "FloatCol": "float",
+                    "IntCol": "int64",
+                    "BoolCol": "int64",
+                    "IntColWithNull": "float",
+                    "BoolColWithNull": "float",
+                },
+                "mysql": {
+                    "TextCol": "str",
+                    "DateCol": "str",
+                    "IntDateCol": "int64",
+                    "IntDateOnlyCol": "int64",
+                    "FloatCol": "float",
+                    "IntCol": "int64",
+                    "BoolCol": "bool",
+                    "IntColWithNull": "float",
+                    "BoolColWithNull": "float",
+                },
+                "postgresql": {
+                    "TextCol": "str",
+                    "DateCol": "str",
+                    "DateColWithTz": "str",
+                    "IntDateCol": "int64",
+                    "IntDateOnlyCol": "int64",
+                    "FloatCol": "float",
+                    "IntCol": "int64",
+                    "BoolCol": "bool",
+                    "IntColWithNull": "float",
+                    "BoolColWithNull": "float",
+                },
+            }
+
+            dtypes = flavor_dtypes[flavor]
+            return df[dtypes.keys()].astype(dtypes)
+
+        df = DataFrame(data)
+        self.types_test = {
+            flavor: _filter_to_flavor(flavor, df)
+            for flavor in ("sqlite", "mysql", "postgresql")
+        }
+
     def _load_raw_sql(self):
         self.drop_table("types_test_data")
         self._get_exec().execute(SQL_STRINGS["create_test_types"][self.flavor])
@@ -405,6 +453,8 @@ def _load_raw_sql(self):
                 ins["query"], [d[field] for field in ins["fields"]]
             )
 
+        self._load_types_test_data(data)
+
     def _count_rows(self, table_name):
         result = (
             self._get_exec()
@@ -741,6 +791,36 @@ def test_date_parsing(self):
             Timestamp("2010-12-12"),
         ]
 
+    @pytest.mark.parametrize("error", ["ignore", "raise", "coerce"])
+    @pytest.mark.parametrize(
+        "read_sql, text, mode",
+        [
+            (sql.read_sql, "SELECT * FROM types_test_data", ("sqlalchemy", "fallback")),
+            (sql.read_sql, "types_test_data", ("sqlalchemy")),
+            (
+                sql.read_sql_query,
+                "SELECT * FROM types_test_data",
+                ("sqlalchemy", "fallback"),
+            ),
+            (sql.read_sql_table, "types_test_data", ("sqlalchemy")),
+        ],
+    )
+    def test_custom_dateparsing_error(self, read_sql, text, mode, error):
+        if self.mode in mode:
+            expected = self.types_test[self.flavor].astype(
+                {"DateCol": "datetime64[ns]"}
+            )
+
+            result = read_sql(
+                text,
+                con=self.conn,
+                parse_dates={
+                    "DateCol": {"errors": error},
+                },
+            )
+
+            tm.assert_frame_equal(result, expected)
+
     def test_date_and_index(self):
         # Test case where same column appears in parse_date and index_col
 

Original file line number	Diff line number	Diff line change
`@@ -221,6 +221,7 @@ I/O`
`221`	`221`	- Bug in :func:`read_csv` raising ``IndexError`` with multiple header columns and ``index_col`` specified when file has no data rows (:issue:`38292`)
`222`	`222`	- Bug in :func:`read_csv` not accepting ``usecols`` with different length than ``names`` for ``engine="python"`` (:issue:`16469`)
`223`	`223`	- Bug in :func:`read_csv` raising ``TypeError`` when ``names`` and ``parse_dates`` is specified for ``engine="c"`` (:issue:`33699`)
	`224`	+- Allow custom error values for parse_dates argument of :func:`read_sql`, :func:`read_sql_query` and :func:`read_sql_table` (:issue:`35185`)
`224`	`225`	`-`
`225`	`226`
`226`	`227`	`Period`