Skip to content

Commit 7ffbf1a

Browse files
BUG: Allow custom error values in parse_dates argument of read_sql like functions (GH35185) (#37823)
* BUG: Allow custom error values in parse_dates argument of read_sql (GH35185) * Ignore call overload for one time exception * Add tests for custom dateparsing error for read_sql * Generalize test for all sql read functions * Add conditional mode for tests * Typo * Updated test * Update to_datetime call in _handle_date_column * Move whatsnew message to v1.3.0 * Update test * Explicit cast to int64 * Remove accidental check_dtype=False * Fix wrong reference in whatsnew * Add hyphen Co-authored-by: Jeff Reback <[email protected]>
1 parent ef75719 commit 7ffbf1a

File tree

3 files changed

+87
-1
lines changed

3 files changed

+87
-1
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,7 @@ I/O
221221
- Bug in :func:`read_csv` raising ``IndexError`` with multiple header columns and ``index_col`` specified when file has no data rows (:issue:`38292`)
222222
- Bug in :func:`read_csv` not accepting ``usecols`` with different length than ``names`` for ``engine="python"`` (:issue:`16469`)
223223
- Bug in :func:`read_csv` raising ``TypeError`` when ``names`` and ``parse_dates`` is specified for ``engine="c"`` (:issue:`33699`)
224+
- Allow custom error values for parse_dates argument of :func:`read_sql`, :func:`read_sql_query` and :func:`read_sql_table` (:issue:`35185`)
224225
-
225226

226227
Period

pandas/io/sql.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,12 @@ def _process_parse_dates_argument(parse_dates):
7979

8080
def _handle_date_column(col, utc=None, format=None):
8181
if isinstance(format, dict):
82-
return to_datetime(col, errors="ignore", **format)
82+
# GH35185 Allow custom error values in parse_dates argument of
83+
# read_sql like functions.
84+
# Format can take on custom to_datetime argument values such as
85+
# {"errors": "coerce"} or {"dayfirst": True}
86+
error = format.pop("errors", None) or "ignore"
87+
return to_datetime(col, errors=error, **format)
8388
else:
8489
# Allow passing of formatting string for integers
8590
# GH17855

pandas/tests/io/test_sql.py

+80
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,54 @@ def _load_test3_data(self):
369369

370370
self.test_frame3 = DataFrame(data, columns=columns)
371371

372+
def _load_types_test_data(self, data):
373+
def _filter_to_flavor(flavor, df):
374+
flavor_dtypes = {
375+
"sqlite": {
376+
"TextCol": "str",
377+
"DateCol": "str",
378+
"IntDateCol": "int64",
379+
"IntDateOnlyCol": "int64",
380+
"FloatCol": "float",
381+
"IntCol": "int64",
382+
"BoolCol": "int64",
383+
"IntColWithNull": "float",
384+
"BoolColWithNull": "float",
385+
},
386+
"mysql": {
387+
"TextCol": "str",
388+
"DateCol": "str",
389+
"IntDateCol": "int64",
390+
"IntDateOnlyCol": "int64",
391+
"FloatCol": "float",
392+
"IntCol": "int64",
393+
"BoolCol": "bool",
394+
"IntColWithNull": "float",
395+
"BoolColWithNull": "float",
396+
},
397+
"postgresql": {
398+
"TextCol": "str",
399+
"DateCol": "str",
400+
"DateColWithTz": "str",
401+
"IntDateCol": "int64",
402+
"IntDateOnlyCol": "int64",
403+
"FloatCol": "float",
404+
"IntCol": "int64",
405+
"BoolCol": "bool",
406+
"IntColWithNull": "float",
407+
"BoolColWithNull": "float",
408+
},
409+
}
410+
411+
dtypes = flavor_dtypes[flavor]
412+
return df[dtypes.keys()].astype(dtypes)
413+
414+
df = DataFrame(data)
415+
self.types_test = {
416+
flavor: _filter_to_flavor(flavor, df)
417+
for flavor in ("sqlite", "mysql", "postgresql")
418+
}
419+
372420
def _load_raw_sql(self):
373421
self.drop_table("types_test_data")
374422
self._get_exec().execute(SQL_STRINGS["create_test_types"][self.flavor])
@@ -405,6 +453,8 @@ def _load_raw_sql(self):
405453
ins["query"], [d[field] for field in ins["fields"]]
406454
)
407455

456+
self._load_types_test_data(data)
457+
408458
def _count_rows(self, table_name):
409459
result = (
410460
self._get_exec()
@@ -741,6 +791,36 @@ def test_date_parsing(self):
741791
Timestamp("2010-12-12"),
742792
]
743793

794+
@pytest.mark.parametrize("error", ["ignore", "raise", "coerce"])
795+
@pytest.mark.parametrize(
796+
"read_sql, text, mode",
797+
[
798+
(sql.read_sql, "SELECT * FROM types_test_data", ("sqlalchemy", "fallback")),
799+
(sql.read_sql, "types_test_data", ("sqlalchemy")),
800+
(
801+
sql.read_sql_query,
802+
"SELECT * FROM types_test_data",
803+
("sqlalchemy", "fallback"),
804+
),
805+
(sql.read_sql_table, "types_test_data", ("sqlalchemy")),
806+
],
807+
)
808+
def test_custom_dateparsing_error(self, read_sql, text, mode, error):
809+
if self.mode in mode:
810+
expected = self.types_test[self.flavor].astype(
811+
{"DateCol": "datetime64[ns]"}
812+
)
813+
814+
result = read_sql(
815+
text,
816+
con=self.conn,
817+
parse_dates={
818+
"DateCol": {"errors": error},
819+
},
820+
)
821+
822+
tm.assert_frame_equal(result, expected)
823+
744824
def test_date_and_index(self):
745825
# Test case where same column appears in parse_date and index_col
746826

0 commit comments

Comments
 (0)