Skip to content

BUG: Allow custom error values in parse_dates argument of read_sql like functions (GH35185) #37823

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Dec 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
33bdac6
BUG: Allow custom error values in parse_dates argument of read_sql (G…
avinashpancham Nov 13, 2020
bfdc016
Ignore call overload for one time exception
avinashpancham Nov 13, 2020
ff90c17
Merge remote-tracking branch 'upstream/master' into GH35185
avinashpancham Nov 14, 2020
e451c0e
Add tests for custom dateparsing error for read_sql
avinashpancham Nov 14, 2020
a7637d7
Generalize test for all sql read functions
avinashpancham Nov 14, 2020
7d8a5b0
Add conditional mode for tests
avinashpancham Nov 14, 2020
c2b4b2e
Merge remote-tracking branch 'upstream/master' into GH35185
avinashpancham Nov 14, 2020
1d3d25a
Typo
avinashpancham Nov 14, 2020
c4597b6
Updated test
avinashpancham Nov 15, 2020
2054360
Merge remote-tracking branch 'upstream/master' into GH35185
avinashpancham Dec 1, 2020
7673ed8
Update to_datetime call in _handle_date_column
avinashpancham Dec 1, 2020
03d05d7
Merge remote-tracking branch 'upstream/master' into GH35185
avinashpancham Dec 2, 2020
ada00a1
Merge remote-tracking branch 'upstream/master' into GH35185
avinashpancham Dec 10, 2020
42fd388
Move whatsnew message to v1.3.0
avinashpancham Dec 10, 2020
99d4bb1
Update test
avinashpancham Dec 11, 2020
0077f6b
Explicit cast to int64
avinashpancham Dec 11, 2020
ddcf646
Remove accidental check_dtype=False
avinashpancham Dec 11, 2020
a2b6635
Merge branch 'master' into GH35185
jreback Dec 13, 2020
37af90a
Merge remote-tracking branch 'upstream/master' into GH35185
avinashpancham Dec 13, 2020
664e97a
Fix wrong reference in whatsnew
avinashpancham Dec 13, 2020
29a7c26
Add hyphen
avinashpancham Dec 13, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,7 @@ I/O
- Bug in :func:`read_csv` raising ``IndexError`` with multiple header columns and ``index_col`` specified when file has no data rows (:issue:`38292`)
- Bug in :func:`read_csv` not accepting ``usecols`` with different length than ``names`` for ``engine="python"`` (:issue:`16469`)
- Bug in :func:`read_csv` raising ``TypeError`` when ``names`` and ``parse_dates`` is specified for ``engine="c"`` (:issue:`33699`)
- Allow custom error values for parse_dates argument of :func:`read_sql`, :func:`read_sql_query` and :func:`read_sql_table` (:issue:`35185`)
-

Period
Expand Down
7 changes: 6 additions & 1 deletion pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,12 @@ def _process_parse_dates_argument(parse_dates):

def _handle_date_column(col, utc=None, format=None):
if isinstance(format, dict):
return to_datetime(col, errors="ignore", **format)
# GH35185 Allow custom error values in parse_dates argument of
# read_sql like functions.
# Format can take on custom to_datetime argument values such as
# {"errors": "coerce"} or {"dayfirst": True}
error = format.pop("errors", None) or "ignore"
return to_datetime(col, errors=error, **format)
else:
# Allow passing of formatting string for integers
# GH17855
Expand Down
80 changes: 80 additions & 0 deletions pandas/tests/io/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,54 @@ def _load_test3_data(self):

self.test_frame3 = DataFrame(data, columns=columns)

def _load_types_test_data(self, data):
def _filter_to_flavor(flavor, df):
flavor_dtypes = {
"sqlite": {
"TextCol": "str",
"DateCol": "str",
"IntDateCol": "int64",
"IntDateOnlyCol": "int64",
"FloatCol": "float",
"IntCol": "int64",
"BoolCol": "int64",
"IntColWithNull": "float",
"BoolColWithNull": "float",
},
"mysql": {
"TextCol": "str",
"DateCol": "str",
"IntDateCol": "int64",
"IntDateOnlyCol": "int64",
"FloatCol": "float",
"IntCol": "int64",
"BoolCol": "bool",
"IntColWithNull": "float",
"BoolColWithNull": "float",
},
"postgresql": {
"TextCol": "str",
"DateCol": "str",
"DateColWithTz": "str",
"IntDateCol": "int64",
"IntDateOnlyCol": "int64",
"FloatCol": "float",
"IntCol": "int64",
"BoolCol": "bool",
"IntColWithNull": "float",
"BoolColWithNull": "float",
},
}

dtypes = flavor_dtypes[flavor]
return df[dtypes.keys()].astype(dtypes)

df = DataFrame(data)
self.types_test = {
flavor: _filter_to_flavor(flavor, df)
for flavor in ("sqlite", "mysql", "postgresql")
}

def _load_raw_sql(self):
self.drop_table("types_test_data")
self._get_exec().execute(SQL_STRINGS["create_test_types"][self.flavor])
Expand Down Expand Up @@ -405,6 +453,8 @@ def _load_raw_sql(self):
ins["query"], [d[field] for field in ins["fields"]]
)

self._load_types_test_data(data)

def _count_rows(self, table_name):
result = (
self._get_exec()
Expand Down Expand Up @@ -741,6 +791,36 @@ def test_date_parsing(self):
Timestamp("2010-12-12"),
]

@pytest.mark.parametrize("error", ["ignore", "raise", "coerce"])
@pytest.mark.parametrize(
"read_sql, text, mode",
[
(sql.read_sql, "SELECT * FROM types_test_data", ("sqlalchemy", "fallback")),
(sql.read_sql, "types_test_data", ("sqlalchemy")),
(
sql.read_sql_query,
"SELECT * FROM types_test_data",
("sqlalchemy", "fallback"),
),
(sql.read_sql_table, "types_test_data", ("sqlalchemy")),
],
)
def test_custom_dateparsing_error(self, read_sql, text, mode, error):
if self.mode in mode:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you make this tests assert the results, otherwise its not actually testing anything

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

expected = self.types_test[self.flavor].astype(
{"DateCol": "datetime64[ns]"}
)

result = read_sql(
text,
con=self.conn,
parse_dates={
"DateCol": {"errors": error},
},
)

tm.assert_frame_equal(result, expected)

def test_date_and_index(self):
# Test case where same column appears in parse_date and index_col

Expand Down