Skip to content

CLN: read_sql date parsing #24500

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 31, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 34 additions & 36 deletions pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,17 @@ def _convert_params(sql, params):
return args


def _process_parse_dates_argument(parse_dates):
"""Process parse_dates argument for read_sql functions"""
# handle non-list entries for parse_dates gracefully
if parse_dates is True or parse_dates is None or parse_dates is False:
parse_dates = []

elif not hasattr(parse_dates, '__iter__'):
parse_dates = [parse_dates]
return parse_dates


def _handle_date_column(col, utc=None, format=None):
if isinstance(format, dict):
return to_datetime(col, errors='ignore', **format)
Expand All @@ -96,8 +107,7 @@ def _handle_date_column(col, utc=None, format=None):
elif is_datetime64tz_dtype(col):
# coerce to UTC timezone
# GH11216
return (to_datetime(col, errors='coerce')
.astype('datetime64[ns, UTC]'))
return to_datetime(col, utc=True)
else:
return to_datetime(col, errors='coerce', format=format, utc=utc)

Expand All @@ -107,27 +117,18 @@ def _parse_date_columns(data_frame, parse_dates):
Force non-datetime columns to be read as such.
Supports both string formatted and integer timestamp columns.
"""
# handle non-list entries for parse_dates gracefully
if parse_dates is True or parse_dates is None or parse_dates is False:
parse_dates = []

if not hasattr(parse_dates, '__iter__'):
parse_dates = [parse_dates]

for col_name in parse_dates:
df_col = data_frame[col_name]
try:
fmt = parse_dates[col_name]
except TypeError:
fmt = None
data_frame[col_name] = _handle_date_column(df_col, format=fmt)
parse_dates = _process_parse_dates_argument(parse_dates)

# we want to coerce datetime64_tz dtypes for now
# we want to coerce datetime64_tz dtypes for now to UTC
# we could in theory do a 'nice' conversion from a FixedOffset tz
# GH11216
for col_name, df_col in data_frame.iteritems():
if is_datetime64tz_dtype(df_col):
data_frame[col_name] = _handle_date_column(df_col)
if is_datetime64tz_dtype(df_col) or col_name in parse_dates:
try:
fmt = parse_dates[col_name]
except TypeError:
fmt = None
data_frame[col_name] = _handle_date_column(df_col, format=fmt)

return data_frame

Expand All @@ -139,7 +140,7 @@ def _wrap_result(data, columns, index_col=None, coerce_float=True,
frame = DataFrame.from_records(data, columns=columns,
coerce_float=coerce_float)

_parse_date_columns(frame, parse_dates)
frame = _parse_date_columns(frame, parse_dates)

if index_col is not None:
frame.set_index(index_col, inplace=True)
Expand Down Expand Up @@ -818,17 +819,24 @@ def _harmonize_columns(self, parse_dates=None):
Datetimes should already be converted to np.datetime64 if supported,
but here we also force conversion if required.
"""
# handle non-list entries for parse_dates gracefully
if parse_dates is True or parse_dates is None or parse_dates is False:
parse_dates = []

if not hasattr(parse_dates, '__iter__'):
parse_dates = [parse_dates]
parse_dates = _process_parse_dates_argument(parse_dates)

for sql_col in self.table.columns:
col_name = sql_col.name
try:
df_col = self.frame[col_name]

# Handle date parsing upfront; don't try to convert columns
# twice
if col_name in parse_dates:
try:
fmt = parse_dates[col_name]
except TypeError:
fmt = None
self.frame[col_name] = _handle_date_column(
df_col, format=fmt)
continue

# the type the dataframe column should have
col_type = self._get_dtype(sql_col.type)

Expand All @@ -846,16 +854,6 @@ def _harmonize_columns(self, parse_dates=None):
if col_type is np.dtype('int64') or col_type is bool:
self.frame[col_name] = df_col.astype(
col_type, copy=False)

# Handle date parsing
if col_name in parse_dates:
try:
fmt = parse_dates[col_name]
except TypeError:
fmt = None
self.frame[col_name] = _handle_date_column(
df_col, format=fmt)

except KeyError:
pass # this column not in results

Expand Down
9 changes: 3 additions & 6 deletions pandas/tests/io/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,8 @@

from datetime import datetime, date, time

from pandas.core.dtypes.common import (
is_object_dtype, is_datetime64_dtype,
is_datetime64tz_dtype)
from pandas.core.dtypes.common import (is_datetime64_dtype,
is_datetime64tz_dtype)
from pandas import DataFrame, Series, Index, MultiIndex, isna, concat
from pandas import date_range, to_datetime, to_timedelta, Timestamp
import pandas.compat as compat
Expand Down Expand Up @@ -1356,9 +1355,7 @@ def check(col):
# even with the same versions of psycopg2 & sqlalchemy, possibly a
# Postgrsql server version difference
col = df.DateColWithTz
assert (is_object_dtype(col.dtype) or
is_datetime64_dtype(col.dtype) or
is_datetime64tz_dtype(col.dtype))
assert is_datetime64tz_dtype(col.dtype)

df = pd.read_sql_query("select * from types_test_data",
self.conn, parse_dates=['DateColWithTz'])
Expand Down