Skip to content

Commit c6e9198

Browse files
mroeschkePingviinituutti
authored andcommitted
CLN: read_sql date parsing (pandas-dev#24500)
1 parent 8012c07 commit c6e9198

File tree

2 files changed

+37
-42
lines changed

2 files changed

+37
-42
lines changed

pandas/io/sql.py

+34-36
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,17 @@ def _convert_params(sql, params):
8282
return args
8383

8484

85+
def _process_parse_dates_argument(parse_dates):
86+
"""Process parse_dates argument for read_sql functions"""
87+
# handle non-list entries for parse_dates gracefully
88+
if parse_dates is True or parse_dates is None or parse_dates is False:
89+
parse_dates = []
90+
91+
elif not hasattr(parse_dates, '__iter__'):
92+
parse_dates = [parse_dates]
93+
return parse_dates
94+
95+
8596
def _handle_date_column(col, utc=None, format=None):
8697
if isinstance(format, dict):
8798
return to_datetime(col, errors='ignore', **format)
@@ -96,8 +107,7 @@ def _handle_date_column(col, utc=None, format=None):
96107
elif is_datetime64tz_dtype(col):
97108
# coerce to UTC timezone
98109
# GH11216
99-
return (to_datetime(col, errors='coerce')
100-
.astype('datetime64[ns, UTC]'))
110+
return to_datetime(col, utc=True)
101111
else:
102112
return to_datetime(col, errors='coerce', format=format, utc=utc)
103113

@@ -107,27 +117,18 @@ def _parse_date_columns(data_frame, parse_dates):
107117
Force non-datetime columns to be read as such.
108118
Supports both string formatted and integer timestamp columns.
109119
"""
110-
# handle non-list entries for parse_dates gracefully
111-
if parse_dates is True or parse_dates is None or parse_dates is False:
112-
parse_dates = []
113-
114-
if not hasattr(parse_dates, '__iter__'):
115-
parse_dates = [parse_dates]
116-
117-
for col_name in parse_dates:
118-
df_col = data_frame[col_name]
119-
try:
120-
fmt = parse_dates[col_name]
121-
except TypeError:
122-
fmt = None
123-
data_frame[col_name] = _handle_date_column(df_col, format=fmt)
120+
parse_dates = _process_parse_dates_argument(parse_dates)
124121

125-
# we want to coerce datetime64_tz dtypes for now
122+
# we want to coerce datetime64_tz dtypes for now to UTC
126123
# we could in theory do a 'nice' conversion from a FixedOffset tz
127124
# GH11216
128125
for col_name, df_col in data_frame.iteritems():
129-
if is_datetime64tz_dtype(df_col):
130-
data_frame[col_name] = _handle_date_column(df_col)
126+
if is_datetime64tz_dtype(df_col) or col_name in parse_dates:
127+
try:
128+
fmt = parse_dates[col_name]
129+
except TypeError:
130+
fmt = None
131+
data_frame[col_name] = _handle_date_column(df_col, format=fmt)
131132

132133
return data_frame
133134

@@ -139,7 +140,7 @@ def _wrap_result(data, columns, index_col=None, coerce_float=True,
139140
frame = DataFrame.from_records(data, columns=columns,
140141
coerce_float=coerce_float)
141142

142-
_parse_date_columns(frame, parse_dates)
143+
frame = _parse_date_columns(frame, parse_dates)
143144

144145
if index_col is not None:
145146
frame.set_index(index_col, inplace=True)
@@ -818,17 +819,24 @@ def _harmonize_columns(self, parse_dates=None):
818819
Datetimes should already be converted to np.datetime64 if supported,
819820
but here we also force conversion if required.
820821
"""
821-
# handle non-list entries for parse_dates gracefully
822-
if parse_dates is True or parse_dates is None or parse_dates is False:
823-
parse_dates = []
824-
825-
if not hasattr(parse_dates, '__iter__'):
826-
parse_dates = [parse_dates]
822+
parse_dates = _process_parse_dates_argument(parse_dates)
827823

828824
for sql_col in self.table.columns:
829825
col_name = sql_col.name
830826
try:
831827
df_col = self.frame[col_name]
828+
829+
# Handle date parsing upfront; don't try to convert columns
830+
# twice
831+
if col_name in parse_dates:
832+
try:
833+
fmt = parse_dates[col_name]
834+
except TypeError:
835+
fmt = None
836+
self.frame[col_name] = _handle_date_column(
837+
df_col, format=fmt)
838+
continue
839+
832840
# the type the dataframe column should have
833841
col_type = self._get_dtype(sql_col.type)
834842

@@ -846,16 +854,6 @@ def _harmonize_columns(self, parse_dates=None):
846854
if col_type is np.dtype('int64') or col_type is bool:
847855
self.frame[col_name] = df_col.astype(
848856
col_type, copy=False)
849-
850-
# Handle date parsing
851-
if col_name in parse_dates:
852-
try:
853-
fmt = parse_dates[col_name]
854-
except TypeError:
855-
fmt = None
856-
self.frame[col_name] = _handle_date_column(
857-
df_col, format=fmt)
858-
859857
except KeyError:
860858
pass # this column not in results
861859

pandas/tests/io/test_sql.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,8 @@
2828

2929
from datetime import datetime, date, time
3030

31-
from pandas.core.dtypes.common import (
32-
is_object_dtype, is_datetime64_dtype,
33-
is_datetime64tz_dtype)
31+
from pandas.core.dtypes.common import (is_datetime64_dtype,
32+
is_datetime64tz_dtype)
3433
from pandas import DataFrame, Series, Index, MultiIndex, isna, concat
3534
from pandas import date_range, to_datetime, to_timedelta, Timestamp
3635
import pandas.compat as compat
@@ -1356,9 +1355,7 @@ def check(col):
13561355
# even with the same versions of psycopg2 & sqlalchemy, possibly a
13571356
# Postgrsql server version difference
13581357
col = df.DateColWithTz
1359-
assert (is_object_dtype(col.dtype) or
1360-
is_datetime64_dtype(col.dtype) or
1361-
is_datetime64tz_dtype(col.dtype))
1358+
assert is_datetime64tz_dtype(col.dtype)
13621359

13631360
df = pd.read_sql_query("select * from types_test_data",
13641361
self.conn, parse_dates=['DateColWithTz'])

0 commit comments

Comments
 (0)