Skip to content

Commit dc3de6a

Browse files
committed
BUG: edge case when reading from postgresl with read_sql_query and datetime with timezone types and a chunksize, #11216
- When we don't specifiy a chunksize we get an object dtype which is ok - We create a propery datetime64[ns, tz] type, but its a pytz.FixedOffset(....), which ATM is not really a useful/palatable type and is mostly confusing for now. In the future could attempt to coerce this to a nice tz, e.g. US/Eastern, not sure if this is possible - Note that this is w/o parse_dates specified
1 parent dff4534 commit dc3de6a

File tree

2 files changed

+44
-1
lines changed

2 files changed

+44
-1
lines changed

pandas/io/sql.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,14 @@ def _parse_date_columns(data_frame, parse_dates):
113113
fmt = None
114114
data_frame[col_name] = _handle_date_column(df_col, format=fmt)
115115

116+
117+
# we want to coerce datetime64_tz dtypes for now
118+
# we could in theory do a 'nice' conversion from a FixedOffset tz
119+
# GH11216
120+
for col_name, df_col in data_frame.iteritems():
121+
if com.is_datetime64tz_dtype(df_col):
122+
data_frame[col_name] = _handle_date_column(df_col)
123+
116124
return data_frame
117125

118126

@@ -366,7 +374,7 @@ def read_sql_query(sql, con, index_col=None, coerce_float=True, params=None,
366374
----------
367375
sql : string SQL query or SQLAlchemy Selectable (select or text object)
368376
to be executed.
369-
con : SQLAlchemy connectable(engine/connection) or database string URI
377+
con : SQLAlchemy connectable(engine/connection) or database string URI
370378
or sqlite3 DBAPI2 connection
371379
Using SQLAlchemy makes it possible to use any DB supported by that
372380
library.

pandas/io/tests/test_sql.py

+35
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,15 @@
2626
import nose
2727
import warnings
2828
import numpy as np
29+
import pandas as pd
2930

3031
from datetime import datetime, date, time
3132

3233
from pandas import DataFrame, Series, Index, MultiIndex, isnull, concat
3334
from pandas import date_range, to_datetime, to_timedelta, Timestamp
3435
import pandas.compat as compat
3536
from pandas.compat import StringIO, range, lrange, string_types
37+
from pandas.core import common as com
3638
from pandas.core.datetools import format as date_format
3739

3840
import pandas.io.sql as sql
@@ -1248,6 +1250,39 @@ def test_default_date_load(self):
12481250
self.assertTrue(issubclass(df.DateCol.dtype.type, np.datetime64),
12491251
"DateCol loaded with incorrect type")
12501252

1253+
def test_datetime_with_timezone(self):
1254+
# edge case that converts postgresql datetime with time zone types
1255+
# to datetime64[ns,psycopg2.tz.FixedOffsetTimezone..], which is ok
1256+
# but should be more natural, so coerce to datetime64[ns] for now
1257+
1258+
# GH11216
1259+
df = pd.read_sql_query("select * from types_test_data", self.conn)
1260+
if not hasattr(df,'DateColWithTz'):
1261+
raise nose.SkipTest("no column with datetime with time zone")
1262+
1263+
# this is parsed on Travis (linux), but not on macosx for some reason
1264+
# even with the same versions of psycopg2 & sqlalchemy, possibly a Postgrsql server
1265+
# version difference
1266+
dtype = df.DateColWithTz.dtype
1267+
self.assertTrue(com.is_object_dtype(dtype) or com.is_datetime64_dtype(dtype),
1268+
"DateCol loaded with incorrect type -> {0}".format(dtype))
1269+
1270+
df = pd.read_sql_query("select * from types_test_data", self.conn, parse_dates=['DateColWithTz'])
1271+
if not hasattr(df,'DateColWithTz'):
1272+
raise nose.SkipTest("no column with datetime with time zone")
1273+
1274+
dtype = df.DateColWithTz.dtype
1275+
self.assertTrue(com.is_datetime64_dtype(dtype),
1276+
"DateCol loaded with incorrect type -> {0}".format(dtype))
1277+
1278+
df = pd.concat(list(pd.read_sql_query("select * from types_test_data",
1279+
self.conn,chunksize=1)),ignore_index=True)
1280+
dtype = df.DateColWithTz.dtype
1281+
self.assertTrue(com.is_datetime64_dtype(dtype),
1282+
"DateCol loaded with incorrect type -> {0}".format(dtype))
1283+
expected = sql.read_sql_table("types_test_data", self.conn)
1284+
tm.assert_series_equal(df.DateColWithTz, expected.DateColWithTz)
1285+
12511286
def test_date_parsing(self):
12521287
# No Parsing
12531288
df = sql.read_sql_table("types_test_data", self.conn)

0 commit comments

Comments
 (0)