Skip to content

Commit 071cffd

Browse files
committed
Merge pull request #11216 from jreback/datetime_with_tz
BUG: edge case when reading from postgresl with read_sql_query and datetime with tz and chunksize
2 parents d6c7a3a + bd26dec commit 071cffd

File tree

2 files changed

+90
-28
lines changed

2 files changed

+90
-28
lines changed

pandas/io/sql.py

+28-11
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from pandas.core.api import DataFrame, Series
1919
from pandas.core.common import isnull
2020
from pandas.core.base import PandasObject
21+
from pandas.core.dtypes import DatetimeTZDtype
2122
from pandas.tseries.tools import to_datetime
2223
from pandas.util.decorators import Appender
2324

@@ -89,6 +90,10 @@ def _handle_date_column(col, format=None):
8990
# parse dates as timestamp
9091
format = 's' if format is None else format
9192
return to_datetime(col, errors='coerce', unit=format, utc=True)
93+
elif com.is_datetime64tz_dtype(col):
94+
# coerce to UTC timezone
95+
# GH11216
96+
return to_datetime(col,errors='coerce').astype('datetime64[ns, UTC]')
9297
else:
9398
return to_datetime(col, errors='coerce', format=format, utc=True)
9499

@@ -113,6 +118,14 @@ def _parse_date_columns(data_frame, parse_dates):
113118
fmt = None
114119
data_frame[col_name] = _handle_date_column(df_col, format=fmt)
115120

121+
122+
# we want to coerce datetime64_tz dtypes for now
123+
# we could in theory do a 'nice' conversion from a FixedOffset tz
124+
# GH11216
125+
for col_name, df_col in data_frame.iteritems():
126+
if com.is_datetime64tz_dtype(df_col):
127+
data_frame[col_name] = _handle_date_column(df_col)
128+
116129
return data_frame
117130

118131

@@ -366,7 +379,7 @@ def read_sql_query(sql, con, index_col=None, coerce_float=True, params=None,
366379
----------
367380
sql : string SQL query or SQLAlchemy Selectable (select or text object)
368381
to be executed.
369-
con : SQLAlchemy connectable(engine/connection) or database string URI
382+
con : SQLAlchemy connectable(engine/connection) or database string URI
370383
or sqlite3 DBAPI2 connection
371384
Using SQLAlchemy makes it possible to use any DB supported by that
372385
library.
@@ -898,11 +911,10 @@ def _harmonize_columns(self, parse_dates=None):
898911
try:
899912
df_col = self.frame[col_name]
900913
# the type the dataframe column should have
901-
col_type = self._numpy_type(sql_col.type)
914+
col_type = self._get_dtype(sql_col.type)
902915

903-
if col_type is datetime or col_type is date:
904-
if not issubclass(df_col.dtype.type, np.datetime64):
905-
self.frame[col_name] = _handle_date_column(df_col)
916+
if col_type is datetime or col_type is date or col_type is DatetimeTZDtype:
917+
self.frame[col_name] = _handle_date_column(df_col)
906918

907919
elif col_type is float:
908920
# floats support NA, can always convert!
@@ -982,20 +994,25 @@ def _sqlalchemy_type(self, col):
982994

983995
return Text
984996

985-
def _numpy_type(self, sqltype):
986-
from sqlalchemy.types import Integer, Float, Boolean, DateTime, Date
997+
def _get_dtype(self, sqltype):
998+
from sqlalchemy.types import Integer, Float, Boolean, DateTime, Date, TIMESTAMP
987999

9881000
if isinstance(sqltype, Float):
9891001
return float
990-
if isinstance(sqltype, Integer):
1002+
elif isinstance(sqltype, Integer):
9911003
# TODO: Refine integer size.
9921004
return np.dtype('int64')
993-
if isinstance(sqltype, DateTime):
1005+
elif isinstance(sqltype, TIMESTAMP):
1006+
# we have a timezone capable type
1007+
if not sqltype.timezone:
1008+
return datetime
1009+
return DatetimeTZDtype
1010+
elif isinstance(sqltype, DateTime):
9941011
# Caution: np.datetime64 is also a subclass of np.number.
9951012
return datetime
996-
if isinstance(sqltype, Date):
1013+
elif isinstance(sqltype, Date):
9971014
return date
998-
if isinstance(sqltype, Boolean):
1015+
elif isinstance(sqltype, Boolean):
9991016
return bool
10001017
return object
10011018

pandas/io/tests/test_sql.py

+62-17
Original file line numberDiff line numberDiff line change
@@ -26,13 +26,15 @@
2626
import nose
2727
import warnings
2828
import numpy as np
29+
import pandas as pd
2930

3031
from datetime import datetime, date, time
3132

3233
from pandas import DataFrame, Series, Index, MultiIndex, isnull, concat
3334
from pandas import date_range, to_datetime, to_timedelta, Timestamp
3435
import pandas.compat as compat
3536
from pandas.compat import StringIO, range, lrange, string_types
37+
from pandas.core import common as com
3638
from pandas.core.datetools import format as date_format
3739

3840
import pandas.io.sql as sql
@@ -1248,6 +1250,66 @@ def test_default_date_load(self):
12481250
self.assertTrue(issubclass(df.DateCol.dtype.type, np.datetime64),
12491251
"DateCol loaded with incorrect type")
12501252

1253+
def test_datetime_with_timezone(self):
1254+
# edge case that converts postgresql datetime with time zone types
1255+
# to datetime64[ns,psycopg2.tz.FixedOffsetTimezone..], which is ok
1256+
# but should be more natural, so coerce to datetime64[ns] for now
1257+
1258+
def check(col):
1259+
# check that a column is either datetime64[ns]
1260+
# or datetime64[ns, UTC]
1261+
if com.is_datetime64_dtype(col.dtype):
1262+
1263+
# "2000-01-01 00:00:00-08:00" should convert to "2000-01-01 08:00:00"
1264+
self.assertEqual(col[0], Timestamp('2000-01-01 08:00:00'))
1265+
1266+
# "2000-06-01 00:00:00-07:00" should convert to "2000-06-01 07:00:00"
1267+
self.assertEqual(col[1], Timestamp('2000-06-01 07:00:00'))
1268+
1269+
elif com.is_datetime64tz_dtype(col.dtype):
1270+
self.assertTrue(str(col.dt.tz) == 'UTC')
1271+
1272+
# "2000-01-01 00:00:00-08:00" should convert to "2000-01-01 08:00:00"
1273+
self.assertEqual(col[0], Timestamp('2000-01-01 08:00:00', tz='UTC'))
1274+
1275+
# "2000-06-01 00:00:00-07:00" should convert to "2000-06-01 07:00:00"
1276+
self.assertEqual(col[1], Timestamp('2000-06-01 07:00:00', tz='UTC'))
1277+
1278+
else:
1279+
raise AssertionError("DateCol loaded with incorrect type -> {0}".format(col.dtype))
1280+
1281+
# GH11216
1282+
df = pd.read_sql_query("select * from types_test_data", self.conn)
1283+
if not hasattr(df,'DateColWithTz'):
1284+
raise nose.SkipTest("no column with datetime with time zone")
1285+
1286+
# this is parsed on Travis (linux), but not on macosx for some reason
1287+
# even with the same versions of psycopg2 & sqlalchemy, possibly a Postgrsql server
1288+
# version difference
1289+
col = df.DateColWithTz
1290+
self.assertTrue(com.is_object_dtype(col.dtype) or com.is_datetime64_dtype(col.dtype) \
1291+
or com.is_datetime64tz_dtype(col.dtype),
1292+
"DateCol loaded with incorrect type -> {0}".format(col.dtype))
1293+
1294+
df = pd.read_sql_query("select * from types_test_data", self.conn, parse_dates=['DateColWithTz'])
1295+
if not hasattr(df,'DateColWithTz'):
1296+
raise nose.SkipTest("no column with datetime with time zone")
1297+
check(df.DateColWithTz)
1298+
1299+
df = pd.concat(list(pd.read_sql_query("select * from types_test_data",
1300+
self.conn,chunksize=1)),ignore_index=True)
1301+
col = df.DateColWithTz
1302+
self.assertTrue(com.is_datetime64tz_dtype(col.dtype),
1303+
"DateCol loaded with incorrect type -> {0}".format(col.dtype))
1304+
self.assertTrue(str(col.dt.tz) == 'UTC')
1305+
expected = sql.read_sql_table("types_test_data", self.conn)
1306+
tm.assert_series_equal(df.DateColWithTz, expected.DateColWithTz.astype('datetime64[ns, UTC]'))
1307+
1308+
# xref #7139
1309+
# this might or might not be converted depending on the postgres driver
1310+
df = sql.read_sql_table("types_test_data", self.conn)
1311+
check(df.DateColWithTz)
1312+
12511313
def test_date_parsing(self):
12521314
# No Parsing
12531315
df = sql.read_sql_table("types_test_data", self.conn)
@@ -1746,23 +1808,6 @@ def test_schema_support(self):
17461808
res2 = pdsql.read_table('test_schema_other2')
17471809
tm.assert_frame_equal(res1, res2)
17481810

1749-
def test_datetime_with_time_zone(self):
1750-
1751-
# Test to see if we read the date column with timezones that
1752-
# the timezone information is converted to utc and into a
1753-
# np.datetime64 (GH #7139)
1754-
1755-
df = sql.read_sql_table("types_test_data", self.conn)
1756-
self.assertTrue(issubclass(df.DateColWithTz.dtype.type, np.datetime64),
1757-
"DateColWithTz loaded with incorrect type -> {0}".format(df.DateColWithTz.dtype))
1758-
1759-
# "2000-01-01 00:00:00-08:00" should convert to "2000-01-01 08:00:00"
1760-
self.assertEqual(df.DateColWithTz[0], Timestamp('2000-01-01 08:00:00'))
1761-
1762-
# "2000-06-01 00:00:00-07:00" should convert to "2000-06-01 07:00:00"
1763-
self.assertEqual(df.DateColWithTz[1], Timestamp('2000-06-01 07:00:00'))
1764-
1765-
17661811
class TestMySQLAlchemy(_TestMySQLAlchemy, _TestSQLAlchemy):
17671812
pass
17681813

0 commit comments

Comments
 (0)