Skip to content

Commit bd26dec

Browse files
committed
use datetime64[ns, UTC] for 'datetime with timezone' sql types
1 parent dc3de6a commit bd26dec

File tree

2 files changed

+57
-38
lines changed

2 files changed

+57
-38
lines changed

pandas/io/sql.py

+19-10
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from pandas.core.api import DataFrame, Series
1919
from pandas.core.common import isnull
2020
from pandas.core.base import PandasObject
21+
from pandas.core.dtypes import DatetimeTZDtype
2122
from pandas.tseries.tools import to_datetime
2223
from pandas.util.decorators import Appender
2324

@@ -89,6 +90,10 @@ def _handle_date_column(col, format=None):
8990
# parse dates as timestamp
9091
format = 's' if format is None else format
9192
return to_datetime(col, errors='coerce', unit=format, utc=True)
93+
elif com.is_datetime64tz_dtype(col):
94+
# coerce to UTC timezone
95+
# GH11216
96+
return to_datetime(col,errors='coerce').astype('datetime64[ns, UTC]')
9297
else:
9398
return to_datetime(col, errors='coerce', format=format, utc=True)
9499

@@ -906,11 +911,10 @@ def _harmonize_columns(self, parse_dates=None):
906911
try:
907912
df_col = self.frame[col_name]
908913
# the type the dataframe column should have
909-
col_type = self._numpy_type(sql_col.type)
914+
col_type = self._get_dtype(sql_col.type)
910915

911-
if col_type is datetime or col_type is date:
912-
if not issubclass(df_col.dtype.type, np.datetime64):
913-
self.frame[col_name] = _handle_date_column(df_col)
916+
if col_type is datetime or col_type is date or col_type is DatetimeTZDtype:
917+
self.frame[col_name] = _handle_date_column(df_col)
914918

915919
elif col_type is float:
916920
# floats support NA, can always convert!
@@ -990,20 +994,25 @@ def _sqlalchemy_type(self, col):
990994

991995
return Text
992996

993-
def _numpy_type(self, sqltype):
994-
from sqlalchemy.types import Integer, Float, Boolean, DateTime, Date
997+
def _get_dtype(self, sqltype):
998+
from sqlalchemy.types import Integer, Float, Boolean, DateTime, Date, TIMESTAMP
995999

9961000
if isinstance(sqltype, Float):
9971001
return float
998-
if isinstance(sqltype, Integer):
1002+
elif isinstance(sqltype, Integer):
9991003
# TODO: Refine integer size.
10001004
return np.dtype('int64')
1001-
if isinstance(sqltype, DateTime):
1005+
elif isinstance(sqltype, TIMESTAMP):
1006+
# we have a timezone capable type
1007+
if not sqltype.timezone:
1008+
return datetime
1009+
return DatetimeTZDtype
1010+
elif isinstance(sqltype, DateTime):
10021011
# Caution: np.datetime64 is also a subclass of np.number.
10031012
return datetime
1004-
if isinstance(sqltype, Date):
1013+
elif isinstance(sqltype, Date):
10051014
return date
1006-
if isinstance(sqltype, Boolean):
1015+
elif isinstance(sqltype, Boolean):
10071016
return bool
10081017
return object
10091018

pandas/io/tests/test_sql.py

+38-28
Original file line numberDiff line numberDiff line change
@@ -1255,6 +1255,29 @@ def test_datetime_with_timezone(self):
12551255
# to datetime64[ns,psycopg2.tz.FixedOffsetTimezone..], which is ok
12561256
# but should be more natural, so coerce to datetime64[ns] for now
12571257

1258+
def check(col):
1259+
# check that a column is either datetime64[ns]
1260+
# or datetime64[ns, UTC]
1261+
if com.is_datetime64_dtype(col.dtype):
1262+
1263+
# "2000-01-01 00:00:00-08:00" should convert to "2000-01-01 08:00:00"
1264+
self.assertEqual(col[0], Timestamp('2000-01-01 08:00:00'))
1265+
1266+
# "2000-06-01 00:00:00-07:00" should convert to "2000-06-01 07:00:00"
1267+
self.assertEqual(col[1], Timestamp('2000-06-01 07:00:00'))
1268+
1269+
elif com.is_datetime64tz_dtype(col.dtype):
1270+
self.assertTrue(str(col.dt.tz) == 'UTC')
1271+
1272+
# "2000-01-01 00:00:00-08:00" should convert to "2000-01-01 08:00:00"
1273+
self.assertEqual(col[0], Timestamp('2000-01-01 08:00:00', tz='UTC'))
1274+
1275+
# "2000-06-01 00:00:00-07:00" should convert to "2000-06-01 07:00:00"
1276+
self.assertEqual(col[1], Timestamp('2000-06-01 07:00:00', tz='UTC'))
1277+
1278+
else:
1279+
raise AssertionError("DateCol loaded with incorrect type -> {0}".format(col.dtype))
1280+
12581281
# GH11216
12591282
df = pd.read_sql_query("select * from types_test_data", self.conn)
12601283
if not hasattr(df,'DateColWithTz'):
@@ -1263,25 +1286,29 @@ def test_datetime_with_timezone(self):
12631286
# this is parsed on Travis (linux), but not on macosx for some reason
12641287
# even with the same versions of psycopg2 & sqlalchemy, possibly a Postgrsql server
12651288
# version difference
1266-
dtype = df.DateColWithTz.dtype
1267-
self.assertTrue(com.is_object_dtype(dtype) or com.is_datetime64_dtype(dtype),
1268-
"DateCol loaded with incorrect type -> {0}".format(dtype))
1289+
col = df.DateColWithTz
1290+
self.assertTrue(com.is_object_dtype(col.dtype) or com.is_datetime64_dtype(col.dtype) \
1291+
or com.is_datetime64tz_dtype(col.dtype),
1292+
"DateCol loaded with incorrect type -> {0}".format(col.dtype))
12691293

12701294
df = pd.read_sql_query("select * from types_test_data", self.conn, parse_dates=['DateColWithTz'])
12711295
if not hasattr(df,'DateColWithTz'):
12721296
raise nose.SkipTest("no column with datetime with time zone")
1273-
1274-
dtype = df.DateColWithTz.dtype
1275-
self.assertTrue(com.is_datetime64_dtype(dtype),
1276-
"DateCol loaded with incorrect type -> {0}".format(dtype))
1297+
check(df.DateColWithTz)
12771298

12781299
df = pd.concat(list(pd.read_sql_query("select * from types_test_data",
12791300
self.conn,chunksize=1)),ignore_index=True)
1280-
dtype = df.DateColWithTz.dtype
1281-
self.assertTrue(com.is_datetime64_dtype(dtype),
1282-
"DateCol loaded with incorrect type -> {0}".format(dtype))
1301+
col = df.DateColWithTz
1302+
self.assertTrue(com.is_datetime64tz_dtype(col.dtype),
1303+
"DateCol loaded with incorrect type -> {0}".format(col.dtype))
1304+
self.assertTrue(str(col.dt.tz) == 'UTC')
12831305
expected = sql.read_sql_table("types_test_data", self.conn)
1284-
tm.assert_series_equal(df.DateColWithTz, expected.DateColWithTz)
1306+
tm.assert_series_equal(df.DateColWithTz, expected.DateColWithTz.astype('datetime64[ns, UTC]'))
1307+
1308+
# xref #7139
1309+
# this might or might not be converted depending on the postgres driver
1310+
df = sql.read_sql_table("types_test_data", self.conn)
1311+
check(df.DateColWithTz)
12851312

12861313
def test_date_parsing(self):
12871314
# No Parsing
@@ -1781,23 +1808,6 @@ def test_schema_support(self):
17811808
res2 = pdsql.read_table('test_schema_other2')
17821809
tm.assert_frame_equal(res1, res2)
17831810

1784-
def test_datetime_with_time_zone(self):
1785-
1786-
# Test to see if we read the date column with timezones that
1787-
# the timezone information is converted to utc and into a
1788-
# np.datetime64 (GH #7139)
1789-
1790-
df = sql.read_sql_table("types_test_data", self.conn)
1791-
self.assertTrue(issubclass(df.DateColWithTz.dtype.type, np.datetime64),
1792-
"DateColWithTz loaded with incorrect type -> {0}".format(df.DateColWithTz.dtype))
1793-
1794-
# "2000-01-01 00:00:00-08:00" should convert to "2000-01-01 08:00:00"
1795-
self.assertEqual(df.DateColWithTz[0], Timestamp('2000-01-01 08:00:00'))
1796-
1797-
# "2000-06-01 00:00:00-07:00" should convert to "2000-06-01 07:00:00"
1798-
self.assertEqual(df.DateColWithTz[1], Timestamp('2000-06-01 07:00:00'))
1799-
1800-
18011811
class TestMySQLAlchemy(_TestMySQLAlchemy, _TestSQLAlchemy):
18021812
pass
18031813

0 commit comments

Comments
 (0)