Skip to content

Commit 5cc35f4

Browse files
committed
BUG: Fix error when reading postgres table with timezone pandas-dev#7139
`read_sql_table()` will break if it reads a table with a `timestamp with time zone` column if individual rows within that column have different time zones. This is very common due to daylight savings time. Pandas right now does not have good support for a Series containing datetimes with different time zones (hence this bug). So this change simply converts a `timestamp with time zone` column into UTC during import, which pandas has great support for.
1 parent a477202 commit 5cc35f4

File tree

3 files changed

+82
-15
lines changed

3 files changed

+82
-15
lines changed

doc/source/whatsnew/v0.16.1.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ Bug Fixes
5252

5353

5454
- Bug in ``transform`` causing length mismatch when null entries were present and a fast aggregator was being used (:issue:`9697`)
55-
55+
- Bug in ``read_sql_table`` error when reading postgres table with timezone (:issue:`7139`)
5656

5757

5858

pandas/io/sql.py

+14-4
Original file line numberDiff line numberDiff line change
@@ -83,14 +83,14 @@ def _handle_date_column(col, format=None):
8383
return to_datetime(col, **format)
8484
else:
8585
if format in ['D', 's', 'ms', 'us', 'ns']:
86-
return to_datetime(col, coerce=True, unit=format)
86+
return to_datetime(col, coerce=True, unit=format, utc=True)
8787
elif (issubclass(col.dtype.type, np.floating)
8888
or issubclass(col.dtype.type, np.integer)):
8989
# parse dates as timestamp
9090
format = 's' if format is None else format
91-
return to_datetime(col, coerce=True, unit=format)
91+
return to_datetime(col, coerce=True, unit=format, utc=True)
9292
else:
93-
return to_datetime(col, coerce=True, format=format)
93+
return to_datetime(col, coerce=True, format=format, utc=True)
9494

9595

9696
def _parse_date_columns(data_frame, parse_dates):
@@ -318,6 +318,10 @@ def read_sql_table(table_name, con, schema=None, index_col=None,
318318
-------
319319
DataFrame
320320
321+
Notes
322+
-----
323+
Any datetime values with time zone information will be converted to UTC
324+
321325
See also
322326
--------
323327
read_sql_query : Read SQL query into a DataFrame.
@@ -390,6 +394,11 @@ def read_sql_query(sql, con, index_col=None, coerce_float=True, params=None,
390394
-------
391395
DataFrame
392396
397+
Notes
398+
-----
399+
Any datetime values with time zone information parsed via the `parse_dates`
400+
parameter will be converted to UTC
401+
393402
See also
394403
--------
395404
read_sql_table : Read SQL database table into a DataFrame
@@ -451,7 +460,8 @@ def read_sql(sql, con, index_col=None, coerce_float=True, params=None,
451460
This function is a convenience wrapper around ``read_sql_table`` and
452461
``read_sql_query`` (and for backward compatibility) and will delegate
453462
to the specific function depending on the provided input (database
454-
table name or sql query).
463+
table name or sql query). The delegated function might have more specific
464+
notes about their functionality not listed here.
455465
456466
See also
457467
--------

pandas/io/tests/test_sql.py

+67-10
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from datetime import datetime, date, time
3030

3131
from pandas import DataFrame, Series, Index, MultiIndex, isnull, concat
32-
from pandas import date_range, to_datetime, to_timedelta
32+
from pandas import date_range, to_datetime, to_timedelta, Timestamp
3333
import pandas.compat as compat
3434
from pandas.compat import StringIO, range, lrange, string_types
3535
from pandas.core.datetools import format as date_format
@@ -100,6 +100,7 @@
100100
'postgresql': """CREATE TABLE types_test_data (
101101
"TextCol" TEXT,
102102
"DateCol" TIMESTAMP,
103+
"DateColWithTz" TIMESTAMP WITH TIME ZONE,
103104
"IntDateCol" INTEGER,
104105
"FloatCol" DOUBLE PRECISION,
105106
"IntCol" INTEGER,
@@ -109,18 +110,36 @@
109110
)"""
110111
},
111112
'insert_test_types': {
112-
'sqlite': """
113+
'sqlite': {
114+
'query': """
113115
INSERT INTO types_test_data
114116
VALUES(?, ?, ?, ?, ?, ?, ?, ?)
115117
""",
116-
'mysql': """
118+
'fields': (
119+
'TextCol', 'DateCol', 'IntDateCol', 'FloatCol',
120+
'IntCol', 'BoolCol', 'IntColWithNull', 'BoolColWithNull'
121+
)
122+
},
123+
'mysql': {
124+
'query': """
117125
INSERT INTO types_test_data
118126
VALUES("%s", %s, %s, %s, %s, %s, %s, %s)
119127
""",
120-
'postgresql': """
128+
'fields': (
129+
'TextCol', 'DateCol', 'IntDateCol', 'FloatCol',
130+
'IntCol', 'BoolCol', 'IntColWithNull', 'BoolColWithNull'
131+
)
132+
},
133+
'postgresql': {
134+
'query': """
121135
INSERT INTO types_test_data
122-
VALUES(%s, %s, %s, %s, %s, %s, %s, %s)
123-
"""
136+
VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s)
137+
""",
138+
'fields': (
139+
'TextCol', 'DateCol', 'DateColWithTz', 'IntDateCol', 'FloatCol',
140+
'IntCol', 'BoolCol', 'IntColWithNull', 'BoolColWithNull'
141+
)
142+
},
124143
},
125144
'read_parameters': {
126145
'sqlite': "SELECT * FROM iris WHERE Name=? AND SepalLength=?",
@@ -218,11 +237,36 @@ def _load_raw_sql(self):
218237
self._get_exec().execute(SQL_STRINGS['create_test_types'][self.flavor])
219238
ins = SQL_STRINGS['insert_test_types'][self.flavor]
220239

221-
data = [(
222-
'first', '2000-01-03 00:00:00', 535852800, 10.10, 1, False, 1, False),
223-
('first', '2000-01-04 00:00:00', 1356998400, 10.10, 1, False, None, None)]
240+
data = [
241+
{
242+
'TextCol': 'first',
243+
'DateCol': '2000-01-03 00:00:00',
244+
'DateColWithTz': '2000-01-01 00:00:00-08:00',
245+
'IntDateCol': 535852800,
246+
'FloatCol': 10.10,
247+
'IntCol': 1,
248+
'BoolCol': False,
249+
'IntColWithNull': 1,
250+
'BoolColWithNull': False,
251+
},
252+
{
253+
'TextCol': 'first',
254+
'DateCol': '2000-01-04 00:00:00',
255+
'DateColWithTz': '2000-06-01 00:00:00-07:00',
256+
'IntDateCol': 1356998400,
257+
'FloatCol': 10.10,
258+
'IntCol': 1,
259+
'BoolCol': False,
260+
'IntColWithNull': None,
261+
'BoolColWithNull': None,
262+
},
263+
]
264+
224265
for d in data:
225-
self._get_exec().execute(ins, d)
266+
self._get_exec().execute(
267+
ins['query'],
268+
[d[field] for field in ins['fields']]
269+
)
226270

227271
def _count_rows(self, table_name):
228272
result = self._get_exec().execute(
@@ -1512,6 +1556,19 @@ def test_schema_support(self):
15121556
res2 = pdsql.read_table('test_schema_other2')
15131557
tm.assert_frame_equal(res1, res2)
15141558

1559+
def test_datetime_with_time_zone(self):
1560+
# Test to see if we read the date column with timezones that
1561+
# the timezone information is converted to utc and into a
1562+
# np.datetime64 (GH #7139)
1563+
df = sql.read_sql_table("types_test_data", self.conn)
1564+
self.assertTrue(issubclass(df.DateColWithTz.dtype.type, np.datetime64),
1565+
"DateColWithTz loaded with incorrect type")
1566+
1567+
# "2000-01-01 00:00:00-08:00" should convert to "2000-01-01 08:00:00"
1568+
self.assertEqual(df.DateColWithTz[0], Timestamp('2000-01-01 08:00:00'))
1569+
1570+
# "2000-06-01 00:00:00-07:00" should convert to "2000-06-01 07:00:00"
1571+
self.assertEqual(df.DateColWithTz[1], Timestamp('2000-06-01 07:00:00'))
15151572

15161573
#------------------------------------------------------------------------------
15171574
#--- Test Sqlite / MySQL fallback

0 commit comments

Comments
 (0)