Skip to content

BUG: Fix error when reading postgres table with timezone #7139 #7364

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.16.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ Bug Fixes


- Bug in ``transform`` causing length mismatch when null entries were present and a fast aggregator was being used (:issue:`9697`)

- Bug in ``read_sql_table`` error when reading postgres table with timezone (:issue:`7139`)



Expand Down
18 changes: 14 additions & 4 deletions pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,14 @@ def _handle_date_column(col, format=None):
return to_datetime(col, **format)
else:
if format in ['D', 's', 'ms', 'us', 'ns']:
return to_datetime(col, coerce=True, unit=format)
return to_datetime(col, coerce=True, unit=format, utc=True)
elif (issubclass(col.dtype.type, np.floating)
or issubclass(col.dtype.type, np.integer)):
# parse dates as timestamp
format = 's' if format is None else format
return to_datetime(col, coerce=True, unit=format)
return to_datetime(col, coerce=True, unit=format, utc=True)
else:
return to_datetime(col, coerce=True, format=format)
return to_datetime(col, coerce=True, format=format, utc=True)


def _parse_date_columns(data_frame, parse_dates):
Expand Down Expand Up @@ -318,6 +318,10 @@ def read_sql_table(table_name, con, schema=None, index_col=None,
-------
DataFrame

Notes
-----
Any datetime values with time zone information will be converted to UTC

See also
--------
read_sql_query : Read SQL query into a DataFrame.
Expand Down Expand Up @@ -390,6 +394,11 @@ def read_sql_query(sql, con, index_col=None, coerce_float=True, params=None,
-------
DataFrame

Notes
-----
Any datetime values with time zone information parsed via the `parse_dates`
parameter will be converted to UTC

See also
--------
read_sql_table : Read SQL database table into a DataFrame
Expand Down Expand Up @@ -451,7 +460,8 @@ def read_sql(sql, con, index_col=None, coerce_float=True, params=None,
This function is a convenience wrapper around ``read_sql_table`` and
``read_sql_query`` (and for backward compatibility) and will delegate
to the specific function depending on the provided input (database
table name or sql query).
table name or sql query). The delegated function might have more specific
notes about their functionality not listed here.

See also
--------
Expand Down
77 changes: 67 additions & 10 deletions pandas/io/tests/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from datetime import datetime, date, time

from pandas import DataFrame, Series, Index, MultiIndex, isnull, concat
from pandas import date_range, to_datetime, to_timedelta
from pandas import date_range, to_datetime, to_timedelta, Timestamp
import pandas.compat as compat
from pandas.compat import StringIO, range, lrange, string_types
from pandas.core.datetools import format as date_format
Expand Down Expand Up @@ -100,6 +100,7 @@
'postgresql': """CREATE TABLE types_test_data (
"TextCol" TEXT,
"DateCol" TIMESTAMP,
"DateColWithTz" TIMESTAMP WITH TIME ZONE,
"IntDateCol" INTEGER,
"FloatCol" DOUBLE PRECISION,
"IntCol" INTEGER,
Expand All @@ -109,18 +110,36 @@
)"""
},
'insert_test_types': {
'sqlite': """
'sqlite': {
'query': """
INSERT INTO types_test_data
VALUES(?, ?, ?, ?, ?, ?, ?, ?)
""",
'mysql': """
'fields': (
'TextCol', 'DateCol', 'IntDateCol', 'FloatCol',
'IntCol', 'BoolCol', 'IntColWithNull', 'BoolColWithNull'
)
},
'mysql': {
'query': """
INSERT INTO types_test_data
VALUES("%s", %s, %s, %s, %s, %s, %s, %s)
""",
'postgresql': """
'fields': (
'TextCol', 'DateCol', 'IntDateCol', 'FloatCol',
'IntCol', 'BoolCol', 'IntColWithNull', 'BoolColWithNull'
)
},
'postgresql': {
'query': """
INSERT INTO types_test_data
VALUES(%s, %s, %s, %s, %s, %s, %s, %s)
"""
VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s)
""",
'fields': (
'TextCol', 'DateCol', 'DateColWithTz', 'IntDateCol', 'FloatCol',
'IntCol', 'BoolCol', 'IntColWithNull', 'BoolColWithNull'
)
},
},
'read_parameters': {
'sqlite': "SELECT * FROM iris WHERE Name=? AND SepalLength=?",
Expand Down Expand Up @@ -218,11 +237,36 @@ def _load_raw_sql(self):
self._get_exec().execute(SQL_STRINGS['create_test_types'][self.flavor])
ins = SQL_STRINGS['insert_test_types'][self.flavor]

data = [(
'first', '2000-01-03 00:00:00', 535852800, 10.10, 1, False, 1, False),
('first', '2000-01-04 00:00:00', 1356998400, 10.10, 1, False, None, None)]
data = [
{
'TextCol': 'first',
'DateCol': '2000-01-03 00:00:00',
'DateColWithTz': '2000-01-01 00:00:00-08:00',
'IntDateCol': 535852800,
'FloatCol': 10.10,
'IntCol': 1,
'BoolCol': False,
'IntColWithNull': 1,
'BoolColWithNull': False,
},
{
'TextCol': 'first',
'DateCol': '2000-01-04 00:00:00',
'DateColWithTz': '2000-06-01 00:00:00-07:00',
'IntDateCol': 1356998400,
'FloatCol': 10.10,
'IntCol': 1,
'BoolCol': False,
'IntColWithNull': None,
'BoolColWithNull': None,
},
]

for d in data:
self._get_exec().execute(ins, d)
self._get_exec().execute(
ins['query'],
[d[field] for field in ins['fields']]
)

def _count_rows(self, table_name):
result = self._get_exec().execute(
Expand Down Expand Up @@ -1512,6 +1556,19 @@ def test_schema_support(self):
res2 = pdsql.read_table('test_schema_other2')
tm.assert_frame_equal(res1, res2)

def test_datetime_with_time_zone(self):
# Test to see if we read the date column with timezones that
# the timezone information is converted to utc and into a
# np.datetime64 (GH #7139)
df = sql.read_sql_table("types_test_data", self.conn)
self.assertTrue(issubclass(df.DateColWithTz.dtype.type, np.datetime64),
"DateColWithTz loaded with incorrect type")

# "2000-01-01 00:00:00-08:00" should convert to "2000-01-01 08:00:00"
self.assertEqual(df.DateColWithTz[0], Timestamp('2000-01-01 08:00:00'))

# "2000-06-01 00:00:00-07:00" should convert to "2000-06-01 07:00:00"
self.assertEqual(df.DateColWithTz[1], Timestamp('2000-06-01 07:00:00'))

#------------------------------------------------------------------------------
#--- Test Sqlite / MySQL fallback
Expand Down