From 5cc35f4fd7af9fa9d3579f5e300ef400cea42d12 Mon Sep 17 00:00:00 2001
From: Dan Birken <birken@gmail.com>
Date: Mon, 23 Mar 2015 17:35:38 -0700
Subject: [PATCH] BUG: Fix error when reading postgres table with timezone
 #7139

`read_sql_table()` will break if it reads a table with a `timestamp
with time zone` column if individual rows within that column have
different time zones. This is very common due to daylight savings time.

Pandas right now does not have good support for a Series containing
datetimes with different time zones (hence this bug).  So this change
simply converts a `timestamp with time zone` column into UTC during
import, which pandas has great support for.
---
 doc/source/whatsnew/v0.16.1.txt |  2 +-
 pandas/io/sql.py                | 18 ++++++--
 pandas/io/tests/test_sql.py     | 77 ++++++++++++++++++++++++++++-----
 3 files changed, 82 insertions(+), 15 deletions(-)

diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
index d130879b85475..54ba2ac6586d0 100644
--- a/doc/source/whatsnew/v0.16.1.txt
+++ b/doc/source/whatsnew/v0.16.1.txt
@@ -52,7 +52,7 @@ Bug Fixes
 
 
 - Bug in ``transform`` causing length mismatch when null entries were present and a fast aggregator was being used (:issue:`9697`)
-
+- Bug in ``read_sql_table`` error when reading postgres table with timezone (:issue:`7139`)
 
 
 
diff --git a/pandas/io/sql.py b/pandas/io/sql.py
index 117d7b4a9ceaa..7c70b4b1df492 100644
--- a/pandas/io/sql.py
+++ b/pandas/io/sql.py
@@ -83,14 +83,14 @@ def _handle_date_column(col, format=None):
         return to_datetime(col, **format)
     else:
         if format in ['D', 's', 'ms', 'us', 'ns']:
-            return to_datetime(col, coerce=True, unit=format)
+            return to_datetime(col, coerce=True, unit=format, utc=True)
         elif (issubclass(col.dtype.type, np.floating)
                 or issubclass(col.dtype.type, np.integer)):
             # parse dates as timestamp
             format = 's' if format is None else format
-            return to_datetime(col, coerce=True, unit=format)
+            return to_datetime(col, coerce=True, unit=format, utc=True)
         else:
-            return to_datetime(col, coerce=True, format=format)
+            return to_datetime(col, coerce=True, format=format, utc=True)
 
 
 def _parse_date_columns(data_frame, parse_dates):
@@ -318,6 +318,10 @@ def read_sql_table(table_name, con, schema=None, index_col=None,
     -------
     DataFrame
 
+    Notes
+    -----
+    Any datetime values with time zone information will be converted to UTC
+
     See also
     --------
     read_sql_query : Read SQL query into a DataFrame.
@@ -390,6 +394,11 @@ def read_sql_query(sql, con, index_col=None, coerce_float=True, params=None,
     -------
     DataFrame
 
+    Notes
+    -----
+    Any datetime values with time zone information parsed via the `parse_dates`
+    parameter will be converted to UTC
+
     See also
     --------
     read_sql_table : Read SQL database table into a DataFrame
@@ -451,7 +460,8 @@ def read_sql(sql, con, index_col=None, coerce_float=True, params=None,
     This function is a convenience wrapper around ``read_sql_table`` and
     ``read_sql_query`` (and for backward compatibility) and will delegate
     to the specific function depending on the provided input (database
-    table name or sql query).
+    table name or sql query).  The delegated function might have more specific
+    notes about their functionality not listed here.
 
     See also
     --------
diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py
index 2db6f1e104770..ac266dd77c984 100644
--- a/pandas/io/tests/test_sql.py
+++ b/pandas/io/tests/test_sql.py
@@ -29,7 +29,7 @@
 from datetime import datetime, date, time
 
 from pandas import DataFrame, Series, Index, MultiIndex, isnull, concat
-from pandas import date_range, to_datetime, to_timedelta
+from pandas import date_range, to_datetime, to_timedelta, Timestamp
 import pandas.compat as compat
 from pandas.compat import StringIO, range, lrange, string_types
 from pandas.core.datetools import format as date_format
@@ -100,6 +100,7 @@
         'postgresql': """CREATE TABLE types_test_data (
                     "TextCol" TEXT,
                     "DateCol" TIMESTAMP,
+                    "DateColWithTz" TIMESTAMP WITH TIME ZONE,
                     "IntDateCol" INTEGER,
                     "FloatCol" DOUBLE PRECISION,
                     "IntCol" INTEGER,
@@ -109,18 +110,36 @@
                 )"""
     },
     'insert_test_types': {
-        'sqlite': """
+        'sqlite': {
+            'query': """
                 INSERT INTO types_test_data
                 VALUES(?, ?, ?, ?, ?, ?, ?, ?)
                 """,
-        'mysql': """
+            'fields': (
+                'TextCol', 'DateCol', 'IntDateCol', 'FloatCol',
+                'IntCol', 'BoolCol', 'IntColWithNull', 'BoolColWithNull'
+            )
+        },
+        'mysql': {
+            'query': """
                 INSERT INTO types_test_data
                 VALUES("%s", %s, %s, %s, %s, %s, %s, %s)
                 """,
-        'postgresql': """
+            'fields': (
+                'TextCol', 'DateCol', 'IntDateCol', 'FloatCol',
+                'IntCol', 'BoolCol', 'IntColWithNull', 'BoolColWithNull'
+            )
+        },
+        'postgresql': {
+            'query': """
                 INSERT INTO types_test_data
-                VALUES(%s, %s, %s, %s, %s, %s, %s, %s)
-                """
+                VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s)
+                """,
+            'fields': (
+                'TextCol', 'DateCol', 'DateColWithTz', 'IntDateCol', 'FloatCol',
+                'IntCol', 'BoolCol', 'IntColWithNull', 'BoolColWithNull'
+            )
+        },
     },
     'read_parameters': {
         'sqlite': "SELECT * FROM iris WHERE Name=? AND SepalLength=?",
@@ -218,11 +237,36 @@ def _load_raw_sql(self):
         self._get_exec().execute(SQL_STRINGS['create_test_types'][self.flavor])
         ins = SQL_STRINGS['insert_test_types'][self.flavor]
 
-        data = [(
-            'first', '2000-01-03 00:00:00', 535852800, 10.10, 1, False, 1, False),
-            ('first', '2000-01-04 00:00:00', 1356998400, 10.10, 1, False, None, None)]
+        data = [
+            {
+                'TextCol': 'first',
+                'DateCol': '2000-01-03 00:00:00',
+                'DateColWithTz': '2000-01-01 00:00:00-08:00',
+                'IntDateCol': 535852800,
+                'FloatCol': 10.10,
+                'IntCol': 1,
+                'BoolCol': False,
+                'IntColWithNull': 1,
+                'BoolColWithNull': False,
+            },
+            {
+                'TextCol': 'first',
+                'DateCol': '2000-01-04 00:00:00',
+                'DateColWithTz': '2000-06-01 00:00:00-07:00',
+                'IntDateCol': 1356998400,
+                'FloatCol': 10.10,
+                'IntCol': 1,
+                'BoolCol': False,
+                'IntColWithNull': None,
+                'BoolColWithNull': None,
+            },
+        ]
+
         for d in data:
-            self._get_exec().execute(ins, d)
+            self._get_exec().execute(
+                ins['query'],
+                [d[field] for field in ins['fields']]
+            )
 
     def _count_rows(self, table_name):
         result = self._get_exec().execute(
@@ -1512,6 +1556,19 @@ def test_schema_support(self):
         res2 = pdsql.read_table('test_schema_other2')
         tm.assert_frame_equal(res1, res2)
 
+    def test_datetime_with_time_zone(self):
+        # Test to see if we read the date column with timezones that
+        # the timezone information is converted to utc and into a
+        # np.datetime64 (GH #7139)
+        df = sql.read_sql_table("types_test_data", self.conn)
+        self.assertTrue(issubclass(df.DateColWithTz.dtype.type, np.datetime64),
+                        "DateColWithTz loaded with incorrect type")
+
+        # "2000-01-01 00:00:00-08:00" should convert to "2000-01-01 08:00:00"
+        self.assertEqual(df.DateColWithTz[0], Timestamp('2000-01-01 08:00:00'))
+
+        # "2000-06-01 00:00:00-07:00" should convert to "2000-06-01 07:00:00"
+        self.assertEqual(df.DateColWithTz[1], Timestamp('2000-06-01 07:00:00'))
 
 #------------------------------------------------------------------------------
 #--- Test Sqlite / MySQL fallback