From 0498dd1670caba7692e724f5a574d65bd75dc695 Mon Sep 17 00:00:00 2001 From: Dror Atariah Date: Sun, 15 Oct 2017 22:18:24 +0200 Subject: [PATCH 1/5] BUG: formating integers datetimes using sql GH17855 --- pandas/io/sql.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 9c6d01d236c57..296450dc77cb7 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -109,7 +109,11 @@ def _handle_date_column(col, utc=None, format=None): issubclass(col.dtype.type, np.integer)): # parse dates as timestamp format = 's' if format is None else format - return to_datetime(col, errors='coerce', unit=format, utc=utc) + if '%' in format: + return to_datetime( + col, errors='coerce', format=format, utc=utc) + else: + return to_datetime(col, errors='coerce', unit=format, utc=utc) elif is_datetime64tz_dtype(col): # coerce to UTC timezone # GH11216 From 385ad78018ab5803f0cf06cef86afb7f0d59e866 Mon Sep 17 00:00:00 2001 From: Dror Atariah Date: Sun, 15 Oct 2017 22:18:24 +0200 Subject: [PATCH 2/5] BUG: formating integers datetimes using sql GH17855 --- pandas/io/sql.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 82b1bff75b114..a8158db63b49e 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -109,7 +109,11 @@ def _handle_date_column(col, utc=None, format=None): issubclass(col.dtype.type, np.integer)): # parse dates as timestamp format = 's' if format is None else format - return to_datetime(col, errors='coerce', unit=format, utc=utc) + if '%' in format: + return to_datetime( + col, errors='coerce', format=format, utc=utc) + else: + return to_datetime(col, errors='coerce', unit=format, utc=utc) elif is_datetime64tz_dtype(col): # coerce to UTC timezone # GH11216 From 102ab25d82212c8a788812c057ad2a1b8cc41fd4 Mon Sep 17 00:00:00 2001 From: Dror Atariah Date: Tue, 21 Nov 2017 10:28:18 +0100 Subject: [PATCH 3/5] Improve fix as per discussion Cleaned the fix and implemented tests --- pandas/io/sql.py | 16 ++++++---------- pandas/tests/io/test_sql.py | 32 ++++++++++++++++++++++---------- 2 files changed, 28 insertions(+), 20 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index a8158db63b49e..401a9c11a774d 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -103,17 +103,13 @@ def _handle_date_column(col, utc=None, format=None): if isinstance(format, dict): return to_datetime(col, errors='ignore', **format) else: - if format in ['D', 's', 'ms', 'us', 'ns']: + # Allow passing of formatting string for integers + # GH17855 + if format is None and (issubclass(col.dtype.type, np.floating) or + issubclass(col.dtype.type, np.integer)): + format = 's' + if format in ['D', 'd', 'h', 'm', 's', 'ms', 'us', 'ns']: return to_datetime(col, errors='coerce', unit=format, utc=utc) - elif (issubclass(col.dtype.type, np.floating) or - issubclass(col.dtype.type, np.integer)): - # parse dates as timestamp - format = 's' if format is None else format - if '%' in format: - return to_datetime( - col, errors='coerce', format=format, utc=utc) - else: - return to_datetime(col, errors='coerce', unit=format, utc=utc) elif is_datetime64tz_dtype(col): # coerce to UTC timezone # GH11216 diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 00a30940ca352..2c27663afd936 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -88,6 +88,7 @@ "TextCol" TEXT, "DateCol" TEXT, "IntDateCol" INTEGER, + "IntDateOnlyCol" INTEGER, "FloatCol" REAL, "IntCol" INTEGER, "BoolCol" INTEGER, @@ -98,6 +99,7 @@ `TextCol` TEXT, `DateCol` DATETIME, `IntDateCol` INTEGER, + "IntDateOnlyCol" INTEGER, `FloatCol` DOUBLE, `IntCol` INTEGER, `BoolCol` BOOLEAN, @@ -109,6 +111,7 @@ "DateCol" TIMESTAMP, "DateColWithTz" TIMESTAMP WITH TIME ZONE, "IntDateCol" INTEGER, + "IntDateOnlyCol" INTEGER, "FloatCol" DOUBLE PRECISION, "IntCol" INTEGER, "BoolCol" BOOLEAN, @@ -120,31 +123,33 @@ 'sqlite': { 'query': """ INSERT INTO types_test_data - VALUES(?, ?, ?, ?, ?, ?, ?, ?) + VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?) """, 'fields': ( - 'TextCol', 'DateCol', 'IntDateCol', 'FloatCol', - 'IntCol', 'BoolCol', 'IntColWithNull', 'BoolColWithNull' + 'TextCol', 'DateCol', 'IntDateCol', 'IntDateOnlyCol', + 'FloatCol', 'IntCol', 'BoolCol', 'IntColWithNull', + 'BoolColWithNull' ) }, 'mysql': { 'query': """ INSERT INTO types_test_data - VALUES("%s", %s, %s, %s, %s, %s, %s, %s) + VALUES("%s", %s, %s, %s, %s, %s, %s, %s, %s) """, 'fields': ( - 'TextCol', 'DateCol', 'IntDateCol', 'FloatCol', - 'IntCol', 'BoolCol', 'IntColWithNull', 'BoolColWithNull' + 'TextCol', 'DateCol', 'IntDateCol', 'IntDateOnlyCol', + 'FloatCol', 'IntCol', 'BoolCol', 'IntColWithNull', + 'BoolColWithNull' ) }, 'postgresql': { 'query': """ INSERT INTO types_test_data - VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s) + VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) """, 'fields': ( 'TextCol', 'DateCol', 'DateColWithTz', - 'IntDateCol', 'FloatCol', + 'IntDateCol', 'IntDateOnlyCol', 'FloatCol', 'IntCol', 'BoolCol', 'IntColWithNull', 'BoolColWithNull' ) }, @@ -313,13 +318,13 @@ def _load_raw_sql(self): self.drop_table('types_test_data') self._get_exec().execute(SQL_STRINGS['create_test_types'][self.flavor]) ins = SQL_STRINGS['insert_test_types'][self.flavor] - data = [ { 'TextCol': 'first', 'DateCol': '2000-01-03 00:00:00', 'DateColWithTz': '2000-01-01 00:00:00-08:00', 'IntDateCol': 535852800, + 'IntDateOnlyCol': 20101010, 'FloatCol': 10.10, 'IntCol': 1, 'BoolCol': False, @@ -331,6 +336,7 @@ def _load_raw_sql(self): 'DateCol': '2000-01-04 00:00:00', 'DateColWithTz': '2000-06-01 00:00:00-07:00', 'IntDateCol': 1356998400, + 'IntDateOnlyCol': 20101212, 'FloatCol': 10.10, 'IntCol': 1, 'BoolCol': False, @@ -622,9 +628,15 @@ def test_date_parsing(self): df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, parse_dates={'IntDateCol': 's'}) - assert issubclass(df.IntDateCol.dtype.type, np.datetime64) + df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, + parse_dates={'IntDateOnlyCol': '%Y%m%d'}) + assert issubclass(df.IntDateOnlyCol.dtype.type, np.datetime64) + assert df.IntDateOnlyCol.tolist() == [ + pd.Timestamp('2010-10-10'), pd.Timestamp('2010-12-12') + ] + def test_date_and_index(self): # Test case where same column appears in parse_date and index_col From 5b56686687374f3e074069f3eb9f928259577c02 Mon Sep 17 00:00:00 2001 From: Dror Atariah Date: Tue, 21 Nov 2017 14:11:17 +0100 Subject: [PATCH 4/5] Improved testing Included explicit values comparison --- pandas/tests/io/test_sql.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 2c27663afd936..14bc8f813c639 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -616,25 +616,41 @@ def test_date_parsing(self): df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, parse_dates=['DateCol']) assert issubclass(df.DateCol.dtype.type, np.datetime64) + assert df.DateCol.tolist() == [ + pd.Timestamp(2000, 1, 3, 0, 0, 0), + pd.Timestamp(2000, 1, 4, 0, 0, 0) + ] df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'}) assert issubclass(df.DateCol.dtype.type, np.datetime64) + assert df.DateCol.tolist() == [ + pd.Timestamp(2000, 1, 3, 0, 0, 0), + pd.Timestamp(2000, 1, 4, 0, 0, 0) + ] df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, parse_dates=['IntDateCol']) - assert issubclass(df.IntDateCol.dtype.type, np.datetime64) + assert df.IntDateCol.tolist() == [ + pd.Timestamp(1986, 12, 25, 0, 0, 0), + pd.Timestamp(2013, 1, 1, 0, 0, 0) + ] df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, parse_dates={'IntDateCol': 's'}) assert issubclass(df.IntDateCol.dtype.type, np.datetime64) + assert df.IntDateCol.tolist() == [ + pd.Timestamp(1986, 12, 25, 0, 0, 0), + pd.Timestamp(2013, 1, 1, 0, 0, 0) + ] df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, parse_dates={'IntDateOnlyCol': '%Y%m%d'}) assert issubclass(df.IntDateOnlyCol.dtype.type, np.datetime64) assert df.IntDateOnlyCol.tolist() == [ - pd.Timestamp('2010-10-10'), pd.Timestamp('2010-12-12') + pd.Timestamp('2010-10-10'), + pd.Timestamp('2010-12-12') ] def test_date_and_index(self): From 987f5880c77bbed8be1a7fdf4291839d5ce7a815 Mon Sep 17 00:00:00 2001 From: Dror Atariah Date: Tue, 21 Nov 2017 14:47:54 +0100 Subject: [PATCH 5/5] Fixed typo in SQL data setting --- pandas/tests/io/test_sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 14bc8f813c639..217fc8e67483d 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -99,7 +99,7 @@ `TextCol` TEXT, `DateCol` DATETIME, `IntDateCol` INTEGER, - "IntDateOnlyCol" INTEGER, + `IntDateOnlyCol` INTEGER, `FloatCol` DOUBLE, `IntCol` INTEGER, `BoolCol` BOOLEAN,