diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 82b1bff75b114..401a9c11a774d 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -103,12 +103,12 @@ def _handle_date_column(col, utc=None, format=None): if isinstance(format, dict): return to_datetime(col, errors='ignore', **format) else: - if format in ['D', 's', 'ms', 'us', 'ns']: - return to_datetime(col, errors='coerce', unit=format, utc=utc) - elif (issubclass(col.dtype.type, np.floating) or - issubclass(col.dtype.type, np.integer)): - # parse dates as timestamp - format = 's' if format is None else format + # Allow passing of formatting string for integers + # GH17855 + if format is None and (issubclass(col.dtype.type, np.floating) or + issubclass(col.dtype.type, np.integer)): + format = 's' + if format in ['D', 'd', 'h', 'm', 's', 'ms', 'us', 'ns']: return to_datetime(col, errors='coerce', unit=format, utc=utc) elif is_datetime64tz_dtype(col): # coerce to UTC timezone diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 00a30940ca352..217fc8e67483d 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -88,6 +88,7 @@ "TextCol" TEXT, "DateCol" TEXT, "IntDateCol" INTEGER, + "IntDateOnlyCol" INTEGER, "FloatCol" REAL, "IntCol" INTEGER, "BoolCol" INTEGER, @@ -98,6 +99,7 @@ `TextCol` TEXT, `DateCol` DATETIME, `IntDateCol` INTEGER, + `IntDateOnlyCol` INTEGER, `FloatCol` DOUBLE, `IntCol` INTEGER, `BoolCol` BOOLEAN, @@ -109,6 +111,7 @@ "DateCol" TIMESTAMP, "DateColWithTz" TIMESTAMP WITH TIME ZONE, "IntDateCol" INTEGER, + "IntDateOnlyCol" INTEGER, "FloatCol" DOUBLE PRECISION, "IntCol" INTEGER, "BoolCol" BOOLEAN, @@ -120,31 +123,33 @@ 'sqlite': { 'query': """ INSERT INTO types_test_data - VALUES(?, ?, ?, ?, ?, ?, ?, ?) + VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?) """, 'fields': ( - 'TextCol', 'DateCol', 'IntDateCol', 'FloatCol', - 'IntCol', 'BoolCol', 'IntColWithNull', 'BoolColWithNull' + 'TextCol', 'DateCol', 'IntDateCol', 'IntDateOnlyCol', + 'FloatCol', 'IntCol', 'BoolCol', 'IntColWithNull', + 'BoolColWithNull' ) }, 'mysql': { 'query': """ INSERT INTO types_test_data - VALUES("%s", %s, %s, %s, %s, %s, %s, %s) + VALUES("%s", %s, %s, %s, %s, %s, %s, %s, %s) """, 'fields': ( - 'TextCol', 'DateCol', 'IntDateCol', 'FloatCol', - 'IntCol', 'BoolCol', 'IntColWithNull', 'BoolColWithNull' + 'TextCol', 'DateCol', 'IntDateCol', 'IntDateOnlyCol', + 'FloatCol', 'IntCol', 'BoolCol', 'IntColWithNull', + 'BoolColWithNull' ) }, 'postgresql': { 'query': """ INSERT INTO types_test_data - VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s) + VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) """, 'fields': ( 'TextCol', 'DateCol', 'DateColWithTz', - 'IntDateCol', 'FloatCol', + 'IntDateCol', 'IntDateOnlyCol', 'FloatCol', 'IntCol', 'BoolCol', 'IntColWithNull', 'BoolColWithNull' ) }, @@ -313,13 +318,13 @@ def _load_raw_sql(self): self.drop_table('types_test_data') self._get_exec().execute(SQL_STRINGS['create_test_types'][self.flavor]) ins = SQL_STRINGS['insert_test_types'][self.flavor] - data = [ { 'TextCol': 'first', 'DateCol': '2000-01-03 00:00:00', 'DateColWithTz': '2000-01-01 00:00:00-08:00', 'IntDateCol': 535852800, + 'IntDateOnlyCol': 20101010, 'FloatCol': 10.10, 'IntCol': 1, 'BoolCol': False, @@ -331,6 +336,7 @@ def _load_raw_sql(self): 'DateCol': '2000-01-04 00:00:00', 'DateColWithTz': '2000-06-01 00:00:00-07:00', 'IntDateCol': 1356998400, + 'IntDateOnlyCol': 20101212, 'FloatCol': 10.10, 'IntCol': 1, 'BoolCol': False, @@ -610,20 +616,42 @@ def test_date_parsing(self): df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, parse_dates=['DateCol']) assert issubclass(df.DateCol.dtype.type, np.datetime64) + assert df.DateCol.tolist() == [ + pd.Timestamp(2000, 1, 3, 0, 0, 0), + pd.Timestamp(2000, 1, 4, 0, 0, 0) + ] df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, parse_dates={'DateCol': '%Y-%m-%d %H:%M:%S'}) assert issubclass(df.DateCol.dtype.type, np.datetime64) + assert df.DateCol.tolist() == [ + pd.Timestamp(2000, 1, 3, 0, 0, 0), + pd.Timestamp(2000, 1, 4, 0, 0, 0) + ] df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, parse_dates=['IntDateCol']) - assert issubclass(df.IntDateCol.dtype.type, np.datetime64) + assert df.IntDateCol.tolist() == [ + pd.Timestamp(1986, 12, 25, 0, 0, 0), + pd.Timestamp(2013, 1, 1, 0, 0, 0) + ] df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, parse_dates={'IntDateCol': 's'}) - assert issubclass(df.IntDateCol.dtype.type, np.datetime64) + assert df.IntDateCol.tolist() == [ + pd.Timestamp(1986, 12, 25, 0, 0, 0), + pd.Timestamp(2013, 1, 1, 0, 0, 0) + ] + + df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn, + parse_dates={'IntDateOnlyCol': '%Y%m%d'}) + assert issubclass(df.IntDateOnlyCol.dtype.type, np.datetime64) + assert df.IntDateOnlyCol.tolist() == [ + pd.Timestamp('2010-10-10'), + pd.Timestamp('2010-12-12') + ] def test_date_and_index(self): # Test case where same column appears in parse_date and index_col