Skip to content

ENH: add support for datetime.date/time in to_sql (GH6932) #8090

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/v0.15.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,7 @@ Enhancements
~~~~~~~~~~~~

- Added support for a ``chunksize`` parameter to ``to_sql`` function. This allows DataFrame to be written in chunks and avoid packet-size overflow errors (:issue:`8062`)
- Added support for writing ``datetime.date`` and ``datetime.time`` object columns with ``to_sql`` (:issue:`6932`).

- Added support for bool, uint8, uint16 and uint32 datatypes in ``to_stata`` (:issue:`7097`, :issue:`7365`)

Expand Down
53 changes: 33 additions & 20 deletions pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import re
import numpy as np

import pandas.lib as lib
import pandas.core.common as com
from pandas.compat import lzip, map, zip, raise_with_traceback, string_types
from pandas.core.api import DataFrame, Series
Expand Down Expand Up @@ -684,13 +685,14 @@ def _get_column_names_and_types(self, dtype_mapper):
if self.index is not None:
for i, idx_label in enumerate(self.index):
idx_type = dtype_mapper(
self.frame.index.get_level_values(i).dtype)
self.frame.index.get_level_values(i))
column_names_and_types.append((idx_label, idx_type))

column_names_and_types += zip(
list(map(str, self.frame.columns)),
map(dtype_mapper, self.frame.dtypes)
)
column_names_and_types += [
(str(self.frame.columns[i]),
dtype_mapper(self.frame.iloc[:,i]))
for i in range(len(self.frame.columns))
]
return column_names_and_types

def _create_table_statement(self):
Expand Down Expand Up @@ -756,30 +758,33 @@ def _harmonize_columns(self, parse_dates=None):
except KeyError:
pass # this column not in results

def _sqlalchemy_type(self, arr_or_dtype):
def _sqlalchemy_type(self, col):
from sqlalchemy.types import (BigInteger, Float, Text, Boolean,
DateTime, Date, Interval)
DateTime, Date, Time, Interval)

if arr_or_dtype is date:
return Date
if com.is_datetime64_dtype(arr_or_dtype):
if com.is_datetime64_dtype(col):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jreback The com.is_..._dtype(arr_or_dtype) functions, they are also meant to work with a Series? (despite the argument name).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep, you may need however to use lib.infer_dtype here (returns a string-like describe what the actual data is, beware though it often has to scan all the data, but will fastpath if its already has a dtype, this is really for an object dtype, that say holds datetime.date)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, I used it a couple of lines lower specificly for datetime.date and datetime.time

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is the reason I changed the function from passing the dtype to passing the column (series), because I needed also the values of the column for infer_dtype

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep you can certainly test for the datetime64 first . not sure exactly what you need. take a look at core/index.py/Index.__new__, which goes thru a bunch of steps to figure out what is what. You have an advantage, you already know its a Series, and already have it coerced (if its possible), so for example if it is datetime64[ns] you are done. (as you are already doing).

try:
tz = arr_or_dtype.tzinfo
tz = col.tzinfo
return DateTime(timezone=True)
except:
return DateTime
if com.is_timedelta64_dtype(arr_or_dtype):
if com.is_timedelta64_dtype(col):
warnings.warn("the 'timedelta' type is not supported, and will be "
"written as integer values (ns frequency) to the "
"database.", UserWarning)
return BigInteger
elif com.is_float_dtype(arr_or_dtype):
elif com.is_float_dtype(col):
return Float
elif com.is_integer_dtype(arr_or_dtype):
elif com.is_integer_dtype(col):
# TODO: Refine integer size.
return BigInteger
elif com.is_bool_dtype(arr_or_dtype):
elif com.is_bool_dtype(col):
return Boolean
inferred = lib.infer_dtype(com._ensure_object(col))
if inferred == 'date':
return Date
if inferred == 'time':
return Time
return Text

def _numpy_type(self, sqltype):
Expand Down Expand Up @@ -908,7 +913,11 @@ def _create_sql_schema(self, frame, table_name):
},
'date': {
'mysql': 'DATE',
'sqlite': 'TIMESTAMP',
'sqlite': 'DATE',
},
'time': {
'mysql': 'TIME',
'sqlite': 'TIME',
},
'bool': {
'mysql': 'BOOLEAN',
Expand Down Expand Up @@ -1014,8 +1023,8 @@ def _create_table_statement(self):
create_statement = template % {'name': self.name, 'columns': columns}
return create_statement

def _sql_type_name(self, dtype):
pytype = dtype.type
def _sql_type_name(self, col):
pytype = col.dtype.type
pytype_name = "text"
if issubclass(pytype, np.floating):
pytype_name = "float"
Expand All @@ -1029,10 +1038,14 @@ def _sql_type_name(self, dtype):
elif issubclass(pytype, np.datetime64) or pytype is datetime:
# Caution: np.datetime64 is also a subclass of np.number.
pytype_name = "datetime"
elif pytype is datetime.date:
pytype_name = "date"
elif issubclass(pytype, np.bool_):
pytype_name = "bool"
elif issubclass(pytype, np.object):
pytype = lib.infer_dtype(com._ensure_object(col))
if pytype == "date":
pytype_name = "date"
elif pytype == "time":
pytype_name = "time"

return _SQL_TYPES[pytype_name][self.pd_sql.flavor]

Expand Down
33 changes: 32 additions & 1 deletion pandas/io/tests/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import warnings
import numpy as np

from datetime import datetime
from datetime import datetime, date, time

from pandas import DataFrame, Series, Index, MultiIndex, isnull
from pandas import date_range, to_datetime, to_timedelta
Expand All @@ -35,6 +35,7 @@
from pandas.core.datetools import format as date_format

import pandas.io.sql as sql
from pandas.io.sql import read_sql_table, read_sql_query
import pandas.util.testing as tm


Expand Down Expand Up @@ -976,6 +977,21 @@ def test_datetime_NaT(self):
else:
tm.assert_frame_equal(result, df)

def test_datetime_date(self):
# test support for datetime.date
df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"])
df.to_sql('test_date', self.conn, index=False)
res = read_sql_table('test_date', self.conn)
# comes back as datetime64
tm.assert_series_equal(res['a'], to_datetime(df['a']))

def test_datetime_time(self):
# test support for datetime.time
df = DataFrame([time(9, 0, 0), time(9, 1, 30)], columns=["a"])
df.to_sql('test_time', self.conn, index=False)
res = read_sql_table('test_time', self.conn)
tm.assert_frame_equal(res, df)

def test_mixed_dtype_insert(self):
# see GH6509
s1 = Series(2**25 + 1,dtype=np.int32)
Expand Down Expand Up @@ -1269,6 +1285,21 @@ def test_roundtrip(self):
def test_execute_sql(self):
self._execute_sql()

def test_datetime_date(self):
# test support for datetime.date
df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"])
df.to_sql('test_date', self.conn, index=False, flavor=self.flavor)
res = read_sql_query('SELECT * FROM test_date', self.conn)
if self.flavor == 'sqlite':
# comes back as strings
tm.assert_frame_equal(res, df.astype(str))
elif self.flavor == 'mysql':
tm.assert_frame_equal(res, df)

def test_datetime_time(self):
# test support for datetime.time
raise nose.SkipTest("datetime.time not supported for sqlite fallback")


class TestMySQLLegacy(TestSQLiteLegacy):
"""
Expand Down