diff --git a/doc/source/io.rst b/doc/source/io.rst index f2d5924edac77..2ec61f7f00bd8 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -3393,12 +3393,34 @@ the database using :func:`~pandas.DataFrame.to_sql`. data.to_sql('data', engine) -With some databases, writing large DataFrames can result in errors due to packet size limitations being exceeded. This can be avoided by setting the ``chunksize`` parameter when calling ``to_sql``. For example, the following writes ``data`` to the database in batches of 1000 rows at a time: +With some databases, writing large DataFrames can result in errors due to +packet size limitations being exceeded. This can be avoided by setting the +``chunksize`` parameter when calling ``to_sql``. For example, the following +writes ``data`` to the database in batches of 1000 rows at a time: .. ipython:: python data.to_sql('data_chunked', engine, chunksize=1000) +SQL data types +"""""""""""""" + +:func:`~pandas.DataFrame.to_sql` will try to map your data to an appropriate +SQL data type based on the dtype of the data. When you have columns of dtype +``object``, pandas will try to infer the data type. + +You can always override the default type by specifying the desired SQL type of +any of the columns by using the ``dtype`` argument. This argument needs a +dictionary mapping column names to SQLAlchemy types (or strings for the sqlite3 +fallback mode). +For example, specifying to use the sqlalchemy ``String`` type instead of the +default ``Text`` type for string columns: + +.. ipython:: python + + from sqlalchemy.types import String + data.to_sql('data_dtype', engine, dtype={'Col_1': String}) + .. note:: Due to the limited support for timedelta's in the different database @@ -3413,15 +3435,6 @@ With some databases, writing large DataFrames can result in errors due to packet Because of this, reading the database table back in does **not** generate a categorical. -.. note:: - - You can specify the SQL type of any of the columns by using the dtypes - parameter (a dictionary mapping column names to SQLAlchemy types). This - can be useful in cases where columns with NULL values are inferred by - Pandas to an excessively general datatype (e.g. a boolean column is is - inferred to be object because it has NULLs). - - Reading Tables ~~~~~~~~~~~~~~ @@ -3782,11 +3795,11 @@ is lost when exporting. *Stata* only supports string value labels, and so ``str`` is called on the categories when exporting data. Exporting ``Categorical`` variables with - non-string categories produces a warning, and can result a loss of + non-string categories produces a warning, and can result a loss of information if the ``str`` representations of the categories are not unique. Labeled data can similarly be imported from *Stata* data files as ``Categorical`` -variables using the keyword argument ``convert_categoricals`` (``True`` by default). +variables using the keyword argument ``convert_categoricals`` (``True`` by default). The keyword argument ``order_categoricals`` (``True`` by default) determines whether imported ``Categorical`` variables are ordered. diff --git a/doc/source/whatsnew/v0.15.2.txt b/doc/source/whatsnew/v0.15.2.txt index 419885ad4159b..8e2c7a862362a 100644 --- a/doc/source/whatsnew/v0.15.2.txt +++ b/doc/source/whatsnew/v0.15.2.txt @@ -96,7 +96,16 @@ API changes Enhancements ~~~~~~~~~~~~ -- Added the ability to specify the SQL type of columns when writing a DataFrame to a database (:issue:`8778`). +- Added the ability to specify the SQL type of columns when writing a DataFrame + to a database (:issue:`8778`). + For example, specifying to use the sqlalchemy ``String`` type instead of the + default ``Text`` type for string columns: + + .. code-block:: + + from sqlalchemy.types import String + data.to_sql('data_dtype', engine, dtype={'Col_1': String}) + - Added ability to export Categorical data to Stata (:issue:`8633`). See :ref:`here ` for limitations of categorical variables exported to Stata data files. - Added ability to export Categorical data to to/from HDF5 (:issue:`7621`). Queries work the same as if it was an object array. However, the ``category`` dtyped data is stored in a more efficient manner. See :ref:`here ` for an example and caveats w.r.t. prior versions of pandas. - Added support for ``searchsorted()`` on `Categorical` class (:issue:`8420`). diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d63643c53e6f4..0fc7171410152 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -954,8 +954,9 @@ def to_sql(self, name, con, flavor='sqlite', schema=None, if_exists='fail', chunksize : int, default None If not None, then rows will be written in batches of this size at a time. If None, all rows will be written at once. - dtype : Dictionary of column name to SQLAlchemy type, default None - Optional datatypes for SQL columns. + dtype : dict of column name to SQL type, default None + Optional specifying the datatype for columns. The SQL type should + be a SQLAlchemy type, or a string for sqlite3 fallback connection. """ from pandas.io import sql @@ -4128,7 +4129,7 @@ def func(self, axis=None, dtype=None, out=None, skipna=True, y = _values_from_object(self).copy() - if skipna and issubclass(y.dtype.type, + if skipna and issubclass(y.dtype.type, (np.datetime64, np.timedelta64)): result = accum_func(y, axis) mask = isnull(self) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 77527f867fad8..ea6239f080caa 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -518,8 +518,9 @@ def to_sql(frame, name, con, flavor='sqlite', schema=None, if_exists='fail', chunksize : int, default None If not None, then rows will be written in batches of this size at a time. If None, all rows will be written at once. - dtype : dictionary of column name to SQLAchemy type, default None - optional datatypes for SQL columns. + dtype : dict of column name to SQL type, default None + Optional specifying the datatype for columns. The SQL type should + be a SQLAlchemy type, or a string for sqlite3 fallback connection. """ if if_exists not in ('fail', 'replace', 'append'): @@ -1133,8 +1134,9 @@ def to_sql(self, frame, name, if_exists='fail', index=True, chunksize : int, default None If not None, then rows will be written in batches of this size at a time. If None, all rows will be written at once. - dtype : dictionary of column name to SQLAlchemy type, default None - Optional datatypes for SQL columns. + dtype : dict of column name to SQL type, default None + Optional specifying the datatype for columns. The SQL type should + be a SQLAlchemy type. """ if dtype is not None: @@ -1468,8 +1470,9 @@ def to_sql(self, frame, name, if_exists='fail', index=True, chunksize : int, default None If not None, then rows will be written in batches of this size at a time. If None, all rows will be written at once. - dtype : dictionary of column_name to SQLite string type, default None - optional datatypes for SQL columns. + dtype : dict of column name to SQL type, default None + Optional specifying the datatype for columns. The SQL type should + be a string. """ if dtype is not None: