diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a893b2ba1a189..fcc551b3ac2e4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1865,33 +1865,108 @@ def to_sql(self, name, con, schema=None, if_exists='fail', index=True, """ Write records stored in a DataFrame to a SQL database. + Databases supported by SQLAlchemy [1]_ are supported. Tables can be + newly created, appended to, or overwritten. + Parameters ---------- name : string - Name of SQL table - con : SQLAlchemy engine or DBAPI2 connection (legacy mode) + Name of SQL table. + con : sqlalchemy.engine.Engine or sqlite3.Connection Using SQLAlchemy makes it possible to use any DB supported by that - library. If a DBAPI2 object, only sqlite3 is supported. - schema : string, default None + library. Legacy support is provided for sqlite3.Connection objects. + schema : string, optional Specify the schema (if database flavor supports this). If None, use default schema. if_exists : {'fail', 'replace', 'append'}, default 'fail' - - fail: If table exists, do nothing. - - replace: If table exists, drop it, recreate it, and insert data. - - append: If table exists, insert data. Create if does not exist. + How to behave if the table already exists. + + * fail: Raise a ValueError. + * replace: Drop the table before inserting new values. + * append: Insert new values to the existing table. + index : boolean, default True - Write DataFrame index as a column. + Write DataFrame index as a column. Uses `index_label` as the column + name in the table. index_label : string or sequence, default None Column label for index column(s). If None is given (default) and `index` is True, then the index names are used. A sequence should be given if the DataFrame uses MultiIndex. - chunksize : int, default None - If not None, then rows will be written in batches of this size at a - time. If None, all rows will be written at once. - dtype : dict of column name to SQL type, default None - Optional specifying the datatype for columns. The SQL type should - be a SQLAlchemy type, or a string for sqlite3 fallback connection. + chunksize : int, optional + Rows will be written in batches of this size at a time. By default, + all rows will be written at once. + dtype : dict, optional + Specifying the datatype for columns. The keys should be the column + names and the values should be the SQLAlchemy types or strings for + the sqlite3 legacy mode. + + Raises + ------ + ValueError + When the table already exists and `if_exists` is 'fail' (the + default). + + See Also + -------- + pandas.read_sql : read a DataFrame from a table + + References + ---------- + .. [1] http://docs.sqlalchemy.org + .. [2] https://www.python.org/dev/peps/pep-0249/ + + Examples + -------- + + Create an in-memory SQLite database. + + >>> from sqlalchemy import create_engine + >>> engine = create_engine('sqlite://', echo=False) + + Create a table from scratch with 3 rows. + + >>> df = pd.DataFrame({'name' : ['User 1', 'User 2', 'User 3']}) + >>> df + name + 0 User 1 + 1 User 2 + 2 User 3 + + >>> df.to_sql('users', con=engine) + >>> engine.execute("SELECT * FROM users").fetchall() + [(0, 'User 1'), (1, 'User 2'), (2, 'User 3')] + + >>> df1 = pd.DataFrame({'name' : ['User 4', 'User 5']}) + >>> df1.to_sql('users', con=engine, if_exists='append') + >>> engine.execute("SELECT * FROM users").fetchall() + [(0, 'User 1'), (1, 'User 2'), (2, 'User 3'), + (0, 'User 4'), (1, 'User 5')] + + Overwrite the table with just ``df1``. + + >>> df1.to_sql('users', con=engine, if_exists='replace', + ... index_label='id') + >>> engine.execute("SELECT * FROM users").fetchall() + [(0, 'User 4'), (1, 'User 5')] + + Specify the dtype (especially useful for integers with missing values). + Notice that while pandas is forced to store the data as floating point, + the database supports nullable integers. When fetching the data with + Python, we get back integer scalars. + + >>> df = pd.DataFrame({"A": [1, None, 2]}) + >>> df + A + 0 1.0 + 1 NaN + 2 2.0 + + >>> from sqlalchemy.types import Integer + >>> df.to_sql('integers', con=engine, index=False, + ... dtype={"A": Integer()}) + >>> engine.execute("SELECT * FROM integers").fetchall() + [(1,), (None,), (2,)] """ from pandas.io import sql sql.to_sql(self, name, con, schema=schema, if_exists=if_exists,