DOC: update the pandas.DataFrame.to_sql docstring (#20126)

jazzmuesli · TomAugspurger · commit 50b218406041 · 2018-03-13T15:27:30.000-05:00
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -1902,33 +1902,108 @@ def to_sql(self, name, con, schema=None, if_exists='fail', index=True,
         """
         Write records stored in a DataFrame to a SQL database.
 
+        Databases supported by SQLAlchemy [1]_ are supported. Tables can be
+        newly created, appended to, or overwritten.
+
         Parameters
         ----------
         name : string
-            Name of SQL table
-        con : SQLAlchemy engine or DBAPI2 connection (legacy mode)
+            Name of SQL table.
+        con : sqlalchemy.engine.Engine or sqlite3.Connection
             Using SQLAlchemy makes it possible to use any DB supported by that
-            library. If a DBAPI2 object, only sqlite3 is supported.
-        schema : string, default None
+            library. Legacy support is provided for sqlite3.Connection objects.
+        schema : string, optional
             Specify the schema (if database flavor supports this). If None, use
             default schema.
         if_exists : {'fail', 'replace', 'append'}, default 'fail'
-            - fail: If table exists, do nothing.
-            - replace: If table exists, drop it, recreate it, and insert data.
-            - append: If table exists, insert data. Create if does not exist.
+            How to behave if the table already exists.
+
+            * fail: Raise a ValueError.
+            * replace: Drop the table before inserting new values.
+            * append: Insert new values to the existing table.
+
         index : boolean, default True
-            Write DataFrame index as a column.
+            Write DataFrame index as a column. Uses `index_label` as the column
+            name in the table.
         index_label : string or sequence, default None
             Column label for index column(s). If None is given (default) and
             `index` is True, then the index names are used.
             A sequence should be given if the DataFrame uses MultiIndex.
-        chunksize : int, default None
-            If not None, then rows will be written in batches of this size at a
-            time.  If None, all rows will be written at once.
-        dtype : dict of column name to SQL type, default None
-            Optional specifying the datatype for columns. The SQL type should
-            be a SQLAlchemy type, or a string for sqlite3 fallback connection.
+        chunksize : int, optional
+            Rows will be written in batches of this size at a time. By default,
+            all rows will be written at once.
+        dtype : dict, optional
+            Specifying the datatype for columns. The keys should be the column
+            names and the values should be the SQLAlchemy types or strings for
+            the sqlite3 legacy mode.
+
+        Raises
+        ------
+        ValueError
+            When the table already exists and `if_exists` is 'fail' (the
+            default).
+
+        See Also
+        --------
+        pandas.read_sql : read a DataFrame from a table
+
+        References
+        ----------
+        .. [1] http://docs.sqlalchemy.org
+        .. [2] https://www.python.org/dev/peps/pep-0249/
+
+        Examples
+        --------
+
+        Create an in-memory SQLite database.
+
+        >>> from sqlalchemy import create_engine
+        >>> engine = create_engine('sqlite://', echo=False)
+
+        Create a table from scratch with 3 rows.
+
+        >>> df = pd.DataFrame({'name' : ['User 1', 'User 2', 'User 3']})
+        >>> df
+             name
+        0  User 1
+        1  User 2
+        2  User 3
+
+        >>> df.to_sql('users', con=engine)
+        >>> engine.execute("SELECT * FROM users").fetchall()
+        [(0, 'User 1'), (1, 'User 2'), (2, 'User 3')]
+
+        >>> df1 = pd.DataFrame({'name' : ['User 4', 'User 5']})
+        >>> df1.to_sql('users', con=engine, if_exists='append')
+        >>> engine.execute("SELECT * FROM users").fetchall()
+        [(0, 'User 1'), (1, 'User 2'), (2, 'User 3'),
+         (0, 'User 4'), (1, 'User 5')]
+
+        Overwrite the table with just ``df1``.
+
+        >>> df1.to_sql('users', con=engine, if_exists='replace',
+        ...            index_label='id')
+        >>> engine.execute("SELECT * FROM users").fetchall()
+        [(0, 'User 4'), (1, 'User 5')]
+
+        Specify the dtype (especially useful for integers with missing values).
+        Notice that while pandas is forced to store the data as floating point,
+        the database supports nullable integers. When fetching the data with
+        Python, we get back integer scalars.
+
+        >>> df = pd.DataFrame({"A": [1, None, 2]})
+        >>> df
+             A
+        0  1.0
+        1  NaN
+        2  2.0
+
+        >>> from sqlalchemy.types import Integer
+        >>> df.to_sql('integers', con=engine, index=False,
+        ...           dtype={"A": Integer()})
 
+        >>> engine.execute("SELECT * FROM integers").fetchall()
+        [(1,), (None,), (2,)]
         """
         from pandas.io import sql
         sql.to_sql(self, name, con, schema=schema, if_exists=if_exists,