Skip to content

ENH: Add 'truncate' option for if_exists arg for DataFrame.to_sql method #52961

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ Other enhancements
- :meth:`arrays.DatetimeArray.map`, :meth:`arrays.TimedeltaArray.map` and :meth:`arrays.PeriodArray.map` can now take a ``na_action`` argument (:issue:`51644`)
- :meth:`arrays.SparseArray.map` now supports ``na_action`` (:issue:`52096`).
- Add :meth:`diff()` and :meth:`round()` for :class:`Index` (:issue:`19708`)
- Add ``"truncate"`` option to ``if_exists`` argument in :meth:`DataFrame.to_sql` which truncates the existing table before inserting new data (:issue:`37210`).
- Add dtype of categories to ``repr`` information of :class:`CategoricalDtype` (:issue:`52179`)
- Added to the escape mode "latex-math" preserving without escaping all characters between "\(" and "\)" in formatter (:issue:`51903`)
- Adding ``engine_kwargs`` parameter to :meth:`DataFrame.read_excel` (:issue:`52214`)
Expand Down
34 changes: 26 additions & 8 deletions pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -683,7 +683,7 @@ def to_sql(
name: str,
con,
schema: str | None = None,
if_exists: Literal["fail", "replace", "append"] = "fail",
if_exists: Literal["fail", "replace", "append", "truncate"] = "fail",
index: bool = True,
index_label: IndexLabel = None,
chunksize: int | None = None,
Expand All @@ -708,10 +708,11 @@ def to_sql(
schema : str, optional
Name of SQL schema in database to write to (if database flavor
supports this). If None, use default schema (default).
if_exists : {'fail', 'replace', 'append'}, default 'fail'
if_exists : {'fail', 'replace', 'append', 'truncate'}, default 'fail'
- fail: If table exists, do nothing.
- replace: If table exists, drop it, recreate it, and insert data.
- append: If table exists, insert data. Create if does not exist.
- truncate: If table exists, truncate the table, then insert data.
index : bool, default True
Write DataFrame index as a column.
index_label : str or sequence, optional
Expand Down Expand Up @@ -761,7 +762,7 @@ def to_sql(
`sqlite3 <https://docs.python.org/3/library/sqlite3.html#sqlite3.Cursor.rowcount>`__ or
`SQLAlchemy <https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.BaseCursorResult.rowcount>`__
""" # noqa: E501
if if_exists not in ("fail", "replace", "append"):
if if_exists not in ("fail", "replace", "append", "truncate"):
raise ValueError(f"'{if_exists}' is not valid for if_exists")

if isinstance(frame, Series):
Expand Down Expand Up @@ -864,7 +865,7 @@ def __init__(
pandas_sql_engine,
frame=None,
index: bool | str | list[str] | None = True,
if_exists: Literal["fail", "replace", "append"] = "fail",
if_exists: Literal["fail", "replace", "append", "truncate"] = "fail",
prefix: str = "pandas",
index_label=None,
schema=None,
Expand Down Expand Up @@ -915,6 +916,8 @@ def create(self) -> None:
if self.if_exists == "replace":
self.pd_sql.drop_table(self.name, self.schema)
self._execute_create()
elif self.if_exists == "truncate":
self.pd_sql.trunc_table(self.name, self.schema)
elif self.if_exists == "append":
pass
else:
Expand Down Expand Up @@ -1404,7 +1407,7 @@ def to_sql(
self,
frame,
name: str,
if_exists: Literal["fail", "replace", "append"] = "fail",
if_exists: Literal["fail", "replace", "append", "truncate"] = "fail",
index: bool = True,
index_label=None,
schema=None,
Expand Down Expand Up @@ -1788,7 +1791,7 @@ def prep_table(
self,
frame,
name: str,
if_exists: Literal["fail", "replace", "append"] = "fail",
if_exists: Literal["fail", "replace", "append", "truncate"] = "fail",
index: bool | str | list[str] | None = True,
index_label=None,
schema=None,
Expand Down Expand Up @@ -1865,7 +1868,7 @@ def to_sql(
self,
frame,
name: str,
if_exists: Literal["fail", "replace", "append"] = "fail",
if_exists: Literal["fail", "replace", "append", "truncate"] = "fail",
index: bool = True,
index_label=None,
schema: str | None = None,
Expand All @@ -1883,10 +1886,11 @@ def to_sql(
frame : DataFrame
name : string
Name of SQL table.
if_exists : {'fail', 'replace', 'append'}, default 'fail'
if_exists : {'fail', 'replace', 'append', 'truncate'}, default 'fail'
- fail: If table exists, do nothing.
- replace: If table exists, drop it, recreate it, and insert data.
- append: If table exists, insert data. Create if does not exist.
- truncate: If table exists, truncate the table, then insert data.
index : boolean, default True
Write DataFrame index as a column.
index_label : string or sequence, default None
Expand Down Expand Up @@ -1981,6 +1985,17 @@ def drop_table(self, table_name: str, schema: str | None = None) -> None:
self.get_table(table_name, schema).drop(bind=self.con)
self.meta.clear()

def trunc_table(self, table_name: str, schema: str | None = None) -> None:
schema = schema or self.meta.schema
if self.con.engine.name == "sqlite":
raise NotImplementedError("TRUNCATE not supported on sqlite database.")
if self.has_table(table_name, schema):
self.meta.reflect(bind=self.con, only=[table_name], schema=schema)
with self.run_transaction():
table_to_truncate = self.get_table(table_name, schema)
self.execute(f"TRUNCATE TABLE {table_to_truncate}")
self.meta.clear()

def _create_sql_schema(
self,
frame: DataFrame,
Expand Down Expand Up @@ -2409,6 +2424,9 @@ def drop_table(self, name: str, schema: str | None = None) -> None:
drop_sql = f"DROP TABLE {_get_valid_sqlite_name(name)}"
self.execute(drop_sql)

def trunc_table(self, name: str, schema: str | None = None) -> None:
raise NotImplementedError("TRUNCATE not supported on sqlite database.")

def _create_sql_schema(
self,
frame,
Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/io/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -1104,6 +1104,14 @@ def test_to_sql_replace(self, test_frame1):

assert num_rows == num_entries

def test_sqlite_truncate_raises(self, test_frame1):
msg = "TRUNCATE not supported on sqlite database."
with pytest.raises(NotImplementedError, match=msg):
# create table
sql.to_sql(test_frame1, "test_frame3", self.conn, if_exists="fail")
# Add to table again
sql.to_sql(test_frame1, "test_frame3", self.conn, if_exists="truncate")

def test_to_sql_append(self, test_frame1):
assert sql.to_sql(test_frame1, "test_frame4", self.conn, if_exists="fail") == 4

Expand Down Expand Up @@ -2224,6 +2232,25 @@ def _get_index_columns(self, tbl_name):
def test_to_sql_save_index(self):
self._to_sql_save_index()

def test_to_sql_truncate(self, test_frame1):
if self.flavor == "sqlite":
msg = "TRUNCATE not supported on sqlite database."
with pytest.raises(NotImplementedError, match=msg):
# create table
sql.to_sql(test_frame1, "test_frame3", self.conn, if_exists="fail")
# Add to table again
sql.to_sql(test_frame1, "test_frame3", self.conn, if_exists="truncate")
else:
sql.to_sql(test_frame1, "test_frame3", self.conn, if_exists="fail")
# Add to table again
sql.to_sql(test_frame1, "test_frame3", self.conn, if_exists="truncate")
assert sql.has_table("test_frame3", self.conn)

num_entries = len(test_frame1)
num_rows = count_rows(self.conn, "test_frame3")

assert num_rows == num_entries

def test_transactions(self):
self._transaction_test()

Expand Down