Skip to content

Truncate table option for to sql #50088

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.5.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ Bug fixes

Other
~~~~~
-
- Added ``"truncate"`` option to ``if_exists`` argument in :meth:`DataFrame.to_sql` to truncate the existing table (:issue:`37210`)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you move this to the 2.0 whatsnew?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done! Thanks for all your help with my first pandas PR!

-

.. ---------------------------------------------------------------------------
Expand Down
34 changes: 26 additions & 8 deletions pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,7 @@ def to_sql(
name: str,
con,
schema: str | None = None,
if_exists: Literal["fail", "replace", "append"] = "fail",
if_exists: Literal["fail", "replace", "append", "truncate"] = "fail",
index: bool = True,
index_label: IndexLabel = None,
chunksize: int | None = None,
Expand All @@ -629,10 +629,11 @@ def to_sql(
schema : str, optional
Name of SQL schema in database to write to (if database flavor
supports this). If None, use default schema (default).
if_exists : {'fail', 'replace', 'append'}, default 'fail'
if_exists : {'fail', 'replace', 'append', 'truncate}, default 'fail'
- fail: If table exists, do nothing.
- replace: If table exists, drop it, recreate it, and insert data.
- append: If table exists, insert data. Create if does not exist.
- truncate: If table exists, truncate it, then insert data.
index : bool, default True
Write DataFrame index as a column.
index_label : str or sequence, optional
Expand Down Expand Up @@ -682,7 +683,7 @@ def to_sql(
`sqlite3 <https://docs.python.org/3/library/sqlite3.html#sqlite3.Cursor.rowcount>`__ or
`SQLAlchemy <https://docs.sqlalchemy.org/en/14/core/connections.html#sqlalchemy.engine.BaseCursorResult.rowcount>`__
""" # noqa:E501
if if_exists not in ("fail", "replace", "append"):
if if_exists not in ("fail", "replace", "append", "truncate"):
raise ValueError(f"'{if_exists}' is not valid for if_exists")

if isinstance(frame, Series):
Expand Down Expand Up @@ -854,6 +855,8 @@ def create(self) -> None:
if self.if_exists == "replace":
self.pd_sql.drop_table(self.name, self.schema)
self._execute_create()
elif self.if_exists == "truncate":
self.pd_sql.trunc_table(self.name, self.schema)
elif self.if_exists == "append":
pass
else:
Expand Down Expand Up @@ -1311,7 +1314,7 @@ def to_sql(
self,
frame,
name,
if_exists: Literal["fail", "replace", "append"] = "fail",
if_exists: Literal["fail", "replace", "append", "truncate"] = "fail",
index: bool = True,
index_label=None,
schema=None,
Expand Down Expand Up @@ -1642,7 +1645,7 @@ def prep_table(
self,
frame,
name,
if_exists: Literal["fail", "replace", "append"] = "fail",
if_exists: Literal["fail", "replace", "append", "truncate"] = "fail",
index: bool | str | list[str] | None = True,
index_label=None,
schema=None,
Expand Down Expand Up @@ -1718,7 +1721,7 @@ def to_sql(
self,
frame,
name: str,
if_exists: Literal["fail", "replace", "append"] = "fail",
if_exists: Literal["fail", "replace", "append", "truncate"] = "fail",
index: bool = True,
index_label=None,
schema: str | None = None,
Expand All @@ -1736,10 +1739,11 @@ def to_sql(
frame : DataFrame
name : string
Name of SQL table.
if_exists : {'fail', 'replace', 'append'}, default 'fail'
if_exists : {'fail', 'replace', 'append', 'truncate'}, default 'fail'
- fail: If table exists, do nothing.
- replace: If table exists, drop it, recreate it, and insert data.
- append: If table exists, insert data. Create if does not exist.
- truncate: If table exists, truncate it, and insert data.
index : boolean, default True
Write DataFrame index as a column.
index_label : string or sequence, default None
Expand Down Expand Up @@ -1833,6 +1837,15 @@ def drop_table(self, table_name: str, schema: str | None = None) -> None:
self.get_table(table_name, schema).drop(bind=self.con)
self.meta.clear()

def trunc_table(self, table_name: str, schema: str | None = None) -> None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happens when you try to truncate a table that doesn't exist? Should this raise? If so can you add a test for that?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the table doesn't exist, it should just create a new table - added a test for it.

Also, added a test for if truncate is selected and then new columns are designated to write to the table. This should throw an error on the database.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would rather we raise here if the DB doesn't support truncate. In the future there could be a use for a delete_from argument in addition to truncate, so merging the two here dependent on the DB is confusing

schema = schema or self.meta.schema
if self.has_table(table_name, schema):
self.meta.reflect(bind=self.con, only=[table_name], schema=schema)
if schema:
schema = schema + "."
self.execute(f"DELETE FROM {schema or ''}{table_name}")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't this just be TRUNCATE?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I used DELETE since sqlite doesn't support TRUNCATE. However, since I had the if statement already for the schema, it will now use DELETE for the schema-less sqlite and use the faster TRUNCATE for everything else.

self.meta.clear()

def _create_sql_schema(
self,
frame: DataFrame,
Expand Down Expand Up @@ -2181,10 +2194,11 @@ def to_sql(
frame: DataFrame
name: string
Name of SQL table.
if_exists: {'fail', 'replace', 'append'}, default 'fail'
if_exists: {'fail', 'replace', 'append', 'truncate}, default 'fail'
fail: If table exists, do nothing.
replace: If table exists, drop it, recreate it, and insert data.
append: If table exists, insert data. Create if it does not exist.
truncate: If table exists, truncate it, then insert data.
index : bool, default True
Write DataFrame index as a column
index_label : string or sequence, default None
Expand Down Expand Up @@ -2253,6 +2267,10 @@ def drop_table(self, name: str, schema: str | None = None) -> None:
drop_sql = f"DROP TABLE {_get_valid_sqlite_name(name)}"
self.execute(drop_sql)

def trunc_table(self, name: str, schema: str | None = None) -> None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this method should be deleted, or should explicitly raise a NotImplementedError for sqlite. Can you also set up a test called test_sqlite_truncate_raises that makes sure that happens? You'll see that pattern in many of the other tests

trunc_sql = f"TRUNCATE TABLE {_get_valid_sqlite_name(name)}"
self.execute(trunc_sql)

def _create_sql_schema(
self,
frame,
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/io/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -902,6 +902,17 @@ def test_to_sql_replace(self, test_frame1):

assert num_rows == num_entries

def test_to_sql_truncate(self, test_frame1):
sql.to_sql(test_frame1, "test_frame3", self.conn, if_exists="fail")
# Add to table again
sql.to_sql(test_frame1, "test_frame3", self.conn, if_exists="truncate")
assert sql.has_table("test_frame3", self.conn)

num_entries = len(test_frame1)
num_rows = count_rows(self.conn, "test_frame3")

assert num_rows == num_entries

def test_to_sql_append(self, test_frame1):
assert sql.to_sql(test_frame1, "test_frame4", self.conn, if_exists="fail") == 4

Expand Down