Skip to content

Commit fea6799

Browse files
ENH: Pluggable SQL performance via new SQL engine keyword (#40556)
1 parent d7e8b1a commit fea6799

File tree

4 files changed

+281
-51
lines changed

4 files changed

+281
-51
lines changed

doc/source/user_guide/options.rst

+4
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,10 @@ io.hdf.dropna_table True drop ALL nan rows when appe
456456
io.parquet.engine None The engine to use as a default for
457457
parquet reading and writing. If None
458458
then try 'pyarrow' and 'fastparquet'
459+
io.sql.engine None The engine to use as a default for
460+
sql reading and writing, with SQLAlchemy
461+
as a higher level interface. If None
462+
then try 'sqlalchemy'
459463
mode.chained_assignment warn Controls ``SettingWithCopyWarning``:
460464
'raise', 'warn', or None. Raise an
461465
exception, warn, or no action if

pandas/core/config_init.py

+16
Original file line numberDiff line numberDiff line change
@@ -652,6 +652,22 @@ def use_inf_as_na_cb(key):
652652
validator=is_one_of_factory(["auto", "pyarrow", "fastparquet"]),
653653
)
654654

655+
656+
# Set up the io.sql specific configuration.
657+
sql_engine_doc = """
658+
: string
659+
The default sql reader/writer engine. Available options:
660+
'auto', 'sqlalchemy', the default is 'auto'
661+
"""
662+
663+
with cf.config_prefix("io.sql"):
664+
cf.register_option(
665+
"engine",
666+
"auto",
667+
sql_engine_doc,
668+
validator=is_one_of_factory(["auto", "sqlalchemy"]),
669+
)
670+
655671
# --------
656672
# Plotting
657673
# ---------

pandas/io/sql.py

+207-51
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727

2828
import pandas._libs.lib as lib
2929
from pandas._typing import DtypeArg
30+
from pandas.compat._optional import import_optional_dependency
31+
from pandas.errors import AbstractMethodError
3032

3133
from pandas.core.dtypes.common import (
3234
is_datetime64tz_dtype,
@@ -36,6 +38,7 @@
3638
from pandas.core.dtypes.dtypes import DatetimeTZDtype
3739
from pandas.core.dtypes.missing import isna
3840

41+
from pandas import get_option
3942
from pandas.core.api import (
4043
DataFrame,
4144
Series,
@@ -643,6 +646,8 @@ def to_sql(
643646
chunksize: int | None = None,
644647
dtype: DtypeArg | None = None,
645648
method: str | None = None,
649+
engine: str = "auto",
650+
**engine_kwargs,
646651
) -> None:
647652
"""
648653
Write records stored in a DataFrame to a SQL database.
@@ -689,6 +694,16 @@ def to_sql(
689694
section :ref:`insert method <io.sql.method>`.
690695
691696
.. versionadded:: 0.24.0
697+
698+
engine : {'auto', 'sqlalchemy'}, default 'auto'
699+
SQL engine library to use. If 'auto', then the option
700+
``io.sql.engine`` is used. The default ``io.sql.engine``
701+
behavior is 'sqlalchemy'
702+
703+
.. versionadded:: 1.3.0
704+
705+
**engine_kwargs
706+
Any additional kwargs are passed to the engine.
692707
"""
693708
if if_exists not in ("fail", "replace", "append"):
694709
raise ValueError(f"'{if_exists}' is not valid for if_exists")
@@ -712,6 +727,8 @@ def to_sql(
712727
chunksize=chunksize,
713728
dtype=dtype,
714729
method=method,
730+
engine=engine,
731+
**engine_kwargs,
715732
)
716733

717734

@@ -1283,6 +1300,91 @@ def to_sql(
12831300
)
12841301

12851302

1303+
class BaseEngine:
1304+
def insert_records(
1305+
self,
1306+
table: SQLTable,
1307+
con,
1308+
frame,
1309+
name,
1310+
index=True,
1311+
schema=None,
1312+
chunksize=None,
1313+
method=None,
1314+
**engine_kwargs,
1315+
):
1316+
"""
1317+
Inserts data into already-prepared table
1318+
"""
1319+
raise AbstractMethodError(self)
1320+
1321+
1322+
class SQLAlchemyEngine(BaseEngine):
1323+
def __init__(self):
1324+
import_optional_dependency(
1325+
"sqlalchemy", extra="sqlalchemy is required for SQL support."
1326+
)
1327+
1328+
def insert_records(
1329+
self,
1330+
table: SQLTable,
1331+
con,
1332+
frame,
1333+
name,
1334+
index=True,
1335+
schema=None,
1336+
chunksize=None,
1337+
method=None,
1338+
**engine_kwargs,
1339+
):
1340+
from sqlalchemy import exc
1341+
1342+
try:
1343+
table.insert(chunksize=chunksize, method=method)
1344+
except exc.SQLAlchemyError as err:
1345+
# GH34431
1346+
# https://stackoverflow.com/a/67358288/6067848
1347+
msg = r"""(\(1054, "Unknown column 'inf(e0)?' in 'field list'"\))(?#
1348+
)|inf can not be used with MySQL"""
1349+
err_text = str(err.orig)
1350+
if re.search(msg, err_text):
1351+
raise ValueError("inf cannot be used with MySQL") from err
1352+
else:
1353+
raise err
1354+
1355+
1356+
def get_engine(engine: str) -> BaseEngine:
1357+
""" return our implementation """
1358+
if engine == "auto":
1359+
engine = get_option("io.sql.engine")
1360+
1361+
if engine == "auto":
1362+
# try engines in this order
1363+
engine_classes = [SQLAlchemyEngine]
1364+
1365+
error_msgs = ""
1366+
for engine_class in engine_classes:
1367+
try:
1368+
return engine_class()
1369+
except ImportError as err:
1370+
error_msgs += "\n - " + str(err)
1371+
1372+
raise ImportError(
1373+
"Unable to find a usable engine; "
1374+
"tried using: 'sqlalchemy'.\n"
1375+
"A suitable version of "
1376+
"sqlalchemy is required for sql I/O "
1377+
"support.\n"
1378+
"Trying to import the above resulted in these errors:"
1379+
f"{error_msgs}"
1380+
)
1381+
1382+
elif engine == "sqlalchemy":
1383+
return SQLAlchemyEngine()
1384+
1385+
raise ValueError("engine must be one of 'auto', 'sqlalchemy'")
1386+
1387+
12861388
class SQLDatabase(PandasSQL):
12871389
"""
12881390
This class enables conversion between DataFrame and SQL databases
@@ -1504,58 +1606,18 @@ def read_query(
15041606

15051607
read_sql = read_query
15061608

1507-
def to_sql(
1609+
def prep_table(
15081610
self,
15091611
frame,
15101612
name,
15111613
if_exists="fail",
15121614
index=True,
15131615
index_label=None,
15141616
schema=None,
1515-
chunksize=None,
15161617
dtype: DtypeArg | None = None,
1517-
method=None,
1518-
):
1618+
) -> SQLTable:
15191619
"""
1520-
Write records stored in a DataFrame to a SQL database.
1521-
1522-
Parameters
1523-
----------
1524-
frame : DataFrame
1525-
name : string
1526-
Name of SQL table.
1527-
if_exists : {'fail', 'replace', 'append'}, default 'fail'
1528-
- fail: If table exists, do nothing.
1529-
- replace: If table exists, drop it, recreate it, and insert data.
1530-
- append: If table exists, insert data. Create if does not exist.
1531-
index : bool, default True
1532-
Write DataFrame index as a column.
1533-
index_label : string or sequence, default None
1534-
Column label for index column(s). If None is given (default) and
1535-
`index` is True, then the index names are used.
1536-
A sequence should be given if the DataFrame uses MultiIndex.
1537-
schema : string, default None
1538-
Name of SQL schema in database to write to (if database flavor
1539-
supports this). If specified, this overwrites the default
1540-
schema of the SQLDatabase object.
1541-
chunksize : int, default None
1542-
If not None, then rows will be written in batches of this size at a
1543-
time. If None, all rows will be written at once.
1544-
dtype : single type or dict of column name to SQL type, default None
1545-
Optional specifying the datatype for columns. The SQL type should
1546-
be a SQLAlchemy type. If all columns are of the same type, one
1547-
single value can be used.
1548-
method : {None', 'multi', callable}, default None
1549-
Controls the SQL insertion clause used:
1550-
1551-
* None : Uses standard SQL ``INSERT`` clause (one per row).
1552-
* 'multi': Pass multiple values in a single ``INSERT`` clause.
1553-
* callable with signature ``(pd_table, conn, keys, data_iter)``.
1554-
1555-
Details and a sample callable implementation can be found in the
1556-
section :ref:`insert method <io.sql.method>`.
1557-
1558-
.. versionadded:: 0.24.0
1620+
Prepares table in the database for data insertion. Creates it if needed, etc.
15591621
"""
15601622
if dtype:
15611623
if not is_dict_like(dtype):
@@ -1589,15 +1651,17 @@ def to_sql(
15891651
dtype=dtype,
15901652
)
15911653
table.create()
1654+
return table
15921655

1593-
from sqlalchemy.exc import SQLAlchemyError
1594-
1595-
try:
1596-
table.insert(chunksize, method=method)
1597-
except SQLAlchemyError as err:
1598-
# GH 34431 36465
1599-
raise ValueError("inf cannot be used with MySQL") from err
1600-
1656+
def check_case_sensitive(
1657+
self,
1658+
name,
1659+
schema,
1660+
):
1661+
"""
1662+
Checks table name for issues with case-sensitivity.
1663+
Method is called after data is inserted.
1664+
"""
16011665
if not name.isdigit() and not name.islower():
16021666
# check for potentially case sensitivity issues (GH7815)
16031667
# Only check when name is not a number and name is not lower case
@@ -1623,6 +1687,97 @@ def to_sql(
16231687
)
16241688
warnings.warn(msg, UserWarning)
16251689

1690+
def to_sql(
1691+
self,
1692+
frame,
1693+
name,
1694+
if_exists="fail",
1695+
index=True,
1696+
index_label=None,
1697+
schema=None,
1698+
chunksize=None,
1699+
dtype: DtypeArg | None = None,
1700+
method=None,
1701+
engine="auto",
1702+
**engine_kwargs,
1703+
):
1704+
"""
1705+
Write records stored in a DataFrame to a SQL database.
1706+
1707+
Parameters
1708+
----------
1709+
frame : DataFrame
1710+
name : string
1711+
Name of SQL table.
1712+
if_exists : {'fail', 'replace', 'append'}, default 'fail'
1713+
- fail: If table exists, do nothing.
1714+
- replace: If table exists, drop it, recreate it, and insert data.
1715+
- append: If table exists, insert data. Create if does not exist.
1716+
index : boolean, default True
1717+
Write DataFrame index as a column.
1718+
index_label : string or sequence, default None
1719+
Column label for index column(s). If None is given (default) and
1720+
`index` is True, then the index names are used.
1721+
A sequence should be given if the DataFrame uses MultiIndex.
1722+
schema : string, default None
1723+
Name of SQL schema in database to write to (if database flavor
1724+
supports this). If specified, this overwrites the default
1725+
schema of the SQLDatabase object.
1726+
chunksize : int, default None
1727+
If not None, then rows will be written in batches of this size at a
1728+
time. If None, all rows will be written at once.
1729+
dtype : single type or dict of column name to SQL type, default None
1730+
Optional specifying the datatype for columns. The SQL type should
1731+
be a SQLAlchemy type. If all columns are of the same type, one
1732+
single value can be used.
1733+
method : {None', 'multi', callable}, default None
1734+
Controls the SQL insertion clause used:
1735+
1736+
* None : Uses standard SQL ``INSERT`` clause (one per row).
1737+
* 'multi': Pass multiple values in a single ``INSERT`` clause.
1738+
* callable with signature ``(pd_table, conn, keys, data_iter)``.
1739+
1740+
Details and a sample callable implementation can be found in the
1741+
section :ref:`insert method <io.sql.method>`.
1742+
1743+
.. versionadded:: 0.24.0
1744+
1745+
engine : {'auto', 'sqlalchemy'}, default 'auto'
1746+
SQL engine library to use. If 'auto', then the option
1747+
``io.sql.engine`` is used. The default ``io.sql.engine``
1748+
behavior is 'sqlalchemy'
1749+
1750+
.. versionadded:: 1.3.0
1751+
1752+
**engine_kwargs
1753+
Any additional kwargs are passed to the engine.
1754+
"""
1755+
sql_engine = get_engine(engine)
1756+
1757+
table = self.prep_table(
1758+
frame=frame,
1759+
name=name,
1760+
if_exists=if_exists,
1761+
index=index,
1762+
index_label=index_label,
1763+
schema=schema,
1764+
dtype=dtype,
1765+
)
1766+
1767+
sql_engine.insert_records(
1768+
table=table,
1769+
con=self.connectable,
1770+
frame=frame,
1771+
name=name,
1772+
index=index,
1773+
schema=schema,
1774+
chunksize=chunksize,
1775+
method=method,
1776+
**engine_kwargs,
1777+
)
1778+
1779+
self.check_case_sensitive(name=name, schema=schema)
1780+
16261781
@property
16271782
def tables(self):
16281783
return self.meta.tables
@@ -2008,6 +2163,7 @@ def to_sql(
20082163
chunksize=None,
20092164
dtype: DtypeArg | None = None,
20102165
method=None,
2166+
**kwargs,
20112167
):
20122168
"""
20132169
Write records stored in a DataFrame to a SQL database.

0 commit comments

Comments
 (0)