From 2562f715293939a1c47c4dd8dce341c0722910f5 Mon Sep 17 00:00:00 2001 From: Josh Dimarsky <24758845+yehoshuadimarsky@users.noreply.github.com> Date: Sun, 21 Mar 2021 04:15:23 +0000 Subject: [PATCH 01/11] initial refactor of sqlalchemy, also added bcpandas but no tests yet --- environment.yml | 6 +- pandas/core/config_init.py | 16 ++ pandas/io/sql.py | 298 ++++++++++++++++++++++++++++++------- requirements-dev.txt | 3 +- 4 files changed, 265 insertions(+), 58 deletions(-) diff --git a/environment.yml b/environment.yml index 1259d0dd4ae44..18599e64b1c5c 100644 --- a/environment.yml +++ b/environment.yml @@ -101,12 +101,16 @@ dependencies: - pyarrow>=0.15.0 # pandas.read_parquet, DataFrame.to_parquet, pandas.read_feather, DataFrame.to_feather - python-snappy # required by pyarrow + # pandas.read_sql, DataFrame.to_sql + - sqlalchemy + - bcpandas>=1.0.1 + - pyqt>=5.9.2 # pandas.read_clipboard - pytables>=3.5.1 # pandas.read_hdf, DataFrame.to_hdf - s3fs>=0.4.0 # file IO when using 's3://...' path - fsspec>=0.7.4 # for generic remote file operations - gcsfs>=0.6.0 # file IO when using 'gcs://...' path - - sqlalchemy # pandas.read_sql, DataFrame.to_sql + - xarray # DataFrame.to_xarray - cftime # Needed for downstream xarray.CFTimeIndex test - pyreadstat # pandas.read_spss diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index fd49ac0176ce4..8d4fd817e24ab 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -652,6 +652,22 @@ def use_inf_as_na_cb(key): validator=is_one_of_factory(["auto", "pyarrow", "fastparquet"]), ) + +# Set up the io.sql specific configuration. +sql_engine_doc = """ +: string + The default sql reader/writer engine. Available options: + 'auto', 'sqlalchemy', 'bcpandas', the default is 'auto' +""" + +with cf.config_prefix("io.sql"): + cf.register_option( + "engine", + "auto", + sql_engine_doc, + validator=is_one_of_factory(["auto", "sqlalchemy", "bcpandas"]), + ) + # -------- # Plotting # --------- diff --git a/pandas/io/sql.py b/pandas/io/sql.py index e3347468828d1..22a656f76d679 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -29,6 +29,8 @@ import pandas._libs.lib as lib from pandas._typing import DtypeArg +from pandas.compat._optional import import_optional_dependency +from pandas.errors import AbstractMethodError from pandas.core.dtypes.common import ( is_datetime64tz_dtype, @@ -38,6 +40,7 @@ from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.missing import isna +from pandas import get_option from pandas.core.api import ( DataFrame, Series, @@ -645,6 +648,8 @@ def to_sql( chunksize: Optional[int] = None, dtype: Optional[DtypeArg] = None, method: Optional[str] = None, + engine: str = "auto", + **kwargs, ) -> None: """ Write records stored in a DataFrame to a SQL database. @@ -691,6 +696,14 @@ def to_sql( section :ref:`insert method `. .. versionadded:: 0.24.0 + engine : {'auto', 'sqlalchemy', 'bcpandas'}, default 'auto' + SQL engine library to use. If 'auto', then the option + ``io.sql.engine`` is used. The default ``io.sql.engine`` + behavior is 'sqlalchemy' + + .. versionadded:: 1.4.0 + **kwargs + Any additional kwargs are passed to the engine. """ if if_exists not in ("fail", "replace", "append"): raise ValueError(f"'{if_exists}' is not valid for if_exists") @@ -714,6 +727,8 @@ def to_sql( chunksize=chunksize, dtype=dtype, method=method, + engine=engine, + **kwargs, ) @@ -1285,6 +1300,130 @@ def to_sql( ) +class BaseEngine: + def insert_records( + self, + table: SQLTable, + con, + frame, + name, + index=True, + schema=None, + chunksize=None, + method=None, + **kwargs, + ): + """ + Inserts data into already-prepared table + """ + raise AbstractMethodError(self) + + +class SQLAlchemyEngine(BaseEngine): + def __init__(self): + import_optional_dependency( + "sqlalchemy", extra="sqlalchemy is required for SQL support." + ) + + def insert_records( + self, + table: SQLTable, + con, + frame, + name, + index=True, + schema=None, + chunksize=None, + method=None, + **kwargs, + ): + from sqlalchemy import exc + + try: + table.insert(chunksize=chunksize, method=method) + except exc.SQLAlchemyError as err: + # GH34431 + msg = "(1054, \"Unknown column 'inf' in 'field list'\")" + err_text = str(err.orig) + if re.search(msg, err_text): + raise ValueError("inf cannot be used with MySQL") from err + else: + raise err + + +class BCPandasEngine(BaseEngine): + def __init__(self): + import_optional_dependency( + "bcpandas", extra="bcpandas is required for SQL support." + ) + + import bcpandas + + self.api = bcpandas + + def insert_records( + self, + table: SQLTable, + con, + frame, + name, + index=True, + schema=None, + chunksize=None, + method=None, + **kwargs, + ): + # 'if_exists' already checked when created `SQLTable`, + # setting to 'append' for SQL Server specific checks in bcpandas + if_exists = "append" + creds = self.api.SqlCreds.from_engine(con) + + self.api.to_sql( + df=frame, + table_name=name, + creds=creds, + sql_type="table", + schema=schema, + index=index, + if_exists=if_exists, + batch_size=chunksize, + ) + + +def get_engine(engine: str) -> BaseEngine: + """ return our implementation """ + if engine == "auto": + engine = get_option("io.sql.engine") + + if engine == "auto": + # try engines in this order + engine_classes = [SQLAlchemyEngine, BCPandasEngine] + + error_msgs = "" + for engine_class in engine_classes: + try: + return engine_class() + except ImportError as err: + error_msgs += "\n - " + str(err) + + raise ImportError( + "Unable to find a usable engine; " + "tried using: 'sqlalchemy', 'bcpandas'.\n" + "A suitable version of " + "sqlalchemy or bcpandas is required for sql I/O " + "support.\n" + "Trying to import the above resulted in these errors:" + f"{error_msgs}" + ) + + if engine == "sqlalchemy": + return SQLAlchemyEngine() + elif engine == "bcpandas": + return BCPandasEngine() + + raise ValueError("engine must be one of 'sqlalchemy', 'bcpandas'") + + class SQLDatabase(PandasSQL): """ This class enables conversion between DataFrame and SQL databases @@ -1506,7 +1645,7 @@ def read_query( read_sql = read_query - def to_sql( + def prep_table( self, frame, name, @@ -1514,50 +1653,10 @@ def to_sql( index=True, index_label=None, schema=None, - chunksize=None, dtype: Optional[DtypeArg] = None, - method=None, - ): + ) -> SQLTable: """ - Write records stored in a DataFrame to a SQL database. - - Parameters - ---------- - frame : DataFrame - name : string - Name of SQL table. - if_exists : {'fail', 'replace', 'append'}, default 'fail' - - fail: If table exists, do nothing. - - replace: If table exists, drop it, recreate it, and insert data. - - append: If table exists, insert data. Create if does not exist. - index : boolean, default True - Write DataFrame index as a column. - index_label : string or sequence, default None - Column label for index column(s). If None is given (default) and - `index` is True, then the index names are used. - A sequence should be given if the DataFrame uses MultiIndex. - schema : string, default None - Name of SQL schema in database to write to (if database flavor - supports this). If specified, this overwrites the default - schema of the SQLDatabase object. - chunksize : int, default None - If not None, then rows will be written in batches of this size at a - time. If None, all rows will be written at once. - dtype : single type or dict of column name to SQL type, default None - Optional specifying the datatype for columns. The SQL type should - be a SQLAlchemy type. If all columns are of the same type, one - single value can be used. - method : {None', 'multi', callable}, default None - Controls the SQL insertion clause used: - - * None : Uses standard SQL ``INSERT`` clause (one per row). - * 'multi': Pass multiple values in a single ``INSERT`` clause. - * callable with signature ``(pd_table, conn, keys, data_iter)``. - - Details and a sample callable implementation can be found in the - section :ref:`insert method `. - - .. versionadded:: 0.24.0 + Prepares table in the database for data insertion. Creates it if needed, etc. """ if dtype: if not is_dict_like(dtype): @@ -1591,20 +1690,17 @@ def to_sql( dtype=dtype, ) table.create() + return table - from sqlalchemy import exc - - try: - table.insert(chunksize, method=method) - except exc.SQLAlchemyError as err: - # GH34431 - msg = "(1054, \"Unknown column 'inf' in 'field list'\")" - err_text = str(err.orig) - if re.search(msg, err_text): - raise ValueError("inf cannot be used with MySQL") from err - else: - raise err - + def check_case_sensitive( + self, + name, + schema, + ): + """ + Checks table name for issues with case-sensitivity. + Method is called after data is inserted. + """ if not name.isdigit() and not name.islower(): # check for potentially case sensitivity issues (GH7815) # Only check when name is not a number and name is not lower case @@ -1630,6 +1726,95 @@ def to_sql( ) warnings.warn(msg, UserWarning) + def to_sql( + self, + frame, + name, + if_exists="fail", + index=True, + index_label=None, + schema=None, + chunksize=None, + dtype: Optional[DtypeArg] = None, + method=None, + engine="auto", + **kwargs, + ): + """ + Write records stored in a DataFrame to a SQL database. + + Parameters + ---------- + frame : DataFrame + name : string + Name of SQL table. + if_exists : {'fail', 'replace', 'append'}, default 'fail' + - fail: If table exists, do nothing. + - replace: If table exists, drop it, recreate it, and insert data. + - append: If table exists, insert data. Create if does not exist. + index : boolean, default True + Write DataFrame index as a column. + index_label : string or sequence, default None + Column label for index column(s). If None is given (default) and + `index` is True, then the index names are used. + A sequence should be given if the DataFrame uses MultiIndex. + schema : string, default None + Name of SQL schema in database to write to (if database flavor + supports this). If specified, this overwrites the default + schema of the SQLDatabase object. + chunksize : int, default None + If not None, then rows will be written in batches of this size at a + time. If None, all rows will be written at once. + dtype : single type or dict of column name to SQL type, default None + Optional specifying the datatype for columns. The SQL type should + be a SQLAlchemy type. If all columns are of the same type, one + single value can be used. + method : {None', 'multi', callable}, default None + Controls the SQL insertion clause used: + + * None : Uses standard SQL ``INSERT`` clause (one per row). + * 'multi': Pass multiple values in a single ``INSERT`` clause. + * callable with signature ``(pd_table, conn, keys, data_iter)``. + + Details and a sample callable implementation can be found in the + section :ref:`insert method `. + + .. versionadded:: 0.24.0 + engine : {'auto', 'sqlalchemy', 'bcpandas'}, default 'auto' + SQL engine library to use. If 'auto', then the option + ``io.sql.engine`` is used. The default ``io.sql.engine`` + behavior is 'sqlalchemy' + + .. versionadded:: 1.4.0 + **kwargs + Any additional kwargs are passed to the engine. + """ + sql_engine = get_engine(engine) + + table = self.prep_table( + frame=frame, + name=name, + if_exists=if_exists, + index=index, + index_label=index_label, + schema=schema, + dtype=dtype, + ) + + sql_engine.insert_records( + table=table, + con=self.connectable, + frame=frame, + name=name, + index=index, + schema=schema, + chunksize=chunksize, + method=method, + **kwargs, + ) + + self.check_case_sensitive(name=name, schema=schema) + @property def tables(self): return self.meta.tables @@ -2015,6 +2200,7 @@ def to_sql( chunksize=None, dtype: Optional[DtypeArg] = None, method=None, + **kwargs, ): """ Write records stored in a DataFrame to a SQL database. diff --git a/requirements-dev.txt b/requirements-dev.txt index 1817d79f96139..4eb7baa0e622d 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -65,12 +65,13 @@ odfpy fastparquet>=0.3.2 pyarrow>=0.15.0 python-snappy +sqlalchemy +bcpandas>=1.0.1 pyqt5>=5.9.2 tables>=3.5.1 s3fs>=0.4.0 fsspec>=0.7.4 gcsfs>=0.6.0 -sqlalchemy xarray cftime pyreadstat From 982593cce56bf6ad060e0b9d4f9f45de88dc519b Mon Sep 17 00:00:00 2001 From: Josh Dimarsky <24758845+yehoshuadimarsky@users.noreply.github.com> Date: Sun, 21 Mar 2021 15:56:37 +0000 Subject: [PATCH 02/11] reverting bcpandas impl; keeping to strictly refactoring sqlalchemy --- environment.yml | 6 +---- pandas/core/config_init.py | 4 ++-- pandas/io/sql.py | 47 +++----------------------------------- requirements-dev.txt | 3 +-- 4 files changed, 7 insertions(+), 53 deletions(-) diff --git a/environment.yml b/environment.yml index 18599e64b1c5c..1259d0dd4ae44 100644 --- a/environment.yml +++ b/environment.yml @@ -101,16 +101,12 @@ dependencies: - pyarrow>=0.15.0 # pandas.read_parquet, DataFrame.to_parquet, pandas.read_feather, DataFrame.to_feather - python-snappy # required by pyarrow - # pandas.read_sql, DataFrame.to_sql - - sqlalchemy - - bcpandas>=1.0.1 - - pyqt>=5.9.2 # pandas.read_clipboard - pytables>=3.5.1 # pandas.read_hdf, DataFrame.to_hdf - s3fs>=0.4.0 # file IO when using 's3://...' path - fsspec>=0.7.4 # for generic remote file operations - gcsfs>=0.6.0 # file IO when using 'gcs://...' path - + - sqlalchemy # pandas.read_sql, DataFrame.to_sql - xarray # DataFrame.to_xarray - cftime # Needed for downstream xarray.CFTimeIndex test - pyreadstat # pandas.read_spss diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 8d4fd817e24ab..baac872a6a466 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -657,7 +657,7 @@ def use_inf_as_na_cb(key): sql_engine_doc = """ : string The default sql reader/writer engine. Available options: - 'auto', 'sqlalchemy', 'bcpandas', the default is 'auto' + 'auto', 'sqlalchemy', the default is 'auto' """ with cf.config_prefix("io.sql"): @@ -665,7 +665,7 @@ def use_inf_as_na_cb(key): "engine", "auto", sql_engine_doc, - validator=is_one_of_factory(["auto", "sqlalchemy", "bcpandas"]), + validator=is_one_of_factory(["auto", "sqlalchemy"]), ) # -------- diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 22a656f76d679..c9b978a235599 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -696,7 +696,7 @@ def to_sql( section :ref:`insert method `. .. versionadded:: 0.24.0 - engine : {'auto', 'sqlalchemy', 'bcpandas'}, default 'auto' + engine : {'auto', 'sqlalchemy'}, default 'auto' SQL engine library to use. If 'auto', then the option ``io.sql.engine`` is used. The default ``io.sql.engine`` behavior is 'sqlalchemy' @@ -1351,45 +1351,6 @@ def insert_records( raise err -class BCPandasEngine(BaseEngine): - def __init__(self): - import_optional_dependency( - "bcpandas", extra="bcpandas is required for SQL support." - ) - - import bcpandas - - self.api = bcpandas - - def insert_records( - self, - table: SQLTable, - con, - frame, - name, - index=True, - schema=None, - chunksize=None, - method=None, - **kwargs, - ): - # 'if_exists' already checked when created `SQLTable`, - # setting to 'append' for SQL Server specific checks in bcpandas - if_exists = "append" - creds = self.api.SqlCreds.from_engine(con) - - self.api.to_sql( - df=frame, - table_name=name, - creds=creds, - sql_type="table", - schema=schema, - index=index, - if_exists=if_exists, - batch_size=chunksize, - ) - - def get_engine(engine: str) -> BaseEngine: """ return our implementation """ if engine == "auto": @@ -1397,7 +1358,7 @@ def get_engine(engine: str) -> BaseEngine: if engine == "auto": # try engines in this order - engine_classes = [SQLAlchemyEngine, BCPandasEngine] + engine_classes = [SQLAlchemyEngine] error_msgs = "" for engine_class in engine_classes: @@ -1418,8 +1379,6 @@ def get_engine(engine: str) -> BaseEngine: if engine == "sqlalchemy": return SQLAlchemyEngine() - elif engine == "bcpandas": - return BCPandasEngine() raise ValueError("engine must be one of 'sqlalchemy', 'bcpandas'") @@ -1780,7 +1739,7 @@ def to_sql( section :ref:`insert method `. .. versionadded:: 0.24.0 - engine : {'auto', 'sqlalchemy', 'bcpandas'}, default 'auto' + engine : {'auto', 'sqlalchemy'}, default 'auto' SQL engine library to use. If 'auto', then the option ``io.sql.engine`` is used. The default ``io.sql.engine`` behavior is 'sqlalchemy' diff --git a/requirements-dev.txt b/requirements-dev.txt index 4eb7baa0e622d..1817d79f96139 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -65,13 +65,12 @@ odfpy fastparquet>=0.3.2 pyarrow>=0.15.0 python-snappy -sqlalchemy -bcpandas>=1.0.1 pyqt5>=5.9.2 tables>=3.5.1 s3fs>=0.4.0 fsspec>=0.7.4 gcsfs>=0.6.0 +sqlalchemy xarray cftime pyreadstat From 962a36cfcb58285950604f96c73dc9f14693b236 Mon Sep 17 00:00:00 2001 From: Yehoshua Dimarsky <24758845+yehoshuadimarsky@users.noreply.github.com> Date: Tue, 23 Mar 2021 19:07:30 +0000 Subject: [PATCH 03/11] changes requested by @jreback --- doc/source/user_guide/options.rst | 4 ++++ pandas/io/sql.py | 30 +++++++++++++++++------------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/doc/source/user_guide/options.rst b/doc/source/user_guide/options.rst index 1fcaac1a91d09..278eb907102ed 100644 --- a/doc/source/user_guide/options.rst +++ b/doc/source/user_guide/options.rst @@ -456,6 +456,10 @@ io.hdf.dropna_table True drop ALL nan rows when appe io.parquet.engine None The engine to use as a default for parquet reading and writing. If None then try 'pyarrow' and 'fastparquet' +io.sql.engine None The engine to use as a default for + sql reading and writing, with SQLAlchemy + as a higher level interface. If None + then try 'sqlalchemy' mode.chained_assignment warn Controls ``SettingWithCopyWarning``: 'raise', 'warn', or None. Raise an exception, warn, or no action if diff --git a/pandas/io/sql.py b/pandas/io/sql.py index c9b978a235599..51a57ba2ff4c8 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -649,7 +649,7 @@ def to_sql( dtype: Optional[DtypeArg] = None, method: Optional[str] = None, engine: str = "auto", - **kwargs, + **engine_kwargs, ) -> None: """ Write records stored in a DataFrame to a SQL database. @@ -696,13 +696,15 @@ def to_sql( section :ref:`insert method `. .. versionadded:: 0.24.0 + engine : {'auto', 'sqlalchemy'}, default 'auto' SQL engine library to use. If 'auto', then the option ``io.sql.engine`` is used. The default ``io.sql.engine`` behavior is 'sqlalchemy' - .. versionadded:: 1.4.0 - **kwargs + .. versionadded:: 1.3.0 + + **engine_kwargs Any additional kwargs are passed to the engine. """ if if_exists not in ("fail", "replace", "append"): @@ -728,7 +730,7 @@ def to_sql( dtype=dtype, method=method, engine=engine, - **kwargs, + **engine_kwargs, ) @@ -1311,7 +1313,7 @@ def insert_records( schema=None, chunksize=None, method=None, - **kwargs, + **engine_kwargs, ): """ Inserts data into already-prepared table @@ -1335,7 +1337,7 @@ def insert_records( schema=None, chunksize=None, method=None, - **kwargs, + **engine_kwargs, ): from sqlalchemy import exc @@ -1369,18 +1371,18 @@ def get_engine(engine: str) -> BaseEngine: raise ImportError( "Unable to find a usable engine; " - "tried using: 'sqlalchemy', 'bcpandas'.\n" + "tried using: 'sqlalchemy'.\n" "A suitable version of " - "sqlalchemy or bcpandas is required for sql I/O " + "sqlalchemy is required for sql I/O " "support.\n" "Trying to import the above resulted in these errors:" f"{error_msgs}" ) - if engine == "sqlalchemy": + elif engine == "sqlalchemy": return SQLAlchemyEngine() - raise ValueError("engine must be one of 'sqlalchemy', 'bcpandas'") + raise ValueError("engine must be one of 'auto', 'sqlalchemy'") class SQLDatabase(PandasSQL): @@ -1697,7 +1699,7 @@ def to_sql( dtype: Optional[DtypeArg] = None, method=None, engine="auto", - **kwargs, + **engine_kwargs, ): """ Write records stored in a DataFrame to a SQL database. @@ -1739,13 +1741,15 @@ def to_sql( section :ref:`insert method `. .. versionadded:: 0.24.0 + engine : {'auto', 'sqlalchemy'}, default 'auto' SQL engine library to use. If 'auto', then the option ``io.sql.engine`` is used. The default ``io.sql.engine`` behavior is 'sqlalchemy' .. versionadded:: 1.4.0 - **kwargs + + **engine_kwargs Any additional kwargs are passed to the engine. """ sql_engine = get_engine(engine) @@ -1769,7 +1773,7 @@ def to_sql( schema=schema, chunksize=chunksize, method=method, - **kwargs, + **engine_kwargs, ) self.check_case_sensitive(name=name, schema=schema) From dbf0cfa9337789a24afbd1034bee1fb69a058b57 Mon Sep 17 00:00:00 2001 From: Yehoshua Dimarsky <24758845+yehoshuadimarsky@users.noreply.github.com> Date: Sun, 11 Apr 2021 19:15:48 +0000 Subject: [PATCH 04/11] added tests for sql configs --- pandas/tests/io/test_sql.py | 54 +++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 04ddef57a9621..290e063a59be7 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -52,7 +52,9 @@ import pandas.io.sql as sql from pandas.io.sql import ( + SQLAlchemyEngine, _gt14, + get_engine, read_sql_query, read_sql_table, ) @@ -575,6 +577,23 @@ def sample(pd_table, conn, keys, data_iter): # Nuke table self.drop_table("test_frame1") + def _to_sql_with_sql_engine(self, engine="auto", **engine_kwargs): + """`to_sql` with the `engine` param""" + # mostly copied from this class's `_to_sql()` method + self.drop_table("test_frame1") + + self.pandasSQL.to_sql( + self.test_frame1, "test_frame1", engine=engine, **engine_kwargs + ) + assert self.pandasSQL.has_table("test_frame1") + + num_entries = len(self.test_frame1) + num_rows = self._count_rows("test_frame1") + assert num_rows == num_entries + + # Nuke table + self.drop_table("test_frame1") + def _roundtrip(self): self.drop_table("test_frame_roundtrip") self.pandasSQL.to_sql(self.test_frame1, "test_frame_roundtrip") @@ -2053,6 +2072,41 @@ class Temporary(Base): tm.assert_frame_equal(df, expected) + # -- SQL Engine tests (in the base class for now) + def test_invalid_engine(self): + msg = "engine must be one of 'auto', 'sqlalchemy'" + with pytest.raises(ValueError, match=msg): + self._to_sql_with_sql_engine("bad_engine") + + def test_options_sqlalchemy(self): + # use the set option + + with pd.option_context("io.sql.engine", "sqlalchemy"): + self._to_sql_with_sql_engine() + + def test_options_auto(self): + # use the set option + + with pd.option_context("io.sql.engine", "auto"): + self._to_sql_with_sql_engine() + + def test_options_get_engine(self): + assert isinstance(get_engine("sqlalchemy"), SQLAlchemyEngine) + + with pd.option_context("io.sql.engine", "sqlalchemy"): + assert isinstance(get_engine("auto"), SQLAlchemyEngine) + assert isinstance(get_engine("sqlalchemy"), SQLAlchemyEngine) + + with pd.option_context("io.sql.engine", "auto"): + assert isinstance(get_engine("auto"), SQLAlchemyEngine) + assert isinstance(get_engine("sqlalchemy"), SQLAlchemyEngine) + + def test_get_engine_auto_error_message(self): + # Expect different error messages from get_engine(engine="auto") + # if engines aren't installed vs. are installed but bad version + pass + # TODO fill this in when we add more engines + class _TestSQLAlchemyConn(_EngineToConnMixin, _TestSQLAlchemy): def test_transactions(self): From b77b6a34ea9bf873001bffef8c21a31315419ec4 Mon Sep 17 00:00:00 2001 From: Yehoshua Dimarsky <24758845+yehoshuadimarsky@users.noreply.github.com> Date: Mon, 12 Apr 2021 16:18:56 +0000 Subject: [PATCH 05/11] fixed versionadded --- pandas/io/sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index ef0f7da4c7e53..3de753be5c5af 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -1747,7 +1747,7 @@ def to_sql( ``io.sql.engine`` is used. The default ``io.sql.engine`` behavior is 'sqlalchemy' - .. versionadded:: 1.4.0 + .. versionadded:: 1.3.0 **engine_kwargs Any additional kwargs are passed to the engine. From c34c97b7f70297223ae48cc7b3893a824eb273e6 Mon Sep 17 00:00:00 2001 From: Josh Dimarsky <24758845+yehoshuadimarsky@users.noreply.github.com> Date: Fri, 16 Apr 2021 13:43:49 +0000 Subject: [PATCH 06/11] change Optional[DtypeArg] to DtypeArg | None --- pandas/io/sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index f935b026ad2a3..85e9cd33cdfae 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -1694,7 +1694,7 @@ def to_sql( index_label=None, schema=None, chunksize=None, - dtype: Optional[DtypeArg] = None, + dtype: DtypeArg | None = None, method=None, engine="auto", **engine_kwargs, From 80e3a1b5516762f8c4a5197cd8961eed854a5a87 Mon Sep 17 00:00:00 2001 From: Josh Dimarsky <24758845+yehoshuadimarsky@users.noreply.github.com> Date: Fri, 30 Apr 2021 13:13:05 +0000 Subject: [PATCH 07/11] catch mysql -inf exception --- pandas/io/sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 85e9cd33cdfae..f6f42c2fdd5dc 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -1343,7 +1343,7 @@ def insert_records( table.insert(chunksize=chunksize, method=method) except exc.SQLAlchemyError as err: # GH34431 - msg = "(1054, \"Unknown column 'inf' in 'field list'\")" + msg = "(1054, \"Unknown column 'inf(e0)?' in 'field list'\")" err_text = str(err.orig) if re.search(msg, err_text): raise ValueError("inf cannot be used with MySQL") from err From 4f6f8eab864e111d2c87bb05e5be0c58c2e1e62d Mon Sep 17 00:00:00 2001 From: Yehoshua Dimarsky <24758845+yehoshuadimarsky@users.noreply.github.com> Date: Fri, 30 Apr 2021 19:09:41 +0000 Subject: [PATCH 08/11] another fix for CI test --- pandas/io/sql.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index f6f42c2fdd5dc..65f7135bd3876 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -1343,9 +1343,9 @@ def insert_records( table.insert(chunksize=chunksize, method=method) except exc.SQLAlchemyError as err: # GH34431 - msg = "(1054, \"Unknown column 'inf(e0)?' in 'field list'\")" + msg_pattern = "(OperationalError: )?\\(1054, \"Unknown column 'inf(e0)?' in 'field list'\"\\)" # noqa: E501 err_text = str(err.orig) - if re.search(msg, err_text): + if re.search(msg_pattern, err_text, re.MULTILINE | re.VERBOSE): raise ValueError("inf cannot be used with MySQL") from err else: raise err From 0be19ced50e73bc4a91f199d3170bdfac09846b9 Mon Sep 17 00:00:00 2001 From: Josh Dimarsky <24758845+yehoshuadimarsky@users.noreply.github.com> Date: Sun, 2 May 2021 16:07:35 +0000 Subject: [PATCH 09/11] Regex update to catch CI error --- pandas/io/sql.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 65f7135bd3876..16c1c6dc36805 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -1343,9 +1343,11 @@ def insert_records( table.insert(chunksize=chunksize, method=method) except exc.SQLAlchemyError as err: # GH34431 - msg_pattern = "(OperationalError: )?\\(1054, \"Unknown column 'inf(e0)?' in 'field list'\"\\)" # noqa: E501 + # https://stackoverflow.com/a/67358288/6067848 + msg = r"""(\(1054, "Unknown column 'inf(e0)?' in 'field list'"\))(?# + )|(ProgrammingError: inf can not be used with MySQL)""" err_text = str(err.orig) - if re.search(msg_pattern, err_text, re.MULTILINE | re.VERBOSE): + if re.search(msg, err_text, re.MULTILINE | re.VERBOSE): raise ValueError("inf cannot be used with MySQL") from err else: raise err From f084faa844b943ae835665d09895e52421e0a8dc Mon Sep 17 00:00:00 2001 From: Josh Dimarsky <24758845+yehoshuadimarsky@users.noreply.github.com> Date: Sun, 2 May 2021 18:02:42 -0400 Subject: [PATCH 10/11] Remove `re.MULTILINE | re.VERBOSE` Co-authored-by: Thomas Li <47963215+lithomas1@users.noreply.github.com> --- pandas/io/sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 16c1c6dc36805..e9f4e92c5730c 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -1347,7 +1347,7 @@ def insert_records( msg = r"""(\(1054, "Unknown column 'inf(e0)?' in 'field list'"\))(?# )|(ProgrammingError: inf can not be used with MySQL)""" err_text = str(err.orig) - if re.search(msg, err_text, re.MULTILINE | re.VERBOSE): + if re.search(msg, err_text): raise ValueError("inf cannot be used with MySQL") from err else: raise err From 36adf43ccc03f1c5395a49581102ede9a485d432 Mon Sep 17 00:00:00 2001 From: Josh Dimarsky <24758845+yehoshuadimarsky@users.noreply.github.com> Date: Sun, 2 May 2021 22:10:40 -0400 Subject: [PATCH 11/11] Update pandas/io/sql.py Co-authored-by: Thomas Li <47963215+lithomas1@users.noreply.github.com> --- pandas/io/sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index e9f4e92c5730c..04a7ccb538a67 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -1345,7 +1345,7 @@ def insert_records( # GH34431 # https://stackoverflow.com/a/67358288/6067848 msg = r"""(\(1054, "Unknown column 'inf(e0)?' in 'field list'"\))(?# - )|(ProgrammingError: inf can not be used with MySQL)""" + )|inf can not be used with MySQL""" err_text = str(err.orig) if re.search(msg, err_text): raise ValueError("inf cannot be used with MySQL") from err