From 8a3f65827f2d8eb0824a0ced1e20dd41abef4e63 Mon Sep 17 00:00:00 2001 From: Pedro Holanda Date: Wed, 26 Jan 2022 15:20:12 +0100 Subject: [PATCH 01/11] Add support to to_sql with duckdb --- pandas/io/sql.py | 98 ++++++++++++++++++++++++++----------- pandas/tests/io/test_sql.py | 39 +++++++++++++++ 2 files changed, 109 insertions(+), 28 deletions(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index fcb3f5177ae3f..f8fc8b3b6b12e 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -732,6 +732,10 @@ def pandasSQL_builder(con, schema: str | None = None): """ import sqlite3 import warnings + import duckdb + + if isinstance(con, duckdb.DuckDBPyConnection): + return DuckDBDatabase(con) if isinstance(con, sqlite3.Connection) or con is None: return SQLiteDatabase(con) @@ -2192,37 +2196,75 @@ def _create_sql_schema( return str(table.sql_schema()) -def get_schema( - frame, - name: str, - keys=None, - con=None, - dtype: DtypeArg | None = None, - schema: str | None = None, -): +class DuckDBDatabase(PandasSQL): """ - Get the SQL db table schema for the given frame. + Version of SQLDatabase to support DuckDB connections (fallback without + SQLAlchemy). This should only be used internally. Parameters ---------- - frame : DataFrame - name : str - name of SQL table - keys : string or sequence, default: None - columns to use a primary key - con: an open SQL database connection object or a SQLAlchemy connectable - Using SQLAlchemy makes it possible to use any DB supported by that - library, default: None - If a DBAPI2 object, only sqlite3 is supported. - dtype : dict of column name to SQL type, default None - Optional specifying the datatype for columns. The SQL type should - be a SQLAlchemy type, or a string for sqlite3 fallback connection. - schema: str, default: None - Optional specifying the schema to be used in creating the table. + con : duckdb connection object - .. versionadded:: 1.2.0 """ - pandas_sql = pandasSQL_builder(con=con) - return pandas_sql._create_sql_schema( - frame, name, keys=keys, dtype=dtype, schema=schema - ) + + def __init__(self, con): + self.con = con + + def to_sql( + self, + frame, + name, + if_exists="fail", + index=True, + index_label=None, + schema=None, + chunksize=None, + dtype: DtypeArg | None = None, + method=None, + **kwargs, + ) -> int | None: + """ + Write records stored in a DataFrame to a SQL database. + + Parameters + ---------- + frame: DataFrame + name: string + Name of SQL table. + if_exists: {'fail', 'replace', 'append'}, default 'fail' + fail: If table exists, do nothing. + replace: If table exists, drop it, recreate it, and insert data. + append: If table exists, insert data. Create if it does not exist. + index : bool, default True + Ignored parameter included for compatibility with SQLAlchemy + and SQLite version of ``to_sql``. + index_label : string or sequence, default None + Ignored parameter included for compatibility with SQLAlchemy + and SQLite version of ``to_sql``. + schema : string, default None + Ignored parameter included for compatibility with SQLAlchemy + version of ``to_sql``. + chunksize : int, default None + Ignored parameter included for compatibility with SQLAlchemy + and SQLite version of ``to_sql``. + dtype : Ignored parameter included for compatibility with SQLAlchemy + and SQLite version of ``to_sql``. + method : {None, 'multi', callable}, default None + Ignored parameter included for compatibility with SQLAlchemy + and SQLite version of ``to_sql``. + """ + table_exits = len(self.con.execute(f"SELECT name FROM sqlite_master WHERE name='{name}'").fetchall()) > 0 + if table_exits: + if if_exists == "fail": + raise ValueError(f"Table '{name}' already exists.") + elif if_exists == "replace": + self.con.execute(f"DROP TABLE {name}") + return self.con.execute(f"CREATE TABLE {name} AS SELECT * FROM frame").fetchone()[0] + elif if_exists == "append": + return self.con.execute(f"INSERT INTO {name} SELECT * FROM frame").fetchone()[0] + else: + raise ValueError(f"'{if_exists}' is not valid for if_exists") + + return self.con.execute(f"CREATE TABLE {name} AS SELECT * FROM frame").fetchone()[0] + + diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 741af4324c1a6..9c7727f9bad5e 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -27,6 +27,7 @@ from io import StringIO from pathlib import Path import sqlite3 +import duckdb import numpy as np import pytest @@ -2936,3 +2937,41 @@ def test_if_exists(self): (5, "E"), ] self.drop_table(table_name) + +class TestDuckDB: + + def test_to_sql_duck(self): + con = duckdb.connect() + df = pd.DataFrame([[None, 10, 1.0], ['nick', None, 1.5], ['juli', 14, None]], + columns=['Name', 'Age', 'Numeric']) + df.to_sql('ages', con) + result = con.execute('SELECT count(*), sum("Age"), sum("Numeric") FROM ages').fetchone() + assert result == (3, 24, 2.5,) + con.close() + + def test_to_sql_duck_all_exist_options(self): + con = duckdb.connect() + con.execute("CREATE TABLE ages (a INTEGER)") + + df = pd.DataFrame([[None, 10, 1.0], ['nick', None, 1.5], ['juli', 14, None]], + columns=['Name', 'Age', 'Numeric']) + with pytest.raises(Exception) as e_info: + df.to_sql('ages', con) + + + assert 'already exists' in str(e_info.value) + + df.to_sql('ages', con, if_exists= 'replace') + result = con.execute('SELECT count(*), sum("Age"), sum("Numeric") FROM ages').fetchone() + assert result == (3, 24, 2.5,) + + df.to_sql('ages', con, if_exists='append') + result = con.execute('SELECT count(*), sum("Age"), sum("Numeric") FROM ages').fetchone() + assert result == (6, 48, 5,) + + with pytest.raises(Exception) as e_info: + df.to_sql('ages', con, if_exists='flark') + + + assert 'not valid for if_exists' in str(e_info.value) + con.close() \ No newline at end of file From e7a48e0cc84f3a5db218ef724515c64d28fdb8e2 Mon Sep 17 00:00:00 2001 From: Pedro Holanda Date: Wed, 26 Jan 2022 16:04:10 +0100 Subject: [PATCH 02/11] Having a go at adding duckdb as an optional dependency --- doc/source/getting_started/install.rst | 1 + environment.yml | 1 + pandas/compat/_optional.py | 1 + pandas/io/sql.py | 3 ++- pandas/tests/io/test_sql.py | 12 +++++++++++- 5 files changed, 16 insertions(+), 2 deletions(-) diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index df9c258f4aa6d..cdff578624ce2 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -348,6 +348,7 @@ Dependency Minimum Version Notes SQLAlchemy 1.4.0 SQL support for databases other than sqlite psycopg2 2.8.4 PostgreSQL engine for sqlalchemy pymysql 0.10.1 MySQL engine for sqlalchemy +duckdb 0.3.1 High-performance analytical database system ========================= ================== ============================================================= Other data sources diff --git a/environment.yml b/environment.yml index a168e691821c3..63543a5d7b507 100644 --- a/environment.yml +++ b/environment.yml @@ -89,6 +89,7 @@ dependencies: - numexpr>=2.7.1 - scipy>=1.4.1 - numba>=0.50.1 + - duckdb >=0.3.1 # optional for io # --------------- diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index a26bc94ab883e..c494e88906264 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -13,6 +13,7 @@ "bs4": "4.8.2", "blosc": "1.20.1", "bottleneck": "1.3.1", + "duckdb": "0.3.1", "fastparquet": "0.4.0", "fsspec": "0.7.4", "html5lib": "1.1", diff --git a/pandas/io/sql.py b/pandas/io/sql.py index f8fc8b3b6b12e..89b78fecccbd3 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -732,7 +732,8 @@ def pandasSQL_builder(con, schema: str | None = None): """ import sqlite3 import warnings - import duckdb + + duckdb = import_optional_dependency("duckdb") if isinstance(con, duckdb.DuckDBPyConnection): return DuckDBDatabase(con) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 9c7727f9bad5e..859c50f48214a 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -27,7 +27,6 @@ from io import StringIO from pathlib import Path import sqlite3 -import duckdb import numpy as np import pytest @@ -72,6 +71,13 @@ except ImportError: SQLALCHEMY_INSTALLED = False +try: + import duckdb + + DUCKDB_INSTALLED = True +except ImportError: + DUCKDB_INSTALLED = False + SQL_STRINGS = { "read_parameters": { "sqlite": "SELECT * FROM iris WHERE Name=? AND SepalLength=?", @@ -2941,6 +2947,8 @@ def test_if_exists(self): class TestDuckDB: def test_to_sql_duck(self): + if not DUCKDB_INSTALLED: + return con = duckdb.connect() df = pd.DataFrame([[None, 10, 1.0], ['nick', None, 1.5], ['juli', 14, None]], columns=['Name', 'Age', 'Numeric']) @@ -2950,6 +2958,8 @@ def test_to_sql_duck(self): con.close() def test_to_sql_duck_all_exist_options(self): + if not DUCKDB_INSTALLED: + return con = duckdb.connect() con.execute("CREATE TABLE ages (a INTEGER)") From 048555d21bc9cd126c81ec3c7932511a45e34fe1 Mon Sep 17 00:00:00 2001 From: Pedro Holanda Date: Wed, 26 Jan 2022 16:13:05 +0100 Subject: [PATCH 03/11] the conda dep is different than pip --- environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 63543a5d7b507..7afbf17106078 100644 --- a/environment.yml +++ b/environment.yml @@ -89,7 +89,7 @@ dependencies: - numexpr>=2.7.1 - scipy>=1.4.1 - numba>=0.50.1 - - duckdb >=0.3.1 + - python-duckdb>=0.3.1 # optional for io # --------------- From e2f6d8868813d369828e52cded27f5133367fa53 Mon Sep 17 00:00:00 2001 From: Pedro Holanda Date: Wed, 26 Jan 2022 17:29:26 +0100 Subject: [PATCH 04/11] conda package name is different --- pandas/compat/_optional.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index c494e88906264..833584c55cf15 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -13,7 +13,7 @@ "bs4": "4.8.2", "blosc": "1.20.1", "bottleneck": "1.3.1", - "duckdb": "0.3.1", + "python-duckdb": "0.3.1", "fastparquet": "0.4.0", "fsspec": "0.7.4", "html5lib": "1.1", From 039edf7ea14b44b9d0a1c759c5d1732a548cf183 Mon Sep 17 00:00:00 2001 From: Pedro Holanda Date: Wed, 26 Jan 2022 19:04:19 +0100 Subject: [PATCH 05/11] try to install it through pip --- environment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/environment.yml b/environment.yml index 7afbf17106078..170b56a568d9a 100644 --- a/environment.yml +++ b/environment.yml @@ -124,3 +124,4 @@ dependencies: - pydata-sphinx-theme - pandas-dev-flaker==0.2.0 - pytest-cython + - duckdb From 2a1b441ba4424e10b1d7c46cb9a5604fcee7be58 Mon Sep 17 00:00:00 2001 From: Pedro Holanda Date: Wed, 26 Jan 2022 21:33:58 +0100 Subject: [PATCH 06/11] Update deps of ci --- ci/deps/actions-310.yaml | 1 + ci/deps/actions-38.yaml | 1 + ci/deps/actions-39.yaml | 1 + ci/deps/azure-windows-310.yaml | 1 + ci/deps/azure-windows-39.yaml | 1 + 5 files changed, 5 insertions(+) diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml index 9829380620f86..f3841b3c5eb15 100644 --- a/ci/deps/actions-310.yaml +++ b/ci/deps/actions-310.yaml @@ -43,6 +43,7 @@ dependencies: - s3fs - scipy - sqlalchemy + - duckdb - tabulate - xarray - xlrd diff --git a/ci/deps/actions-38.yaml b/ci/deps/actions-38.yaml index b23f686d845e9..00473c2e0bb79 100644 --- a/ci/deps/actions-38.yaml +++ b/ci/deps/actions-38.yaml @@ -42,6 +42,7 @@ dependencies: - s3fs - scipy - sqlalchemy + - duckdb - tabulate - xarray - xlrd diff --git a/ci/deps/actions-39.yaml b/ci/deps/actions-39.yaml index 631ef40b02e33..ae9f2925ea846 100644 --- a/ci/deps/actions-39.yaml +++ b/ci/deps/actions-39.yaml @@ -42,6 +42,7 @@ dependencies: - s3fs - scipy - sqlalchemy + - duckdb - tabulate - xarray - xlrd diff --git a/ci/deps/azure-windows-310.yaml b/ci/deps/azure-windows-310.yaml index 8e6f4deef6057..b2d104c84d2a1 100644 --- a/ci/deps/azure-windows-310.yaml +++ b/ci/deps/azure-windows-310.yaml @@ -33,6 +33,7 @@ dependencies: - s3fs>=0.4.2 - scipy - sqlalchemy + - duckdb - xlrd - xlsxwriter - xlwt diff --git a/ci/deps/azure-windows-39.yaml b/ci/deps/azure-windows-39.yaml index 6f820b1c2aedb..0e0b5afcb25b8 100644 --- a/ci/deps/azure-windows-39.yaml +++ b/ci/deps/azure-windows-39.yaml @@ -32,6 +32,7 @@ dependencies: - s3fs>=0.4.2 - scipy - sqlalchemy + - duckdb - xlrd - xlsxwriter - xlwt From 18d9c565f77ed756200fd5b5c22495fbd97cc099 Mon Sep 17 00:00:00 2001 From: Pedro Holanda Date: Thu, 27 Jan 2022 11:48:46 +0100 Subject: [PATCH 07/11] Add duckdb as a dep to all cis --- ci/deps/actions-310.yaml | 2 +- ci/deps/actions-38-downstream_compat.yaml | 1 + ci/deps/actions-38-minimum_versions.yaml | 1 + ci/deps/actions-38.yaml | 2 +- ci/deps/actions-39.yaml | 2 +- ci/deps/azure-macos-310.yaml | 1 + ci/deps/azure-macos-38.yaml | 1 + ci/deps/azure-macos-39.yaml | 1 + ci/deps/azure-windows-310.yaml | 2 +- ci/deps/azure-windows-38.yaml | 1 + ci/deps/azure-windows-39.yaml | 2 +- 11 files changed, 11 insertions(+), 5 deletions(-) diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml index f3841b3c5eb15..95cc6143d0f52 100644 --- a/ci/deps/actions-310.yaml +++ b/ci/deps/actions-310.yaml @@ -43,7 +43,7 @@ dependencies: - s3fs - scipy - sqlalchemy - - duckdb + - python-duckdb - tabulate - xarray - xlrd diff --git a/ci/deps/actions-38-downstream_compat.yaml b/ci/deps/actions-38-downstream_compat.yaml index af4f7dee851d5..f537260c1b569 100644 --- a/ci/deps/actions-38-downstream_compat.yaml +++ b/ci/deps/actions-38-downstream_compat.yaml @@ -45,6 +45,7 @@ dependencies: - xlrd - xlsxwriter - xlwt + - python-duckdb # downstream packages - aiobotocore<2.0.0 # GH#44311 pinned to fix docbuild diff --git a/ci/deps/actions-38-minimum_versions.yaml b/ci/deps/actions-38-minimum_versions.yaml index 467402bb6ef7f..e2f3de76b436c 100644 --- a/ci/deps/actions-38-minimum_versions.yaml +++ b/ci/deps/actions-38-minimum_versions.yaml @@ -50,3 +50,4 @@ dependencies: - xlsxwriter=1.2.2 - xlwt=1.3.0 - zstandard=0.15.2 + - python-duckdb=0.3.1 diff --git a/ci/deps/actions-38.yaml b/ci/deps/actions-38.yaml index 00473c2e0bb79..f04ed18feb910 100644 --- a/ci/deps/actions-38.yaml +++ b/ci/deps/actions-38.yaml @@ -42,7 +42,7 @@ dependencies: - s3fs - scipy - sqlalchemy - - duckdb + - python-duckdb - tabulate - xarray - xlrd diff --git a/ci/deps/actions-39.yaml b/ci/deps/actions-39.yaml index ae9f2925ea846..bb2dc031b314b 100644 --- a/ci/deps/actions-39.yaml +++ b/ci/deps/actions-39.yaml @@ -42,7 +42,7 @@ dependencies: - s3fs - scipy - sqlalchemy - - duckdb + - python-duckdb - tabulate - xarray - xlrd diff --git a/ci/deps/azure-macos-310.yaml b/ci/deps/azure-macos-310.yaml index 312fac8091db6..3903fccb56cbc 100644 --- a/ci/deps/azure-macos-310.yaml +++ b/ci/deps/azure-macos-310.yaml @@ -34,3 +34,4 @@ dependencies: - xlsxwriter - xlwt - zstandard + - python-duckdb diff --git a/ci/deps/azure-macos-38.yaml b/ci/deps/azure-macos-38.yaml index 422aa86c57fc7..d335fe7cd668f 100644 --- a/ci/deps/azure-macos-38.yaml +++ b/ci/deps/azure-macos-38.yaml @@ -34,3 +34,4 @@ dependencies: - xlsxwriter - xlwt - zstandard + - python-duckdb=0.3.1 diff --git a/ci/deps/azure-macos-39.yaml b/ci/deps/azure-macos-39.yaml index 140d67796452c..09c776674bf86 100644 --- a/ci/deps/azure-macos-39.yaml +++ b/ci/deps/azure-macos-39.yaml @@ -34,3 +34,4 @@ dependencies: - xlsxwriter - xlwt - zstandard + - python-duckdb \ No newline at end of file diff --git a/ci/deps/azure-windows-310.yaml b/ci/deps/azure-windows-310.yaml index b2d104c84d2a1..136f5d28a4e24 100644 --- a/ci/deps/azure-windows-310.yaml +++ b/ci/deps/azure-windows-310.yaml @@ -33,7 +33,7 @@ dependencies: - s3fs>=0.4.2 - scipy - sqlalchemy - - duckdb + - python-duckdb - xlrd - xlsxwriter - xlwt diff --git a/ci/deps/azure-windows-38.yaml b/ci/deps/azure-windows-38.yaml index eb533524147d9..8542deda9dfcb 100644 --- a/ci/deps/azure-windows-38.yaml +++ b/ci/deps/azure-windows-38.yaml @@ -33,3 +33,4 @@ dependencies: - xlsxwriter - xlwt - zstandard + - python-duckdb \ No newline at end of file diff --git a/ci/deps/azure-windows-39.yaml b/ci/deps/azure-windows-39.yaml index 0e0b5afcb25b8..729ec1e9b70c4 100644 --- a/ci/deps/azure-windows-39.yaml +++ b/ci/deps/azure-windows-39.yaml @@ -32,7 +32,7 @@ dependencies: - s3fs>=0.4.2 - scipy - sqlalchemy - - duckdb + - python-duckdb - xlrd - xlsxwriter - xlwt From a7560576dbef0d5348dea970fbfed33f49c7e34d Mon Sep 17 00:00:00 2001 From: Pedro Holanda Date: Thu, 27 Jan 2022 12:50:16 +0100 Subject: [PATCH 08/11] Ups, accidetanly deleted the get_schema funciton --- pandas/io/sql.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 89b78fecccbd3..28483494cc53c 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -2196,6 +2196,40 @@ def _create_sql_schema( ) return str(table.sql_schema()) +def get_schema( + frame, + name: str, + keys=None, + con=None, + dtype: DtypeArg | None = None, + schema: str | None = None, +): + """ + Get the SQL db table schema for the given frame. + + Parameters + ---------- + frame : DataFrame + name : str + name of SQL table + keys : string or sequence, default: None + columns to use a primary key + con: an open SQL database connection object or a SQLAlchemy connectable + Using SQLAlchemy makes it possible to use any DB supported by that + library, default: None + If a DBAPI2 object, only sqlite3 is supported. + dtype : dict of column name to SQL type, default None + Optional specifying the datatype for columns. The SQL type should + be a SQLAlchemy type, or a string for sqlite3 fallback connection. + schema: str, default: None + Optional specifying the schema to be used in creating the table. + + .. versionadded:: 1.2.0 + """ + pandas_sql = pandasSQL_builder(con=con) + return pandas_sql._create_sql_schema( + frame, name, keys=keys, dtype=dtype, schema=schema + ) class DuckDBDatabase(PandasSQL): """ From 9db14abb1bd2ea09725f770594b6ffd91349db07 Mon Sep 17 00:00:00 2001 From: Pedro Holanda Date: Thu, 27 Jan 2022 15:03:59 +0100 Subject: [PATCH 09/11] One more dep and formatter --- ci/deps/actions-310-numpydev.yaml | 1 + pandas/io/sql.py | 25 +++++++++--- pandas/tests/io/test_sql.py | 66 ++++++++++++++++++++----------- 3 files changed, 63 insertions(+), 29 deletions(-) diff --git a/ci/deps/actions-310-numpydev.yaml b/ci/deps/actions-310-numpydev.yaml index 3e32665d5433f..f96deaaddf9fe 100644 --- a/ci/deps/actions-310-numpydev.yaml +++ b/ci/deps/actions-310-numpydev.yaml @@ -14,6 +14,7 @@ dependencies: - python-dateutil - pytz - pip + - python-duckdb - pip: - cython==0.29.24 # GH#34014 - "--extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple" diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 28483494cc53c..c6b69b43b852a 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -2196,6 +2196,7 @@ def _create_sql_schema( ) return str(table.sql_schema()) + def get_schema( frame, name: str, @@ -2231,6 +2232,7 @@ def get_schema( frame, name, keys=keys, dtype=dtype, schema=schema ) + class DuckDBDatabase(PandasSQL): """ Version of SQLDatabase to support DuckDB connections (fallback without @@ -2288,18 +2290,29 @@ def to_sql( Ignored parameter included for compatibility with SQLAlchemy and SQLite version of ``to_sql``. """ - table_exits = len(self.con.execute(f"SELECT name FROM sqlite_master WHERE name='{name}'").fetchall()) > 0 + table_exits = ( + len( + self.con.execute( + f"SELECT name FROM sqlite_master WHERE name='{name}'" + ).fetchall() + ) + > 0 + ) if table_exits: if if_exists == "fail": raise ValueError(f"Table '{name}' already exists.") elif if_exists == "replace": self.con.execute(f"DROP TABLE {name}") - return self.con.execute(f"CREATE TABLE {name} AS SELECT * FROM frame").fetchone()[0] + return self.con.execute( + f"CREATE TABLE {name} AS SELECT * FROM frame" + ).fetchone()[0] elif if_exists == "append": - return self.con.execute(f"INSERT INTO {name} SELECT * FROM frame").fetchone()[0] + return self.con.execute( + f"INSERT INTO {name} SELECT * FROM frame" + ).fetchone()[0] else: raise ValueError(f"'{if_exists}' is not valid for if_exists") - return self.con.execute(f"CREATE TABLE {name} AS SELECT * FROM frame").fetchone()[0] - - + return self.con.execute( + f"CREATE TABLE {name} AS SELECT * FROM frame" + ).fetchone()[0] diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 859c50f48214a..aaf6de6b9550a 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2944,17 +2944,25 @@ def test_if_exists(self): ] self.drop_table(table_name) -class TestDuckDB: +class TestDuckDB: def test_to_sql_duck(self): if not DUCKDB_INSTALLED: return con = duckdb.connect() - df = pd.DataFrame([[None, 10, 1.0], ['nick', None, 1.5], ['juli', 14, None]], - columns=['Name', 'Age', 'Numeric']) - df.to_sql('ages', con) - result = con.execute('SELECT count(*), sum("Age"), sum("Numeric") FROM ages').fetchone() - assert result == (3, 24, 2.5,) + df = pd.DataFrame( + [[None, 10, 1.0], ["nick", None, 1.5], ["juli", 14, None]], + columns=["Name", "Age", "Numeric"], + ) + df.to_sql("ages", con) + result = con.execute( + 'SELECT count(*), sum("Age"), sum("Numeric") FROM ages' + ).fetchone() + assert result == ( + 3, + 24, + 2.5, + ) con.close() def test_to_sql_duck_all_exist_options(self): @@ -2963,25 +2971,37 @@ def test_to_sql_duck_all_exist_options(self): con = duckdb.connect() con.execute("CREATE TABLE ages (a INTEGER)") - df = pd.DataFrame([[None, 10, 1.0], ['nick', None, 1.5], ['juli', 14, None]], - columns=['Name', 'Age', 'Numeric']) + df = pd.DataFrame( + [[None, 10, 1.0], ["nick", None, 1.5], ["juli", 14, None]], + columns=["Name", "Age", "Numeric"], + ) with pytest.raises(Exception) as e_info: - df.to_sql('ages', con) - - - assert 'already exists' in str(e_info.value) - - df.to_sql('ages', con, if_exists= 'replace') - result = con.execute('SELECT count(*), sum("Age"), sum("Numeric") FROM ages').fetchone() - assert result == (3, 24, 2.5,) + df.to_sql("ages", con) + + assert "already exists" in str(e_info.value) + + df.to_sql("ages", con, if_exists="replace") + result = con.execute( + 'SELECT count(*), sum("Age"), sum("Numeric") FROM ages' + ).fetchone() + assert result == ( + 3, + 24, + 2.5, + ) - df.to_sql('ages', con, if_exists='append') - result = con.execute('SELECT count(*), sum("Age"), sum("Numeric") FROM ages').fetchone() - assert result == (6, 48, 5,) + df.to_sql("ages", con, if_exists="append") + result = con.execute( + 'SELECT count(*), sum("Age"), sum("Numeric") FROM ages' + ).fetchone() + assert result == ( + 6, + 48, + 5, + ) with pytest.raises(Exception) as e_info: - df.to_sql('ages', con, if_exists='flark') - + df.to_sql("ages", con, if_exists="flark") - assert 'not valid for if_exists' in str(e_info.value) - con.close() \ No newline at end of file + assert "not valid for if_exists" in str(e_info.value) + con.close() From f9d12932aab452abdcb90d368713aff619f0e61c Mon Sep 17 00:00:00 2001 From: Pedro Holanda Date: Thu, 27 Jan 2022 17:00:39 +0100 Subject: [PATCH 10/11] formatter --- ci/deps/azure-macos-39.yaml | 2 +- ci/deps/azure-windows-38.yaml | 2 +- pandas/tests/io/test_sql.py | 14 ++++++-------- requirements-dev.txt | 2 ++ 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/ci/deps/azure-macos-39.yaml b/ci/deps/azure-macos-39.yaml index 09c776674bf86..f005ef1f81275 100644 --- a/ci/deps/azure-macos-39.yaml +++ b/ci/deps/azure-macos-39.yaml @@ -34,4 +34,4 @@ dependencies: - xlsxwriter - xlwt - zstandard - - python-duckdb \ No newline at end of file + - python-duckdb diff --git a/ci/deps/azure-windows-38.yaml b/ci/deps/azure-windows-38.yaml index 8542deda9dfcb..5562f40516eed 100644 --- a/ci/deps/azure-windows-38.yaml +++ b/ci/deps/azure-windows-38.yaml @@ -33,4 +33,4 @@ dependencies: - xlsxwriter - xlwt - zstandard - - python-duckdb \ No newline at end of file + - python-duckdb diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index aaf6de6b9550a..681e3fcd7ebec 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2950,7 +2950,7 @@ def test_to_sql_duck(self): if not DUCKDB_INSTALLED: return con = duckdb.connect() - df = pd.DataFrame( + df = DataFrame( [[None, 10, 1.0], ["nick", None, 1.5], ["juli", 14, None]], columns=["Name", "Age", "Numeric"], ) @@ -2971,15 +2971,14 @@ def test_to_sql_duck_all_exist_options(self): con = duckdb.connect() con.execute("CREATE TABLE ages (a INTEGER)") - df = pd.DataFrame( + df = DataFrame( [[None, 10, 1.0], ["nick", None, 1.5], ["juli", 14, None]], columns=["Name", "Age", "Numeric"], ) - with pytest.raises(Exception) as e_info: + msg = "Table ages already exists." + with pytest.raises(ValueError, match=msg): df.to_sql("ages", con) - assert "already exists" in str(e_info.value) - df.to_sql("ages", con, if_exists="replace") result = con.execute( 'SELECT count(*), sum("Age"), sum("Numeric") FROM ages' @@ -2999,9 +2998,8 @@ def test_to_sql_duck_all_exist_options(self): 48, 5, ) - - with pytest.raises(Exception) as e_info: + msg = "flark not valid for if_exists" + with pytest.raises(ValueError, match=msg): df.to_sql("ages", con, if_exists="flark") - assert "not valid for if_exists" in str(e_info.value) con.close() diff --git a/requirements-dev.txt b/requirements-dev.txt index 2434428101285..c2136e4fbd04c 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -61,6 +61,7 @@ matplotlib>=3.3.2 numexpr>=2.7.1 scipy>=1.4.1 numba>=0.50.1 +python-duckdb>=0.3.1 beautifulsoup4>=4.8.2 html5lib lxml @@ -86,4 +87,5 @@ natsort pydata-sphinx-theme pandas-dev-flaker==0.2.0 pytest-cython +duckdb setuptools>=51.0.0 From 55074d029c24ee06f2072d6d46da7b35f65addcb Mon Sep 17 00:00:00 2001 From: Pedro Holanda Date: Thu, 27 Jan 2022 23:43:59 +0100 Subject: [PATCH 11/11] Fix test --- pandas/tests/io/test_sql.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 681e3fcd7ebec..a70031bb1f63a 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2975,7 +2975,7 @@ def test_to_sql_duck_all_exist_options(self): [[None, 10, 1.0], ["nick", None, 1.5], ["juli", 14, None]], columns=["Name", "Age", "Numeric"], ) - msg = "Table ages already exists." + msg = "Table 'ages' already exists." with pytest.raises(ValueError, match=msg): df.to_sql("ages", con) @@ -2998,7 +2998,7 @@ def test_to_sql_duck_all_exist_options(self): 48, 5, ) - msg = "flark not valid for if_exists" + msg = "'flark' is not valid for if_exists" with pytest.raises(ValueError, match=msg): df.to_sql("ages", con, if_exists="flark")