From 8a3f65827f2d8eb0824a0ced1e20dd41abef4e63 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 26 Jan 2022 15:20:12 +0100
Subject: [PATCH 01/11] Add support to to_sql with duckdb

---
 pandas/io/sql.py            | 98 ++++++++++++++++++++++++++-----------
 pandas/tests/io/test_sql.py | 39 +++++++++++++++
 2 files changed, 109 insertions(+), 28 deletions(-)

diff --git a/pandas/io/sql.py b/pandas/io/sql.py
index fcb3f5177ae3f..f8fc8b3b6b12e 100644
--- a/pandas/io/sql.py
+++ b/pandas/io/sql.py
@@ -732,6 +732,10 @@ def pandasSQL_builder(con, schema: str | None = None):
     """
     import sqlite3
     import warnings
+    import duckdb
+
+    if isinstance(con, duckdb.DuckDBPyConnection):
+        return DuckDBDatabase(con)
 
     if isinstance(con, sqlite3.Connection) or con is None:
         return SQLiteDatabase(con)
@@ -2192,37 +2196,75 @@ def _create_sql_schema(
         return str(table.sql_schema())
 
 
-def get_schema(
-    frame,
-    name: str,
-    keys=None,
-    con=None,
-    dtype: DtypeArg | None = None,
-    schema: str | None = None,
-):
+class DuckDBDatabase(PandasSQL):
     """
-    Get the SQL db table schema for the given frame.
+    Version of SQLDatabase to support DuckDB connections (fallback without
+    SQLAlchemy). This should only be used internally.
 
     Parameters
     ----------
-    frame : DataFrame
-    name : str
-        name of SQL table
-    keys : string or sequence, default: None
-        columns to use a primary key
-    con: an open SQL database connection object or a SQLAlchemy connectable
-        Using SQLAlchemy makes it possible to use any DB supported by that
-        library, default: None
-        If a DBAPI2 object, only sqlite3 is supported.
-    dtype : dict of column name to SQL type, default None
-        Optional specifying the datatype for columns. The SQL type should
-        be a SQLAlchemy type, or a string for sqlite3 fallback connection.
-    schema: str, default: None
-        Optional specifying the schema to be used in creating the table.
+    con : duckdb connection object
 
-        .. versionadded:: 1.2.0
     """
-    pandas_sql = pandasSQL_builder(con=con)
-    return pandas_sql._create_sql_schema(
-        frame, name, keys=keys, dtype=dtype, schema=schema
-    )
+
+    def __init__(self, con):
+        self.con = con
+
+    def to_sql(
+        self,
+        frame,
+        name,
+        if_exists="fail",
+        index=True,
+        index_label=None,
+        schema=None,
+        chunksize=None,
+        dtype: DtypeArg | None = None,
+        method=None,
+        **kwargs,
+    ) -> int | None:
+        """
+        Write records stored in a DataFrame to a SQL database.
+
+        Parameters
+        ----------
+        frame: DataFrame
+        name: string
+            Name of SQL table.
+        if_exists: {'fail', 'replace', 'append'}, default 'fail'
+            fail: If table exists, do nothing.
+            replace: If table exists, drop it, recreate it, and insert data.
+            append: If table exists, insert data. Create if it does not exist.
+        index : bool, default True
+            Ignored parameter included for compatibility with SQLAlchemy
+            and SQLite version of ``to_sql``.
+        index_label : string or sequence, default None
+            Ignored parameter included for compatibility with SQLAlchemy
+            and SQLite version of ``to_sql``.
+        schema : string, default None
+            Ignored parameter included for compatibility with SQLAlchemy
+            version of ``to_sql``.
+        chunksize : int, default None
+            Ignored parameter included for compatibility with SQLAlchemy
+            and SQLite version of ``to_sql``.
+        dtype : Ignored parameter included for compatibility with SQLAlchemy
+            and SQLite version of ``to_sql``.
+        method : {None, 'multi', callable}, default None
+            Ignored parameter included for compatibility with SQLAlchemy
+            and SQLite version of ``to_sql``.
+        """
+        table_exits = len(self.con.execute(f"SELECT name FROM sqlite_master WHERE name='{name}'").fetchall()) > 0
+        if table_exits:
+            if if_exists == "fail":
+                raise ValueError(f"Table '{name}' already exists.")
+            elif if_exists == "replace":
+                self.con.execute(f"DROP TABLE {name}")
+                return self.con.execute(f"CREATE TABLE {name} AS SELECT * FROM frame").fetchone()[0]
+            elif if_exists == "append":
+                return self.con.execute(f"INSERT INTO {name} SELECT * FROM frame").fetchone()[0]
+            else:
+                raise ValueError(f"'{if_exists}' is not valid for if_exists")
+
+        return self.con.execute(f"CREATE TABLE {name} AS SELECT * FROM frame").fetchone()[0]
+
+
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index 741af4324c1a6..9c7727f9bad5e 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -27,6 +27,7 @@
 from io import StringIO
 from pathlib import Path
 import sqlite3
+import duckdb
 
 import numpy as np
 import pytest
@@ -2936,3 +2937,41 @@ def test_if_exists(self):
             (5, "E"),
         ]
         self.drop_table(table_name)
+
+class TestDuckDB:
+
+    def test_to_sql_duck(self):
+        con = duckdb.connect()
+        df = pd.DataFrame([[None, 10, 1.0], ['nick', None, 1.5], ['juli', 14, None]],
+                          columns=['Name', 'Age', 'Numeric'])
+        df.to_sql('ages', con)
+        result = con.execute('SELECT count(*), sum("Age"), sum("Numeric") FROM ages').fetchone()
+        assert result == (3, 24, 2.5,)
+        con.close()
+
+    def test_to_sql_duck_all_exist_options(self):
+        con = duckdb.connect()
+        con.execute("CREATE TABLE ages (a INTEGER)")
+
+        df = pd.DataFrame([[None, 10, 1.0], ['nick', None, 1.5], ['juli', 14, None]],
+                          columns=['Name', 'Age', 'Numeric'])
+        with pytest.raises(Exception) as e_info:
+            df.to_sql('ages', con)
+
+
+        assert 'already exists' in str(e_info.value)
+
+        df.to_sql('ages', con, if_exists= 'replace')
+        result = con.execute('SELECT count(*), sum("Age"), sum("Numeric") FROM ages').fetchone()
+        assert result == (3, 24, 2.5,)
+
+        df.to_sql('ages', con, if_exists='append')
+        result = con.execute('SELECT count(*), sum("Age"), sum("Numeric") FROM ages').fetchone()
+        assert result == (6, 48, 5,)
+
+        with pytest.raises(Exception) as e_info:
+            df.to_sql('ages', con, if_exists='flark')
+
+
+        assert 'not valid for if_exists' in str(e_info.value)
+        con.close()
\ No newline at end of file

From e7a48e0cc84f3a5db218ef724515c64d28fdb8e2 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 26 Jan 2022 16:04:10 +0100
Subject: [PATCH 02/11] Having a go at adding duckdb as an optional dependency

---
 doc/source/getting_started/install.rst |  1 +
 environment.yml                        |  1 +
 pandas/compat/_optional.py             |  1 +
 pandas/io/sql.py                       |  3 ++-
 pandas/tests/io/test_sql.py            | 12 +++++++++++-
 5 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst
index df9c258f4aa6d..cdff578624ce2 100644
--- a/doc/source/getting_started/install.rst
+++ b/doc/source/getting_started/install.rst
@@ -348,6 +348,7 @@ Dependency                Minimum Version    Notes
 SQLAlchemy                1.4.0               SQL support for databases other than sqlite
 psycopg2                  2.8.4               PostgreSQL engine for sqlalchemy
 pymysql                   0.10.1              MySQL engine for sqlalchemy
+duckdb                    0.3.1               High-performance analytical database system
 ========================= ================== =============================================================
 
 Other data sources
diff --git a/environment.yml b/environment.yml
index a168e691821c3..63543a5d7b507 100644
--- a/environment.yml
+++ b/environment.yml
@@ -89,6 +89,7 @@ dependencies:
   - numexpr>=2.7.1
   - scipy>=1.4.1
   - numba>=0.50.1
+  - duckdb >=0.3.1
 
   # optional for io
   # ---------------
diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py
index a26bc94ab883e..c494e88906264 100644
--- a/pandas/compat/_optional.py
+++ b/pandas/compat/_optional.py
@@ -13,6 +13,7 @@
     "bs4": "4.8.2",
     "blosc": "1.20.1",
     "bottleneck": "1.3.1",
+    "duckdb": "0.3.1",
     "fastparquet": "0.4.0",
     "fsspec": "0.7.4",
     "html5lib": "1.1",
diff --git a/pandas/io/sql.py b/pandas/io/sql.py
index f8fc8b3b6b12e..89b78fecccbd3 100644
--- a/pandas/io/sql.py
+++ b/pandas/io/sql.py
@@ -732,7 +732,8 @@ def pandasSQL_builder(con, schema: str | None = None):
     """
     import sqlite3
     import warnings
-    import duckdb
+
+    duckdb = import_optional_dependency("duckdb")
 
     if isinstance(con, duckdb.DuckDBPyConnection):
         return DuckDBDatabase(con)
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index 9c7727f9bad5e..859c50f48214a 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -27,7 +27,6 @@
 from io import StringIO
 from pathlib import Path
 import sqlite3
-import duckdb
 
 import numpy as np
 import pytest
@@ -72,6 +71,13 @@
 except ImportError:
     SQLALCHEMY_INSTALLED = False
 
+try:
+    import duckdb
+
+    DUCKDB_INSTALLED = True
+except ImportError:
+    DUCKDB_INSTALLED = False
+
 SQL_STRINGS = {
     "read_parameters": {
         "sqlite": "SELECT * FROM iris WHERE Name=? AND SepalLength=?",
@@ -2941,6 +2947,8 @@ def test_if_exists(self):
 class TestDuckDB:
 
     def test_to_sql_duck(self):
+        if not DUCKDB_INSTALLED:
+            return
         con = duckdb.connect()
         df = pd.DataFrame([[None, 10, 1.0], ['nick', None, 1.5], ['juli', 14, None]],
                           columns=['Name', 'Age', 'Numeric'])
@@ -2950,6 +2958,8 @@ def test_to_sql_duck(self):
         con.close()
 
     def test_to_sql_duck_all_exist_options(self):
+        if not DUCKDB_INSTALLED:
+            return
         con = duckdb.connect()
         con.execute("CREATE TABLE ages (a INTEGER)")
 

From 048555d21bc9cd126c81ec3c7932511a45e34fe1 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 26 Jan 2022 16:13:05 +0100
Subject: [PATCH 03/11] the conda dep is different than pip

---
 environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment.yml b/environment.yml
index 63543a5d7b507..7afbf17106078 100644
--- a/environment.yml
+++ b/environment.yml
@@ -89,7 +89,7 @@ dependencies:
   - numexpr>=2.7.1
   - scipy>=1.4.1
   - numba>=0.50.1
-  - duckdb >=0.3.1
+  - python-duckdb>=0.3.1
 
   # optional for io
   # ---------------

From e2f6d8868813d369828e52cded27f5133367fa53 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 26 Jan 2022 17:29:26 +0100
Subject: [PATCH 04/11] conda package name is different

---
 pandas/compat/_optional.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py
index c494e88906264..833584c55cf15 100644
--- a/pandas/compat/_optional.py
+++ b/pandas/compat/_optional.py
@@ -13,7 +13,7 @@
     "bs4": "4.8.2",
     "blosc": "1.20.1",
     "bottleneck": "1.3.1",
-    "duckdb": "0.3.1",
+    "python-duckdb": "0.3.1",
     "fastparquet": "0.4.0",
     "fsspec": "0.7.4",
     "html5lib": "1.1",

From 039edf7ea14b44b9d0a1c759c5d1732a548cf183 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 26 Jan 2022 19:04:19 +0100
Subject: [PATCH 05/11] try to install it through pip

---
 environment.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/environment.yml b/environment.yml
index 7afbf17106078..170b56a568d9a 100644
--- a/environment.yml
+++ b/environment.yml
@@ -124,3 +124,4 @@ dependencies:
     - pydata-sphinx-theme
     - pandas-dev-flaker==0.2.0
     - pytest-cython
+    - duckdb

From 2a1b441ba4424e10b1d7c46cb9a5604fcee7be58 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Wed, 26 Jan 2022 21:33:58 +0100
Subject: [PATCH 06/11] Update deps of ci

---
 ci/deps/actions-310.yaml       | 1 +
 ci/deps/actions-38.yaml        | 1 +
 ci/deps/actions-39.yaml        | 1 +
 ci/deps/azure-windows-310.yaml | 1 +
 ci/deps/azure-windows-39.yaml  | 1 +
 5 files changed, 5 insertions(+)

diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml
index 9829380620f86..f3841b3c5eb15 100644
--- a/ci/deps/actions-310.yaml
+++ b/ci/deps/actions-310.yaml
@@ -43,6 +43,7 @@ dependencies:
   - s3fs
   - scipy
   - sqlalchemy
+  - duckdb
   - tabulate
   - xarray
   - xlrd
diff --git a/ci/deps/actions-38.yaml b/ci/deps/actions-38.yaml
index b23f686d845e9..00473c2e0bb79 100644
--- a/ci/deps/actions-38.yaml
+++ b/ci/deps/actions-38.yaml
@@ -42,6 +42,7 @@ dependencies:
   - s3fs
   - scipy
   - sqlalchemy
+  - duckdb
   - tabulate
   - xarray
   - xlrd
diff --git a/ci/deps/actions-39.yaml b/ci/deps/actions-39.yaml
index 631ef40b02e33..ae9f2925ea846 100644
--- a/ci/deps/actions-39.yaml
+++ b/ci/deps/actions-39.yaml
@@ -42,6 +42,7 @@ dependencies:
   - s3fs
   - scipy
   - sqlalchemy
+  - duckdb
   - tabulate
   - xarray
   - xlrd
diff --git a/ci/deps/azure-windows-310.yaml b/ci/deps/azure-windows-310.yaml
index 8e6f4deef6057..b2d104c84d2a1 100644
--- a/ci/deps/azure-windows-310.yaml
+++ b/ci/deps/azure-windows-310.yaml
@@ -33,6 +33,7 @@ dependencies:
   - s3fs>=0.4.2
   - scipy
   - sqlalchemy
+  - duckdb
   - xlrd
   - xlsxwriter
   - xlwt
diff --git a/ci/deps/azure-windows-39.yaml b/ci/deps/azure-windows-39.yaml
index 6f820b1c2aedb..0e0b5afcb25b8 100644
--- a/ci/deps/azure-windows-39.yaml
+++ b/ci/deps/azure-windows-39.yaml
@@ -32,6 +32,7 @@ dependencies:
   - s3fs>=0.4.2
   - scipy
   - sqlalchemy
+  - duckdb
   - xlrd
   - xlsxwriter
   - xlwt

From 18d9c565f77ed756200fd5b5c22495fbd97cc099 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Thu, 27 Jan 2022 11:48:46 +0100
Subject: [PATCH 07/11] Add duckdb as a dep to all cis

---
 ci/deps/actions-310.yaml                  | 2 +-
 ci/deps/actions-38-downstream_compat.yaml | 1 +
 ci/deps/actions-38-minimum_versions.yaml  | 1 +
 ci/deps/actions-38.yaml                   | 2 +-
 ci/deps/actions-39.yaml                   | 2 +-
 ci/deps/azure-macos-310.yaml              | 1 +
 ci/deps/azure-macos-38.yaml               | 1 +
 ci/deps/azure-macos-39.yaml               | 1 +
 ci/deps/azure-windows-310.yaml            | 2 +-
 ci/deps/azure-windows-38.yaml             | 1 +
 ci/deps/azure-windows-39.yaml             | 2 +-
 11 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml
index f3841b3c5eb15..95cc6143d0f52 100644
--- a/ci/deps/actions-310.yaml
+++ b/ci/deps/actions-310.yaml
@@ -43,7 +43,7 @@ dependencies:
   - s3fs
   - scipy
   - sqlalchemy
-  - duckdb
+  - python-duckdb
   - tabulate
   - xarray
   - xlrd
diff --git a/ci/deps/actions-38-downstream_compat.yaml b/ci/deps/actions-38-downstream_compat.yaml
index af4f7dee851d5..f537260c1b569 100644
--- a/ci/deps/actions-38-downstream_compat.yaml
+++ b/ci/deps/actions-38-downstream_compat.yaml
@@ -45,6 +45,7 @@ dependencies:
   - xlrd
   - xlsxwriter
   - xlwt
+  - python-duckdb
 
   # downstream packages
   - aiobotocore<2.0.0  # GH#44311 pinned to fix docbuild
diff --git a/ci/deps/actions-38-minimum_versions.yaml b/ci/deps/actions-38-minimum_versions.yaml
index 467402bb6ef7f..e2f3de76b436c 100644
--- a/ci/deps/actions-38-minimum_versions.yaml
+++ b/ci/deps/actions-38-minimum_versions.yaml
@@ -50,3 +50,4 @@ dependencies:
   - xlsxwriter=1.2.2
   - xlwt=1.3.0
   - zstandard=0.15.2
+  - python-duckdb=0.3.1
diff --git a/ci/deps/actions-38.yaml b/ci/deps/actions-38.yaml
index 00473c2e0bb79..f04ed18feb910 100644
--- a/ci/deps/actions-38.yaml
+++ b/ci/deps/actions-38.yaml
@@ -42,7 +42,7 @@ dependencies:
   - s3fs
   - scipy
   - sqlalchemy
-  - duckdb
+  - python-duckdb
   - tabulate
   - xarray
   - xlrd
diff --git a/ci/deps/actions-39.yaml b/ci/deps/actions-39.yaml
index ae9f2925ea846..bb2dc031b314b 100644
--- a/ci/deps/actions-39.yaml
+++ b/ci/deps/actions-39.yaml
@@ -42,7 +42,7 @@ dependencies:
   - s3fs
   - scipy
   - sqlalchemy
-  - duckdb
+  - python-duckdb
   - tabulate
   - xarray
   - xlrd
diff --git a/ci/deps/azure-macos-310.yaml b/ci/deps/azure-macos-310.yaml
index 312fac8091db6..3903fccb56cbc 100644
--- a/ci/deps/azure-macos-310.yaml
+++ b/ci/deps/azure-macos-310.yaml
@@ -34,3 +34,4 @@ dependencies:
   - xlsxwriter
   - xlwt
   - zstandard
+  - python-duckdb
diff --git a/ci/deps/azure-macos-38.yaml b/ci/deps/azure-macos-38.yaml
index 422aa86c57fc7..d335fe7cd668f 100644
--- a/ci/deps/azure-macos-38.yaml
+++ b/ci/deps/azure-macos-38.yaml
@@ -34,3 +34,4 @@ dependencies:
   - xlsxwriter
   - xlwt
   - zstandard
+  - python-duckdb=0.3.1
diff --git a/ci/deps/azure-macos-39.yaml b/ci/deps/azure-macos-39.yaml
index 140d67796452c..09c776674bf86 100644
--- a/ci/deps/azure-macos-39.yaml
+++ b/ci/deps/azure-macos-39.yaml
@@ -34,3 +34,4 @@ dependencies:
   - xlsxwriter
   - xlwt
   - zstandard
+  - python-duckdb
\ No newline at end of file
diff --git a/ci/deps/azure-windows-310.yaml b/ci/deps/azure-windows-310.yaml
index b2d104c84d2a1..136f5d28a4e24 100644
--- a/ci/deps/azure-windows-310.yaml
+++ b/ci/deps/azure-windows-310.yaml
@@ -33,7 +33,7 @@ dependencies:
   - s3fs>=0.4.2
   - scipy
   - sqlalchemy
-  - duckdb
+  - python-duckdb
   - xlrd
   - xlsxwriter
   - xlwt
diff --git a/ci/deps/azure-windows-38.yaml b/ci/deps/azure-windows-38.yaml
index eb533524147d9..8542deda9dfcb 100644
--- a/ci/deps/azure-windows-38.yaml
+++ b/ci/deps/azure-windows-38.yaml
@@ -33,3 +33,4 @@ dependencies:
   - xlsxwriter
   - xlwt
   - zstandard
+  - python-duckdb
\ No newline at end of file
diff --git a/ci/deps/azure-windows-39.yaml b/ci/deps/azure-windows-39.yaml
index 0e0b5afcb25b8..729ec1e9b70c4 100644
--- a/ci/deps/azure-windows-39.yaml
+++ b/ci/deps/azure-windows-39.yaml
@@ -32,7 +32,7 @@ dependencies:
   - s3fs>=0.4.2
   - scipy
   - sqlalchemy
-  - duckdb
+  - python-duckdb
   - xlrd
   - xlsxwriter
   - xlwt

From a7560576dbef0d5348dea970fbfed33f49c7e34d Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Thu, 27 Jan 2022 12:50:16 +0100
Subject: [PATCH 08/11] Ups, accidetanly deleted the get_schema funciton

---
 pandas/io/sql.py | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/pandas/io/sql.py b/pandas/io/sql.py
index 89b78fecccbd3..28483494cc53c 100644
--- a/pandas/io/sql.py
+++ b/pandas/io/sql.py
@@ -2196,6 +2196,40 @@ def _create_sql_schema(
         )
         return str(table.sql_schema())
 
+def get_schema(
+    frame,
+    name: str,
+    keys=None,
+    con=None,
+    dtype: DtypeArg | None = None,
+    schema: str | None = None,
+):
+    """
+    Get the SQL db table schema for the given frame.
+
+    Parameters
+    ----------
+    frame : DataFrame
+    name : str
+        name of SQL table
+    keys : string or sequence, default: None
+        columns to use a primary key
+    con: an open SQL database connection object or a SQLAlchemy connectable
+        Using SQLAlchemy makes it possible to use any DB supported by that
+        library, default: None
+        If a DBAPI2 object, only sqlite3 is supported.
+    dtype : dict of column name to SQL type, default None
+        Optional specifying the datatype for columns. The SQL type should
+        be a SQLAlchemy type, or a string for sqlite3 fallback connection.
+    schema: str, default: None
+        Optional specifying the schema to be used in creating the table.
+
+        .. versionadded:: 1.2.0
+    """
+    pandas_sql = pandasSQL_builder(con=con)
+    return pandas_sql._create_sql_schema(
+        frame, name, keys=keys, dtype=dtype, schema=schema
+    )
 
 class DuckDBDatabase(PandasSQL):
     """

From 9db14abb1bd2ea09725f770594b6ffd91349db07 Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Thu, 27 Jan 2022 15:03:59 +0100
Subject: [PATCH 09/11] One more dep and formatter

---
 ci/deps/actions-310-numpydev.yaml |  1 +
 pandas/io/sql.py                  | 25 +++++++++---
 pandas/tests/io/test_sql.py       | 66 ++++++++++++++++++++-----------
 3 files changed, 63 insertions(+), 29 deletions(-)

diff --git a/ci/deps/actions-310-numpydev.yaml b/ci/deps/actions-310-numpydev.yaml
index 3e32665d5433f..f96deaaddf9fe 100644
--- a/ci/deps/actions-310-numpydev.yaml
+++ b/ci/deps/actions-310-numpydev.yaml
@@ -14,6 +14,7 @@ dependencies:
   - python-dateutil
   - pytz
   - pip
+  - python-duckdb
   - pip:
     - cython==0.29.24 # GH#34014
     - "--extra-index-url https://pypi.anaconda.org/scipy-wheels-nightly/simple"
diff --git a/pandas/io/sql.py b/pandas/io/sql.py
index 28483494cc53c..c6b69b43b852a 100644
--- a/pandas/io/sql.py
+++ b/pandas/io/sql.py
@@ -2196,6 +2196,7 @@ def _create_sql_schema(
         )
         return str(table.sql_schema())
 
+
 def get_schema(
     frame,
     name: str,
@@ -2231,6 +2232,7 @@ def get_schema(
         frame, name, keys=keys, dtype=dtype, schema=schema
     )
 
+
 class DuckDBDatabase(PandasSQL):
     """
     Version of SQLDatabase to support DuckDB connections (fallback without
@@ -2288,18 +2290,29 @@ def to_sql(
             Ignored parameter included for compatibility with SQLAlchemy
             and SQLite version of ``to_sql``.
         """
-        table_exits = len(self.con.execute(f"SELECT name FROM sqlite_master WHERE name='{name}'").fetchall()) > 0
+        table_exits = (
+            len(
+                self.con.execute(
+                    f"SELECT name FROM sqlite_master WHERE name='{name}'"
+                ).fetchall()
+            )
+            > 0
+        )
         if table_exits:
             if if_exists == "fail":
                 raise ValueError(f"Table '{name}' already exists.")
             elif if_exists == "replace":
                 self.con.execute(f"DROP TABLE {name}")
-                return self.con.execute(f"CREATE TABLE {name} AS SELECT * FROM frame").fetchone()[0]
+                return self.con.execute(
+                    f"CREATE TABLE {name} AS SELECT * FROM frame"
+                ).fetchone()[0]
             elif if_exists == "append":
-                return self.con.execute(f"INSERT INTO {name} SELECT * FROM frame").fetchone()[0]
+                return self.con.execute(
+                    f"INSERT INTO {name} SELECT * FROM frame"
+                ).fetchone()[0]
             else:
                 raise ValueError(f"'{if_exists}' is not valid for if_exists")
 
-        return self.con.execute(f"CREATE TABLE {name} AS SELECT * FROM frame").fetchone()[0]
-
-
+        return self.con.execute(
+            f"CREATE TABLE {name} AS SELECT * FROM frame"
+        ).fetchone()[0]
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index 859c50f48214a..aaf6de6b9550a 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -2944,17 +2944,25 @@ def test_if_exists(self):
         ]
         self.drop_table(table_name)
 
-class TestDuckDB:
 
+class TestDuckDB:
     def test_to_sql_duck(self):
         if not DUCKDB_INSTALLED:
             return
         con = duckdb.connect()
-        df = pd.DataFrame([[None, 10, 1.0], ['nick', None, 1.5], ['juli', 14, None]],
-                          columns=['Name', 'Age', 'Numeric'])
-        df.to_sql('ages', con)
-        result = con.execute('SELECT count(*), sum("Age"), sum("Numeric") FROM ages').fetchone()
-        assert result == (3, 24, 2.5,)
+        df = pd.DataFrame(
+            [[None, 10, 1.0], ["nick", None, 1.5], ["juli", 14, None]],
+            columns=["Name", "Age", "Numeric"],
+        )
+        df.to_sql("ages", con)
+        result = con.execute(
+            'SELECT count(*), sum("Age"), sum("Numeric") FROM ages'
+        ).fetchone()
+        assert result == (
+            3,
+            24,
+            2.5,
+        )
         con.close()
 
     def test_to_sql_duck_all_exist_options(self):
@@ -2963,25 +2971,37 @@ def test_to_sql_duck_all_exist_options(self):
         con = duckdb.connect()
         con.execute("CREATE TABLE ages (a INTEGER)")
 
-        df = pd.DataFrame([[None, 10, 1.0], ['nick', None, 1.5], ['juli', 14, None]],
-                          columns=['Name', 'Age', 'Numeric'])
+        df = pd.DataFrame(
+            [[None, 10, 1.0], ["nick", None, 1.5], ["juli", 14, None]],
+            columns=["Name", "Age", "Numeric"],
+        )
         with pytest.raises(Exception) as e_info:
-            df.to_sql('ages', con)
-
-
-        assert 'already exists' in str(e_info.value)
-
-        df.to_sql('ages', con, if_exists= 'replace')
-        result = con.execute('SELECT count(*), sum("Age"), sum("Numeric") FROM ages').fetchone()
-        assert result == (3, 24, 2.5,)
+            df.to_sql("ages", con)
+
+        assert "already exists" in str(e_info.value)
+
+        df.to_sql("ages", con, if_exists="replace")
+        result = con.execute(
+            'SELECT count(*), sum("Age"), sum("Numeric") FROM ages'
+        ).fetchone()
+        assert result == (
+            3,
+            24,
+            2.5,
+        )
 
-        df.to_sql('ages', con, if_exists='append')
-        result = con.execute('SELECT count(*), sum("Age"), sum("Numeric") FROM ages').fetchone()
-        assert result == (6, 48, 5,)
+        df.to_sql("ages", con, if_exists="append")
+        result = con.execute(
+            'SELECT count(*), sum("Age"), sum("Numeric") FROM ages'
+        ).fetchone()
+        assert result == (
+            6,
+            48,
+            5,
+        )
 
         with pytest.raises(Exception) as e_info:
-            df.to_sql('ages', con, if_exists='flark')
-
+            df.to_sql("ages", con, if_exists="flark")
 
-        assert 'not valid for if_exists' in str(e_info.value)
-        con.close()
\ No newline at end of file
+        assert "not valid for if_exists" in str(e_info.value)
+        con.close()

From f9d12932aab452abdcb90d368713aff619f0e61c Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Thu, 27 Jan 2022 17:00:39 +0100
Subject: [PATCH 10/11] formatter

---
 ci/deps/azure-macos-39.yaml   |  2 +-
 ci/deps/azure-windows-38.yaml |  2 +-
 pandas/tests/io/test_sql.py   | 14 ++++++--------
 requirements-dev.txt          |  2 ++
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/ci/deps/azure-macos-39.yaml b/ci/deps/azure-macos-39.yaml
index 09c776674bf86..f005ef1f81275 100644
--- a/ci/deps/azure-macos-39.yaml
+++ b/ci/deps/azure-macos-39.yaml
@@ -34,4 +34,4 @@ dependencies:
   - xlsxwriter
   - xlwt
   - zstandard
-  - python-duckdb
\ No newline at end of file
+  - python-duckdb
diff --git a/ci/deps/azure-windows-38.yaml b/ci/deps/azure-windows-38.yaml
index 8542deda9dfcb..5562f40516eed 100644
--- a/ci/deps/azure-windows-38.yaml
+++ b/ci/deps/azure-windows-38.yaml
@@ -33,4 +33,4 @@ dependencies:
   - xlsxwriter
   - xlwt
   - zstandard
-  - python-duckdb
\ No newline at end of file
+  - python-duckdb
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index aaf6de6b9550a..681e3fcd7ebec 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -2950,7 +2950,7 @@ def test_to_sql_duck(self):
         if not DUCKDB_INSTALLED:
             return
         con = duckdb.connect()
-        df = pd.DataFrame(
+        df = DataFrame(
             [[None, 10, 1.0], ["nick", None, 1.5], ["juli", 14, None]],
             columns=["Name", "Age", "Numeric"],
         )
@@ -2971,15 +2971,14 @@ def test_to_sql_duck_all_exist_options(self):
         con = duckdb.connect()
         con.execute("CREATE TABLE ages (a INTEGER)")
 
-        df = pd.DataFrame(
+        df = DataFrame(
             [[None, 10, 1.0], ["nick", None, 1.5], ["juli", 14, None]],
             columns=["Name", "Age", "Numeric"],
         )
-        with pytest.raises(Exception) as e_info:
+        msg = "Table ages already exists."
+        with pytest.raises(ValueError, match=msg):
             df.to_sql("ages", con)
 
-        assert "already exists" in str(e_info.value)
-
         df.to_sql("ages", con, if_exists="replace")
         result = con.execute(
             'SELECT count(*), sum("Age"), sum("Numeric") FROM ages'
@@ -2999,9 +2998,8 @@ def test_to_sql_duck_all_exist_options(self):
             48,
             5,
         )
-
-        with pytest.raises(Exception) as e_info:
+        msg = "flark not valid for if_exists"
+        with pytest.raises(ValueError, match=msg):
             df.to_sql("ages", con, if_exists="flark")
 
-        assert "not valid for if_exists" in str(e_info.value)
         con.close()
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 2434428101285..c2136e4fbd04c 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -61,6 +61,7 @@ matplotlib>=3.3.2
 numexpr>=2.7.1
 scipy>=1.4.1
 numba>=0.50.1
+python-duckdb>=0.3.1
 beautifulsoup4>=4.8.2
 html5lib
 lxml
@@ -86,4 +87,5 @@ natsort
 pydata-sphinx-theme
 pandas-dev-flaker==0.2.0
 pytest-cython
+duckdb
 setuptools>=51.0.0

From 55074d029c24ee06f2072d6d46da7b35f65addcb Mon Sep 17 00:00:00 2001
From: Pedro Holanda <pedroholanda@gmail.com>
Date: Thu, 27 Jan 2022 23:43:59 +0100
Subject: [PATCH 11/11] Fix test

---
 pandas/tests/io/test_sql.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index 681e3fcd7ebec..a70031bb1f63a 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -2975,7 +2975,7 @@ def test_to_sql_duck_all_exist_options(self):
             [[None, 10, 1.0], ["nick", None, 1.5], ["juli", 14, None]],
             columns=["Name", "Age", "Numeric"],
         )
-        msg = "Table ages already exists."
+        msg = "Table 'ages' already exists."
         with pytest.raises(ValueError, match=msg):
             df.to_sql("ages", con)
 
@@ -2998,7 +2998,7 @@ def test_to_sql_duck_all_exist_options(self):
             48,
             5,
         )
-        msg = "flark not valid for if_exists"
+        msg = "'flark' is not valid for if_exists"
         with pytest.raises(ValueError, match=msg):
             df.to_sql("ages", con, if_exists="flark")