From 5c7f1f4c00f4b190453c07ffac2ab493aa126041 Mon Sep 17 00:00:00 2001 From: Nicolas Camenisch Date: Mon, 26 Jun 2023 13:54:38 +0200 Subject: [PATCH 1/2] BUG: to_sql fails for pyarrow date dtype (#53854) --- doc/source/whatsnew/v2.1.0.rst | 2 ++ pandas/io/sql.py | 14 ++++++++++---- pandas/tests/io/test_sql.py | 1 + 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 7b9efd7f593dd..fbcb4b05ae34a 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -364,12 +364,14 @@ Datetimelike - :meth:`DatetimeIndex.map` with ``na_action="ignore"`` now works as expected. (:issue:`51644`) - Bug in :class:`DateOffset` which had inconsistent behavior when multiplying a :class:`DateOffset` object by a constant (:issue:`47953`) - Bug in :func:`date_range` when ``freq`` was a :class:`DateOffset` with ``nanoseconds`` (:issue:`46877`) +- Bug in :meth:`DataFrame.to_sql` raising ``ValueError`` for pyarrow-backed date like dtypes (:issue:`53854`) - Bug in :meth:`Timestamp.date`, :meth:`Timestamp.isocalendar`, :meth:`Timestamp.timetuple`, and :meth:`Timestamp.toordinal` were returning incorrect results for inputs outside those supported by the Python standard library's datetime module (:issue:`53668`) - Bug in :meth:`Timestamp.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsDatetime`` (:issue:`51494`) - Bug in :meth:`arrays.DatetimeArray.map` and :meth:`DatetimeIndex.map`, where the supplied callable operated array-wise instead of element-wise (:issue:`51977`) - Bug in constructing a :class:`Series` or :class:`DataFrame` from a datetime or timedelta scalar always inferring nanosecond resolution instead of inferring from the input (:issue:`52212`) - Bug in parsing datetime strings with weekday but no day e.g. "2023 Sept Thu" incorrectly raising ``AttributeError`` instead of ``ValueError`` (:issue:`52659`) + Timedelta ^^^^^^^^^ - :meth:`TimedeltaIndex.map` with ``na_action="ignore"`` now works as expected (:issue:`51644`) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 51cc3eacae284..5fa32d2cb7d83 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -974,10 +974,16 @@ def insert_data(self) -> tuple[list[str], list[np.ndarray]]: for i, (_, ser) in enumerate(temp.items()): if ser.dtype.kind == "M": if isinstance(ser._values, ArrowExtensionArray): - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=FutureWarning) - # GH#52459 to_pydatetime will return Index[object] - d = np.asarray(ser.dt.to_pydatetime(), dtype=object) + import pyarrow as pa + + if pa.types.is_date(ser.dtype.pyarrow_dtype): + # GH#53854 to_pydatetime not supported for pyarrow date dtypes + d = ser._values.astype(object) + else: + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=FutureWarning) + # GH#52459 to_pydatetime will return Index[object] + d = np.asarray(ser.dt.to_pydatetime(), dtype=object) else: d = ser._values.to_pydatetime() elif ser.dtype.kind == "m": diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 75fcef09535d4..066a68cf9e7c7 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -561,6 +561,7 @@ def test_dataframe_to_sql_arrow_dtypes(conn, request): "datetime": pd.array( [datetime(2023, 1, 1)], dtype="timestamp[ns][pyarrow]" ), + "date": pd.array([date(2023, 1, 1)], dtype="date32[day][pyarrow]"), "timedelta": pd.array([timedelta(1)], dtype="duration[ns][pyarrow]"), "string": pd.array(["a"], dtype="string[pyarrow]"), } From 16af3dafc7303e852be075e46a45bd540ad2b2f1 Mon Sep 17 00:00:00 2001 From: Nicolas Camenisch Date: Mon, 26 Jun 2023 19:41:35 +0200 Subject: [PATCH 2/2] use ArrowExtensionArray.to_numpy instead of ArrowExtensionArray.astype --- pandas/io/sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 5fa32d2cb7d83..719479754340b 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -978,7 +978,7 @@ def insert_data(self) -> tuple[list[str], list[np.ndarray]]: if pa.types.is_date(ser.dtype.pyarrow_dtype): # GH#53854 to_pydatetime not supported for pyarrow date dtypes - d = ser._values.astype(object) + d = ser._values.to_numpy(dtype=object) else: with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=FutureWarning)