From 660773d2bf5cc6616e93f219d8ef5c6b1dbc0153 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 17 Mar 2023 13:23:47 -0700 Subject: [PATCH 1/6] BUG: to_sql with ArrowExtesionArray --- pandas/core/arrays/arrow/array.py | 5 ++++- pandas/io/sql.py | 12 +++++++----- pandas/tests/extension/test_arrow.py | 1 + pandas/tests/io/test_sql.py | 21 +++++++++++++++++++++ 4 files changed, 33 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 551b925f42579..d8f1875d81d02 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -2087,7 +2087,10 @@ def _dt_round( return self._round_temporally("round", freq, ambiguous, nonexistent) def _dt_to_pydatetime(self): - return np.array(self._pa_array.to_pylist(), dtype=object) + data = self._pa_array.to_pylist() + if self._dtype.pyarrow_dtype.unit == "ns": + data = [ts.to_pydatetime(warn=False) for ts in data] + return np.array(data, dtype=object) def _dt_tz_localize( self, diff --git a/pandas/io/sql.py b/pandas/io/sql.py index ec04a9ce81d92..e6709bce42e33 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -964,14 +964,16 @@ def insert_data(self) -> tuple[list[str], list[np.ndarray]]: data_list: list[np.ndarray] = [None] * ncols # type: ignore[list-item] for i, (_, ser) in enumerate(temp.items()): - vals = ser._values - if vals.dtype.kind == "M": - d = vals.to_pydatetime() - elif vals.dtype.kind == "m": + if ser.dtype.kind == "M": + d = ser.dt.to_pydatetime() + elif ser.dtype.kind == "m": + vals = ser._values + if isinstance(vals, ArrowExtensionArray): + vals = vals.to_numpy(dtype=np.dtype("m8")) # store as integers, see GH#6921, GH#7076 d = vals.view("i8").astype(object) else: - d = vals.astype(object) + d = ser._values.astype(object) assert isinstance(d, np.ndarray), type(d) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 2378710555340..fe887687aa13f 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -2271,6 +2271,7 @@ def test_dt_to_pydatetime(): result = ser.dt.to_pydatetime() expected = np.array(data, dtype=object) tm.assert_numpy_array_equal(result, expected) + assert all(type(res) is datetime for res in result) expected = ser.astype("datetime64[ns]").dt.to_pydatetime() tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 3d79d483038ee..6877e1a3a1afd 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -24,6 +24,7 @@ date, datetime, time, + timedelta, ) from io import StringIO from pathlib import Path @@ -549,6 +550,26 @@ def test_dataframe_to_sql(conn, test_frame1, request): test_frame1.to_sql("test", conn, if_exists="append", index=False) +@pytest.mark.db +@pytest.mark.parametrize("conn", all_connectable) +def test_dataframe_to_sql_arrow_dtypes(conn, test_frame1, request): + # GH 52046 + pytest.importorskip("pyarrow") + df = DataFrame( + { + "int": pd.array([1], dtype="int8[pyarrow]"), + "datetime": pd.array( + [datetime(2023, 1, 1)], dtype="timestamp[ns][pyarrow]" + ), + "timedelta": pd.array([timedelta(1)], dtype="duration[ns][pyarrow]"), + "string": pd.array(["a"], dtype="string[pyarrow]"), + } + ) + conn = request.getfixturevalue(conn) + with tm.assert_produces_warning(UserWarning, match="the 'timedelta'"): + df.to_sql("test_arrow", conn, if_exists="replace", index=False) + + @pytest.mark.db @pytest.mark.parametrize("conn", all_connectable) @pytest.mark.parametrize("method", [None, "multi"]) From 775043a505e0b17c02fc1712d3c12529568b0386 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 17 Mar 2023 13:24:59 -0700 Subject: [PATCH 2/6] Remove unneeded fixture --- pandas/tests/io/test_sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 6877e1a3a1afd..1bfc5cf0c3178 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -552,7 +552,7 @@ def test_dataframe_to_sql(conn, test_frame1, request): @pytest.mark.db @pytest.mark.parametrize("conn", all_connectable) -def test_dataframe_to_sql_arrow_dtypes(conn, test_frame1, request): +def test_dataframe_to_sql_arrow_dtypes(conn, request): # GH 52046 pytest.importorskip("pyarrow") df = DataFrame( From 9bd31de96ad151600cf2e9e04039fd9026c21458 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 17 Mar 2023 15:25:06 -0700 Subject: [PATCH 3/6] ns --- pandas/io/sql.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index e6709bce42e33..044fd9806d921 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -969,7 +969,7 @@ def insert_data(self) -> tuple[list[str], list[np.ndarray]]: elif ser.dtype.kind == "m": vals = ser._values if isinstance(vals, ArrowExtensionArray): - vals = vals.to_numpy(dtype=np.dtype("m8")) + vals = vals.to_numpy(dtype=np.dtype("m8[ns]")) # store as integers, see GH#6921, GH#7076 d = vals.view("i8").astype(object) else: From e3410cfee37aa6dc47da0f10d662c1cc18a427a8 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 20 Mar 2023 13:49:25 -0700 Subject: [PATCH 4/6] pandas CI not being set --- .github/workflows/ubuntu.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index fe318ce80c98d..4136f248806ca 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -28,7 +28,6 @@ jobs: env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml, actions-311.yaml] pattern: ["not single_cpu", "single_cpu"] pyarrow_version: ["8", "9", "10"] - pandas_ci: [1] include: - name: "Downstream Compat" env_file: actions-38-downstream_compat.yaml @@ -99,7 +98,7 @@ jobs: LC_ALL: ${{ matrix.lc_all || '' }} PANDAS_DATA_MANAGER: ${{ matrix.pandas_data_manager || 'block' }} PANDAS_COPY_ON_WRITE: ${{ matrix.pandas_copy_on_write || '0' }} - PANDAS_CI: ${{ matrix.pandas_ci }} + PANDAS_CI: ${{ matrix.pandas_ci || 1 }} TEST_ARGS: ${{ matrix.test_args || '' }} PYTEST_WORKERS: ${{ contains(matrix.pattern, 'not single_cpu') && 'auto' || '1' }} PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }} From f701846b14e29bfc449129dd68d2a674b4acbcbe Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 20 Mar 2023 17:50:43 -0700 Subject: [PATCH 5/6] to string --- .github/workflows/ubuntu.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 4136f248806ca..9e0ccf41cfa54 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -98,7 +98,7 @@ jobs: LC_ALL: ${{ matrix.lc_all || '' }} PANDAS_DATA_MANAGER: ${{ matrix.pandas_data_manager || 'block' }} PANDAS_COPY_ON_WRITE: ${{ matrix.pandas_copy_on_write || '0' }} - PANDAS_CI: ${{ matrix.pandas_ci || 1 }} + PANDAS_CI: ${{ matrix.pandas_ci || '1' }} TEST_ARGS: ${{ matrix.test_args || '' }} PYTEST_WORKERS: ${{ contains(matrix.pattern, 'not single_cpu') && 'auto' || '1' }} PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }} From 9b6f60a17787e2ce4f7e567eec85062227e41908 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 21 Mar 2023 11:42:37 -0700 Subject: [PATCH 6/6] Undo ubuntu workflow --- .github/workflows/ubuntu.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 9e0ccf41cfa54..fe318ce80c98d 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -28,6 +28,7 @@ jobs: env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml, actions-311.yaml] pattern: ["not single_cpu", "single_cpu"] pyarrow_version: ["8", "9", "10"] + pandas_ci: [1] include: - name: "Downstream Compat" env_file: actions-38-downstream_compat.yaml @@ -98,7 +99,7 @@ jobs: LC_ALL: ${{ matrix.lc_all || '' }} PANDAS_DATA_MANAGER: ${{ matrix.pandas_data_manager || 'block' }} PANDAS_COPY_ON_WRITE: ${{ matrix.pandas_copy_on_write || '0' }} - PANDAS_CI: ${{ matrix.pandas_ci || '1' }} + PANDAS_CI: ${{ matrix.pandas_ci }} TEST_ARGS: ${{ matrix.test_args || '' }} PYTEST_WORKERS: ${{ contains(matrix.pattern, 'not single_cpu') && 'auto' || '1' }} PYTEST_TARGET: ${{ matrix.pytest_target || 'pandas' }}