From 981d857f06a0204edb49b10f1f4283d6ae99de68 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 21 Dec 2020 14:54:37 +0100 Subject: [PATCH] Backport PR #38539: BUG: fix array conversion from Arrow for slided array --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/arrays/_arrow_utils.py | 2 +- pandas/tests/arrays/masked/test_arrow_compat.py | 12 ++++++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index e2521cedb64cc..4816e45861f4c 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -747,6 +747,7 @@ I/O - :meth:`DataFrame.to_html` was ignoring ``formatters`` argument for ``ExtensionDtype`` columns (:issue:`36525`) - Bumped minimum xarray version to 0.12.3 to avoid reference to the removed ``Panel`` class (:issue:`27101`) - :meth:`DataFrame.to_csv` was re-opening file-like handles that also implement ``os.PathLike`` (:issue:`38125`) +- Bug in the conversion of a sliced ``pyarrow.Table`` with missing values to a DataFrame (:issue:`38525`) Period ^^^^^^ diff --git a/pandas/core/arrays/_arrow_utils.py b/pandas/core/arrays/_arrow_utils.py index c89f5554d0715..959a13d9c107d 100644 --- a/pandas/core/arrays/_arrow_utils.py +++ b/pandas/core/arrays/_arrow_utils.py @@ -30,7 +30,7 @@ def pyarrow_array_to_numpy_and_mask(arr, dtype): bitmask = buflist[0] if bitmask is not None: mask = pyarrow.BooleanArray.from_buffers( - pyarrow.bool_(), len(arr), [None, bitmask] + pyarrow.bool_(), len(arr), [None, bitmask], offset=arr.offset ) mask = np.asarray(mask) else: diff --git a/pandas/tests/arrays/masked/test_arrow_compat.py b/pandas/tests/arrays/masked/test_arrow_compat.py index ca6fb1cf9dca0..8bb32dec2cc0e 100644 --- a/pandas/tests/arrays/masked/test_arrow_compat.py +++ b/pandas/tests/arrays/masked/test_arrow_compat.py @@ -52,3 +52,15 @@ def test_arrow_from_arrow_uint(): expected = pd.array([1, 2, 3, 4, None], dtype="UInt32") tm.assert_extension_array_equal(result, expected) + + +@td.skip_if_no("pyarrow", min_version="0.16.0") +def test_arrow_sliced(): + # https://github.com/pandas-dev/pandas/issues/38525 + import pyarrow as pa + + df = pd.DataFrame({"a": pd.array([0, None, 2, 3, None], dtype="Int64")}) + table = pa.table(df) + result = table.slice(2, None).to_pandas() + expected = df.iloc[2:].reset_index(drop=True) + tm.assert_frame_equal(result, expected)