Skip to content

Commit 34fff1f

Browse files
jorisvandenbosscheTomAugspurger
authored andcommitted
ENH: Add IntegerArray.__arrow_array__ for custom conversion to Arrow (#28368)
* ENH: Add IntegerArray.__arrow_array__ for custom conversion to Arrow * simplify pyarrow version check in tests * add whatsnew
1 parent 3f40528 commit 34fff1f

File tree

4 files changed

+39
-3
lines changed

4 files changed

+39
-3
lines changed

doc/source/whatsnew/v1.0.0.rst

+7-3
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,19 @@ including other versions of pandas.
2020

2121
Enhancements
2222
~~~~~~~~~~~~
23-
- :meth:`DataFrame.to_latex` now accepts ``caption`` and ``label`` arguments (:issue:`25436`)
24-
-
23+
2524

2625
.. _whatsnew_1000.enhancements.other:
2726

2827
Other enhancements
2928
^^^^^^^^^^^^^^^^^^
3029

31-
-
30+
- :meth:`DataFrame.to_latex` now accepts ``caption`` and ``label`` arguments (:issue:`25436`)
31+
- The :ref:`integer dtype <integer_na>` with support for missing values can now be converted to
32+
``pyarrow`` (>= 0.15.0), which means that it is supported in writing to the Parquet file format
33+
when using the ``pyarrow`` engine. It is currently not yet supported when converting back to
34+
pandas (so it will become an integer or float dtype depending on the presence of missing data).
35+
(:issue:`28368`)
3236
-
3337

3438
.. _whatsnew_1000.api_breaking:

pandas/core/arrays/integer.py

+8
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,14 @@ def __array__(self, dtype=None):
367367
"""
368368
return self._coerce_to_ndarray()
369369

370+
def __arrow_array__(self, type=None):
371+
"""
372+
Convert myself into a pyarrow Array.
373+
"""
374+
import pyarrow as pa
375+
376+
return pa.array(self._data, mask=self._mask, type=type)
377+
370378
_HANDLED_TYPES = (np.ndarray, numbers.Number)
371379

372380
def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):

pandas/tests/arrays/test_integer.py

+12
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
import pytest
33

4+
import pandas.util._test_decorators as td
5+
46
from pandas.core.dtypes.generic import ABCIndexClass
57

68
import pandas as pd
@@ -817,6 +819,16 @@ def test_ufunc_reduce_raises(values):
817819
np.add.reduce(a)
818820

819821

822+
@td.skip_if_no("pyarrow", min_version="0.14.1.dev")
823+
def test_arrow_array(data):
824+
# protocol added in 0.15.0
825+
import pyarrow as pa
826+
827+
arr = pa.array(data)
828+
expected = pa.array(list(data), type=data.dtype.name.lower(), from_pandas=True)
829+
assert arr.equals(expected)
830+
831+
820832
# TODO(jreback) - these need testing / are broken
821833

822834
# shift

pandas/tests/io/test_parquet.py

+12
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,18 @@ def test_empty_dataframe(self, pa):
488488
df = pd.DataFrame()
489489
check_round_trip(df, pa)
490490

491+
@td.skip_if_no("pyarrow", min_version="0.14.1.dev")
492+
def test_nullable_integer(self, pa):
493+
df = pd.DataFrame({"a": pd.Series([1, 2, 3], dtype="Int64")})
494+
# currently de-serialized as plain int
495+
expected = df.assign(a=df.a.astype("int64"))
496+
check_round_trip(df, pa, expected=expected)
497+
498+
df = pd.DataFrame({"a": pd.Series([1, 2, 3, None], dtype="Int64")})
499+
# if missing values currently de-serialized as float
500+
expected = df.assign(a=df.a.astype("float64"))
501+
check_round_trip(df, pa, expected=expected)
502+
491503

492504
class TestParquetFastParquet(Base):
493505
@td.skip_if_no("fastparquet", min_version="0.2.1")

0 commit comments

Comments
 (0)