diff --git a/doc/source/reference/arrays.rst b/doc/source/reference/arrays.rst index 61ee894f4b126..4b06023ab3963 100644 --- a/doc/source/reference/arrays.rst +++ b/doc/source/reference/arrays.rst @@ -53,6 +53,10 @@ PyArrow This feature is experimental, and the API can change in a future release without warning. +The ``dtype`` argument of :class:`Series` and :class:`DataFrame` can accept a string of a `pyarrow data type `__ +with ``pyarrow`` in brackets e.g. ``"int64[pyarrow]"`` or, for pyarrow data types that take parameters, a :class:`ArrowDtype` +initialized with a ``pyarrow.DataType``. + The :class:`arrays.ArrowExtensionArray` is backed by a :external+pyarrow:py:class:`pyarrow.ChunkedArray` with a :external+pyarrow:py:class:`pyarrow.DataType` instead of a NumPy array and data type. The ``.dtype`` of a :class:`arrays.ArrowExtensionArray` is an :class:`ArrowDtype`. diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index d8a319da2065e..3f45b6393bfb7 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -54,6 +54,8 @@ initialized with a ``pyarrow.DataType``. Most operations are supported and have been implemented using `pyarrow compute `__ functions. We recommend installing the latest version of PyArrow to access the most recently implemented compute functions. +Please view the :ref:`API reference ` for more information. + .. warning:: This feature is experimental, and the API can change in a future release without warning. @@ -1034,7 +1036,6 @@ Conversion - Bug in :meth:`DataFrame.apply` that returns a :class:`DataFrame` instead of a :class:`Series` when applied to an empty :class:`DataFrame` and ``axis=1`` (:issue:`39111`) - Bug when inferring the dtype from an iterable that is *not* a NumPy ``ndarray`` consisting of all NumPy unsigned integer scalars did not result in an unsigned integer dtype (:issue:`47294`) - Bug in :meth:`DataFrame.eval` when pandas objects (e.g. ``'Timestamp'``) were column names (:issue:`44603`) -- Strings ^^^^^^^ diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 1035fd08a1a36..3f60b39980f36 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -215,7 +215,7 @@ for unit in ["s", "ms", "us", "ns"] for tz in [None, "UTC", "US/Pacific", "US/Eastern"] ] - TIMEDELTA_PYARROW_DTYPES = [pa.duration(unit) for unit in ["s", "ms", "us", "ns"]] + # TIMEDELTA_PYARROW_DTYPES = [pa.duration(unit) for unit in ["s", "ms", "us", "ns"]] BOOL_PYARROW_DTYPES = [pa.bool_()] @@ -227,7 +227,7 @@ + TIME_PYARROW_DTYPES + DATE_PYARROW_DTYPES + DATETIME_PYARROW_DTYPES - + TIMEDELTA_PYARROW_DTYPES + # + TIMEDELTA_PYARROW_DTYPES + BOOL_PYARROW_DTYPES ) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index cfae5b4cae681..5ce48ace13dbe 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -199,6 +199,13 @@ class ArrowExtensionArray(OpsMixin, ExtensionArray): [1, 1, ] Length: 3, dtype: int64[pyarrow] + + Create a ArrowExtensionArray directly from an pyarrow array. + >>> import pyarrow as pa + >>> pd.arrays.ArrowExtensionArray(pa.array([1, 1, None])) + + [1, 1, ] + Length: 3, dtype: int64[pyarrow] """ # noqa: E501 (http link too long) _data: pa.ChunkedArray diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 95a15386ed0cb..314ebb1606c7c 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1750,3 +1750,10 @@ def test_mode(data_for_grouping, dropna, take_idx, exp_idx, request): result = ser.mode(dropna=dropna) expected = pd.Series(data_for_grouping.take(exp_idx)) tm.assert_series_equal(result, expected) + + +def test_repr_from_arrow_array(data, frame_or_series): + # GH 48238 + pa_array = pa.array([data[0], None]) + result = frame_or_series(pa_array, dtype=ArrowDtype(pa_array.type)) + repr(result)