pandas-dev · jreback · Jul 28, 2022 · Jul 15, 2022 · Jul 15, 2022 · Jul 15, 2022
diff --git a/pandas/core/arrays/arrow/_arrow_utils.py b/pandas/core/arrays/arrow/_arrow_utils.py
@@ -19,9 +19,12 @@ def fallback_performancewarning(version: str | None = None) -> None:
     Raise a PerformanceWarning for falling back to ExtensionArray's
     non-pyarrow method
     """
-    msg = "Falling back on a non-pyarrow code path which may decrease performance."
+    msg = (
+        "Falling back on a non-pyarrow code path which may decrease performance or "
+        "not be fully compatible with pyarrow."
+    )
     if version is not None:
-        msg += f" Upgrade to pyarrow >={version} to possibly suppress this warning."
+        msg += f" Upgrade to pyarrow >={version} to suppress this warning."
     warnings.warn(msg, PerformanceWarning, stacklevel=find_stack_level())
 
 

diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
@@ -35,7 +35,10 @@
 
 from pandas.core.algorithms import resolve_na_sentinel
 from pandas.core.arraylike import OpsMixin
-from pandas.core.arrays.base import ExtensionArray
+from pandas.core.arrays.base import (
+    ExtensionArray,
+    ExtensionArrayT,
+)
 from pandas.core.indexers import (
     check_array_indexer,
     unpack_tuple_and_ellipses,
@@ -702,6 +705,59 @@ def _indexing_key_to_indices(
             indices = np.arange(n)[key]
         return indices
 
+    # TODO: redefine _rank using pc.rank with pyarrow 9.0
+
+    def _quantile(
+        self: ArrowExtensionArrayT, qs: npt.NDArray[np.float64], interpolation: str
+    ) -> ArrowExtensionArrayT | ExtensionArrayT:
+        """
+        Compute the quantiles of self for each quantile in `qs`.
+
+        Parameters
+        ----------
+        qs : np.ndarray[float64]
+        interpolation: str
+
+        Returns
+        -------
+        same type as self
+        """
+        if pa_version_under4p0:
+            fallback_performancewarning("4")
+            return super()._quantile(qs, interpolation)
+        result = pc.quantile(self._data, q=qs, interpolation=interpolation)
+        return type(self)(result)
+
+    def _mode(
+        self: ArrowExtensionArrayT, dropna: bool = True
+    ) -> ArrowExtensionArrayT | ExtensionArrayT:
+        """
+        Returns the mode(s) of the ExtensionArray.
+
+        Always returns `ExtensionArray` even if only one value.
+
+        Parameters
+        ----------
+        dropna : bool, default True
+            Don't consider counts of NA values.
+            Not implemented by pyarrow.
+
+        Returns
+        -------
+        same type as self
+            Sorted, if possible.
+        """
+        if pa_version_under6p0:
+            fallback_performancewarning("6")
+            return super()._mode(dropna)
+        modes = pc.mode(self._data, pc.count_distinct(self._data).as_py())
+        values = modes.field(0)
+        counts = modes.field(1)
+        # counts sorted descending i.e counts[0] = max
+        mask = pc.equal(counts, counts[0])
+        most_common = values.filter(mask)
+        return type(self)(most_common)
+
     def _maybe_convert_setitem_value(self, value):
         """Maybe convert value to be pyarrow compatible."""
         # TODO: Make more robust like ArrowStringArray._maybe_convert_setitem_value

diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
@@ -10,7 +10,7 @@
 classes (if they are relevant for the extension interface for all dtypes), or
 be added to the array-specific tests in `pandas/tests/arrays/`.
 """
-
+import contextlib
 from datetime import (
     date,
     datetime,
@@ -24,8 +24,11 @@
 from pandas.compat import (
     pa_version_under2p0,
     pa_version_under3p0,
+    pa_version_under4p0,
+    pa_version_under6p0,
     pa_version_under8p0,
 )
+from pandas.errors import PerformanceWarning
 
 import pandas as pd
 import pandas._testing as tm
@@ -1838,3 +1841,75 @@ def test_compare_array(self, data, comparison_op, na_value, request):
 def test_arrowdtype_construct_from_string_type_with_unsupported_parameters():
     with pytest.raises(NotImplementedError, match="Passing pyarrow type"):
         ArrowDtype.construct_from_string("timestamp[s, tz=UTC][pyarrow]")
+
+
+@pytest.mark.parametrize(
+    "interpolation", ["linear", "lower", "higher", "nearest", "midpoint"]
+)
+@pytest.mark.parametrize("quantile", [0.5, [0.5, 0.5]])
+def test_quantile(data, interpolation, quantile, request):
+    data = data.take([0, 0, 0])
+    ser = pd.Series(data)
+    if pa_version_under4p0:
+        with tm.assert_produces_warning(PerformanceWarning):
+            # Just validate the PerformanceWarning
+            # ExtensionArray._quantile may not support all pyarrow types
+            with contextlib.suppress(Exception):
+                ser.quantile(q=quantile, interpolation=interpolation)
+    else:
+        pa_dtype = data.dtype.pyarrow_dtype
+        if not (pa.types.is_integer(pa_dtype) or pa.types.is_floating(pa_dtype)):
+            request.node.add_marker(
+                pytest.mark.xfail(
+                    raises=pa.ArrowNotImplementedError,
+                    reason=f"quantile not supported by pyarrow for {pa_dtype}",
+                )
+            )
+        result = ser.quantile(q=quantile, interpolation=interpolation)
+        if quantile == 0.5:
+            assert result == data[0]
+        else:
+            # Just check the values
+            result = result.astype("float64[pyarrow]")
+            expected = pd.Series(
+                data.take([0, 0]).astype("float64[pyarrow]"), index=[0.5, 0.5]
+            )
+            tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("dropna", [True, False])
+@pytest.mark.parametrize(
+    "take_idx, exp_idx",
+    [[[0, 0, 2, 2, 4, 4], [4, 0]], [[0, 0, 0, 2, 4, 4], [0]]],
+    ids=["multi_mode", "single_mode"],
+)
+def test_mode(data_for_grouping, dropna, take_idx, exp_idx, request):
+    data = data_for_grouping.take(take_idx)
+    ser = pd.Series(data)
+    if pa_version_under6p0:
+        with tm.assert_produces_warning(
+            PerformanceWarning, raise_on_extra_warnings=False
+        ):
+            # Just validate the PerformanceWarning
+            # ExtensionArray._mode may not support all pyarrow types
+            with contextlib.suppress(Exception):
+                ser.mode(dropna=dropna)
+    else:
+        pa_dtype = data_for_grouping.dtype.pyarrow_dtype
+        if pa.types.is_temporal(pa_dtype):
+            request.node.add_marker(
+                pytest.mark.xfail(
+                    raises=pa.ArrowNotImplementedError,
+                    reason=f"mode not supported by pyarrow for {pa_dtype}",
+                )
+            )
+        elif pa.types.is_boolean(pa_dtype) and "multi_mode" in request.node.nodeid:
+            # https://issues.apache.org/jira/browse/ARROW-17096
+            request.node.add_marker(
+                pytest.mark.xfail(
+                    reason="https://issues.apache.org/jira/browse/ARROW-17096",
+                )
+            )
+        result = ser.mode(dropna=dropna)
+        expected = pd.Series(data_for_grouping.take(exp_idx))
+        tm.assert_series_equal(result, expected)