diff --git a/pandas/core/array_algos/quantile.py b/pandas/core/array_algos/quantile.py index efa36a5bd3ae9..32c50ed38eba0 100644 --- a/pandas/core/array_algos/quantile.py +++ b/pandas/core/array_algos/quantile.py @@ -37,7 +37,18 @@ def quantile_compat(values: ArrayLike, qs: np.ndarray, interpolation: str) -> Ar mask = isna(values) return _quantile_with_mask(values, mask, fill_value, qs, interpolation) else: - return _quantile_ea_compat(values, qs, interpolation) + # In general we don't want to import from arrays here; + # this is temporary pending discussion in GH#41428 + from pandas.core.arrays import BaseMaskedArray + + if isinstance(values, BaseMaskedArray): + # e.g. IntegerArray, does not implement _from_factorized + out = _quantile_ea_fallback(values, qs, interpolation) + + else: + out = _quantile_ea_compat(values, qs, interpolation) + + return out def _quantile_with_mask( @@ -144,3 +155,31 @@ def _quantile_ea_compat( # error: Incompatible return value type (got "ndarray", expected "ExtensionArray") return result # type: ignore[return-value] + + +def _quantile_ea_fallback( + values: ExtensionArray, qs: np.ndarray, interpolation: str +) -> ExtensionArray: + """ + quantile compatibility for ExtensionArray subclasses that do not + implement `_from_factorized`, e.g. IntegerArray. + + Notes + ----- + We assume that all impacted cases are 1D-only. + """ + mask = np.atleast_2d(np.asarray(values.isna())) + npvalues = np.atleast_2d(np.asarray(values)) + + res = _quantile_with_mask( + npvalues, + mask=mask, + fill_value=values.dtype.na_value, + qs=qs, + interpolation=interpolation, + ) + assert res.ndim == 2 + assert res.shape[0] == 1 + res = res[0] + out = type(values)._from_sequence(res, dtype=values.dtype) + return out diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index bd4dfdb4ebad0..e051e765b2ba3 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1316,7 +1316,6 @@ def quantile( assert is_list_like(qs) # caller is responsible for this result = quantile_compat(self.values, np.asarray(qs._values), interpolation) - return new_block(result, placement=self._mgr_locs, ndim=2) diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index dbb5cb357de47..7926ec52b1f28 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -548,22 +548,28 @@ class TestQuantileExtensionDtype: ), pd.period_range("2016-01-01", periods=9, freq="D"), pd.date_range("2016-01-01", periods=9, tz="US/Pacific"), - pytest.param( - pd.array(np.arange(9), dtype="Int64"), - marks=pytest.mark.xfail(reason="doesn't implement from_factorized"), - ), - pytest.param( - pd.array(np.arange(9), dtype="Float64"), - marks=pytest.mark.xfail(reason="doesn't implement from_factorized"), - ), + pd.array(np.arange(9), dtype="Int64"), + pd.array(np.arange(9), dtype="Float64"), ], ids=lambda x: str(x.dtype), ) def index(self, request): + # NB: not actually an Index object idx = request.param idx.name = "A" return idx + @pytest.fixture + def obj(self, index, frame_or_series): + # bc index is not always an Index (yet), we need to re-patch .name + obj = frame_or_series(index).copy() + + if frame_or_series is Series: + obj.name = "A" + else: + obj.columns = ["A"] + return obj + def compute_quantile(self, obj, qs): if isinstance(obj, Series): result = obj.quantile(qs) @@ -571,8 +577,7 @@ def compute_quantile(self, obj, qs): result = obj.quantile(qs, numeric_only=False) return result - def test_quantile_ea(self, index, frame_or_series): - obj = frame_or_series(index).copy() + def test_quantile_ea(self, obj, index): # result should be invariant to shuffling indexer = np.arange(len(index), dtype=np.intp) @@ -583,13 +588,14 @@ def test_quantile_ea(self, index, frame_or_series): result = self.compute_quantile(obj, qs) # expected here assumes len(index) == 9 - expected = Series([index[4], index[0], index[-1]], index=qs, name="A") - expected = frame_or_series(expected) + expected = Series( + [index[4], index[0], index[-1]], dtype=index.dtype, index=qs, name="A" + ) + expected = type(obj)(expected) tm.assert_equal(result, expected) - def test_quantile_ea_with_na(self, index, frame_or_series): - obj = frame_or_series(index).copy() + def test_quantile_ea_with_na(self, obj, index): obj.iloc[0] = index._na_value obj.iloc[-1] = index._na_value @@ -603,15 +609,15 @@ def test_quantile_ea_with_na(self, index, frame_or_series): result = self.compute_quantile(obj, qs) # expected here assumes len(index) == 9 - expected = Series([index[4], index[1], index[-2]], index=qs, name="A") - expected = frame_or_series(expected) + expected = Series( + [index[4], index[1], index[-2]], dtype=index.dtype, index=qs, name="A" + ) + expected = type(obj)(expected) tm.assert_equal(result, expected) # TODO: filtering can be removed after GH#39763 is fixed @pytest.mark.filterwarnings("ignore:Using .astype to convert:FutureWarning") - def test_quantile_ea_all_na(self, index, frame_or_series): - - obj = frame_or_series(index).copy() + def test_quantile_ea_all_na(self, obj, index, frame_or_series): obj.iloc[:] = index._na_value @@ -628,13 +634,12 @@ def test_quantile_ea_all_na(self, index, frame_or_series): result = self.compute_quantile(obj, qs) expected = index.take([-1, -1, -1], allow_fill=True, fill_value=index._na_value) - expected = Series(expected, index=qs) - expected = frame_or_series(expected) + expected = Series(expected, index=qs, name="A") + expected = type(obj)(expected) tm.assert_equal(result, expected) - def test_quantile_ea_scalar(self, index, frame_or_series): + def test_quantile_ea_scalar(self, obj, index): # scalar qs - obj = frame_or_series(index).copy() # result should be invariant to shuffling indexer = np.arange(len(index), dtype=np.intp) @@ -644,8 +649,8 @@ def test_quantile_ea_scalar(self, index, frame_or_series): qs = 0.5 result = self.compute_quantile(obj, qs) - expected = Series({"A": index[4]}, name=0.5) - if frame_or_series is Series: + expected = Series({"A": index[4]}, dtype=index.dtype, name=0.5) + if isinstance(obj, Series): expected = expected["A"] assert result == expected else: