Skip to content

REG: quantile with IntegerArray/FloatingArray #41428

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 31, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 40 additions & 1 deletion pandas/core/array_algos/quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,18 @@ def quantile_compat(values: ArrayLike, qs: np.ndarray, interpolation: str) -> Ar
mask = isna(values)
return _quantile_with_mask(values, mask, fill_value, qs, interpolation)
else:
return _quantile_ea_compat(values, qs, interpolation)
# In general we don't want to import from arrays here;
# this is temporary pending discussion in GH#41428
from pandas.core.arrays import BaseMaskedArray

if isinstance(values, BaseMaskedArray):
# e.g. IntegerArray, does not implement _from_factorized
out = _quantile_ea_fallback(values, qs, interpolation)

else:
out = _quantile_ea_compat(values, qs, interpolation)

return out


def _quantile_with_mask(
Expand Down Expand Up @@ -144,3 +155,31 @@ def _quantile_ea_compat(

# error: Incompatible return value type (got "ndarray", expected "ExtensionArray")
return result # type: ignore[return-value]


def _quantile_ea_fallback(
values: ExtensionArray, qs: np.ndarray, interpolation: str
) -> ExtensionArray:
"""
quantile compatibility for ExtensionArray subclasses that do not
implement `_from_factorized`, e.g. IntegerArray.

Notes
-----
We assume that all impacted cases are 1D-only.
"""
mask = np.atleast_2d(np.asarray(values.isna()))
npvalues = np.atleast_2d(np.asarray(values))

res = _quantile_with_mask(
npvalues,
mask=mask,
fill_value=values.dtype.na_value,
qs=qs,
interpolation=interpolation,
)
assert res.ndim == 2
assert res.shape[0] == 1
res = res[0]
out = type(values)._from_sequence(res, dtype=values.dtype)
return out
1 change: 0 additions & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1316,7 +1316,6 @@ def quantile(
assert is_list_like(qs) # caller is responsible for this

result = quantile_compat(self.values, np.asarray(qs._values), interpolation)

return new_block(result, placement=self._mgr_locs, ndim=2)


Expand Down
55 changes: 30 additions & 25 deletions pandas/tests/frame/methods/test_quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,31 +548,36 @@ class TestQuantileExtensionDtype:
),
pd.period_range("2016-01-01", periods=9, freq="D"),
pd.date_range("2016-01-01", periods=9, tz="US/Pacific"),
pytest.param(
pd.array(np.arange(9), dtype="Int64"),
marks=pytest.mark.xfail(reason="doesn't implement from_factorized"),
),
pytest.param(
pd.array(np.arange(9), dtype="Float64"),
marks=pytest.mark.xfail(reason="doesn't implement from_factorized"),
),
pd.array(np.arange(9), dtype="Int64"),
pd.array(np.arange(9), dtype="Float64"),
],
ids=lambda x: str(x.dtype),
)
def index(self, request):
# NB: not actually an Index object
idx = request.param
idx.name = "A"
return idx

@pytest.fixture
def obj(self, index, frame_or_series):
# bc index is not always an Index (yet), we need to re-patch .name
obj = frame_or_series(index).copy()

if frame_or_series is Series:
obj.name = "A"
else:
obj.columns = ["A"]
return obj

def compute_quantile(self, obj, qs):
if isinstance(obj, Series):
result = obj.quantile(qs)
else:
result = obj.quantile(qs, numeric_only=False)
return result

def test_quantile_ea(self, index, frame_or_series):
obj = frame_or_series(index).copy()
def test_quantile_ea(self, obj, index):

# result should be invariant to shuffling
indexer = np.arange(len(index), dtype=np.intp)
Expand All @@ -583,13 +588,14 @@ def test_quantile_ea(self, index, frame_or_series):
result = self.compute_quantile(obj, qs)

# expected here assumes len(index) == 9
expected = Series([index[4], index[0], index[-1]], index=qs, name="A")
expected = frame_or_series(expected)
expected = Series(
[index[4], index[0], index[-1]], dtype=index.dtype, index=qs, name="A"
)
expected = type(obj)(expected)

tm.assert_equal(result, expected)

def test_quantile_ea_with_na(self, index, frame_or_series):
obj = frame_or_series(index).copy()
def test_quantile_ea_with_na(self, obj, index):

obj.iloc[0] = index._na_value
obj.iloc[-1] = index._na_value
Expand All @@ -603,15 +609,15 @@ def test_quantile_ea_with_na(self, index, frame_or_series):
result = self.compute_quantile(obj, qs)

# expected here assumes len(index) == 9
expected = Series([index[4], index[1], index[-2]], index=qs, name="A")
expected = frame_or_series(expected)
expected = Series(
[index[4], index[1], index[-2]], dtype=index.dtype, index=qs, name="A"
)
expected = type(obj)(expected)
tm.assert_equal(result, expected)

# TODO: filtering can be removed after GH#39763 is fixed
@pytest.mark.filterwarnings("ignore:Using .astype to convert:FutureWarning")
def test_quantile_ea_all_na(self, index, frame_or_series):

obj = frame_or_series(index).copy()
def test_quantile_ea_all_na(self, obj, index, frame_or_series):

obj.iloc[:] = index._na_value

Expand All @@ -628,13 +634,12 @@ def test_quantile_ea_all_na(self, index, frame_or_series):
result = self.compute_quantile(obj, qs)

expected = index.take([-1, -1, -1], allow_fill=True, fill_value=index._na_value)
expected = Series(expected, index=qs)
expected = frame_or_series(expected)
expected = Series(expected, index=qs, name="A")
expected = type(obj)(expected)
tm.assert_equal(result, expected)

def test_quantile_ea_scalar(self, index, frame_or_series):
def test_quantile_ea_scalar(self, obj, index):
# scalar qs
obj = frame_or_series(index).copy()

# result should be invariant to shuffling
indexer = np.arange(len(index), dtype=np.intp)
Expand All @@ -644,8 +649,8 @@ def test_quantile_ea_scalar(self, index, frame_or_series):
qs = 0.5
result = self.compute_quantile(obj, qs)

expected = Series({"A": index[4]}, name=0.5)
if frame_or_series is Series:
expected = Series({"A": index[4]}, dtype=index.dtype, name=0.5)
if isinstance(obj, Series):
expected = expected["A"]
assert result == expected
else:
Expand Down