Skip to content

Commit af0a60e

Browse files
authored
ENH: ArrayManager.quantile (#40189)
1 parent ec56dd2 commit af0a60e

17 files changed

+129
-95
lines changed

pandas/core/array_algos/quantile.py

+87-1
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,50 @@
1+
from __future__ import annotations
2+
3+
from typing import TYPE_CHECKING
4+
15
import numpy as np
26

37
from pandas._libs import lib
8+
from pandas._typing import ArrayLike
49

5-
from pandas.core.dtypes.common import is_list_like
10+
from pandas.core.dtypes.common import (
11+
is_list_like,
12+
is_sparse,
13+
)
14+
from pandas.core.dtypes.missing import (
15+
isna,
16+
na_value_for_dtype,
17+
)
618

719
from pandas.core.nanops import nanpercentile
820

21+
if TYPE_CHECKING:
22+
from pandas.core.arrays import ExtensionArray
23+
24+
25+
def quantile_compat(values: ArrayLike, qs, interpolation: str, axis: int) -> ArrayLike:
26+
"""
27+
Compute the quantiles of the given values for each quantile in `qs`.
28+
29+
Parameters
30+
----------
31+
values : np.ndarray or ExtensionArray
32+
qs : a scalar or list of the quantiles to be computed
33+
interpolation : str
34+
axis : int
35+
36+
Returns
37+
-------
38+
np.ndarray or ExtensionArray
39+
"""
40+
if isinstance(values, np.ndarray):
41+
fill_value = na_value_for_dtype(values.dtype, compat=False)
42+
mask = isna(values)
43+
result = quantile_with_mask(values, mask, fill_value, qs, interpolation, axis)
44+
else:
45+
result = quantile_ea_compat(values, qs, interpolation, axis)
46+
return result
47+
948

1049
def quantile_with_mask(
1150
values: np.ndarray,
@@ -75,3 +114,50 @@ def quantile_with_mask(
75114
result = lib.item_from_zerodim(result)
76115

77116
return result
117+
118+
119+
def quantile_ea_compat(
120+
values: ExtensionArray, qs, interpolation: str, axis: int
121+
) -> ExtensionArray:
122+
"""
123+
ExtensionArray compatibility layer for quantile_with_mask.
124+
125+
We pretend that an ExtensionArray with shape (N,) is actually (1, N,)
126+
for compatibility with non-EA code.
127+
128+
Parameters
129+
----------
130+
values : ExtensionArray
131+
qs : a scalar or list of the quantiles to be computed
132+
interpolation: str
133+
axis : int
134+
135+
Returns
136+
-------
137+
ExtensionArray
138+
"""
139+
# TODO(EA2D): make-believe not needed with 2D EAs
140+
orig = values
141+
142+
# asarray needed for Sparse, see GH#24600
143+
mask = np.asarray(values.isna())
144+
mask = np.atleast_2d(mask)
145+
146+
values, fill_value = values._values_for_factorize()
147+
values = np.atleast_2d(values)
148+
149+
result = quantile_with_mask(values, mask, fill_value, qs, interpolation, axis)
150+
151+
if not is_sparse(orig.dtype):
152+
# shape[0] should be 1 as long as EAs are 1D
153+
154+
if result.ndim == 1:
155+
# i.e. qs was originally a scalar
156+
assert result.shape == (1,), result.shape
157+
result = type(orig)._from_factorized(result, orig)
158+
159+
else:
160+
assert result.shape == (1, len(qs)), result.shape
161+
result = type(orig)._from_factorized(result[0], orig)
162+
163+
return result

pandas/core/frame.py

+3-10
Original file line numberDiff line numberDiff line change
@@ -9661,9 +9661,8 @@ def quantile(
96619661
q = Index(q, dtype=np.float64)
96629662
data = self._get_numeric_data() if numeric_only else self
96639663
axis = self._get_axis_number(axis)
9664-
is_transposed = axis == 1
96659664

9666-
if is_transposed:
9665+
if axis == 1:
96679666
data = data.T
96689667

96699668
if len(data.columns) == 0:
@@ -9673,15 +9672,9 @@ def quantile(
96739672
return self._constructor([], index=q, columns=cols)
96749673
return self._constructor_sliced([], index=cols, name=q, dtype=np.float64)
96759674

9676-
result = data._mgr.quantile(
9677-
qs=q, axis=1, interpolation=interpolation, transposed=is_transposed
9678-
)
9679-
9680-
result = self._constructor(result)
9681-
9682-
if is_transposed:
9683-
result = result.T
9675+
res = data._mgr.quantile(qs=q, axis=1, interpolation=interpolation)
96849676

9677+
result = self._constructor(res)
96859678
return result
96869679

96879680
@doc(NDFrame.asfreq, **_shared_doc_kwargs)

pandas/core/internals/array_manager.py

+28-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from __future__ import annotations
55

66
from typing import (
7+
TYPE_CHECKING,
78
Any,
89
Callable,
910
List,
@@ -56,6 +57,7 @@
5657
)
5758

5859
import pandas.core.algorithms as algos
60+
from pandas.core.array_algos.quantile import quantile_compat
5961
from pandas.core.array_algos.take import take_nd
6062
from pandas.core.arrays import (
6163
DatetimeArray,
@@ -82,6 +84,10 @@
8284
)
8385
from pandas.core.internals.blocks import make_block
8486

87+
if TYPE_CHECKING:
88+
from pandas import Float64Index
89+
90+
8591
T = TypeVar("T", bound="ArrayManager")
8692

8793

@@ -448,7 +454,28 @@ def apply_with_block(self: T, f, align_keys=None, **kwargs) -> T:
448454

449455
return type(self)(result_arrays, self._axes)
450456

451-
# TODO quantile
457+
def quantile(
458+
self,
459+
*,
460+
qs: Float64Index,
461+
axis: int = 0,
462+
transposed: bool = False,
463+
interpolation="linear",
464+
) -> ArrayManager:
465+
466+
arrs = [
467+
x if not isinstance(x, np.ndarray) else np.atleast_2d(x)
468+
for x in self.arrays
469+
]
470+
assert axis == 1
471+
new_arrs = [quantile_compat(x, qs, interpolation, axis=axis) for x in arrs]
472+
for i, arr in enumerate(new_arrs):
473+
if arr.ndim == 2:
474+
assert arr.shape[0] == 1, arr.shape
475+
new_arrs[i] = arr[0]
476+
477+
axes = [qs, self._axes[1]]
478+
return type(self)(new_arrs, axes)
452479

453480
def isna(self, func) -> ArrayManager:
454481
return self.apply("apply", func=func)

pandas/core/internals/blocks.py

+2-24
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@
8181
setitem_datetimelike_compat,
8282
validate_putmask,
8383
)
84-
from pandas.core.array_algos.quantile import quantile_with_mask
84+
from pandas.core.array_algos.quantile import quantile_compat
8585
from pandas.core.array_algos.replace import (
8686
compare_or_regex_search,
8787
replace_regex,
@@ -1458,11 +1458,7 @@ def quantile(
14581458
assert axis == 1 # only ever called this way
14591459
assert is_list_like(qs) # caller is responsible for this
14601460

1461-
fill_value = self.fill_value
1462-
values = self.values
1463-
mask = np.asarray(isna(values))
1464-
1465-
result = quantile_with_mask(values, mask, fill_value, qs, interpolation, axis)
1461+
result = quantile_compat(self.values, qs, interpolation, axis)
14661462

14671463
return make_block(result, placement=self.mgr_locs, ndim=2)
14681464

@@ -1836,24 +1832,6 @@ def _unstack(self, unstacker, fill_value, new_placement):
18361832
]
18371833
return blocks, mask
18381834

1839-
def quantile(self, qs, interpolation="linear", axis: int = 0) -> Block:
1840-
# asarray needed for Sparse, see GH#24600
1841-
mask = np.asarray(isna(self.values))
1842-
mask = np.atleast_2d(mask)
1843-
1844-
values, fill_value = self.values._values_for_factorize()
1845-
1846-
values = np.atleast_2d(values)
1847-
1848-
result = quantile_with_mask(values, mask, fill_value, qs, interpolation, axis)
1849-
1850-
if not is_sparse(self.dtype):
1851-
# shape[0] should be 1 as long as EAs are 1D
1852-
assert result.shape == (1, len(qs)), result.shape
1853-
result = type(self.values)._from_factorized(result[0], self.values)
1854-
1855-
return make_block(result, placement=self.mgr_locs, ndim=2)
1856-
18571835

18581836
class HybridMixin:
18591837
"""

pandas/core/internals/managers.py

-10
Original file line numberDiff line numberDiff line change
@@ -521,7 +521,6 @@ def quantile(
521521
*,
522522
qs: Float64Index,
523523
axis: int = 0,
524-
transposed: bool = False,
525524
interpolation="linear",
526525
) -> BlockManager:
527526
"""
@@ -534,8 +533,6 @@ def quantile(
534533
axis: reduction axis, default 0
535534
consolidate: bool, default True. Join together blocks having same
536535
dtype
537-
transposed: bool, default False
538-
we are holding transposed data
539536
interpolation : type of interpolation, default 'linear'
540537
qs : list of the quantiles to be computed
541538
@@ -557,13 +554,6 @@ def quantile(
557554
for blk in self.blocks
558555
]
559556

560-
if transposed:
561-
new_axes = new_axes[::-1]
562-
blocks = [
563-
b.make_block(b.values.T, placement=np.arange(b.shape[1]))
564-
for b in blocks
565-
]
566-
567557
return type(self)(blocks, new_axes)
568558

569559
def isna(self, func) -> BlockManager:

pandas/tests/frame/methods/test_describe.py

-5
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
import pandas.util._test_decorators as td
5-
64
import pandas as pd
75
from pandas import (
86
Categorical,
@@ -13,9 +11,6 @@
1311
)
1412
import pandas._testing as tm
1513

16-
# TODO(ArrayManager) quantile is needed for describe()
17-
pytestmark = td.skip_array_manager_not_yet_implemented
18-
1914

2015
class TestDataFrameDescribe:
2116
def test_describe_bool_in_mixed_frame(self):

pandas/tests/frame/methods/test_quantile.py

+9-6
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
import pandas.util._test_decorators as td
5-
64
import pandas as pd
75
from pandas import (
86
DataFrame,
@@ -11,8 +9,6 @@
119
)
1210
import pandas._testing as tm
1311

14-
pytestmark = td.skip_array_manager_not_yet_implemented
15-
1612

1713
class TestDataFrameQuantile:
1814
@pytest.mark.parametrize(
@@ -526,12 +522,13 @@ def test_quantile_empty_no_columns(self):
526522
expected.columns.name = "captain tightpants"
527523
tm.assert_frame_equal(result, expected)
528524

529-
def test_quantile_item_cache(self):
525+
def test_quantile_item_cache(self, using_array_manager):
530526
# previous behavior incorrect retained an invalid _item_cache entry
531527
df = DataFrame(np.random.randn(4, 3), columns=["A", "B", "C"])
532528
df["D"] = df["A"] * 2
533529
ser = df["A"]
534-
assert len(df._mgr.blocks) == 2
530+
if not using_array_manager:
531+
assert len(df._mgr.blocks) == 2
535532

536533
df.quantile(numeric_only=False)
537534
ser.values[0] = 99
@@ -610,12 +607,18 @@ def test_quantile_ea_with_na(self, index, frame_or_series):
610607
expected = frame_or_series(expected)
611608
tm.assert_equal(result, expected)
612609

610+
# TODO: filtering can be removed after GH#39763 is fixed
611+
@pytest.mark.filterwarnings("ignore:Using .astype to convert:FutureWarning")
613612
def test_quantile_ea_all_na(self, index, frame_or_series):
614613

615614
obj = frame_or_series(index).copy()
616615

617616
obj.iloc[:] = index._na_value
618617

618+
# TODO(ArrayManager): this casting should be unnecessary after GH#39763 is fixed
619+
obj[:] = obj.astype(index.dtype)
620+
assert np.all(obj.dtypes == index.dtype)
621+
619622
# result should be invariant to shuffling
620623
indexer = np.arange(len(index), dtype=np.intp)
621624
np.random.shuffle(indexer)

pandas/tests/groupby/aggregate/test_aggregate.py

-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
import pytest
1111

1212
from pandas.errors import PerformanceWarning
13-
import pandas.util._test_decorators as td
1413

1514
from pandas.core.dtypes.common import is_integer_dtype
1615

@@ -46,7 +45,6 @@ def test_agg_regression1(tsframe):
4645
tm.assert_frame_equal(result, expected)
4746

4847

49-
@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile/describe
5048
def test_agg_must_agg(df):
5149
grouped = df.groupby("A")["C"]
5250

pandas/tests/groupby/test_apply.py

-1
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,6 @@ def test_groupby_as_index_apply(df):
318318
tm.assert_index_equal(res, ind)
319319

320320

321-
@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile
322321
def test_apply_concat_preserve_names(three_group):
323322
grouped = three_group.groupby(["A", "B"])
324323

pandas/tests/groupby/test_categorical.py

-4
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@ def get_stats(group):
8383
assert result.index.names[0] == "C"
8484

8585

86-
@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile
8786
def test_basic():
8887

8988
cats = Categorical(
@@ -540,7 +539,6 @@ def test_dataframe_categorical_ordered_observed_sort(ordered, observed, sort):
540539
assert False, msg
541540

542541

543-
@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile
544542
def test_datetime():
545543
# GH9049: ensure backward compatibility
546544
levels = pd.date_range("2014-01-01", periods=4)
@@ -606,7 +604,6 @@ def test_categorical_index():
606604
tm.assert_frame_equal(result, expected)
607605

608606

609-
@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile
610607
def test_describe_categorical_columns():
611608
# GH 11558
612609
cats = CategoricalIndex(
@@ -621,7 +618,6 @@ def test_describe_categorical_columns():
621618
tm.assert_categorical_equal(result.stack().columns.values, cats.values)
622619

623620

624-
@td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile
625621
def test_unstack_categorical():
626622
# GH11558 (example is taken from the original issue)
627623
df = DataFrame(

0 commit comments

Comments
 (0)