Skip to content

REF: simplify BlockManager.quantile #39618

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -9401,6 +9401,14 @@ def quantile(
"""
validate_percentile(q)

if not is_list_like(q):
# BlockManager.quantile expects listlike, so we wrap and unwrap here
res = self.quantile(
[q], axis=axis, numeric_only=numeric_only, interpolation=interpolation
)
return res.iloc[0]

q = Index(q, dtype=np.float64)
data = self._get_numeric_data() if numeric_only else self
axis = self._get_axis_number(axis)
is_transposed = axis == 1
Expand All @@ -9419,10 +9427,7 @@ def quantile(
qs=q, axis=1, interpolation=interpolation, transposed=is_transposed
)

if result.ndim == 2:
result = self._constructor(result)
else:
result = self._constructor_sliced(result, name=q)
result = self._constructor(result)

if is_transposed:
result = result.T
Expand Down
35 changes: 17 additions & 18 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@
from pandas.core.nanops import nanpercentile

if TYPE_CHECKING:
from pandas import Index
from pandas import Float64Index, Index
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray


Expand Down Expand Up @@ -1383,30 +1383,33 @@ def _unstack(self, unstacker, fill_value, new_placement):
blocks = [make_block(new_values, placement=new_placement)]
return blocks, mask

def quantile(self, qs, interpolation="linear", axis: int = 0) -> Block:
def quantile(
self, qs: Float64Index, interpolation="linear", axis: int = 0
) -> Block:
"""
compute the quantiles of the

Parameters
----------
qs: a scalar or list of the quantiles to be computed
interpolation: type of interpolation, default 'linear'
axis: axis to compute, default 0
qs : Float64Index
List of the quantiles to be computed.
interpolation : str, default 'linear'
Type of interpolation.
axis : int, default 0
Axis to compute.

Returns
-------
Block
"""
# We should always have ndim == 2 because Series dispatches to DataFrame
assert self.ndim == 2
assert axis == 1 # only ever called this way
assert is_list_like(qs) # caller is responsible for this

values = self.get_values()

is_empty = values.shape[axis] == 0
orig_scalar = not is_list_like(qs)
if orig_scalar:
# make list-like, unpack later
qs = [qs]

if is_empty:
# create the array of na_values
Expand All @@ -1430,14 +1433,7 @@ def quantile(self, qs, interpolation="linear", axis: int = 0) -> Block:
result = np.array(result, copy=False)
result = result.T

if orig_scalar and not lib.is_scalar(result):
# result could be scalar in case with is_empty and self.ndim == 1
assert result.shape[-1] == 1, result.shape
result = result[..., 0]
result = lib.item_from_zerodim(result)

ndim = np.ndim(result)
return make_block(result, placement=np.arange(len(result)), ndim=ndim)
return make_block(result, placement=self.mgr_locs, ndim=2)

def _replace_coerce(
self,
Expand Down Expand Up @@ -2184,7 +2180,10 @@ def fillna(
value, limit=limit, inplace=inplace, downcast=downcast
)

def quantile(self, qs, interpolation="linear", axis: int = 0) -> Block:
def quantile(
self, qs: Float64Index, interpolation="linear", axis: int = 0
) -> Block:
assert axis == 1 # only ever called this way
naive = self.values.view("M8[ns]")

# TODO(EA2D): kludge for 2D block with 1D values
Expand Down
88 changes: 19 additions & 69 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
is_extension_array_dtype,
is_list_like,
)
from pandas.core.dtypes.concat import concat_compat
from pandas.core.dtypes.dtypes import ExtensionDtype
from pandas.core.dtypes.generic import ABCDataFrame, ABCPandasArray, ABCSeries
from pandas.core.dtypes.missing import array_equals, isna
Expand All @@ -40,7 +39,7 @@
from pandas.core.arrays.sparse import SparseDtype
from pandas.core.construction import extract_array
from pandas.core.indexers import maybe_convert_indices
from pandas.core.indexes.api import Index, ensure_index
from pandas.core.indexes.api import Float64Index, Index, ensure_index
from pandas.core.internals.base import DataManager
from pandas.core.internals.blocks import (
Block,
Expand Down Expand Up @@ -441,11 +440,11 @@ def apply(

def quantile(
self,
*,
qs: Float64Index,
axis: int = 0,
transposed: bool = False,
interpolation="linear",
qs=None,
numeric_only=None,
) -> BlockManager:
"""
Iterate over blocks applying quantile reduction.
Expand All @@ -460,8 +459,7 @@ def quantile(
transposed: bool, default False
we are holding transposed data
interpolation : type of interpolation, default 'linear'
qs : a scalar or list of the quantiles to be computed
numeric_only : ignored
qs : list of the quantiles to be computed

Returns
-------
Expand All @@ -470,73 +468,25 @@ def quantile(
# Series dispatches to DataFrame for quantile, which allows us to
# simplify some of the code here and in the blocks
assert self.ndim >= 2
assert is_list_like(qs) # caller is responsible for this
assert axis == 1 # only ever called this way

def get_axe(block, qs, axes):
# Because Series dispatches to DataFrame, we will always have
# block.ndim == 2
from pandas import Float64Index

if is_list_like(qs):
ax = Float64Index(qs)
else:
ax = axes[0]
return ax

axes, blocks = [], []
for b in self.blocks:
block = b.quantile(axis=axis, qs=qs, interpolation=interpolation)

axe = get_axe(b, qs, axes=self.axes)

axes.append(axe)
blocks.append(block)

# note that some DatetimeTZ, Categorical are always ndim==1
ndim = {b.ndim for b in blocks}
assert 0 not in ndim, ndim

if 2 in ndim:

new_axes = list(self.axes)

# multiple blocks that are reduced
if len(blocks) > 1:
new_axes[1] = axes[0]

# reset the placement to the original
for b, sb in zip(blocks, self.blocks):
b.mgr_locs = sb.mgr_locs

else:
new_axes[axis] = Index(np.concatenate([ax._values for ax in axes]))

if transposed:
new_axes = new_axes[::-1]
blocks = [
b.make_block(b.values.T, placement=np.arange(b.shape[1]))
for b in blocks
]

return type(self)(blocks, new_axes)

# single block, i.e. ndim == {1}
values = concat_compat([b.values for b in blocks])

# compute the orderings of our original data
if len(self.blocks) > 1:
new_axes = list(self.axes)
new_axes[1] = Float64Index(qs)

indexer = np.empty(len(self.axes[0]), dtype=np.intp)
i = 0
for b in self.blocks:
for j in b.mgr_locs:
indexer[j] = i
i = i + 1
blocks = [
blk.quantile(axis=axis, qs=qs, interpolation=interpolation)
for blk in self.blocks
]

values = values.take(indexer)
if transposed:
new_axes = new_axes[::-1]
blocks = [
b.make_block(b.values.T, placement=np.arange(b.shape[1]))
for b in blocks
]

return SingleBlockManager(
make_block(values, ndim=1, placement=np.arange(len(values))), axes[0]
)
return type(self)(blocks, new_axes)

def isna(self, func) -> BlockManager:
return self.apply("apply", func=func)
Expand Down