From c556576b963354e7b64d7ed93b2fc253d4a446d3 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 5 Feb 2021 14:47:52 -0800 Subject: [PATCH 1/2] REF: simplify BlockManager.quantile --- pandas/core/frame.py | 7 +++ pandas/core/internals/blocks.py | 16 ++----- pandas/core/internals/managers.py | 80 ++++++------------------------- 3 files changed, 26 insertions(+), 77 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6357b8feb348b..30cdcfb76edcb 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9410,6 +9410,13 @@ def quantile( """ validate_percentile(q) + if not is_list_like(q): + # BlockManager.quantile expects listlike, so we wrap and unwrap here + res = self.quantile( + [q], axis=axis, numeric_only=numeric_only, interpolation=interpolation + ) + return res.iloc[0] + data = self._get_numeric_data() if numeric_only else self axis = self._get_axis_number(axis) is_transposed = axis == 1 diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 9314666acdaad..b27eb79ace121 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1399,14 +1399,12 @@ def quantile(self, qs, interpolation="linear", axis: int = 0) -> Block: """ # We should always have ndim == 2 because Series dispatches to DataFrame assert self.ndim == 2 + assert axis == 1 # only ever called this way + assert is_list_like(qs) # caller is responsible for this values = self.get_values() is_empty = values.shape[axis] == 0 - orig_scalar = not is_list_like(qs) - if orig_scalar: - # make list-like, unpack later - qs = [qs] if is_empty: # create the array of na_values @@ -1430,14 +1428,7 @@ def quantile(self, qs, interpolation="linear", axis: int = 0) -> Block: result = np.array(result, copy=False) result = result.T - if orig_scalar and not lib.is_scalar(result): - # result could be scalar in case with is_empty and self.ndim == 1 - assert result.shape[-1] == 1, result.shape - result = result[..., 0] - result = lib.item_from_zerodim(result) - - ndim = np.ndim(result) - return make_block(result, placement=np.arange(len(result)), ndim=ndim) + return make_block(result, placement=self.mgr_locs, ndim=2) def _replace_coerce( self, @@ -2185,6 +2176,7 @@ def fillna( ) def quantile(self, qs, interpolation="linear", axis: int = 0) -> Block: + assert axis == 1 # only ever called this way naive = self.values.view("M8[ns]") # TODO(EA2D): kludge for 2D block with 1D values diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index c352fa39627d9..85dbffbbc50f7 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -31,7 +31,6 @@ is_extension_array_dtype, is_list_like, ) -from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.dtypes import ExtensionDtype from pandas.core.dtypes.generic import ABCDataFrame, ABCPandasArray, ABCSeries from pandas.core.dtypes.missing import array_equals, isna @@ -40,7 +39,7 @@ from pandas.core.arrays.sparse import SparseDtype from pandas.core.construction import extract_array from pandas.core.indexers import maybe_convert_indices -from pandas.core.indexes.api import Index, ensure_index +from pandas.core.indexes.api import Float64Index, Index, ensure_index from pandas.core.internals.base import DataManager from pandas.core.internals.blocks import ( Block, @@ -445,7 +444,6 @@ def quantile( transposed: bool = False, interpolation="linear", qs=None, - numeric_only=None, ) -> BlockManager: """ Iterate over blocks applying quantile reduction. @@ -460,8 +458,7 @@ def quantile( transposed: bool, default False we are holding transposed data interpolation : type of interpolation, default 'linear' - qs : a scalar or list of the quantiles to be computed - numeric_only : ignored + qs : list of the quantiles to be computed Returns ------- @@ -470,73 +467,26 @@ def quantile( # Series dispatches to DataFrame for quantile, which allows us to # simplify some of the code here and in the blocks assert self.ndim >= 2 + assert is_list_like(qs) # caller is responsible for this + assert axis == 1 # only ever called this way - def get_axe(block, qs, axes): - # Because Series dispatches to DataFrame, we will always have - # block.ndim == 2 - from pandas import Float64Index - - if is_list_like(qs): - ax = Float64Index(qs) - else: - ax = axes[0] - return ax + qs_axe = Float64Index(qs) + new_axes = list(self.axes) + new_axes[1] = qs_axe - axes, blocks = [], [] + blocks = [] for b in self.blocks: block = b.quantile(axis=axis, qs=qs, interpolation=interpolation) - - axe = get_axe(b, qs, axes=self.axes) - - axes.append(axe) blocks.append(block) - # note that some DatetimeTZ, Categorical are always ndim==1 - ndim = {b.ndim for b in blocks} - assert 0 not in ndim, ndim - - if 2 in ndim: - - new_axes = list(self.axes) - - # multiple blocks that are reduced - if len(blocks) > 1: - new_axes[1] = axes[0] - - # reset the placement to the original - for b, sb in zip(blocks, self.blocks): - b.mgr_locs = sb.mgr_locs - - else: - new_axes[axis] = Index(np.concatenate([ax._values for ax in axes])) - - if transposed: - new_axes = new_axes[::-1] - blocks = [ - b.make_block(b.values.T, placement=np.arange(b.shape[1])) - for b in blocks - ] - - return type(self)(blocks, new_axes) - - # single block, i.e. ndim == {1} - values = concat_compat([b.values for b in blocks]) - - # compute the orderings of our original data - if len(self.blocks) > 1: - - indexer = np.empty(len(self.axes[0]), dtype=np.intp) - i = 0 - for b in self.blocks: - for j in b.mgr_locs: - indexer[j] = i - i = i + 1 - - values = values.take(indexer) + if transposed: + new_axes = new_axes[::-1] + blocks = [ + b.make_block(b.values.T, placement=np.arange(b.shape[1])) + for b in blocks + ] - return SingleBlockManager( - make_block(values, ndim=1, placement=np.arange(len(values))), axes[0] - ) + return type(self)(blocks, new_axes) def isna(self, func) -> BlockManager: return self.apply("apply", func=func) From 4dc6d0f57748322728a424b9154864ab4a020d54 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 7 Feb 2021 09:28:42 -0800 Subject: [PATCH 2/2] annotate --- pandas/core/frame.py | 6 ++---- pandas/core/internals/blocks.py | 19 +++++++++++++------ pandas/core/internals/managers.py | 14 +++++++------- 3 files changed, 22 insertions(+), 17 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f51a747a07c62..477dbd4c1570e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9408,6 +9408,7 @@ def quantile( ) return res.iloc[0] + q = Index(q, dtype=np.float64) data = self._get_numeric_data() if numeric_only else self axis = self._get_axis_number(axis) is_transposed = axis == 1 @@ -9426,10 +9427,7 @@ def quantile( qs=q, axis=1, interpolation=interpolation, transposed=is_transposed ) - if result.ndim == 2: - result = self._constructor(result) - else: - result = self._constructor_sliced(result, name=q) + result = self._constructor(result) if is_transposed: result = result.T diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index b27eb79ace121..ffa52f3be4a57 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -82,7 +82,7 @@ from pandas.core.nanops import nanpercentile if TYPE_CHECKING: - from pandas import Index + from pandas import Float64Index, Index from pandas.core.arrays._mixins import NDArrayBackedExtensionArray @@ -1383,15 +1383,20 @@ def _unstack(self, unstacker, fill_value, new_placement): blocks = [make_block(new_values, placement=new_placement)] return blocks, mask - def quantile(self, qs, interpolation="linear", axis: int = 0) -> Block: + def quantile( + self, qs: Float64Index, interpolation="linear", axis: int = 0 + ) -> Block: """ compute the quantiles of the Parameters ---------- - qs: a scalar or list of the quantiles to be computed - interpolation: type of interpolation, default 'linear' - axis: axis to compute, default 0 + qs : Float64Index + List of the quantiles to be computed. + interpolation : str, default 'linear' + Type of interpolation. + axis : int, default 0 + Axis to compute. Returns ------- @@ -2175,7 +2180,9 @@ def fillna( value, limit=limit, inplace=inplace, downcast=downcast ) - def quantile(self, qs, interpolation="linear", axis: int = 0) -> Block: + def quantile( + self, qs: Float64Index, interpolation="linear", axis: int = 0 + ) -> Block: assert axis == 1 # only ever called this way naive = self.values.view("M8[ns]") diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 85dbffbbc50f7..0aa97b4d6c0ed 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -440,10 +440,11 @@ def apply( def quantile( self, + *, + qs: Float64Index, axis: int = 0, transposed: bool = False, interpolation="linear", - qs=None, ) -> BlockManager: """ Iterate over blocks applying quantile reduction. @@ -470,14 +471,13 @@ def quantile( assert is_list_like(qs) # caller is responsible for this assert axis == 1 # only ever called this way - qs_axe = Float64Index(qs) new_axes = list(self.axes) - new_axes[1] = qs_axe + new_axes[1] = Float64Index(qs) - blocks = [] - for b in self.blocks: - block = b.quantile(axis=axis, qs=qs, interpolation=interpolation) - blocks.append(block) + blocks = [ + blk.quantile(axis=axis, qs=qs, interpolation=interpolation) + for blk in self.blocks + ] if transposed: new_axes = new_axes[::-1]