From c0a7bf9ecbe4ad00ec9a3ba979822ce484639c37 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 3 Jan 2019 12:38:56 -0800 Subject: [PATCH 1/4] simplify quantile dispatching --- pandas/core/internals/blocks.py | 22 ++++--------- pandas/core/internals/managers.py | 51 +++++++++++++++++++------------ 2 files changed, 37 insertions(+), 36 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 3b2c13af785d4..88586acdd14dc 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1439,7 +1439,7 @@ def _unstack(self, unstacker_func, new_columns, n_rows, fill_value): blocks = [make_block(new_values, placement=new_placement)] return blocks, mask - def quantile(self, qs, interpolation='linear', axis=0, axes=None): + def quantile(self, qs, interpolation='linear', axis=0): """ compute the quantiles of the @@ -1448,12 +1448,10 @@ def quantile(self, qs, interpolation='linear', axis=0, axes=None): qs: a scalar or list of the quantiles to be computed interpolation: type of interpolation, default 'linear' axis: axis to compute, default 0 - axes : BlockManager.axes Returns ------- - tuple of (axis, block) - + Block """ kw = {'interpolation': interpolation} values = self.get_values() @@ -1492,10 +1490,8 @@ def _nanpercentile(values, q, axis, **kw): else: return np.percentile(values, q, axis=axis, **kw) - from pandas import Float64Index is_empty = values.shape[axis] == 0 if is_list_like(qs): - ax = Float64Index(qs) if is_empty: if self.ndim == 1: @@ -1515,12 +1511,6 @@ def _nanpercentile(values, q, axis, **kw): result = result.T else: - - if self.ndim == 1: - ax = Float64Index([qs]) - else: - ax = axes[0] - if is_empty: if self.ndim == 1: result = self._na_value @@ -1532,10 +1522,10 @@ def _nanpercentile(values, q, axis, **kw): ndim = getattr(result, 'ndim', None) or 0 result = self._try_coerce_result(result) if lib.is_scalar(result): - return ax, self.make_block_scalar(result) - return ax, make_block(result, - placement=np.arange(len(result)), - ndim=ndim) + return result # FIXME: doesn't match signature + return make_block(result, + placement=np.arange(len(result)), + ndim=ndim) def _replace_coerce(self, to_replace, value, inplace=True, regex=False, convert=False, mask=None): diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index eba49d18431ef..f54510b9b35c7 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -16,7 +16,7 @@ maybe_promote) from pandas.core.dtypes.common import ( _NS_DTYPE, is_datetimelike_v_numeric, is_extension_array_dtype, - is_extension_type, is_numeric_v_string_like, is_scalar) + is_extension_type, is_list_like, is_numeric_v_string_like, is_scalar) import pandas.core.dtypes.concat as _concat from pandas.core.dtypes.generic import ABCExtensionArray, ABCSeries from pandas.core.dtypes.missing import isna @@ -402,40 +402,62 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False, bm._consolidate_inplace() return bm - def reduction(self, f, axis=0, consolidate=True, transposed=False, - **kwargs): + def quantile(self, axis=0, consolidate=True, transposed=False, + interpolation='linear', qs=None, numeric_only=None): """ - iterate over the blocks, collect and create a new block manager. + Iterate over blocks applying quantile reduction. This routine is intended for reduction type operations and will do inference on the generated blocks. Parameters ---------- - f: the callable or function name to operate on at the block level axis: reduction axis, default 0 consolidate: boolean, default True. Join together blocks having same dtype transposed: boolean, default False we are holding transposed data + interpolation : type of interpolation, default 'linear' + qs : a scalar or list of the quantiles to be computed + numeric_only : ignored Returns ------- Block Manager (new object) - """ if consolidate: self._consolidate_inplace() + def get_axe(block, qs, axes): + from pandas import Float64Index + if is_list_like(qs): + ax = Float64Index(qs) + elif block.ndim == 1: + ax = Float64Index([qs]) + else: + ax = axes[0] + return ax + axes, blocks = [], [] for b in self.blocks: - axe, block = getattr(b, f)(axis=axis, axes=self.axes, **kwargs) + block = b.quantile(axis=axis, qs=qs, interpolation=interpolation) + + axe = get_axe(b, qs, axes=self.axes) axes.append(axe) blocks.append(block) # note that some DatetimeTZ, Categorical are always ndim==1 - ndim = {b.ndim for b in blocks} + # we may cheat and have a scalar in `blocks`. + ndim = {np.ndim(b) for b in blocks} + if 0 in ndim: + # we cheated and returned a scalar instead of a dummy block; + # this is only reached in the Series case + assert len(blocks) == 1, blocks + assert lib.is_scalar(blocks[0]), blocks[0] + assert len(self.blocks) == 1, self.blocks + assert self.blocks[0].ndim == 1, self.blocks[0] # i.e. Series + return blocks[0] if 2 in ndim: @@ -461,15 +483,7 @@ def reduction(self, f, axis=0, consolidate=True, transposed=False, return self.__class__(blocks, new_axes) - # 0 ndim - if 0 in ndim and 1 not in ndim: - values = np.array([b.values for b in blocks]) - if len(values) == 1: - return values.item() - blocks = [make_block(values, ndim=1)] - axes = Index([ax[0] for ax in axes]) - - # single block + # single block, i.e. ndim == {1} values = _concat._concat_compat([b.values for b in blocks]) # compute the orderings of our original data @@ -496,9 +510,6 @@ def isna(self, func, **kwargs): def where(self, **kwargs): return self.apply('where', **kwargs) - def quantile(self, **kwargs): - return self.reduction('quantile', **kwargs) - def setitem(self, **kwargs): return self.apply('setitem', **kwargs) From 1ad4970bf7b840de6980b83738a2e194016c7e0e Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 3 Jan 2019 12:39:36 -0800 Subject: [PATCH 2/4] remove ScalarBlock, make_block_scalar --- pandas/core/internals/__init__.py | 3 +-- pandas/core/internals/blocks.py | 29 ----------------------------- 2 files changed, 1 insertion(+), 31 deletions(-) diff --git a/pandas/core/internals/__init__.py b/pandas/core/internals/__init__.py index 7d6aa6a42efc2..7878613a8b1b1 100644 --- a/pandas/core/internals/__init__.py +++ b/pandas/core/internals/__init__.py @@ -5,8 +5,7 @@ make_block, # io.pytables, io.packers FloatBlock, IntBlock, ComplexBlock, BoolBlock, ObjectBlock, TimeDeltaBlock, DatetimeBlock, DatetimeTZBlock, - CategoricalBlock, ExtensionBlock, ScalarBlock, - Block) + CategoricalBlock, ExtensionBlock, Block) from .managers import ( # noqa:F401 BlockManager, SingleBlockManager, create_block_manager_from_arrays, create_block_manager_from_blocks, diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 88586acdd14dc..29949cc91cbe1 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -222,12 +222,6 @@ def make_block(self, values, placement=None, ndim=None): return make_block(values, placement=placement, ndim=ndim) - def make_block_scalar(self, values): - """ - Create a ScalarBlock - """ - return ScalarBlock(values) - def make_block_same_class(self, values, placement=None, ndim=None, dtype=None): """ Wrap given values in a block of same type as self. """ @@ -1565,29 +1559,6 @@ def _replace_coerce(self, to_replace, value, inplace=True, regex=False, return self -class ScalarBlock(Block): - """ - a scalar compat Block - """ - __slots__ = ['_mgr_locs', 'values', 'ndim'] - - def __init__(self, values): - self.ndim = 0 - self.mgr_locs = [0] - self.values = values - - @property - def dtype(self): - return type(self.values) - - @property - def shape(self): - return tuple([0]) - - def __len__(self): - return 0 - - class NonConsolidatableMixIn(object): """ hold methods for the nonconsolidatable blocks """ _can_consolidate = False From 6d224c072bf175a50f712f053b1fc1654aa0142c Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 3 Jan 2019 15:21:23 -0800 Subject: [PATCH 3/4] cln --- pandas/core/internals/blocks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index d5ae22bf6f82e..f3a85d731b9eb 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2676,7 +2676,7 @@ def convert(self, *args, **kwargs): if args: raise NotImplementedError - by_item = True if 'by_item' not in kwargs else kwargs['by_item'] + by_item = kwargs.get('by_item', True) new_inputs = ['coerce', 'datetime', 'numeric', 'timedelta'] new_style = False From 3c8ef9e42b7947708c1297bfd1bed35bc8cd4827 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 3 Jan 2019 18:05:47 -0800 Subject: [PATCH 4/4] dispatch Series.quantile to DataFrame.quantile --- pandas/core/internals/blocks.py | 10 +++++----- pandas/core/internals/managers.py | 16 ++++++---------- pandas/core/series.py | 12 ++++++++++-- pandas/tests/resample/test_base.py | 2 +- 4 files changed, 22 insertions(+), 18 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 772ca2cd5d340..f88114e1c9e20 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1462,13 +1462,15 @@ def quantile(self, qs, interpolation='linear', axis=0): else: # create the array of na_values # 2d len(values) * len(qs) - result = np.repeat(np.array([self._na_value] * len(qs)), + result = np.repeat(np.array([self.fill_value] * len(qs)), len(values)).reshape(len(values), len(qs)) else: - mask = isna(self.values) + # asarray needed for Sparse, see GH#24600 + # TODO: Why self.values and not values? + mask = np.asarray(isna(self.values)) result = nanpercentile(values, np.array(qs) * 100, - axis=axis, na_value=self._na_value, + axis=axis, na_value=self.fill_value, mask=mask, ndim=self.ndim, interpolation=interpolation) @@ -1484,8 +1486,6 @@ def quantile(self, qs, interpolation='linear', axis=0): ndim = getattr(result, 'ndim', None) or 0 result = self._try_coerce_result(result) - if lib.is_scalar(result): - return result # FIXME: doesn't match signature return make_block(result, placement=np.arange(len(result)), ndim=ndim) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index f54510b9b35c7..ab033ff4c1c4b 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -425,6 +425,10 @@ def quantile(self, axis=0, consolidate=True, transposed=False, Block Manager (new object) """ + # Series dispatches to DataFrame for quantile, which allows us to + # simplify some of the code here and in the blocks + assert self.ndim >= 2 + if consolidate: self._consolidate_inplace() @@ -448,16 +452,8 @@ def get_axe(block, qs, axes): blocks.append(block) # note that some DatetimeTZ, Categorical are always ndim==1 - # we may cheat and have a scalar in `blocks`. - ndim = {np.ndim(b) for b in blocks} - if 0 in ndim: - # we cheated and returned a scalar instead of a dummy block; - # this is only reached in the Series case - assert len(blocks) == 1, blocks - assert lib.is_scalar(blocks[0]), blocks[0] - assert len(self.blocks) == 1, self.blocks - assert self.blocks[0].ndim == 1, self.blocks[0] # i.e. Series - return blocks[0] + ndim = {b.ndim for b in blocks} + assert 0 not in ndim, ndim if 2 in ndim: diff --git a/pandas/core/series.py b/pandas/core/series.py index 46ff04fdd31ae..de34227cda28a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1987,15 +1987,23 @@ def quantile(self, q=0.5, interpolation='linear'): self._check_percentile(q) - result = self._data.quantile(qs=q, interpolation=interpolation) + # We dispatch to DataFrame so that core.internals only has to worry + # about 2D cases. + df = self.to_frame() + + result = df.quantile(q=q, interpolation=interpolation, + numeric_only=False) + if result.ndim == 2: + result = result.iloc[:, 0] if is_list_like(q): + result.name = self.name return self._constructor(result, index=Float64Index(q), name=self.name) else: # scalar - return result + return result.iloc[0] def corr(self, other, method='pearson', min_periods=None): """ diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py index 31199dc01b659..0efd48c25ad62 100644 --- a/pandas/tests/resample/test_base.py +++ b/pandas/tests/resample/test_base.py @@ -218,5 +218,5 @@ def test_resample_quantile_all_ts(series): q = 0.75 freq = 'H' result = s.resample(freq).quantile(q) - expected = s.resample(freq).agg(lambda x: x.quantile(q)) + expected = s.resample(freq).agg(lambda x: x.quantile(q)).rename(s.name) tm.assert_series_equal(result, expected)