From 647a3939c89f236033c06afe120c8e879bf1f853 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 20 Mar 2021 10:26:16 -0700 Subject: [PATCH 1/2] DOC: suppress warnings from CategoricalBlock deprecation --- doc/source/user_guide/io.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index cf153ddd2cbbd..3b7a6037a9715 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -5240,6 +5240,7 @@ Write to a feather file. Read from a feather file. .. ipython:: python + :okwarning: result = pd.read_feather("example.feather") result @@ -5323,6 +5324,7 @@ Write to a parquet file. Read from a parquet file. .. ipython:: python + :okwarning: result = pd.read_parquet("example_fp.parquet", engine="fastparquet") result = pd.read_parquet("example_pa.parquet", engine="pyarrow") From e25e0d9cece5bd463d2f866c779ab3f525d38f90 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 20 Mar 2021 17:34:11 -0700 Subject: [PATCH 2/2] CLN: remove unreachable quantile code --- pandas/core/array_algos/quantile.py | 63 ++++++++------------------ pandas/core/internals/array_manager.py | 4 +- pandas/core/internals/blocks.py | 2 +- pandas/core/nanops.py | 49 +++++++------------- 4 files changed, 40 insertions(+), 78 deletions(-) diff --git a/pandas/core/array_algos/quantile.py b/pandas/core/array_algos/quantile.py index eb96c14286715..5f9e67a484d24 100644 --- a/pandas/core/array_algos/quantile.py +++ b/pandas/core/array_algos/quantile.py @@ -4,13 +4,9 @@ import numpy as np -from pandas._libs import lib from pandas._typing import ArrayLike -from pandas.core.dtypes.common import ( - is_list_like, - is_sparse, -) +from pandas.core.dtypes.common import is_sparse from pandas.core.dtypes.missing import ( isna, na_value_for_dtype, @@ -22,16 +18,15 @@ from pandas.core.arrays import ExtensionArray -def quantile_compat(values: ArrayLike, qs, interpolation: str, axis: int) -> ArrayLike: +def quantile_compat(values: ArrayLike, qs: np.ndarray, interpolation: str) -> ArrayLike: """ Compute the quantiles of the given values for each quantile in `qs`. Parameters ---------- values : np.ndarray or ExtensionArray - qs : a scalar or list of the quantiles to be computed + qs : np.ndarray[float64] interpolation : str - axis : int Returns ------- @@ -40,18 +35,17 @@ def quantile_compat(values: ArrayLike, qs, interpolation: str, axis: int) -> Arr if isinstance(values, np.ndarray): fill_value = na_value_for_dtype(values.dtype, compat=False) mask = isna(values) - return quantile_with_mask(values, mask, fill_value, qs, interpolation, axis) + return _quantile_with_mask(values, mask, fill_value, qs, interpolation) else: - return quantile_ea_compat(values, qs, interpolation, axis) + return _quantile_ea_compat(values, qs, interpolation) -def quantile_with_mask( +def _quantile_with_mask( values: np.ndarray, mask: np.ndarray, fill_value, - qs, + qs: np.ndarray, interpolation: str, - axis: int, ) -> np.ndarray: """ Compute the quantiles of the given values for each quantile in `qs`. @@ -66,11 +60,9 @@ def quantile_with_mask( fill_value : Scalar The value to interpret fill NA entries with For ExtensionArray, this is _values_for_factorize()[1] - qs : a scalar or list of the quantiles to be computed + qs : np.ndarray[float64] interpolation : str Type of interpolation - axis : int - Axis along which to compute quantiles. Returns ------- @@ -80,12 +72,12 @@ def quantile_with_mask( ----- Assumes values is already 2D. For ExtensionArray this means np.atleast_2d has been called on _values_for_factorize()[0] + + Quantile is computed along axis=1. """ - is_empty = values.shape[axis] == 0 - orig_scalar = not is_list_like(qs) - if orig_scalar: - # make list-like, unpack later - qs = [qs] + assert values.ndim == 2 + + is_empty = values.shape[1] == 0 if is_empty: # create the array of na_values @@ -97,29 +89,22 @@ def quantile_with_mask( result = nanpercentile( values, np.array(qs) * 100, - axis=axis, na_value=fill_value, mask=mask, - ndim=values.ndim, interpolation=interpolation, ) result = np.array(result, copy=False) result = result.T - if orig_scalar: - assert result.shape[-1] == 1, result.shape - result = result[..., 0] - result = lib.item_from_zerodim(result) - return result -def quantile_ea_compat( - values: ExtensionArray, qs, interpolation: str, axis: int +def _quantile_ea_compat( + values: ExtensionArray, qs: np.ndarray, interpolation: str ) -> ExtensionArray: """ - ExtensionArray compatibility layer for quantile_with_mask. + ExtensionArray compatibility layer for _quantile_with_mask. We pretend that an ExtensionArray with shape (N,) is actually (1, N,) for compatibility with non-EA code. @@ -127,9 +112,8 @@ def quantile_ea_compat( Parameters ---------- values : ExtensionArray - qs : a scalar or list of the quantiles to be computed + qs : np.ndarray[float64] interpolation: str - axis : int Returns ------- @@ -145,19 +129,12 @@ def quantile_ea_compat( arr, fill_value = values._values_for_factorize() arr = np.atleast_2d(arr) - result = quantile_with_mask(arr, mask, fill_value, qs, interpolation, axis) + result = _quantile_with_mask(arr, mask, fill_value, qs, interpolation) if not is_sparse(orig.dtype): # shape[0] should be 1 as long as EAs are 1D - - if result.ndim == 1: - # i.e. qs was originally a scalar - assert result.shape == (1,), result.shape - result = type(orig)._from_factorized(result, orig) - - else: - assert result.shape == (1, len(qs)), result.shape - result = type(orig)._from_factorized(result[0], orig) + assert result.shape == (1, len(qs)), result.shape + result = type(orig)._from_factorized(result[0], orig) # error: Incompatible return value type (got "ndarray", expected "ExtensionArray") return result # type: ignore[return-value] diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 34b3d83c066c2..40d7a49a1d6be 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -512,7 +512,9 @@ def quantile( arrs = [ensure_block_shape(x, 2) for x in self.arrays] assert axis == 1 - new_arrs = [quantile_compat(x, qs, interpolation, axis=axis) for x in arrs] + new_arrs = [ + quantile_compat(x, np.asarray(qs._values), interpolation) for x in arrs + ] for i, arr in enumerate(new_arrs): if arr.ndim == 2: assert arr.shape[0] == 1, arr.shape diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 174ea8760b0db..8cb5c6c56006e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1426,7 +1426,7 @@ def quantile( assert axis == 1 # only ever called this way assert is_list_like(qs) # caller is responsible for this - result = quantile_compat(self.values, qs, interpolation, axis) + result = quantile_compat(self.values, np.asarray(qs._values), interpolation) return new_block(result, placement=self._mgr_locs, ndim=2) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 45f275664b206..2aadf5fc07f87 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1648,7 +1648,7 @@ def f(x, y): def _nanpercentile_1d( - values: np.ndarray, mask: np.ndarray, q, na_value: Scalar, interpolation + values: np.ndarray, mask: np.ndarray, q: np.ndarray, na_value: Scalar, interpolation ) -> Union[Scalar, np.ndarray]: """ Wrapper for np.percentile that skips missing values, specialized to @@ -1659,7 +1659,7 @@ def _nanpercentile_1d( values : array over which to find quantiles mask : ndarray[bool] locations in values that should be considered missing - q : scalar or array of quantile indices to find + q : np.ndarray[float64] of quantile indices to find na_value : scalar value to return for empty or all-null values interpolation : str @@ -1672,22 +1672,17 @@ def _nanpercentile_1d( values = values[~mask] if len(values) == 0: - if lib.is_scalar(q): - return na_value - else: - return np.array([na_value] * len(q), dtype=values.dtype) + return np.array([na_value] * len(q), dtype=values.dtype) return np.percentile(values, q, interpolation=interpolation) def nanpercentile( values: np.ndarray, - q, + q: np.ndarray, *, - axis: int, na_value, mask: np.ndarray, - ndim: int, interpolation, ): """ @@ -1695,29 +1690,26 @@ def nanpercentile( Parameters ---------- - values : array over which to find quantiles - q : scalar or array of quantile indices to find - axis : {0, 1} + values : np.ndarray[ndim=2] over which to find quantiles + q : np.ndarray[float64] of quantile indices to find na_value : scalar value to return for empty or all-null values mask : ndarray[bool] locations in values that should be considered missing - ndim : {1, 2} interpolation : str Returns ------- quantiles : scalar or array """ + if values.dtype.kind in ["m", "M"]: # need to cast to integer to avoid rounding errors in numpy result = nanpercentile( values.view("i8"), q=q, - axis=axis, na_value=na_value.view("i8"), mask=mask, - ndim=ndim, interpolation=interpolation, ) @@ -1726,25 +1718,16 @@ def nanpercentile( return result.astype(values.dtype) if not lib.is_scalar(mask) and mask.any(): - if ndim == 1: - return _nanpercentile_1d( - values, mask, q, na_value, interpolation=interpolation - ) - else: - # for nonconsolidatable blocks mask is 1D, but values 2D - if mask.ndim < values.ndim: - mask = mask.reshape(values.shape) - if axis == 0: - values = values.T - mask = mask.T - result = [ - _nanpercentile_1d(val, m, q, na_value, interpolation=interpolation) - for (val, m) in zip(list(values), list(mask)) - ] - result = np.array(result, dtype=values.dtype, copy=False).T - return result + # Caller is responsible for ensuring mask shape match + assert mask.shape == values.shape + result = [ + _nanpercentile_1d(val, m, q, na_value, interpolation=interpolation) + for (val, m) in zip(list(values), list(mask)) + ] + result = np.array(result, dtype=values.dtype, copy=False).T + return result else: - return np.percentile(values, q, axis=axis, interpolation=interpolation) + return np.percentile(values, q, axis=1, interpolation=interpolation) def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike: