Skip to content

CLN: remove unreachable quantile code #40547

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/user_guide/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5240,6 +5240,7 @@ Write to a feather file.
Read from a feather file.

.. ipython:: python
:okwarning:

result = pd.read_feather("example.feather")
result
Expand Down Expand Up @@ -5323,6 +5324,7 @@ Write to a parquet file.
Read from a parquet file.

.. ipython:: python
:okwarning:

result = pd.read_parquet("example_fp.parquet", engine="fastparquet")
result = pd.read_parquet("example_pa.parquet", engine="pyarrow")
Expand Down
63 changes: 20 additions & 43 deletions pandas/core/array_algos/quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,9 @@

import numpy as np

from pandas._libs import lib
from pandas._typing import ArrayLike

from pandas.core.dtypes.common import (
is_list_like,
is_sparse,
)
from pandas.core.dtypes.common import is_sparse
from pandas.core.dtypes.missing import (
isna,
na_value_for_dtype,
Expand All @@ -22,16 +18,15 @@
from pandas.core.arrays import ExtensionArray


def quantile_compat(values: ArrayLike, qs, interpolation: str, axis: int) -> ArrayLike:
def quantile_compat(values: ArrayLike, qs: np.ndarray, interpolation: str) -> ArrayLike:
"""
Compute the quantiles of the given values for each quantile in `qs`.

Parameters
----------
values : np.ndarray or ExtensionArray
qs : a scalar or list of the quantiles to be computed
qs : np.ndarray[float64]
interpolation : str
axis : int

Returns
-------
Expand All @@ -40,18 +35,17 @@ def quantile_compat(values: ArrayLike, qs, interpolation: str, axis: int) -> Arr
if isinstance(values, np.ndarray):
fill_value = na_value_for_dtype(values.dtype, compat=False)
mask = isna(values)
return quantile_with_mask(values, mask, fill_value, qs, interpolation, axis)
return _quantile_with_mask(values, mask, fill_value, qs, interpolation)
else:
return quantile_ea_compat(values, qs, interpolation, axis)
return _quantile_ea_compat(values, qs, interpolation)


def quantile_with_mask(
def _quantile_with_mask(
values: np.ndarray,
mask: np.ndarray,
fill_value,
qs,
qs: np.ndarray,
interpolation: str,
axis: int,
) -> np.ndarray:
"""
Compute the quantiles of the given values for each quantile in `qs`.
Expand All @@ -66,11 +60,9 @@ def quantile_with_mask(
fill_value : Scalar
The value to interpret fill NA entries with
For ExtensionArray, this is _values_for_factorize()[1]
qs : a scalar or list of the quantiles to be computed
qs : np.ndarray[float64]
interpolation : str
Type of interpolation
axis : int
Axis along which to compute quantiles.

Returns
-------
Expand All @@ -80,12 +72,12 @@ def quantile_with_mask(
-----
Assumes values is already 2D. For ExtensionArray this means np.atleast_2d
has been called on _values_for_factorize()[0]

Quantile is computed along axis=1.
"""
is_empty = values.shape[axis] == 0
orig_scalar = not is_list_like(qs)
if orig_scalar:
# make list-like, unpack later
qs = [qs]
assert values.ndim == 2

is_empty = values.shape[1] == 0

if is_empty:
# create the array of na_values
Expand All @@ -97,39 +89,31 @@ def quantile_with_mask(
result = nanpercentile(
values,
np.array(qs) * 100,
axis=axis,
na_value=fill_value,
mask=mask,
ndim=values.ndim,
interpolation=interpolation,
)

result = np.array(result, copy=False)
result = result.T

if orig_scalar:
assert result.shape[-1] == 1, result.shape
result = result[..., 0]
result = lib.item_from_zerodim(result)

return result


def quantile_ea_compat(
values: ExtensionArray, qs, interpolation: str, axis: int
def _quantile_ea_compat(
values: ExtensionArray, qs: np.ndarray, interpolation: str
) -> ExtensionArray:
"""
ExtensionArray compatibility layer for quantile_with_mask.
ExtensionArray compatibility layer for _quantile_with_mask.

We pretend that an ExtensionArray with shape (N,) is actually (1, N,)
for compatibility with non-EA code.

Parameters
----------
values : ExtensionArray
qs : a scalar or list of the quantiles to be computed
qs : np.ndarray[float64]
interpolation: str
axis : int

Returns
-------
Expand All @@ -145,19 +129,12 @@ def quantile_ea_compat(
arr, fill_value = values._values_for_factorize()
arr = np.atleast_2d(arr)

result = quantile_with_mask(arr, mask, fill_value, qs, interpolation, axis)
result = _quantile_with_mask(arr, mask, fill_value, qs, interpolation)

if not is_sparse(orig.dtype):
# shape[0] should be 1 as long as EAs are 1D

if result.ndim == 1:
# i.e. qs was originally a scalar
assert result.shape == (1,), result.shape
result = type(orig)._from_factorized(result, orig)

else:
assert result.shape == (1, len(qs)), result.shape
result = type(orig)._from_factorized(result[0], orig)
assert result.shape == (1, len(qs)), result.shape
result = type(orig)._from_factorized(result[0], orig)

# error: Incompatible return value type (got "ndarray", expected "ExtensionArray")
return result # type: ignore[return-value]
4 changes: 3 additions & 1 deletion pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,9 @@ def quantile(

arrs = [ensure_block_shape(x, 2) for x in self.arrays]
assert axis == 1
new_arrs = [quantile_compat(x, qs, interpolation, axis=axis) for x in arrs]
new_arrs = [
quantile_compat(x, np.asarray(qs._values), interpolation) for x in arrs
]
for i, arr in enumerate(new_arrs):
if arr.ndim == 2:
assert arr.shape[0] == 1, arr.shape
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1426,7 +1426,7 @@ def quantile(
assert axis == 1 # only ever called this way
assert is_list_like(qs) # caller is responsible for this

result = quantile_compat(self.values, qs, interpolation, axis)
result = quantile_compat(self.values, np.asarray(qs._values), interpolation)

return new_block(result, placement=self._mgr_locs, ndim=2)

Expand Down
49 changes: 16 additions & 33 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1648,7 +1648,7 @@ def f(x, y):


def _nanpercentile_1d(
values: np.ndarray, mask: np.ndarray, q, na_value: Scalar, interpolation
values: np.ndarray, mask: np.ndarray, q: np.ndarray, na_value: Scalar, interpolation
) -> Union[Scalar, np.ndarray]:
"""
Wrapper for np.percentile that skips missing values, specialized to
Expand All @@ -1659,7 +1659,7 @@ def _nanpercentile_1d(
values : array over which to find quantiles
mask : ndarray[bool]
locations in values that should be considered missing
q : scalar or array of quantile indices to find
q : np.ndarray[float64] of quantile indices to find
na_value : scalar
value to return for empty or all-null values
interpolation : str
Expand All @@ -1672,52 +1672,44 @@ def _nanpercentile_1d(
values = values[~mask]

if len(values) == 0:
if lib.is_scalar(q):
return na_value
else:
return np.array([na_value] * len(q), dtype=values.dtype)
return np.array([na_value] * len(q), dtype=values.dtype)

return np.percentile(values, q, interpolation=interpolation)


def nanpercentile(
values: np.ndarray,
q,
q: np.ndarray,
*,
axis: int,
na_value,
mask: np.ndarray,
ndim: int,
interpolation,
):
"""
Wrapper for np.percentile that skips missing values.

Parameters
----------
values : array over which to find quantiles
q : scalar or array of quantile indices to find
axis : {0, 1}
values : np.ndarray[ndim=2] over which to find quantiles
q : np.ndarray[float64] of quantile indices to find
na_value : scalar
value to return for empty or all-null values
mask : ndarray[bool]
locations in values that should be considered missing
ndim : {1, 2}
interpolation : str

Returns
-------
quantiles : scalar or array
"""

if values.dtype.kind in ["m", "M"]:
# need to cast to integer to avoid rounding errors in numpy
result = nanpercentile(
values.view("i8"),
q=q,
axis=axis,
na_value=na_value.view("i8"),
mask=mask,
ndim=ndim,
interpolation=interpolation,
)

Expand All @@ -1726,25 +1718,16 @@ def nanpercentile(
return result.astype(values.dtype)

if not lib.is_scalar(mask) and mask.any():
if ndim == 1:
return _nanpercentile_1d(
values, mask, q, na_value, interpolation=interpolation
)
else:
# for nonconsolidatable blocks mask is 1D, but values 2D
if mask.ndim < values.ndim:
mask = mask.reshape(values.shape)
if axis == 0:
values = values.T
mask = mask.T
result = [
_nanpercentile_1d(val, m, q, na_value, interpolation=interpolation)
for (val, m) in zip(list(values), list(mask))
]
result = np.array(result, dtype=values.dtype, copy=False).T
return result
# Caller is responsible for ensuring mask shape match
assert mask.shape == values.shape
result = [
_nanpercentile_1d(val, m, q, na_value, interpolation=interpolation)
for (val, m) in zip(list(values), list(mask))
]
result = np.array(result, dtype=values.dtype, copy=False).T
return result
else:
return np.percentile(values, q, axis=axis, interpolation=interpolation)
return np.percentile(values, q, axis=1, interpolation=interpolation)


def na_accum_func(values: ArrayLike, accum_func, *, skipna: bool) -> ArrayLike:
Expand Down