Skip to content

CLN: simplify mask_missing #38127

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 28, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 22 additions & 34 deletions pandas/core/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,65 +7,53 @@
import numpy as np

from pandas._libs import algos, lib
from pandas._typing import DtypeObj
from pandas._typing import ArrayLike, DtypeObj
from pandas.compat._optional import import_optional_dependency

from pandas.core.dtypes.cast import infer_dtype_from_array
from pandas.core.dtypes.common import (
ensure_float64,
is_integer_dtype,
is_numeric_v_string_like,
is_scalar,
needs_i8_conversion,
)
from pandas.core.dtypes.missing import isna


def mask_missing(arr, values_to_mask):
def mask_missing(arr: ArrayLike, values_to_mask) -> np.ndarray:
"""
Return a masking array of same size/shape as arr
with entries equaling any member of values_to_mask set to True
"""
dtype, values_to_mask = infer_dtype_from_array(values_to_mask)

try:
values_to_mask = np.array(values_to_mask, dtype=dtype)
Parameters
----------
arr : ArrayLike
values_to_mask: list, tuple, or scalar

except Exception:
values_to_mask = np.array(values_to_mask, dtype=object)
Returns
-------
np.ndarray[bool]
"""
# When called from Block.replace/replace_list, values_to_mask is a scalar
# known to be holdable by arr.
# When called from Series._single_replace, values_to_mask is tuple or list
dtype, values_to_mask = infer_dtype_from_array(values_to_mask)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, this is very confusing and ought to simplify and tighten up the signature here (to make it consistently called), but clearly for a followon.

values_to_mask = np.array(values_to_mask, dtype=dtype)

na_mask = isna(values_to_mask)
nonna = values_to_mask[~na_mask]

mask = None
# GH 21977
mask = np.zeros(arr.shape, dtype=bool)
for x in nonna:
if mask is None:
if is_numeric_v_string_like(arr, x):
# GH#29553 prevent numpy deprecation warnings
mask = False
else:
mask = arr == x

# if x is a string and arr is not, then we get False and we must
# expand the mask to size arr.shape
if is_scalar(mask):
mask = np.zeros(arr.shape, dtype=bool)
if is_numeric_v_string_like(arr, x):
# GH#29553 prevent numpy deprecation warnings
pass
else:
if is_numeric_v_string_like(arr, x):
# GH#29553 prevent numpy deprecation warnings
mask |= False
else:
mask |= arr == x
mask |= arr == x

if na_mask.any():
if mask is None:
mask = isna(arr)
else:
mask |= isna(arr)

# GH 21977
if mask is None:
mask = np.zeros(arr.shape, dtype=bool)
mask |= isna(arr)

return mask

Expand Down