|
| 1 | +""" |
| 2 | +Methods used by Block.replace and related methods. |
| 3 | +""" |
| 4 | +import operator |
| 5 | +import re |
| 6 | +from typing import Optional, Pattern, Union |
| 7 | + |
| 8 | +import numpy as np |
| 9 | + |
| 10 | +from pandas._typing import ArrayLike, Scalar |
| 11 | + |
| 12 | +from pandas.core.dtypes.common import ( |
| 13 | + is_datetimelike_v_numeric, |
| 14 | + is_numeric_v_string_like, |
| 15 | + is_scalar, |
| 16 | +) |
| 17 | +from pandas.core.dtypes.missing import isna |
| 18 | + |
| 19 | + |
| 20 | +def compare_or_regex_search( |
| 21 | + a: ArrayLike, |
| 22 | + b: Union[Scalar, Pattern], |
| 23 | + regex: bool = False, |
| 24 | + mask: Optional[ArrayLike] = None, |
| 25 | +) -> Union[ArrayLike, bool]: |
| 26 | + """ |
| 27 | + Compare two array_like inputs of the same shape or two scalar values |
| 28 | +
|
| 29 | + Calls operator.eq or re.search, depending on regex argument. If regex is |
| 30 | + True, perform an element-wise regex matching. |
| 31 | +
|
| 32 | + Parameters |
| 33 | + ---------- |
| 34 | + a : array_like |
| 35 | + b : scalar or regex pattern |
| 36 | + regex : bool, default False |
| 37 | + mask : array_like or None (default) |
| 38 | +
|
| 39 | + Returns |
| 40 | + ------- |
| 41 | + mask : array_like of bool |
| 42 | + """ |
| 43 | + |
| 44 | + def _check_comparison_types( |
| 45 | + result: Union[ArrayLike, bool], a: ArrayLike, b: Union[Scalar, Pattern] |
| 46 | + ): |
| 47 | + """ |
| 48 | + Raises an error if the two arrays (a,b) cannot be compared. |
| 49 | + Otherwise, returns the comparison result as expected. |
| 50 | + """ |
| 51 | + if is_scalar(result) and isinstance(a, np.ndarray): |
| 52 | + type_names = [type(a).__name__, type(b).__name__] |
| 53 | + |
| 54 | + if isinstance(a, np.ndarray): |
| 55 | + type_names[0] = f"ndarray(dtype={a.dtype})" |
| 56 | + |
| 57 | + raise TypeError( |
| 58 | + f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}" |
| 59 | + ) |
| 60 | + |
| 61 | + if not regex: |
| 62 | + op = lambda x: operator.eq(x, b) |
| 63 | + else: |
| 64 | + op = np.vectorize( |
| 65 | + lambda x: bool(re.search(b, x)) |
| 66 | + if isinstance(x, str) and isinstance(b, (str, Pattern)) |
| 67 | + else False |
| 68 | + ) |
| 69 | + |
| 70 | + # GH#32621 use mask to avoid comparing to NAs |
| 71 | + if mask is None and isinstance(a, np.ndarray) and not isinstance(b, np.ndarray): |
| 72 | + mask = np.reshape(~(isna(a)), a.shape) |
| 73 | + if isinstance(a, np.ndarray): |
| 74 | + a = a[mask] |
| 75 | + |
| 76 | + if is_numeric_v_string_like(a, b): |
| 77 | + # GH#29553 avoid deprecation warnings from numpy |
| 78 | + return np.zeros(a.shape, dtype=bool) |
| 79 | + |
| 80 | + elif is_datetimelike_v_numeric(a, b): |
| 81 | + # GH#29553 avoid deprecation warnings from numpy |
| 82 | + _check_comparison_types(False, a, b) |
| 83 | + return False |
| 84 | + |
| 85 | + result = op(a) |
| 86 | + |
| 87 | + if isinstance(result, np.ndarray) and mask is not None: |
| 88 | + # The shape of the mask can differ to that of the result |
| 89 | + # since we may compare only a subset of a's or b's elements |
| 90 | + tmp = np.zeros(mask.shape, dtype=np.bool_) |
| 91 | + tmp[mask] = result |
| 92 | + result = tmp |
| 93 | + |
| 94 | + _check_comparison_types(result, a, b) |
| 95 | + return result |
0 commit comments