diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 584e21e87390d..21a470d9b0bce 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -452,8 +452,8 @@ Indexing Missing ^^^^^^^ - - Calling :meth:`fillna` on an empty Series now correctly returns a shallow copied object. The behaviour is now consistent with :class:`Index`, :class:`DataFrame` and a non-empty :class:`Series` (:issue:`32543`). +- Bug in :meth:`replace` when argument ``to_replace`` is of type dict/list and is used on a :class:`Series` containing ```` was raising a ``TypeError``. The method now handles this by ignoring ```` values when doing the comparison for the replacement (:issue:`32621`) - Bug in :meth:`~Series.any` and :meth:`~Series.all` incorrectly returning ```` for all ``False`` or all ``True`` values using the nulllable boolean dtype and with ``skipna=False`` (:issue:`33253`) MultiIndex diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index bfb16b48d832c..c4d45a788e723 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -8,7 +8,7 @@ import numpy as np from pandas._libs import Timedelta, Timestamp, internals as libinternals, lib -from pandas._typing import ArrayLike, DtypeObj, Label +from pandas._typing import ArrayLike, DtypeObj, Label, Scalar from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.cast import ( @@ -1905,7 +1905,9 @@ def _merge_blocks( return blocks -def _compare_or_regex_search(a, b, regex=False): +def _compare_or_regex_search( + a: Union[ArrayLike, Scalar], b: Union[ArrayLike, Scalar], regex: bool = False +) -> Union[ArrayLike, bool]: """ Compare two array_like inputs of the same shape or two scalar values @@ -1922,35 +1924,67 @@ def _compare_or_regex_search(a, b, regex=False): ------- mask : array_like of bool """ + + def _check_comparison_types( + result: Union[ArrayLike, bool], + a: Union[ArrayLike, Scalar], + b: Union[ArrayLike, Scalar], + ) -> Union[ArrayLike, bool]: + """ + Raises an error if the two arrays (a,b) cannot be compared. + Otherwise, returns the comparison result as expected. + """ + if is_scalar(result) and ( + isinstance(a, np.ndarray) or isinstance(b, np.ndarray) + ): + type_names = [type(a).__name__, type(b).__name__] + + if isinstance(a, np.ndarray): + type_names[0] = f"ndarray(dtype={a.dtype})" + + if isinstance(b, np.ndarray): + type_names[1] = f"ndarray(dtype={b.dtype})" + + raise TypeError( + f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}" + ) + return result + if not regex: op = lambda x: operator.eq(x, b) else: op = np.vectorize( - lambda x: bool(re.search(b, x)) if isinstance(x, str) else False + lambda x: bool(re.search(b, x)) + if isinstance(x, str) and isinstance(b, str) + else False ) - is_a_array = isinstance(a, np.ndarray) - is_b_array = isinstance(b, np.ndarray) + # GH#32621 use mask to avoid comparing to NAs + if isinstance(a, np.ndarray) and not isinstance(b, np.ndarray): + mask = np.reshape(~(isna(a)), a.shape) + elif isinstance(b, np.ndarray) and not isinstance(a, np.ndarray): + mask = np.reshape(~(isna(b)), b.shape) + elif isinstance(a, np.ndarray) and isinstance(b, np.ndarray): + mask = ~(isna(a) | isna(b)) + if isinstance(a, np.ndarray): + a = a[mask] + if isinstance(b, np.ndarray): + b = b[mask] if is_datetimelike_v_numeric(a, b) or is_numeric_v_string_like(a, b): # GH#29553 avoid deprecation warnings from numpy - result = False - else: - result = op(a) - - if is_scalar(result) and (is_a_array or is_b_array): - type_names = [type(a).__name__, type(b).__name__] + return _check_comparison_types(False, a, b) - if is_a_array: - type_names[0] = f"ndarray(dtype={a.dtype})" + result = op(a) - if is_b_array: - type_names[1] = f"ndarray(dtype={b.dtype})" + if isinstance(result, np.ndarray): + # The shape of the mask can differ to that of the result + # since we may compare only a subset of a's or b's elements + tmp = np.zeros(mask.shape, dtype=np.bool) + tmp[mask] = result + result = tmp - raise TypeError( - f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}" - ) - return result + return _check_comparison_types(result, a, b) def _fast_count_smallints(arr: np.ndarray) -> np.ndarray: diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index bea8cb8b105e7..685457aff6341 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -241,6 +241,13 @@ def test_replace2(self): assert (ser[6:10] == -1).all() assert (ser[20:30] == -1).all() + def test_replace_with_dictlike_and_string_dtype(self): + # GH 32621 + s = pd.Series(["one", "two", np.nan], dtype="string") + expected = pd.Series(["1", "2", np.nan]) + result = s.replace({"one": "1", "two": "2"}) + tm.assert_series_equal(expected, result) + def test_replace_with_empty_dictlike(self): # GH 15289 s = pd.Series(list("abcd"))