|
1 | 1 | from collections import defaultdict
|
2 | 2 | import itertools
|
3 |
| -import operator |
4 |
| -import re |
5 | 3 | from typing import (
|
6 | 4 | Any,
|
7 | 5 | DefaultDict,
|
8 | 6 | Dict,
|
9 | 7 | List,
|
10 | 8 | Optional,
|
11 |
| - Pattern, |
12 | 9 | Sequence,
|
13 | 10 | Tuple,
|
14 | 11 | TypeVar,
|
|
19 | 16 | import numpy as np
|
20 | 17 |
|
21 | 18 | from pandas._libs import internals as libinternals, lib
|
22 |
| -from pandas._typing import ArrayLike, DtypeObj, Label, Scalar |
| 19 | +from pandas._typing import ArrayLike, DtypeObj, Label |
23 | 20 | from pandas.util._validators import validate_bool_kwarg
|
24 | 21 |
|
25 | 22 | from pandas.core.dtypes.cast import (
|
|
29 | 26 | )
|
30 | 27 | from pandas.core.dtypes.common import (
|
31 | 28 | DT64NS_DTYPE,
|
32 |
| - is_datetimelike_v_numeric, |
33 | 29 | is_dtype_equal,
|
34 | 30 | is_extension_array_dtype,
|
35 | 31 | is_list_like,
|
36 |
| - is_numeric_v_string_like, |
37 |
| - is_scalar, |
38 | 32 | )
|
39 | 33 | from pandas.core.dtypes.concat import concat_compat
|
40 | 34 | from pandas.core.dtypes.dtypes import ExtensionDtype
|
|
44 | 38 | import pandas.core.algorithms as algos
|
45 | 39 | from pandas.core.arrays.sparse import SparseDtype
|
46 | 40 | from pandas.core.base import PandasObject
|
47 |
| -import pandas.core.common as com |
48 | 41 | from pandas.core.construction import extract_array
|
49 | 42 | from pandas.core.indexers import maybe_convert_indices
|
50 | 43 | from pandas.core.indexes.api import Index, ensure_index
|
@@ -628,31 +621,10 @@ def replace_list(
|
628 | 621 | """ do a list replace """
|
629 | 622 | inplace = validate_bool_kwarg(inplace, "inplace")
|
630 | 623 |
|
631 |
| - # figure out our mask apriori to avoid repeated replacements |
632 |
| - values = self.as_array() |
633 |
| - |
634 |
| - def comp(s: Scalar, mask: np.ndarray, regex: bool = False): |
635 |
| - """ |
636 |
| - Generate a bool array by perform an equality check, or perform |
637 |
| - an element-wise regular expression matching |
638 |
| - """ |
639 |
| - if isna(s): |
640 |
| - return ~mask |
641 |
| - |
642 |
| - s = com.maybe_box_datetimelike(s) |
643 |
| - return _compare_or_regex_search(values, s, regex, mask) |
644 |
| - |
645 |
| - # Calculate the mask once, prior to the call of comp |
646 |
| - # in order to avoid repeating the same computations |
647 |
| - mask = ~isna(values) |
648 |
| - |
649 |
| - masks = [comp(s, mask, regex) for s in src_list] |
650 |
| - |
651 | 624 | bm = self.apply(
|
652 | 625 | "_replace_list",
|
653 | 626 | src_list=src_list,
|
654 | 627 | dest_list=dest_list,
|
655 |
| - masks=masks, |
656 | 628 | inplace=inplace,
|
657 | 629 | regex=regex,
|
658 | 630 | )
|
@@ -1900,80 +1872,6 @@ def _merge_blocks(
|
1900 | 1872 | return blocks
|
1901 | 1873 |
|
1902 | 1874 |
|
1903 |
| -def _compare_or_regex_search( |
1904 |
| - a: ArrayLike, |
1905 |
| - b: Union[Scalar, Pattern], |
1906 |
| - regex: bool = False, |
1907 |
| - mask: Optional[ArrayLike] = None, |
1908 |
| -) -> Union[ArrayLike, bool]: |
1909 |
| - """ |
1910 |
| - Compare two array_like inputs of the same shape or two scalar values |
1911 |
| -
|
1912 |
| - Calls operator.eq or re.search, depending on regex argument. If regex is |
1913 |
| - True, perform an element-wise regex matching. |
1914 |
| -
|
1915 |
| - Parameters |
1916 |
| - ---------- |
1917 |
| - a : array_like |
1918 |
| - b : scalar or regex pattern |
1919 |
| - regex : bool, default False |
1920 |
| - mask : array_like or None (default) |
1921 |
| -
|
1922 |
| - Returns |
1923 |
| - ------- |
1924 |
| - mask : array_like of bool |
1925 |
| - """ |
1926 |
| - |
1927 |
| - def _check_comparison_types( |
1928 |
| - result: Union[ArrayLike, bool], a: ArrayLike, b: Union[Scalar, Pattern] |
1929 |
| - ): |
1930 |
| - """ |
1931 |
| - Raises an error if the two arrays (a,b) cannot be compared. |
1932 |
| - Otherwise, returns the comparison result as expected. |
1933 |
| - """ |
1934 |
| - if is_scalar(result) and isinstance(a, np.ndarray): |
1935 |
| - type_names = [type(a).__name__, type(b).__name__] |
1936 |
| - |
1937 |
| - if isinstance(a, np.ndarray): |
1938 |
| - type_names[0] = f"ndarray(dtype={a.dtype})" |
1939 |
| - |
1940 |
| - raise TypeError( |
1941 |
| - f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}" |
1942 |
| - ) |
1943 |
| - |
1944 |
| - if not regex: |
1945 |
| - op = lambda x: operator.eq(x, b) |
1946 |
| - else: |
1947 |
| - op = np.vectorize( |
1948 |
| - lambda x: bool(re.search(b, x)) |
1949 |
| - if isinstance(x, str) and isinstance(b, (str, Pattern)) |
1950 |
| - else False |
1951 |
| - ) |
1952 |
| - |
1953 |
| - # GH#32621 use mask to avoid comparing to NAs |
1954 |
| - if mask is None and isinstance(a, np.ndarray) and not isinstance(b, np.ndarray): |
1955 |
| - mask = np.reshape(~(isna(a)), a.shape) |
1956 |
| - if isinstance(a, np.ndarray): |
1957 |
| - a = a[mask] |
1958 |
| - |
1959 |
| - if is_datetimelike_v_numeric(a, b) or is_numeric_v_string_like(a, b): |
1960 |
| - # GH#29553 avoid deprecation warnings from numpy |
1961 |
| - _check_comparison_types(False, a, b) |
1962 |
| - return False |
1963 |
| - |
1964 |
| - result = op(a) |
1965 |
| - |
1966 |
| - if isinstance(result, np.ndarray) and mask is not None: |
1967 |
| - # The shape of the mask can differ to that of the result |
1968 |
| - # since we may compare only a subset of a's or b's elements |
1969 |
| - tmp = np.zeros(mask.shape, dtype=np.bool_) |
1970 |
| - tmp[mask] = result |
1971 |
| - result = tmp |
1972 |
| - |
1973 |
| - _check_comparison_types(result, a, b) |
1974 |
| - return result |
1975 |
| - |
1976 |
| - |
1977 | 1875 | def _fast_count_smallints(arr: np.ndarray) -> np.ndarray:
|
1978 | 1876 | """Faster version of set(arr) for sequences of small numbers."""
|
1979 | 1877 | counts = np.bincount(arr.astype(np.int_))
|
|
0 commit comments