From b286a0e566edca8045ce6dc0cf02fce8f3726ec6 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 22 Nov 2020 18:59:33 -0800 Subject: [PATCH 1/2] REF: Implement isin on DTA instead of DTI --- pandas/core/algorithms.py | 6 ++-- pandas/core/arrays/datetimelike.py | 51 ++++++++++++++++++++++++++- pandas/core/indexes/base.py | 2 +- pandas/core/indexes/datetimelike.py | 53 ----------------------------- pandas/core/indexes/numeric.py | 7 ---- pandas/core/series.py | 2 +- 6 files changed, 54 insertions(+), 67 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index be091314e6c25..595f4f5d42f2e 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -431,10 +431,8 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray: return cast("Categorical", comps).isin(values) if needs_i8_conversion(comps): - # Dispatch to DatetimeLikeIndexMixin.isin - from pandas import Index - - return Index(comps).isin(values) + # Dispatch to DatetimeLikeArrayMixin.isin + return array(comps).isin(values) comps, dtype = _ensure_data(comps) values, _ = _ensure_data(values, dtype=dtype) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 3b419f8d1da2a..6fcb2f50b31c3 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -62,7 +62,7 @@ from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna from pandas.core import nanops, ops -from pandas.core.algorithms import checked_add_with_arr, unique1d, value_counts +from pandas.core.algorithms import checked_add_with_arr, isin, unique1d, value_counts from pandas.core.arraylike import OpsMixin from pandas.core.arrays._mixins import NDArrayBackedExtensionArray import pandas.core.common as com @@ -697,6 +697,55 @@ def map(self, mapper): return Index(self).map(mapper).array + def isin(self, values) -> np.ndarray: + """ + Compute boolean array of whether each value is found in the + passed set of values. + + Parameters + ---------- + values : set or sequence of values + + Returns + ------- + ndarray[bool] + """ + if not hasattr(values, "dtype"): + values = np.asarray(values) + + if values.dtype.kind in ["f", "i", "u", "c"]: + # TODO: de-duplicate with equals, validate_comparison_value + return np.zeros(self.shape, dtype=bool) + + if not isinstance(values, type(self)): + inferrable = [ + "timedelta", + "timedelta64", + "datetime", + "datetime64", + "date", + "period", + ] + if values.dtype == object: + inferred = lib.infer_dtype(values, skipna=False) + if inferred not in inferrable: + if "mixed" in inferred: + return isin(self.astype(object), values) + return np.zeros(self.shape, dtype=bool) + + try: + values = type(self)._from_sequence(values) + except ValueError: + return isin(self.astype(object), values) + + try: + self._check_compatible_with(values) + except (TypeError, ValueError): + # Includes tzawareness mismatch and IncompatibleFrequencyError + return np.zeros(self.shape, dtype=bool) + + return isin(self.asi8, values.asi8) + # ------------------------------------------------------------------ # Null Handling diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7658230d9e1dd..44a7829f21360 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5120,7 +5120,7 @@ def isin(self, values, level=None): """ if level is not None: self._validate_index_level(level) - return algos.isin(self, values) + return algos.isin(self._values, values) def _get_string_slice(self, key: str_t): # this is for partial string indexing, diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index c30abb144cea5..2d402082848ff 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -24,7 +24,6 @@ from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.generic import ABCIndex, ABCSeries -from pandas.core import algorithms from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin from pandas.core.base import IndexOpsMixin @@ -505,58 +504,6 @@ def _partial_date_slice( __truediv__ = make_wrapped_arith_op("__truediv__") __rtruediv__ = make_wrapped_arith_op("__rtruediv__") - def isin(self, values, level=None): - """ - Compute boolean array of whether each index value is found in the - passed set of values. - - Parameters - ---------- - values : set or sequence of values - - Returns - ------- - is_contained : ndarray (boolean dtype) - """ - if level is not None: - self._validate_index_level(level) - - if not hasattr(values, "dtype"): - values = np.asarray(values) - - if values.dtype.kind in ["f", "i", "u", "c"]: - # TODO: de-duplicate with equals, validate_comparison_value - return np.zeros(self.shape, dtype=bool) - - if not isinstance(values, type(self)): - inferrable = [ - "timedelta", - "timedelta64", - "datetime", - "datetime64", - "date", - "period", - ] - if values.dtype == object: - inferred = lib.infer_dtype(values, skipna=False) - if inferred not in inferrable: - if "mixed" in inferred: - return self.astype(object).isin(values) - return np.zeros(self.shape, dtype=bool) - - try: - values = type(self)(values) - except ValueError: - return self.astype(object).isin(values) - - try: - self._data._check_compatible_with(values) - except (TypeError, ValueError): - # Includes tzawareness mismatch and IncompatibleFrequencyError - return np.zeros(self.shape, dtype=bool) - - return algorithms.isin(self.asi8, values.asi8) - def shift(self, periods=1, freq=None): """ Shift index by desired number of time frequency increments. diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 24aaf5885fe0e..7778b1e264cd8 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -27,7 +27,6 @@ from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna -from pandas.core import algorithms import pandas.core.common as com from pandas.core.indexes.base import Index, maybe_extract_name @@ -434,12 +433,6 @@ def __contains__(self, other: Any) -> bool: def is_unique(self) -> bool: return super().is_unique and self._nan_idxs.size < 2 - @doc(Index.isin) - def isin(self, values, level=None): - if level is not None: - self._validate_index_level(level) - return algorithms.isin(np.array(self), values) - def _can_union_without_object_cast(self, other) -> bool: # See GH#26778, further casting may occur in NumericIndex._union return is_numeric_dtype(other.dtype) diff --git a/pandas/core/series.py b/pandas/core/series.py index d59e72a04209c..4c3ad38c8a922 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4691,7 +4691,7 @@ def isin(self, values) -> "Series": 5 False Name: animal, dtype: bool """ - result = algorithms.isin(self, values) + result = algorithms.isin(self._values, values) return self._constructor(result, index=self.index).__finalize__( self, method="isin" ) From f61447f24f08e2e00522322a4f02c569fc98c239 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 23 Nov 2020 08:16:11 -0800 Subject: [PATCH 2/2] fix for doctest --- pandas/core/arrays/datetimelike.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 6fcb2f50b31c3..c482eae35b313 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -729,9 +729,13 @@ def isin(self, values) -> np.ndarray: if values.dtype == object: inferred = lib.infer_dtype(values, skipna=False) if inferred not in inferrable: - if "mixed" in inferred: + if inferred == "string": + pass + + elif "mixed" in inferred: return isin(self.astype(object), values) - return np.zeros(self.shape, dtype=bool) + else: + return np.zeros(self.shape, dtype=bool) try: values = type(self)._from_sequence(values)