From d63b9425714ea08c5321141eb155d03a44df6237 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 7 Sep 2020 19:36:58 -0700 Subject: [PATCH 1/2] REF: share more EA methods --- pandas/core/arrays/_mixins.py | 11 ++++- pandas/core/arrays/categorical.py | 72 ++---------------------------- pandas/core/arrays/datetimelike.py | 38 +++------------- pandas/core/arrays/numpy_.py | 17 +++---- 4 files changed, 26 insertions(+), 112 deletions(-) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 8b79f8ce66756..e9d8671b69c78 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -6,7 +6,7 @@ from pandas.errors import AbstractMethodError from pandas.util._decorators import cache_readonly, doc -from pandas.core.algorithms import searchsorted, take, unique +from pandas.core.algorithms import take, unique from pandas.core.array_algos.transforms import shift from pandas.core.arrays.base import ExtensionArray @@ -102,6 +102,9 @@ def T(self: _T) -> _T: # ------------------------------------------------------------------------ + def _values_for_argsort(self): + return self._ndarray + def copy(self: _T) -> _T: new_data = self._ndarray.copy() return self._from_backing_data(new_data) @@ -135,7 +138,11 @@ def _concat_same_type(cls, to_concat, axis: int = 0): @doc(ExtensionArray.searchsorted) def searchsorted(self, value, side="left", sorter=None): - return searchsorted(self._ndarray, value, side=side, sorter=sorter) + value = self._validate_searchsorted_value(value) + return self._ndarray.searchsorted(value, side=side, sorter=sorter) + + def _validate_searchsorted_value(self, value): + return value @doc(ExtensionArray.shift) def shift(self, periods=1, fill_value=None, axis=0): diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index b732db4c66003..64bda2b970793 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -12,7 +12,7 @@ from pandas._libs import NaT, algos as libalgos, hashtable as htable, lib from pandas._typing import ArrayLike, Dtype, Ordered, Scalar from pandas.compat.numpy import function as nv -from pandas.util._decorators import cache_readonly, deprecate_kwarg, doc +from pandas.util._decorators import cache_readonly, deprecate_kwarg from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs from pandas.core.dtypes.cast import ( @@ -45,12 +45,7 @@ import pandas.core.algorithms as algorithms from pandas.core.algorithms import _get_data_algo, factorize, take_1d, unique1d from pandas.core.arrays._mixins import NDArrayBackedExtensionArray -from pandas.core.base import ( - ExtensionArray, - NoNewAttributesMixin, - PandasObject, - _shared_docs, -) +from pandas.core.base import ExtensionArray, NoNewAttributesMixin, PandasObject import pandas.core.common as com from pandas.core.construction import array, extract_array, sanitize_array from pandas.core.indexers import check_array_indexer, deprecate_ndim_indexing @@ -1323,11 +1318,6 @@ def memory_usage(self, deep=False): """ return self._codes.nbytes + self.dtype.categories.memory_usage(deep=deep) - @doc(_shared_docs["searchsorted"], klass="Categorical") - def searchsorted(self, value, side="left", sorter=None): - value = self._validate_searchsorted_value(value) - return self.codes.searchsorted(value, side=side, sorter=sorter) - def isna(self): """ Detect missing values @@ -1436,62 +1426,6 @@ def check_for_ordered(self, op): "Categorical to an ordered one\n" ) - def _values_for_argsort(self): - return self._codes - - def argsort(self, ascending=True, kind="quicksort", **kwargs): - """ - Return the indices that would sort the Categorical. - - .. versionchanged:: 0.25.0 - - Changed to sort missing values at the end. - - Parameters - ---------- - ascending : bool, default True - Whether the indices should result in an ascending - or descending sort. - kind : {'quicksort', 'mergesort', 'heapsort'}, optional - Sorting algorithm. - **kwargs: - passed through to :func:`numpy.argsort`. - - Returns - ------- - numpy.array - - See Also - -------- - numpy.ndarray.argsort - - Notes - ----- - While an ordering is applied to the category values, arg-sorting - in this context refers more to organizing and grouping together - based on matching category values. Thus, this function can be - called on an unordered Categorical instance unlike the functions - 'Categorical.min' and 'Categorical.max'. - - Examples - -------- - >>> pd.Categorical(['b', 'b', 'a', 'c']).argsort() - array([2, 0, 1, 3]) - - >>> cat = pd.Categorical(['b', 'b', 'a', 'c'], - ... categories=['c', 'b', 'a'], - ... ordered=True) - >>> cat.argsort() - array([3, 0, 1, 2]) - - Missing values are placed at the end - - >>> cat = pd.Categorical([2, None, 1]) - >>> cat.argsort() - array([2, 0, 1]) - """ - return super().argsort(ascending=ascending, kind=kind, **kwargs) - def sort_values( self, inplace: bool = False, ascending: bool = True, na_position: str = "last" ): @@ -1885,7 +1819,7 @@ def __getitem__(self, key): if result.ndim > 1: deprecate_ndim_indexing(result) return result - return self._constructor(result, dtype=self.dtype, fastpath=True) + return self._from_backing_data(result) def __setitem__(self, key, value): """ diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 6477b94a823ce..d61507b33541b 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -544,15 +544,18 @@ def __getitem__(self, key): result = self._ndarray[key] if self.ndim == 1: return self._box_func(result) - return self._simple_new(result, dtype=self.dtype) + return self._from_backing_data(result) key = self._validate_getitem_key(key) result = self._ndarray[key] if lib.is_scalar(result): return self._box_func(result) + result = self._from_backing_data(result) + freq = self._get_getitem_freq(key) - return self._simple_new(result, dtype=self.dtype, freq=freq) + result._freq = freq + return result def _validate_getitem_key(self, key): if com.is_bool_indexer(key): @@ -713,9 +716,6 @@ def _values_for_factorize(self): def _from_factorized(cls, values, original): return cls(values, dtype=original.dtype) - def _values_for_argsort(self): - return self._ndarray - # ------------------------------------------------------------------ # Validation Methods # TODO: try to de-duplicate these, ensure identical behavior @@ -916,34 +916,6 @@ def _unbox(self, other) -> Union[np.int64, np.ndarray]: # These are not part of the EA API, but we implement them because # pandas assumes they're there. - def searchsorted(self, value, side="left", sorter=None): - """ - Find indices where elements should be inserted to maintain order. - - Find the indices into a sorted array `self` such that, if the - corresponding elements in `value` were inserted before the indices, - the order of `self` would be preserved. - - Parameters - ---------- - value : array_like - Values to insert into `self`. - side : {'left', 'right'}, optional - If 'left', the index of the first suitable location found is given. - If 'right', return the last such index. If there is no suitable - index, return either 0 or N (where N is the length of `self`). - sorter : 1-D array_like, optional - Optional array of integer indices that sort `self` into ascending - order. They are typically the result of ``np.argsort``. - - Returns - ------- - indices : array of ints - Array of insertion points with the same shape as `value`. - """ - value = self._validate_searchsorted_value(value) - return self._data.searchsorted(value, side=side, sorter=sorter) - def value_counts(self, dropna=False): """ Return a Series containing counts of unique values. diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 588d68514649a..d3fa87d5ea7ff 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -260,15 +260,19 @@ def __getitem__(self, item): return result def __setitem__(self, key, value) -> None: - value = extract_array(value, extract_numpy=True) + key = self._validate_setitem_key(key) + value = self._validate_setitem_value(value) + self._ndarray[key] = value - key = check_array_indexer(self, key) - scalar_value = lib.is_scalar(value) + def _validate_setitem_value(self, value): + value = extract_array(value, extract_numpy=True) - if not scalar_value: + if not lib.is_scalar(value): value = np.asarray(value, dtype=self._ndarray.dtype) + return value - self._ndarray[key] = value + def _validate_setitem_key(self, key): + return check_array_indexer(self, key) def isna(self) -> np.ndarray: return isna(self._ndarray) @@ -308,9 +312,6 @@ def _validate_fill_value(self, fill_value): fill_value = self.dtype.na_value return fill_value - def _values_for_argsort(self) -> np.ndarray: - return self._ndarray - def _values_for_factorize(self) -> Tuple[np.ndarray, int]: return self._ndarray, -1 From cbee215fc5a40228eec9f5c9ce5f15832ba05143 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 8 Sep 2020 18:43:54 -0700 Subject: [PATCH 2/2] restore Categorical.argsort docstring --- pandas/core/arrays/categorical.py | 53 +++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 99797a7bd2743..66d917b07305c 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1418,6 +1418,59 @@ def check_for_ordered(self, op): "Categorical to an ordered one\n" ) + def argsort(self, ascending=True, kind="quicksort", **kwargs): + """ + Return the indices that would sort the Categorical. + + .. versionchanged:: 0.25.0 + + Changed to sort missing values at the end. + + Parameters + ---------- + ascending : bool, default True + Whether the indices should result in an ascending + or descending sort. + kind : {'quicksort', 'mergesort', 'heapsort'}, optional + Sorting algorithm. + **kwargs: + passed through to :func:`numpy.argsort`. + + Returns + ------- + numpy.array + + See Also + -------- + numpy.ndarray.argsort + + Notes + ----- + While an ordering is applied to the category values, arg-sorting + in this context refers more to organizing and grouping together + based on matching category values. Thus, this function can be + called on an unordered Categorical instance unlike the functions + 'Categorical.min' and 'Categorical.max'. + + Examples + -------- + >>> pd.Categorical(['b', 'b', 'a', 'c']).argsort() + array([2, 0, 1, 3]) + + >>> cat = pd.Categorical(['b', 'b', 'a', 'c'], + ... categories=['c', 'b', 'a'], + ... ordered=True) + >>> cat.argsort() + array([3, 0, 1, 2]) + + Missing values are placed at the end + + >>> cat = pd.Categorical([2, None, 1]) + >>> cat.argsort() + array([2, 0, 1]) + """ + return super().argsort(ascending=ascending, kind=kind, **kwargs) + def sort_values( self, inplace: bool = False, ascending: bool = True, na_position: str = "last" ):