Skip to content

Commit d236e9b

Browse files
jbrockmendelpull[bot]
authored andcommitted
REF: share more EA methods (#36154)
1 parent 7c9adf3 commit d236e9b

File tree

4 files changed

+45
-154
lines changed

4 files changed

+45
-154
lines changed

pandas/core/arrays/_mixins.py

+31-2
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,10 @@
44

55
from pandas.compat.numpy import function as nv
66
from pandas.errors import AbstractMethodError
7-
from pandas.util._decorators import cache_readonly
7+
from pandas.util._decorators import cache_readonly, doc
88

9-
from pandas.core.algorithms import take, unique
9+
from pandas.core.algorithms import searchsorted, take, unique
10+
from pandas.core.array_algos.transforms import shift
1011
from pandas.core.arrays.base import ExtensionArray
1112

1213
_T = TypeVar("_T", bound="NDArrayBackedExtensionArray")
@@ -120,3 +121,31 @@ def repeat(self: _T, repeats, axis=None) -> _T:
120121
def unique(self: _T) -> _T:
121122
new_data = unique(self._ndarray)
122123
return self._from_backing_data(new_data)
124+
125+
@classmethod
126+
@doc(ExtensionArray._concat_same_type)
127+
def _concat_same_type(cls, to_concat, axis: int = 0):
128+
dtypes = {str(x.dtype) for x in to_concat}
129+
if len(dtypes) != 1:
130+
raise ValueError("to_concat must have the same dtype (tz)", dtypes)
131+
132+
new_values = [x._ndarray for x in to_concat]
133+
new_values = np.concatenate(new_values, axis=axis)
134+
return to_concat[0]._from_backing_data(new_values)
135+
136+
@doc(ExtensionArray.searchsorted)
137+
def searchsorted(self, value, side="left", sorter=None):
138+
return searchsorted(self._ndarray, value, side=side, sorter=sorter)
139+
140+
@doc(ExtensionArray.shift)
141+
def shift(self, periods=1, fill_value=None, axis=0):
142+
143+
fill_value = self._validate_shift_value(fill_value)
144+
new_values = shift(self._ndarray, periods, axis, fill_value)
145+
146+
return self._from_backing_data(new_values)
147+
148+
def _validate_shift_value(self, fill_value):
149+
# TODO: after deprecation in datetimelikearraymixin is enforced,
150+
# we can remove this and ust validate_fill_value directly
151+
return self._validate_fill_value(fill_value)

pandas/core/arrays/categorical.py

+6-120
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,7 @@
4444
from pandas.core.accessor import PandasDelegate, delegate_names
4545
import pandas.core.algorithms as algorithms
4646
from pandas.core.algorithms import _get_data_algo, factorize, take_1d, unique1d
47-
from pandas.core.array_algos.transforms import shift
48-
from pandas.core.arrays._mixins import _T, NDArrayBackedExtensionArray
47+
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
4948
from pandas.core.base import (
5049
ExtensionArray,
5150
NoNewAttributesMixin,
@@ -1193,35 +1192,6 @@ def map(self, mapper):
11931192
__le__ = _cat_compare_op(operator.le)
11941193
__ge__ = _cat_compare_op(operator.ge)
11951194

1196-
def shift(self, periods, fill_value=None):
1197-
"""
1198-
Shift Categorical by desired number of periods.
1199-
1200-
Parameters
1201-
----------
1202-
periods : int
1203-
Number of periods to move, can be positive or negative
1204-
fill_value : object, optional
1205-
The scalar value to use for newly introduced missing values.
1206-
1207-
.. versionadded:: 0.24.0
1208-
1209-
Returns
1210-
-------
1211-
shifted : Categorical
1212-
"""
1213-
# since categoricals always have ndim == 1, an axis parameter
1214-
# doesn't make any sense here.
1215-
codes = self.codes
1216-
if codes.ndim > 1:
1217-
raise NotImplementedError("Categorical with ndim > 1.")
1218-
1219-
fill_value = self._validate_fill_value(fill_value)
1220-
1221-
codes = shift(codes, periods, axis=0, fill_value=fill_value)
1222-
1223-
return self._constructor(codes, dtype=self.dtype, fastpath=True)
1224-
12251195
def _validate_fill_value(self, fill_value):
12261196
"""
12271197
Convert a user-facing fill_value to a representation to use with our
@@ -1383,20 +1353,6 @@ def notna(self):
13831353

13841354
notnull = notna
13851355

1386-
def dropna(self):
1387-
"""
1388-
Return the Categorical without null values.
1389-
1390-
Missing values (-1 in .codes) are detected.
1391-
1392-
Returns
1393-
-------
1394-
valid : Categorical
1395-
"""
1396-
result = self[self.notna()]
1397-
1398-
return result
1399-
14001356
def value_counts(self, dropna=True):
14011357
"""
14021358
Return a Series containing counts of each category.
@@ -1749,81 +1705,6 @@ def fillna(self, value=None, method=None, limit=None):
17491705

17501706
return self._constructor(codes, dtype=self.dtype, fastpath=True)
17511707

1752-
def take(self: _T, indexer, allow_fill: bool = False, fill_value=None) -> _T:
1753-
"""
1754-
Take elements from the Categorical.
1755-
1756-
Parameters
1757-
----------
1758-
indexer : sequence of int
1759-
The indices in `self` to take. The meaning of negative values in
1760-
`indexer` depends on the value of `allow_fill`.
1761-
allow_fill : bool, default False
1762-
How to handle negative values in `indexer`.
1763-
1764-
* False: negative values in `indices` indicate positional indices
1765-
from the right. This is similar to
1766-
:func:`numpy.take`.
1767-
1768-
* True: negative values in `indices` indicate missing values
1769-
(the default). These values are set to `fill_value`. Any other
1770-
other negative values raise a ``ValueError``.
1771-
1772-
.. versionchanged:: 1.0.0
1773-
1774-
Default value changed from ``True`` to ``False``.
1775-
1776-
fill_value : object
1777-
The value to use for `indices` that are missing (-1), when
1778-
``allow_fill=True``. This should be the category, i.e. a value
1779-
in ``self.categories``, not a code.
1780-
1781-
Returns
1782-
-------
1783-
Categorical
1784-
This Categorical will have the same categories and ordered as
1785-
`self`.
1786-
1787-
See Also
1788-
--------
1789-
Series.take : Similar method for Series.
1790-
numpy.ndarray.take : Similar method for NumPy arrays.
1791-
1792-
Examples
1793-
--------
1794-
>>> cat = pd.Categorical(['a', 'a', 'b'])
1795-
>>> cat
1796-
['a', 'a', 'b']
1797-
Categories (2, object): ['a', 'b']
1798-
1799-
Specify ``allow_fill==False`` to have negative indices mean indexing
1800-
from the right.
1801-
1802-
>>> cat.take([0, -1, -2], allow_fill=False)
1803-
['a', 'b', 'a']
1804-
Categories (2, object): ['a', 'b']
1805-
1806-
With ``allow_fill=True``, indices equal to ``-1`` mean "missing"
1807-
values that should be filled with the `fill_value`, which is
1808-
``np.nan`` by default.
1809-
1810-
>>> cat.take([0, -1, -1], allow_fill=True)
1811-
['a', NaN, NaN]
1812-
Categories (2, object): ['a', 'b']
1813-
1814-
The fill value can be specified.
1815-
1816-
>>> cat.take([0, -1, -1], allow_fill=True, fill_value='a')
1817-
['a', 'a', 'a']
1818-
Categories (2, object): ['a', 'b']
1819-
1820-
Specifying a fill value that's not in ``self.categories``
1821-
will raise a ``ValueError``.
1822-
"""
1823-
return NDArrayBackedExtensionArray.take(
1824-
self, indexer, allow_fill=allow_fill, fill_value=fill_value
1825-
)
1826-
18271708
# ------------------------------------------------------------------
18281709
# NDArrayBackedExtensionArray compat
18291710

@@ -1861,6 +1742,9 @@ def __contains__(self, key) -> bool:
18611742

18621743
return contains(self, key, container=self._codes)
18631744

1745+
# ------------------------------------------------------------------
1746+
# Rendering Methods
1747+
18641748
def _tidy_repr(self, max_vals=10, footer=True) -> str:
18651749
"""
18661750
a short repr displaying only max_vals and an optional (but default
@@ -1959,6 +1843,8 @@ def __repr__(self) -> str:
19591843

19601844
return result
19611845

1846+
# ------------------------------------------------------------------
1847+
19621848
def _maybe_coerce_indexer(self, indexer):
19631849
"""
19641850
return an indexer coerced to the codes dtype

pandas/core/arrays/datetimelike.py

+7-21
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,8 @@
5454

5555
from pandas.core import missing, nanops, ops
5656
from pandas.core.algorithms import checked_add_with_arr, unique1d, value_counts
57-
from pandas.core.array_algos.transforms import shift
5857
from pandas.core.arrays._mixins import _T, NDArrayBackedExtensionArray
59-
from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin
58+
from pandas.core.arrays.base import ExtensionOpsMixin
6059
import pandas.core.common as com
6160
from pandas.core.construction import array, extract_array
6261
from pandas.core.indexers import check_array_indexer
@@ -672,18 +671,11 @@ def view(self, dtype=None):
672671

673672
@classmethod
674673
def _concat_same_type(cls, to_concat, axis: int = 0):
675-
676-
# do not pass tz to set because tzlocal cannot be hashed
677-
dtypes = {str(x.dtype) for x in to_concat}
678-
if len(dtypes) != 1:
679-
raise ValueError("to_concat must have the same dtype (tz)", dtypes)
674+
new_obj = super()._concat_same_type(to_concat, axis)
680675

681676
obj = to_concat[0]
682677
dtype = obj.dtype
683678

684-
i8values = [x.asi8 for x in to_concat]
685-
values = np.concatenate(i8values, axis=axis)
686-
687679
new_freq = None
688680
if is_period_dtype(dtype):
689681
new_freq = obj.freq
@@ -697,11 +689,13 @@ def _concat_same_type(cls, to_concat, axis: int = 0):
697689
if all(pair[0][-1] + obj.freq == pair[1][0] for pair in pairs):
698690
new_freq = obj.freq
699691

700-
return cls._simple_new(values, dtype=dtype, freq=new_freq)
692+
new_obj._freq = new_freq
693+
return new_obj
701694

702695
def copy(self: DatetimeLikeArrayT) -> DatetimeLikeArrayT:
703-
values = self.asi8.copy()
704-
return type(self)._simple_new(values, dtype=self.dtype, freq=self.freq)
696+
new_obj = super().copy()
697+
new_obj._freq = self.freq
698+
return new_obj
705699

706700
def _values_for_factorize(self):
707701
return self.asi8, iNaT
@@ -713,14 +707,6 @@ def _from_factorized(cls, values, original):
713707
def _values_for_argsort(self):
714708
return self._data
715709

716-
@Appender(ExtensionArray.shift.__doc__)
717-
def shift(self, periods=1, fill_value=None, axis=0):
718-
719-
fill_value = self._validate_shift_value(fill_value)
720-
new_values = shift(self._data, periods, axis, fill_value)
721-
722-
return type(self)._simple_new(new_values, dtype=self.dtype)
723-
724710
# ------------------------------------------------------------------
725711
# Validation Methods
726712
# TODO: try to de-duplicate these, ensure identical behavior

pandas/core/arrays/numpy_.py

+1-11
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from pandas._libs import lib
88
from pandas._typing import Scalar
99
from pandas.compat.numpy import function as nv
10-
from pandas.util._decorators import doc
1110
from pandas.util._validators import validate_fillna_kwargs
1211

1312
from pandas.core.dtypes.dtypes import ExtensionDtype
@@ -16,10 +15,9 @@
1615

1716
from pandas import compat
1817
from pandas.core import nanops, ops
19-
from pandas.core.algorithms import searchsorted
2018
from pandas.core.array_algos import masked_reductions
2119
from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
22-
from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin
20+
from pandas.core.arrays.base import ExtensionOpsMixin
2321
from pandas.core.construction import extract_array
2422
from pandas.core.indexers import check_array_indexer
2523
from pandas.core.missing import backfill_1d, pad_1d
@@ -189,10 +187,6 @@ def _from_sequence(cls, scalars, dtype=None, copy: bool = False) -> "PandasArray
189187
def _from_factorized(cls, values, original) -> "PandasArray":
190188
return cls(values)
191189

192-
@classmethod
193-
def _concat_same_type(cls, to_concat) -> "PandasArray":
194-
return cls(np.concatenate(to_concat))
195-
196190
def _from_backing_data(self, arr: np.ndarray) -> "PandasArray":
197191
return type(self)(arr)
198192

@@ -423,10 +417,6 @@ def to_numpy(
423417

424418
return result
425419

426-
@doc(ExtensionArray.searchsorted)
427-
def searchsorted(self, value, side="left", sorter=None):
428-
return searchsorted(self.to_numpy(), value, side=side, sorter=sorter)
429-
430420
# ------------------------------------------------------------------------
431421
# Ops
432422

0 commit comments

Comments
 (0)