|
27 | 27 | from pandas.core.dtypes.common import (
|
28 | 28 | ensure_int64,
|
29 | 29 | ensure_object,
|
30 |
| - ensure_platform_int, |
31 | 30 | is_categorical_dtype,
|
32 | 31 | is_datetime64_dtype,
|
33 | 32 | is_dict_like,
|
|
51 | 50 | from pandas.core.accessor import PandasDelegate, delegate_names
|
52 | 51 | import pandas.core.algorithms as algorithms
|
53 | 52 | from pandas.core.algorithms import _get_data_algo, factorize, take, take_1d, unique1d
|
| 53 | +from pandas.core.array_algos.transforms import shift |
54 | 54 | from pandas.core.arrays.base import ExtensionArray, _extension_array_shared_docs
|
55 | 55 | from pandas.core.base import NoNewAttributesMixin, PandasObject, _shared_docs
|
56 | 56 | import pandas.core.common as com
|
@@ -199,17 +199,6 @@ def contains(cat, key, container):
|
199 | 199 | return any(loc_ in container for loc_ in loc)
|
200 | 200 |
|
201 | 201 |
|
202 |
| -_codes_doc = """ |
203 |
| -The category codes of this categorical. |
204 |
| -
|
205 |
| -Level codes are an array if integer which are the positions of the real |
206 |
| -values in the categories array. |
207 |
| -
|
208 |
| -There is not setter, use the other categorical methods and the normal item |
209 |
| -setter to change values in the categorical. |
210 |
| -""" |
211 |
| - |
212 |
| - |
213 | 202 | class Categorical(ExtensionArray, PandasObject):
|
214 | 203 | """
|
215 | 204 | Represent a categorical variable in classic R / S-plus fashion.
|
@@ -652,27 +641,26 @@ def from_codes(cls, codes, categories=None, ordered=None, dtype=None):
|
652 | 641 |
|
653 | 642 | return cls(codes, dtype=dtype, fastpath=True)
|
654 | 643 |
|
655 |
| - def _get_codes(self): |
| 644 | + @property |
| 645 | + def codes(self) -> np.ndarray: |
656 | 646 | """
|
657 |
| - Get the codes. |
| 647 | + The category codes of this categorical. |
| 648 | +
|
| 649 | + Codes are an array of integers which are the positions of the actual |
| 650 | + values in the categories array. |
| 651 | +
|
| 652 | + There is no setter, use the other categorical methods and the normal item |
| 653 | + setter to change values in the categorical. |
658 | 654 |
|
659 | 655 | Returns
|
660 | 656 | -------
|
661 |
| - codes : integer array view |
662 |
| - A non writable view of the `codes` array. |
| 657 | + ndarray[int] |
| 658 | + A non-writable view of the `codes` array. |
663 | 659 | """
|
664 | 660 | v = self._codes.view()
|
665 | 661 | v.flags.writeable = False
|
666 | 662 | return v
|
667 | 663 |
|
668 |
| - def _set_codes(self, codes): |
669 |
| - """ |
670 |
| - Not settable by the user directly |
671 |
| - """ |
672 |
| - raise ValueError("cannot set Categorical codes directly") |
673 |
| - |
674 |
| - codes = property(fget=_get_codes, fset=_set_codes, doc=_codes_doc) |
675 |
| - |
676 | 664 | def _set_categories(self, categories, fastpath=False):
|
677 | 665 | """
|
678 | 666 | Sets new categories inplace
|
@@ -1241,23 +1229,41 @@ def shift(self, periods, fill_value=None):
|
1241 | 1229 | codes = self.codes
|
1242 | 1230 | if codes.ndim > 1:
|
1243 | 1231 | raise NotImplementedError("Categorical with ndim > 1.")
|
1244 |
| - if np.prod(codes.shape) and (periods != 0): |
1245 |
| - codes = np.roll(codes, ensure_platform_int(periods), axis=0) |
1246 |
| - if isna(fill_value): |
1247 |
| - fill_value = -1 |
1248 |
| - elif fill_value in self.categories: |
1249 |
| - fill_value = self.categories.get_loc(fill_value) |
1250 |
| - else: |
1251 |
| - raise ValueError( |
1252 |
| - f"'fill_value={fill_value}' is not present " |
1253 |
| - "in this Categorical's categories" |
1254 |
| - ) |
1255 |
| - if periods > 0: |
1256 |
| - codes[:periods] = fill_value |
1257 |
| - else: |
1258 |
| - codes[periods:] = fill_value |
1259 | 1232 |
|
1260 |
| - return self.from_codes(codes, dtype=self.dtype) |
| 1233 | + fill_value = self._validate_fill_value(fill_value) |
| 1234 | + |
| 1235 | + codes = shift(codes.copy(), periods, axis=0, fill_value=fill_value) |
| 1236 | + |
| 1237 | + return self._constructor(codes, dtype=self.dtype, fastpath=True) |
| 1238 | + |
| 1239 | + def _validate_fill_value(self, fill_value): |
| 1240 | + """ |
| 1241 | + Convert a user-facing fill_value to a representation to use with our |
| 1242 | + underlying ndarray, raising ValueError if this is not possible. |
| 1243 | +
|
| 1244 | + Parameters |
| 1245 | + ---------- |
| 1246 | + fill_value : object |
| 1247 | +
|
| 1248 | + Returns |
| 1249 | + ------- |
| 1250 | + fill_value : int |
| 1251 | +
|
| 1252 | + Raises |
| 1253 | + ------ |
| 1254 | + ValueError |
| 1255 | + """ |
| 1256 | + |
| 1257 | + if isna(fill_value): |
| 1258 | + fill_value = -1 |
| 1259 | + elif fill_value in self.categories: |
| 1260 | + fill_value = self.categories.get_loc(fill_value) |
| 1261 | + else: |
| 1262 | + raise ValueError( |
| 1263 | + f"'fill_value={fill_value}' is not present " |
| 1264 | + "in this Categorical's categories" |
| 1265 | + ) |
| 1266 | + return fill_value |
1261 | 1267 |
|
1262 | 1268 | def __array__(self, dtype=None) -> np.ndarray:
|
1263 | 1269 | """
|
@@ -1835,24 +1841,12 @@ def take(self, indexer, allow_fill: bool = False, fill_value=None):
|
1835 | 1841 | """
|
1836 | 1842 | indexer = np.asarray(indexer, dtype=np.intp)
|
1837 | 1843 |
|
1838 |
| - dtype = self.dtype |
1839 |
| - |
1840 |
| - if isna(fill_value): |
1841 |
| - fill_value = -1 |
1842 |
| - elif allow_fill: |
| 1844 | + if allow_fill: |
1843 | 1845 | # convert user-provided `fill_value` to codes
|
1844 |
| - if fill_value in self.categories: |
1845 |
| - fill_value = self.categories.get_loc(fill_value) |
1846 |
| - else: |
1847 |
| - msg = ( |
1848 |
| - f"'fill_value' ('{fill_value}') is not in this " |
1849 |
| - "Categorical's categories." |
1850 |
| - ) |
1851 |
| - raise TypeError(msg) |
| 1846 | + fill_value = self._validate_fill_value(fill_value) |
1852 | 1847 |
|
1853 | 1848 | codes = take(self._codes, indexer, allow_fill=allow_fill, fill_value=fill_value)
|
1854 |
| - result = type(self).from_codes(codes, dtype=dtype) |
1855 |
| - return result |
| 1849 | + return self._constructor(codes, dtype=self.dtype, fastpath=True) |
1856 | 1850 |
|
1857 | 1851 | def take_nd(self, indexer, allow_fill: bool = False, fill_value=None):
|
1858 | 1852 | # GH#27745 deprecate alias that other EAs dont have
|
|
0 commit comments