Skip to content

Commit c1730ef

Browse files
committed
support CategoricalIndex
raise KeyError when accessing invalid elements setting elements not in the categories is equiv of .append() (which coerces to an Index)
1 parent 8d2818e commit c1730ef

File tree

10 files changed

+915
-186
lines changed

10 files changed

+915
-186
lines changed

pandas/core/api.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from pandas.core.categorical import Categorical
99
from pandas.core.groupby import Grouper
1010
from pandas.core.format import set_eng_float_format
11-
from pandas.core.index import Index, Int64Index, Float64Index, MultiIndex
11+
from pandas.core.index import Index, CategoricalIndex, Int64Index, Float64Index, MultiIndex
1212

1313
from pandas.core.series import Series, TimeSeries
1414
from pandas.core.frame import DataFrame

pandas/core/categorical.py

+26-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
import pandas.core.common as com
1515
from pandas.util.decorators import cache_readonly
1616

17-
from pandas.core.common import (CategoricalDtype, ABCSeries, isnull, notnull,
17+
from pandas.core.common import (CategoricalDtype, ABCSeries, ABCCategoricalIndex, isnull, notnull,
1818
is_categorical_dtype, is_integer_dtype, is_object_dtype,
1919
_possibly_infer_to_datetimelike, get_dtype_kinds,
2020
is_list_like, is_sequence, is_null_slice, is_bool,
@@ -79,7 +79,7 @@ def f(self, other):
7979

8080
def maybe_to_categorical(array):
8181
""" coerce to a categorical if a series is given """
82-
if isinstance(array, ABCSeries):
82+
if isinstance(array, (ABCSeries, ABCCategoricalIndex)):
8383
return array.values
8484
return array
8585

@@ -233,6 +233,10 @@ def __init__(self, values, categories=None, ordered=False, name=None, fastpath=F
233233
cat = values
234234
if isinstance(values, ABCSeries):
235235
cat = values.values
236+
if isinstance(values, ABCCategoricalIndex):
237+
ordered = values.ordered
238+
cat = values.values
239+
236240
if categories is None:
237241
categories = cat.categories
238242
values = values.__array__()
@@ -302,11 +306,31 @@ def copy(self):
302306
return Categorical(values=self._codes.copy(),categories=self.categories,
303307
name=self.name, ordered=self.ordered, fastpath=True)
304308

309+
def astype(self, dtype):
310+
""" coerce this type to another dtype """
311+
if is_categorical_dtype(dtype):
312+
return self
313+
elif is_object_dtype(dtype):
314+
return np.array(self)
315+
316+
raise TypeError('Astype a Categorical to anything other than '
317+
'categorical or object is not supported')
318+
305319
@cache_readonly
306320
def ndim(self):
307321
"""Number of dimensions of the Categorical """
308322
return self._codes.ndim
309323

324+
@cache_readonly
325+
def size(self):
326+
""" return the len of myself """
327+
return len(self)
328+
329+
@cache_readonly
330+
def itemsize(self):
331+
""" return the size of a single category """
332+
return self.categories.itemsize
333+
310334
def reshape(self, new_shape, **kwargs):
311335
""" compat with .reshape """
312336
return self

pandas/core/common.py

+18
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ def _check(cls, inst):
7272
ABCDatetimeIndex = create_pandas_abc_type("ABCDatetimeIndex", "_typ", ("datetimeindex",))
7373
ABCTimedeltaIndex = create_pandas_abc_type("ABCTimedeltaIndex", "_typ", ("timedeltaindex",))
7474
ABCPeriodIndex = create_pandas_abc_type("ABCPeriodIndex", "_typ", ("periodindex",))
75+
ABCCategoricalIndex = create_pandas_abc_type("ABCCategoricalIndex", "_typ", ("categoricalindex",))
7576
ABCSeries = create_pandas_abc_type("ABCSeries", "_typ", ("series",))
7677
ABCDataFrame = create_pandas_abc_type("ABCDataFrame", "_typ", ("dataframe",))
7778
ABCPanel = create_pandas_abc_type("ABCPanel", "_typ", ("panel",))
@@ -2438,9 +2439,26 @@ def _get_dtype_type(arr_or_dtype):
24382439
return np.dtype(arr_or_dtype).type
24392440
elif isinstance(arr_or_dtype, CategoricalDtype):
24402441
return CategoricalDtypeType
2442+
elif isinstance(arr_or_dtype, compat.string_types):
2443+
if is_categorical_dtype(arr_or_dtype):
2444+
return CategoricalDtypeType
2445+
return _get_dtype_type(np.dtype(arr_or_dtype))
24412446
return arr_or_dtype.dtype.type
24422447

24432448

2449+
def is_dtypes_equal(source, target):
2450+
""" return a boolean if the dtypes are equal """
2451+
source = _get_dtype_type(source)
2452+
target = _get_dtype_type(target)
2453+
2454+
try:
2455+
return source == target
2456+
except:
2457+
2458+
# invalid comparison
2459+
# object == category will hit this
2460+
return False
2461+
24442462
def is_any_int_dtype(arr_or_dtype):
24452463
tipo = _get_dtype_type(arr_or_dtype)
24462464
return issubclass(tipo, np.integer)

0 commit comments

Comments
 (0)