-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
WIP: generalize categorical to N-dimensions #8012
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -269,10 +269,23 @@ def __init__(self, values, levels=None, ordered=None, name=None, fastpath=False, | |
self.levels = levels | ||
self.name = name | ||
|
||
def _replace_codes(self, codes): | ||
""" | ||
Returns a new Categorical with replaced codes but the same levels and | ||
metadata | ||
|
||
If codes is a scalar, just return that level. | ||
""" | ||
codes = np.asarray(codes) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will be a problem for Categoricals with only one value ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will add a test to make sure but I don't think so. A 1d array or list with one element is not a scalar. |
||
if np.isscalar(codes): | ||
return self.levels[codes] | ||
else: | ||
return Categorical(codes, levels=self.levels, ordered=self.ordered, | ||
name=self.name, fastpath=True) | ||
|
||
def copy(self): | ||
""" Copy constructor. """ | ||
return Categorical(values=self._codes.copy(),levels=self.levels, | ||
name=self.name, ordered=self.ordered, fastpath=True) | ||
return self._replace_codes(self._codes.copy()) | ||
|
||
@classmethod | ||
def from_array(cls, data): | ||
|
@@ -431,8 +444,19 @@ def shape(self): | |
------- | ||
shape : tuple | ||
""" | ||
return self._codes.shape | ||
|
||
return tuple([len(self._codes)]) | ||
@property | ||
def size(self): | ||
"""Size of the Categorical. | ||
|
||
For internal compatibility with numpy arrays. | ||
|
||
Returns | ||
------- | ||
size : int | ||
""" | ||
return self._codes.size | ||
|
||
def __array__(self, dtype=None): | ||
""" The numpy array interface. | ||
|
@@ -442,11 +466,12 @@ def __array__(self, dtype=None): | |
values : numpy array | ||
A numpy array of the same dtype as categorical.levels.dtype | ||
""" | ||
return com.take_1d(self.levels.values, self._codes) | ||
return com.take_1d( | ||
self.levels.values, self._codes.ravel()).reshape(self.shape) | ||
|
||
@property | ||
def T(self): | ||
return self | ||
return self._replace_codes(self._codes.T) | ||
|
||
def get_values(self): | ||
""" Return the values. | ||
|
@@ -558,7 +583,7 @@ def ravel(self, order='C'): | |
------- | ||
raveled : numpy array | ||
""" | ||
return np.array(self) | ||
return np.array(self._replace_codes(self._codes.ravel(order=order))) | ||
|
||
def view(self): | ||
"""Return a view of myself. | ||
|
@@ -628,9 +653,8 @@ def take_nd(self, indexer, allow_fill=True, fill_value=None): | |
if allow_fill and fill_value is None: | ||
fill_value = np.nan | ||
|
||
values = com.take_1d(self._codes, indexer, allow_fill=allow_fill, fill_value=fill_value) | ||
result = Categorical(values=values, levels=self.levels, ordered=self.ordered, | ||
name=self.name, fastpath=True) | ||
codes = com.take_1d(self._codes, indexer, allow_fill=allow_fill, fill_value=fill_value) | ||
result = self._replace_codes(codes) | ||
return result | ||
|
||
take = take_nd | ||
|
@@ -646,8 +670,7 @@ def _slice(self, slicer): | |
slicer = slicer[1] | ||
|
||
_codes = self._codes[slicer] | ||
return Categorical(values=_codes,levels=self.levels, ordered=self.ordered, | ||
name=self.name, fastpath=True) | ||
return self._replace_codes(_codes) | ||
|
||
def __len__(self): | ||
return len(self._codes) | ||
|
@@ -738,15 +761,11 @@ def __unicode__(self): | |
|
||
def __getitem__(self, key): | ||
""" Return an item. """ | ||
if isinstance(key, (int, np.integer)): | ||
i = self._codes[key] | ||
if i == -1: | ||
return np.nan | ||
else: | ||
return self.levels[i] | ||
else: | ||
return Categorical(values=self._codes[key], levels=self.levels, | ||
ordered=self.ordered, fastpath=True) | ||
return self._replace_codes(self._codes[key]) | ||
# if np.isscalar(codes): | ||
# return self.levels[codes] | ||
# else: | ||
# return self._replace_codes(codes) | ||
|
||
def __setitem__(self, key, value): | ||
""" Item assignment. | ||
|
@@ -760,40 +779,22 @@ def __setitem__(self, key, value): | |
|
||
""" | ||
|
||
# require identical level set | ||
if isinstance(value, Categorical): | ||
# require identical level set | ||
if not value.levels.equals(self.levels): | ||
raise ValueError("cannot set a Categorical with another, without identical levels") | ||
|
||
rvalue = value if com.is_list_like(value) else [value] | ||
to_add = Index(rvalue)-self.levels | ||
if len(to_add): | ||
raise ValueError("cannot setitem on a Categorical with a new level," | ||
" set the levels first") | ||
|
||
# set by position | ||
if isinstance(key, (int, np.integer)): | ||
pass | ||
|
||
# tuple of indexers | ||
elif isinstance(key, tuple): | ||
|
||
# only allow 1 dimensional slicing, but can | ||
# in a 2-d case be passd (slice(None),....) | ||
if len(key) == 2: | ||
if not _is_null_slice(key[0]): | ||
raise AssertionError("invalid slicing for a 1-ndim categorical") | ||
key = key[1] | ||
elif len(key) == 1: | ||
key = key[0] | ||
else: | ||
raise AssertionError("invalid slicing for a 1-ndim categorical") | ||
# we can safely assign codes directly | ||
self._codes[key] = value.codes | ||
|
||
else: | ||
key = self._codes[key] | ||
|
||
lindexer = self.levels.get_indexer(rvalue) | ||
self._codes[key] = lindexer | ||
value = np.asarray(value) | ||
flat_value = value.ravel() | ||
to_add = Index(flat_value) - self.levels | ||
if len(to_add): | ||
raise ValueError("cannot setitem on a Categorical with a new level," | ||
" set the levels first") | ||
lindexer = self.levels.get_indexer(flat_value) | ||
self._codes[key] = lindexer.reshape(value.shape) | ||
|
||
#### reduction ops #### | ||
def _reduce(self, op, axis=0, skipna=True, numeric_only=None, | ||
|
@@ -871,9 +872,8 @@ def mode(self): | |
|
||
import pandas.hashtable as htable | ||
good = self._codes != -1 | ||
result = Categorical(sorted(htable.mode_int64(com._ensure_int64(self._codes[good]))), | ||
levels=self.levels,ordered=self.ordered, name=self.name, | ||
fastpath=True) | ||
result = self._replace_codes( | ||
sorted(htable.mode_int64(com._ensure_int64(self._codes[good])))) | ||
return result | ||
|
||
def unique(self): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is is more used like a
_constructor(...)
.