Skip to content

COMPAT: infer_dtype not handling categoricals (GH8974) #8975

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 3, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 35 additions & 9 deletions pandas/src/inference.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ def is_period(object val):
return util.is_period_object(val)

_TYPE_MAP = {
'categorical' : 'categorical',
'category' : 'categorical',
'int8': 'integer',
'int16': 'integer',
'int32': 'integer',
Expand Down Expand Up @@ -65,29 +67,53 @@ try:
except AttributeError:
pass

cdef _try_infer_map(v):
""" if its in our map, just return the dtype """
cdef:
object val_name, val_kind
val_name = v.dtype.name
if val_name in _TYPE_MAP:
return _TYPE_MAP[val_name]
val_kind = v.dtype.kind
if val_kind in _TYPE_MAP:
return _TYPE_MAP[val_kind]
return None

def infer_dtype(object _values):
"""
we are coercing to an ndarray here
"""

cdef:
Py_ssize_t i, n
object val
ndarray values

if isinstance(_values, np.ndarray):
values = _values
elif hasattr(_values,'values'):
values = _values.values
elif hasattr(_values,'dtype'):

# this will handle ndarray-like
# e.g. categoricals
try:
values = getattr(_values, 'values', _values)
except:
val = _try_infer_map(_values)
if val is not None:
return val

# its ndarray like but we can't handle
raise ValueError("cannot infer type for {0}".format(type(_values)))

else:
if not isinstance(_values, list):
_values = list(_values)
values = list_to_object_array(_values)

values = getattr(values, 'values', values)

val_name = values.dtype.name
if val_name in _TYPE_MAP:
return _TYPE_MAP[val_name]
val_kind = values.dtype.kind
if val_kind in _TYPE_MAP:
return _TYPE_MAP[val_kind]
val = _try_infer_map(values)
if val is not None:
return val

if values.dtype != np.object_:
values = values.astype('O')
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/test_tseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -660,6 +660,24 @@ def test_object(self):
result = lib.infer_dtype(arr)
self.assertEqual(result, 'mixed')

def test_categorical(self):

# GH 8974
from pandas import Categorical, Series
arr = Categorical(list('abc'))
result = lib.infer_dtype(arr)
self.assertEqual(result, 'categorical')

result = lib.infer_dtype(Series(arr))
self.assertEqual(result, 'categorical')

arr = Categorical(list('abc'),categories=['cegfab'],ordered=True)
result = lib.infer_dtype(arr)
self.assertEqual(result, 'categorical')

result = lib.infer_dtype(Series(arr))
self.assertEqual(result, 'categorical')

class TestMoments(tm.TestCase):
pass

Expand Down