From 077b99e17a090901636844612cec581282607ae7 Mon Sep 17 00:00:00 2001 From: Stephan Hoyer Date: Tue, 10 Mar 2015 17:01:55 -0700 Subject: [PATCH] TST/DOC: Fix tests and docs for .cat raising AttributeError if invalid Follow-up on GH9617 Fixes GH8814 -- is it fair to say that ``hasattr(s, 'cat')`` is probably the best solution we're going to come up with for checking for categorical data? --- doc/source/categorical.rst | 8 ++++++++ pandas/core/generic.py | 9 +++++---- pandas/core/series.py | 1 + pandas/tests/test_categorical.py | 10 +++++----- 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst index 91cfa77bc618c..8eb235d46e6ed 100644 --- a/doc/source/categorical.rst +++ b/doc/source/categorical.rst @@ -766,6 +766,14 @@ Dtype comparisons work: dtype == np.str_ np.str_ == dtype +To check if a Series contains Categorical data, with pandas 0.16 or later, use +``hasattr(s, 'cat')``: + +.. ipython:: python + + hasattr(Series(['a'], dtype='category'), 'cat') + hasattr(Series(['a']), 'cat') + Using `numpy` functions on a `Series` of type ``category`` should not work as `Categoricals` are not numeric data (even in the case that ``.categories`` is numeric). diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c29e97b423fe1..3db531c946d01 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -78,10 +78,11 @@ class NDFrame(PandasObject): copy : boolean, default False """ _internal_names = ['_data', '_cacher', '_item_cache', '_cache', - 'is_copy', 'dt', 'cat', 'str', '_subtyp', '_index', + 'is_copy', '_subtyp', '_index', '_default_kind', '_default_fill_value', '__array_struct__','__array_interface__'] _internal_names_set = set(_internal_names) + _accessors = frozenset([]) _metadata = [] is_copy = None @@ -1957,9 +1958,9 @@ def __getattr__(self, name): # Note: obj.x will always call obj.__getattribute__('x') prior to # calling obj.__getattr__('x'). - if name in self._internal_names_set: - return object.__getattribute__(self, name) - elif name in self._metadata: + if (name in self._internal_names_set + or name in self._metadata + or name in self._accessors): return object.__getattribute__(self, name) else: if name in self._info_axis: diff --git a/pandas/core/series.py b/pandas/core/series.py index a83a6291e6c81..d34657f0dc256 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -113,6 +113,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame): Copy input data """ _metadata = ['name'] + _accessors = frozenset(['dt', 'cat', 'str']) _allow_index_ops = True def __init__(self, data=None, index=None, dtype=None, name=None, diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index a77f3984105cc..d27fb39ec65e9 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -1354,13 +1354,13 @@ def test_sequence_like(self): def test_series_delegations(self): # invalid accessor - self.assertRaises(TypeError, lambda : Series([1,2,3]).cat) - tm.assertRaisesRegexp(TypeError, + self.assertRaises(AttributeError, lambda : Series([1,2,3]).cat) + tm.assertRaisesRegexp(AttributeError, r"Can only use .cat accessor with a 'category' dtype", lambda : Series([1,2,3]).cat) - self.assertRaises(TypeError, lambda : Series(['a','b','c']).cat) - self.assertRaises(TypeError, lambda : Series(np.arange(5.)).cat) - self.assertRaises(TypeError, lambda : Series([Timestamp('20130101')]).cat) + self.assertRaises(AttributeError, lambda : Series(['a','b','c']).cat) + self.assertRaises(AttributeError, lambda : Series(np.arange(5.)).cat) + self.assertRaises(AttributeError, lambda : Series([Timestamp('20130101')]).cat) # Series should delegate calls to '.categories', '.codes', '.ordered' and the # methods '.set_categories()' 'drop_unused_categories()' to the categorical