From 28b7bded23f99b07004c40275a628560e5eb7b76 Mon Sep 17 00:00:00 2001
From: Jan Schulz <jasc@gmx.net>
Date: Fri, 13 Nov 2015 00:34:01 +0100
Subject: [PATCH 1/4] TST: Allow for more than one acessor on a Series

`.dt` and `.str` can be available when the Series is of type
category, which makes `.cat` available.
---
 pandas/core/series.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index cab231e8fb09c..29e9a81d19cd6 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2704,12 +2704,10 @@ def _dir_deletions(self):
 
     def _dir_additions(self):
         rv = set()
-        # these accessors are mutually exclusive, so break loop when one exists
         for accessor in self._accessors:
             try:
                 getattr(self, accessor)
                 rv.add(accessor)
-                break
             except AttributeError:
                 pass
         return rv

From 149feeff11284124badc355ffa264b01151b7465 Mon Sep 17 00:00:00 2001
From: Jan Schulz <jasc@gmx.net>
Date: Thu, 12 Nov 2015 14:15:26 +0100
Subject: [PATCH 2/4] Make .str available for Series of type category with
 strings

If a series is a type category and the underlying Categorical has
categories of type string, then make it possible to use the `.str`
assessor on such a series.

The string methods work on the categories (and therefor fast if we have
only a few categories), but return a Series with a dtype other than
category (boolean, string,...), so that it is no different if we use
`.str` on a series of type string or of type category.
---
 pandas/core/strings.py           | 126 +++++++++++++++++++------------
 pandas/tests/test_categorical.py |  72 ++++++++++++++++++
 2 files changed, 148 insertions(+), 50 deletions(-)

diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index f1ff7e2178a04..a8907ac192707 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -2,7 +2,7 @@
 
 from pandas.compat import zip
 from pandas.core.common import (isnull, _values_from_object, is_bool_dtype, is_list_like,
-                                is_categorical_dtype, is_object_dtype)
+                                is_categorical_dtype, is_object_dtype, take_1d)
 import pandas.compat as compat
 from pandas.core.base import AccessorProperty, NoNewAttributesMixin
 from pandas.util.decorators import Appender, deprecate_kwarg
@@ -1003,7 +1003,7 @@ def str_encode(arr, encoding, errors="strict"):
 
 def _noarg_wrapper(f, docstring=None, **kargs):
     def wrapper(self):
-        result = _na_map(f, self.series, **kargs)
+        result = _na_map(f, self._data, **kargs)
         return self._wrap_result(result)
 
     wrapper.__name__ = f.__name__
@@ -1017,15 +1017,15 @@ def wrapper(self):
 
 def _pat_wrapper(f, flags=False, na=False, **kwargs):
     def wrapper1(self, pat):
-        result = f(self.series, pat)
+        result = f(self._data, pat)
         return self._wrap_result(result)
 
     def wrapper2(self, pat, flags=0, **kwargs):
-        result = f(self.series, pat, flags=flags, **kwargs)
+        result = f(self._data, pat, flags=flags, **kwargs)
         return self._wrap_result(result)
 
     def wrapper3(self, pat, na=np.nan):
-        result = f(self.series, pat, na=na)
+        result = f(self._data, pat, na=na)
         return self._wrap_result(result)
 
     wrapper = wrapper3 if na else wrapper2 if flags else wrapper1
@@ -1059,8 +1059,11 @@ class StringMethods(NoNewAttributesMixin):
     >>> s.str.replace('_', '')
     """
 
-    def __init__(self, series):
-        self.series = series
+    def __init__(self, data):
+        self._is_categorical = is_categorical_dtype(data)
+        self._data = data.cat.categories if self._is_categorical else data
+        # save orig to blow up categoricals to the right type
+        self._orig = data
         self._freeze()
 
     def __getitem__(self, key):
@@ -1078,7 +1081,15 @@ def __iter__(self):
             i += 1
             g = self.get(i)
 
-    def _wrap_result(self, result, **kwargs):
+    def _wrap_result(self, result, use_codes=True, name=None):
+
+        # for category, we do the stuff on the categories, so blow it up
+        # to the full series again
+        # But for some operations, we have to do the stuff on the full values,
+        # so make it possible to skip this step as the method already did this before
+        # the transformation...
+        if use_codes and self._is_categorical:
+            result = take_1d(result, self._orig.cat.codes)
 
         # leave as it is to keep extract and get_dummies results
         # can be merged to _wrap_result_expand in v0.17
@@ -1088,29 +1099,34 @@ def _wrap_result(self, result, **kwargs):
 
         if not hasattr(result, 'ndim'):
             return result
-        name = kwargs.get('name') or getattr(result, 'name', None) or self.series.name
+        name = name or getattr(result, 'name', None) or self._orig.name
 
         if result.ndim == 1:
-            if isinstance(self.series, Index):
+            if isinstance(self._orig, Index):
                 # if result is a boolean np.array, return the np.array
                 # instead of wrapping it into a boolean Index (GH 8875)
                 if is_bool_dtype(result):
                     return result
                 return Index(result, name=name)
-            return Series(result, index=self.series.index, name=name)
+            return Series(result, index=self._orig.index, name=name)
         else:
             assert result.ndim < 3
-            return DataFrame(result, index=self.series.index)
+            return DataFrame(result, index=self._orig.index)
 
     def _wrap_result_expand(self, result, expand=False):
         if not isinstance(expand, bool):
             raise ValueError("expand must be True or False")
 
+        # for category, we do the stuff on the categories, so blow it up
+        # to the full series again
+        if self._is_categorical:
+            result = take_1d(result, self._orig.cat.codes)
+
         from pandas.core.index import Index, MultiIndex
         if not hasattr(result, 'ndim'):
             return result
 
-        if isinstance(self.series, Index):
+        if isinstance(self._orig, Index):
             name = getattr(result, 'name', None)
             # if result is a boolean np.array, return the np.array
             # instead of wrapping it into a boolean Index (GH 8875)
@@ -1123,36 +1139,38 @@ def _wrap_result_expand(self, result, expand=False):
             else:
                 return Index(result, name=name)
         else:
-            index = self.series.index
+            index = self._orig.index
             if expand:
                 def cons_row(x):
                     if is_list_like(x):
                         return x
                     else:
                         return [ x ]
-                cons = self.series._constructor_expanddim
+                cons = self._orig._constructor_expanddim
                 data = [cons_row(x) for x in result]
                 return cons(data, index=index)
             else:
                 name = getattr(result, 'name', None)
-                cons = self.series._constructor
+                cons = self._orig._constructor
                 return cons(result, name=name, index=index)
 
     @copy(str_cat)
     def cat(self, others=None, sep=None, na_rep=None):
-        result = str_cat(self.series, others=others, sep=sep, na_rep=na_rep)
-        return self._wrap_result(result)
+        data = self._orig if self._is_categorical else self._data
+        result = str_cat(data, others=others, sep=sep, na_rep=na_rep)
+        return self._wrap_result(result, use_codes=(not self._is_categorical))
+
 
     @deprecate_kwarg('return_type', 'expand',
                      mapping={'series': False, 'frame': True})
     @copy(str_split)
     def split(self, pat=None, n=-1, expand=False):
-        result = str_split(self.series, pat, n=n)
+        result = str_split(self._data, pat, n=n)
         return self._wrap_result_expand(result, expand=expand)
 
     @copy(str_rsplit)
     def rsplit(self, pat=None, n=-1, expand=False):
-        result = str_rsplit(self.series, pat, n=n)
+        result = str_rsplit(self._data, pat, n=n)
         return self._wrap_result_expand(result, expand=expand)
 
     _shared_docs['str_partition'] = ("""
@@ -1203,7 +1221,7 @@ def rsplit(self, pat=None, n=-1, expand=False):
         'also': 'rpartition : Split the string at the last occurrence of `sep`'})
     def partition(self, pat=' ', expand=True):
         f = lambda x: x.partition(pat)
-        result = _na_map(f, self.series)
+        result = _na_map(f, self._data)
         return self._wrap_result_expand(result, expand=expand)
 
     @Appender(_shared_docs['str_partition'] % {'side': 'last',
@@ -1211,45 +1229,45 @@ def partition(self, pat=' ', expand=True):
         'also': 'partition : Split the string at the first occurrence of `sep`'})
     def rpartition(self, pat=' ', expand=True):
         f = lambda x: x.rpartition(pat)
-        result = _na_map(f, self.series)
+        result = _na_map(f, self._data)
         return self._wrap_result_expand(result, expand=expand)
 
     @copy(str_get)
     def get(self, i):
-        result = str_get(self.series, i)
+        result = str_get(self._data, i)
         return self._wrap_result(result)
 
     @copy(str_join)
     def join(self, sep):
-        result = str_join(self.series, sep)
+        result = str_join(self._data, sep)
         return self._wrap_result(result)
 
     @copy(str_contains)
     def contains(self, pat, case=True, flags=0, na=np.nan, regex=True):
-        result = str_contains(self.series, pat, case=case, flags=flags,
+        result = str_contains(self._data, pat, case=case, flags=flags,
                               na=na, regex=regex)
         return self._wrap_result(result)
 
     @copy(str_match)
     def match(self, pat, case=True, flags=0, na=np.nan, as_indexer=False):
-        result = str_match(self.series, pat, case=case, flags=flags,
+        result = str_match(self._data, pat, case=case, flags=flags,
                            na=na, as_indexer=as_indexer)
         return self._wrap_result(result)
 
     @copy(str_replace)
     def replace(self, pat, repl, n=-1, case=True, flags=0):
-        result = str_replace(self.series, pat, repl, n=n, case=case,
+        result = str_replace(self._data, pat, repl, n=n, case=case,
                              flags=flags)
         return self._wrap_result(result)
 
     @copy(str_repeat)
     def repeat(self, repeats):
-        result = str_repeat(self.series, repeats)
+        result = str_repeat(self._data, repeats)
         return self._wrap_result(result)
 
     @copy(str_pad)
     def pad(self, width, side='left', fillchar=' '):
-        result = str_pad(self.series, width, side=side, fillchar=fillchar)
+        result = str_pad(self._data, width, side=side, fillchar=fillchar)
         return self._wrap_result(result)
 
     _shared_docs['str_pad'] = ("""
@@ -1297,27 +1315,27 @@ def zfill(self, width):
         -------
         filled : Series/Index of objects
         """
-        result = str_pad(self.series, width, side='left', fillchar='0')
+        result = str_pad(self._data, width, side='left', fillchar='0')
         return self._wrap_result(result)
 
     @copy(str_slice)
     def slice(self, start=None, stop=None, step=None):
-        result = str_slice(self.series, start, stop, step)
+        result = str_slice(self._data, start, stop, step)
         return self._wrap_result(result)
 
     @copy(str_slice_replace)
     def slice_replace(self, start=None, stop=None, repl=None):
-        result = str_slice_replace(self.series, start, stop, repl)
+        result = str_slice_replace(self._data, start, stop, repl)
         return self._wrap_result(result)
 
     @copy(str_decode)
     def decode(self, encoding, errors="strict"):
-        result = str_decode(self.series, encoding, errors)
+        result = str_decode(self._data, encoding, errors)
         return self._wrap_result(result)
 
     @copy(str_encode)
     def encode(self, encoding, errors="strict"):
-        result = str_encode(self.series, encoding, errors)
+        result = str_encode(self._data, encoding, errors)
         return self._wrap_result(result)
 
     _shared_docs['str_strip'] = ("""
@@ -1332,34 +1350,37 @@ def encode(self, encoding, errors="strict"):
     @Appender(_shared_docs['str_strip'] % dict(side='left and right sides',
               method='strip'))
     def strip(self, to_strip=None):
-        result = str_strip(self.series, to_strip, side='both')
+        result = str_strip(self._data, to_strip, side='both')
         return self._wrap_result(result)
 
     @Appender(_shared_docs['str_strip'] % dict(side='left side',
               method='lstrip'))
     def lstrip(self, to_strip=None):
-        result = str_strip(self.series, to_strip, side='left')
+        result = str_strip(self._data, to_strip, side='left')
         return self._wrap_result(result)
 
     @Appender(_shared_docs['str_strip'] % dict(side='right side',
               method='rstrip'))
     def rstrip(self, to_strip=None):
-        result = str_strip(self.series, to_strip, side='right')
+        result = str_strip(self._data, to_strip, side='right')
         return self._wrap_result(result)
 
     @copy(str_wrap)
     def wrap(self, width, **kwargs):
-        result = str_wrap(self.series, width, **kwargs)
+        result = str_wrap(self._data, width, **kwargs)
         return self._wrap_result(result)
 
     @copy(str_get_dummies)
     def get_dummies(self, sep='|'):
-        result = str_get_dummies(self.series, sep)
-        return self._wrap_result(result)
+        # we need to cast to Series of strings as only that has all
+        # methods available for making the dummies...
+        data = self._orig.astype(str) if self._is_categorical else self._data
+        result = str_get_dummies(data, sep)
+        return self._wrap_result(result, use_codes=(not self._is_categorical))
 
     @copy(str_translate)
     def translate(self, table, deletechars=None):
-        result = str_translate(self.series, table, deletechars)
+        result = str_translate(self._data, table, deletechars)
         return self._wrap_result(result)
 
     count = _pat_wrapper(str_count, flags=True)
@@ -1369,7 +1390,7 @@ def translate(self, table, deletechars=None):
 
     @copy(str_extract)
     def extract(self, pat, flags=0):
-        result, name = str_extract(self.series, pat, flags=flags)
+        result, name = str_extract(self._data, pat, flags=flags)
         return self._wrap_result(result, name=name)
 
     _shared_docs['find'] = ("""
@@ -1398,13 +1419,13 @@ def extract(self, pat, flags=0):
     @Appender(_shared_docs['find'] % dict(side='lowest', method='find',
               also='rfind : Return highest indexes in each strings'))
     def find(self, sub, start=0, end=None):
-        result = str_find(self.series, sub, start=start, end=end, side='left')
+        result = str_find(self._data, sub, start=start, end=end, side='left')
         return self._wrap_result(result)
 
     @Appender(_shared_docs['find'] % dict(side='highest', method='rfind',
               also='find : Return lowest indexes in each strings'))
     def rfind(self, sub, start=0, end=None):
-        result = str_find(self.series, sub, start=start, end=end, side='right')
+        result = str_find(self._data, sub, start=start, end=end, side='right')
         return self._wrap_result(result)
 
     def normalize(self, form):
@@ -1423,7 +1444,7 @@ def normalize(self, form):
         """
         import unicodedata
         f = lambda x: unicodedata.normalize(form, compat.u_safe(x))
-        result = _na_map(f, self.series)
+        result = _na_map(f, self._data)
         return self._wrap_result(result)
 
     _shared_docs['index'] = ("""
@@ -1453,13 +1474,13 @@ def normalize(self, form):
     @Appender(_shared_docs['index'] % dict(side='lowest', similar='find', method='index',
               also='rindex : Return highest indexes in each strings'))
     def index(self, sub, start=0, end=None):
-        result = str_index(self.series, sub, start=start, end=end, side='left')
+        result = str_index(self._data, sub, start=start, end=end, side='left')
         return self._wrap_result(result)
 
     @Appender(_shared_docs['index'] % dict(side='highest', similar='rfind', method='rindex',
               also='index : Return lowest indexes in each strings'))
     def rindex(self, sub, start=0, end=None):
-        result = str_index(self.series, sub, start=start, end=end, side='right')
+        result = str_index(self._data, sub, start=start, end=end, side='right')
         return self._wrap_result(result)
 
     _shared_docs['len'] = ("""
@@ -1553,9 +1574,14 @@ class StringAccessorMixin(object):
     def _make_str_accessor(self):
         from pandas.core.series import Series
         from pandas.core.index import Index
-        if isinstance(self, Series) and not is_object_dtype(self.dtype):
-            # this really should exclude all series with any non-string values,
-            # but that isn't practical for performance reasons until we have a
+        if isinstance(self, Series) and not(
+                    (is_categorical_dtype(self.dtype) and
+                     is_object_dtype(self.values.categories)) or
+                    (is_object_dtype(self.dtype))):
+            # it's neither a string series not a categorical series with strings
+            # inside the categories.
+            # this really should exclude all series with any non-string values (instead of test
+            # for object dtype), but that isn't practical for performance reasons until we have a
             # str dtype (GH 9343)
             raise AttributeError("Can only use .str accessor with string "
                                  "values, which use np.object_ dtype in "
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index 0da4d0e68621d..beab5754d30c3 100755
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -3640,6 +3640,78 @@ def test_cat_accessor_no_new_attributes(self):
         with tm.assertRaisesRegexp(AttributeError, "You cannot add any new attribute"):
             c.cat.xlabel = "a"
 
+    def test_str_accessor_api_for_categorical(self):
+        # https://github.com/pydata/pandas/issues/10661
+        from pandas.core.strings import StringMethods
+        s = Series(list('aabb'))
+        s = s + " " + s
+        c = s.astype('category')
+        self.assertIsInstance(c.str, StringMethods)
+
+        # str functions, which need special arguments
+        special_func_defs = [
+            ('cat', (list("zyxw"),), {"sep": ","}),
+            ('center', (10,), {}),
+            ('contains', ("a",), {}),
+            ('count', ("a",), {}),
+            ('decode', ("UTF-8",), {}),
+            ('encode', ("UTF-8",), {}),
+            ('endswith', ("a",), {}),
+            ('extract', ("([a-z]*) ",), {}),
+            ('find', ("a",), {}),
+            ('findall', ("a",), {}),
+            ('index', (" ",), {}),
+            ('ljust', (10,), {}),
+            ('match', ("a"), {}), # deprecated...
+            ('normalize', ("NFC",), {}),
+            ('pad', (10,), {}),
+            ('partition', (" ",), {"expand": False}), # not default
+            ('partition', (" ",), {"expand": True}), # default
+            ('repeat', (3,), {}),
+            ('replace', ("a", "z"), {}),
+            ('rfind', ("a",), {}),
+            ('rindex', (" ",), {}),
+            ('rjust', (10,), {}),
+            ('rpartition', (" ",), {"expand": False}), # not default
+            ('rpartition', (" ",), {"expand": True}), # default
+            ('slice', (0,1), {}),
+            ('slice_replace', (0,1,"z"), {}),
+            ('split', (" ",), {"expand":False}), #default
+            ('split', (" ",), {"expand":True}), # not default
+            ('startswith', ("a",), {}),
+            ('wrap', (2,), {}),
+            ('zfill', (10,), {})
+        ]
+        _special_func_names = [f[0] for f in special_func_defs]
+
+        # * get, join: they need a individual elements of type lists, but
+        #   we can't make a categorical with lists as individual categories.
+        #   -> `s.str.split(" ").astype("category")` will error!
+        # * `translate` has different interfaces for py2 vs. py3
+        _ignore_names = ["get", "join", "translate"]
+
+        str_func_names = [f for f in dir(s.str) if not (f.startswith("_") or
+                                                        f in _special_func_names or
+                                                        f in _ignore_names)]
+
+        func_defs = [(f, (), {}) for f in str_func_names]
+        func_defs.extend(special_func_defs)
+
+
+        for func, args, kwargs in func_defs:
+            res = getattr(c.str, func)(*args, **kwargs)
+            exp = getattr(s.str, func)(*args, **kwargs)
+
+            if isinstance(res, pd.DataFrame):
+                tm.assert_frame_equal(res, exp)
+            else:
+                tm.assert_series_equal(res, exp)
+
+        invalid = Series([1,2,3]).astype('category')
+        with tm.assertRaisesRegexp(AttributeError, "Can only use .str accessor with string"):
+            invalid.str
+        self.assertFalse(hasattr(invalid, 'str'))
+
     def test_pickle_v0_14_1(self):
 
         # we have the name warning

From a7c65ed6b74227a9d4a49097d2da28c876e85c1f Mon Sep 17 00:00:00 2001
From: Jan Schulz <jasc@gmx.net>
Date: Mon, 16 Nov 2015 16:33:18 +0100
Subject: [PATCH 3/4] Make .dt available for Series of type category with
 datetimes

If a series is a type category and the underlying Categorical has
categories of type datetime, then make it possible to use the .dt
assessor on such a series.

The string methods work on the categories (and therefore fast if we have
only a few categories), but return a Series with a dtype other than
category (integer,...), so that it is no different if we use
.dt on a series of type datetime or of type category.
---
 pandas/tests/test_categorical.py | 75 ++++++++++++++++++++++++++++++++
 pandas/tests/test_series.py      | 12 ++++-
 pandas/tseries/common.py         | 30 +++++++++----
 3 files changed, 107 insertions(+), 10 deletions(-)

diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index beab5754d30c3..ac2f9e77c3674 100755
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -3712,6 +3712,81 @@ def test_str_accessor_api_for_categorical(self):
             invalid.str
         self.assertFalse(hasattr(invalid, 'str'))
 
+    def test_dt_accessor_api_for_categorical(self):
+        # https://github.com/pydata/pandas/issues/10661
+        from pandas.tseries.common import Properties
+        from pandas.tseries.index import date_range, DatetimeIndex
+        from pandas.tseries.period import period_range, PeriodIndex
+        from pandas.tseries.tdi import timedelta_range, TimedeltaIndex
+
+        s_dr = Series(date_range('1/1/2015', periods=5, tz="MET"))
+        c_dr = s_dr.astype("category")
+
+        s_pr = Series(period_range('1/1/2015', freq='D', periods=5))
+        c_pr = s_pr.astype("category")
+
+        s_tdr = Series(timedelta_range('1 days','10 days'))
+        c_tdr = s_tdr.astype("category")
+
+        test_data = [
+            ("Datetime", DatetimeIndex._datetimelike_ops, s_dr, c_dr),
+            ("Period", PeriodIndex._datetimelike_ops, s_pr, c_pr),
+            ("Timedelta", TimedeltaIndex._datetimelike_ops, s_tdr, c_tdr)]
+
+        self.assertIsInstance(c_dr.dt, Properties)
+
+        special_func_defs = [
+            ('strftime', ("%Y-%m-%d",), {}),
+            ('tz_convert', ("EST",), {}),
+            #('tz_localize', ("UTC",), {}),
+        ]
+        _special_func_names = [f[0] for f in special_func_defs]
+
+        # the series is already localized
+        _ignore_names = ['tz_localize']
+
+        for name, attr_names, s, c in test_data:
+            func_names = [f for f in dir(s.dt) if not (f.startswith("_") or
+                                                       f in attr_names or
+                                                       f in _special_func_names or
+                                                       f in _ignore_names)]
+
+            func_defs = [(f, (), {}) for f in func_names]
+            for f_def in special_func_defs:
+                if f_def[0] in dir(s.dt):
+                    func_defs.append(f_def)
+
+            for func, args, kwargs in func_defs:
+                res = getattr(c.dt, func)(*args, **kwargs)
+                exp = getattr(s.dt, func)(*args, **kwargs)
+
+                if isinstance(res, pd.DataFrame):
+                    tm.assert_frame_equal(res, exp)
+                elif isinstance(res, pd.Series):
+                    tm.assert_series_equal(res, exp)
+                else:
+                    tm.assert_numpy_array_equal(res, exp)
+
+            for attr in attr_names:
+                try:
+                    res = getattr(c.dt, attr)
+                    exp = getattr(s.dt, attr)
+                except Exception as e:
+                    print(name, attr)
+                    raise e
+
+            if isinstance(res, pd.DataFrame):
+                tm.assert_frame_equal(res, exp)
+            elif isinstance(res, pd.Series):
+                tm.assert_series_equal(res, exp)
+            else:
+                tm.assert_numpy_array_equal(res, exp)
+
+        invalid = Series([1,2,3]).astype('category')
+        with tm.assertRaisesRegexp(AttributeError, "Can only use .dt accessor with datetimelike"):
+            invalid.dt
+        self.assertFalse(hasattr(invalid, 'str'))
+
     def test_pickle_v0_14_1(self):
 
         # we have the name warning
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index f30481ee17f75..9be0784c709bc 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -362,12 +362,20 @@ def test_tab_completion(self):
         self.assertTrue('str' not in dir(s))
         self.assertTrue('cat' not in dir(s))
 
-        # similiarly for .cat
+        # similiarly for .cat, but with the twist that str and dt should be there
+        # if the categories are of that type
+        # first cat and str
         s = Series(list('abbcd'), dtype="category")
         self.assertTrue('cat' in dir(s))
-        self.assertTrue('str' not in dir(s))
+        self.assertTrue('str' in dir(s)) # as it is a string categorical
         self.assertTrue('dt' not in dir(s))
 
+        # similar to cat and str
+        s = Series(date_range('1/1/2015', periods=5)).astype("category")
+        self.assertTrue('cat' in dir(s))
+        self.assertTrue('str' not in dir(s))
+        self.assertTrue('dt' in dir(s)) # as it is a datetime categorical
+
     def test_binop_maybe_preserve_name(self):
         # names match, preserve
         result = self.ts * self.ts
diff --git a/pandas/tseries/common.py b/pandas/tseries/common.py
index 171f72d37cdd8..31b5281aa86a6 100644
--- a/pandas/tseries/common.py
+++ b/pandas/tseries/common.py
@@ -10,8 +10,8 @@
 from pandas.core.common import (_NS_DTYPE, _TD_DTYPE, is_period_arraylike,
                                 is_datetime_arraylike, is_integer_dtype, is_list_like,
                                 is_datetime64_dtype, is_datetime64tz_dtype,
-                                is_timedelta64_dtype,
-                                get_dtype_kinds)
+                                is_timedelta64_dtype, is_categorical_dtype,
+                                get_dtype_kinds, take_1d)
 
 def is_datetimelike(data):
     """ return a boolean if we can be successfully converted to a datetimelike """
@@ -45,26 +45,36 @@ def maybe_to_datetimelike(data, copy=False):
         raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data)))
 
     index = data.index
+    name = data.name
+    orig = data if is_categorical_dtype(data) else None
+    if orig is not None:
+        data = orig.values.categories
+
     if is_datetime64_dtype(data.dtype):
-        return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index, name=data.name)
+        return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index, name=name,
+                                                orig=orig)
     elif is_datetime64tz_dtype(data.dtype):
-        return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer', ambiguous='infer'), index, name=data.name)
+        return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer', ambiguous='infer'),
+                                  index, data.name, orig=orig)
     elif is_timedelta64_dtype(data.dtype):
-        return TimedeltaProperties(TimedeltaIndex(data, copy=copy, freq='infer'), index, name=data.name)
+        return TimedeltaProperties(TimedeltaIndex(data, copy=copy, freq='infer'), index,
+                                   name=name, orig=orig)
     else:
         if is_period_arraylike(data):
-            return PeriodProperties(PeriodIndex(data, copy=copy), index, name=data.name)
+            return PeriodProperties(PeriodIndex(data, copy=copy), index, name=name, orig=orig)
         if is_datetime_arraylike(data):
-            return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index, name=data.name)
+            return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'), index,
+                                      name=name, orig=orig)
 
     raise TypeError("cannot convert an object of type {0} to a datetimelike index".format(type(data)))
 
 class Properties(PandasDelegate, NoNewAttributesMixin):
 
-    def __init__(self, values, index, name):
+    def __init__(self, values, index, name, orig=None):
         self.values = values
         self.index = index
         self.name = name
+        self.orig = orig
         self._freeze()
 
     def _delegate_property_get(self, name):
@@ -79,6 +89,10 @@ def _delegate_property_get(self, name):
         elif not is_list_like(result):
             return result
 
+        # blow up if we operate on categories
+        if self.orig is not None:
+            result = take_1d(result, self.orig.cat.codes)
+
         # return the result as a Series, which is by definition a copy
         result = Series(result, index=self.index, name=self.name)
 

From 8020bf54d5c1849e70899967b350c5209fe16d5a Mon Sep 17 00:00:00 2001
From: Jan Schulz <jasc@gmx.net>
Date: Fri, 13 Nov 2015 00:39:43 +0100
Subject: [PATCH 4/4] DOC: whatsnew and docs for multiple accessors

Also add some docs in text.rst to mention the performance gains
when using ``s_cat.str`` vs ``s.str``.
---
 doc/source/categorical.rst      | 44 +++++++++++++++++++++++++++++++++
 doc/source/text.rst             | 16 ++++++++++++
 doc/source/whatsnew/v0.17.1.txt |  2 ++
 3 files changed, 62 insertions(+)

diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst
index 4ba52694980d3..6207366b96f63 100644
--- a/doc/source/categorical.rst
+++ b/doc/source/categorical.rst
@@ -515,6 +515,50 @@ To get a single value `Series` of type ``category`` pass in a list with a single
 
     df.loc[["h"],"cats"]
 
+String and datetime accessors
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. versionadded:: 0.17.1
+
+The accessors  ``.dt`` and ``.str`` will work if the ``s.cat.categories`` are of an appropriate
+type:
+
+
+.. ipython:: python
+
+    str_s = pd.Series(list('aabb'))
+    str_cat = str_s.astype('category')
+    str_cat.str.contains("a")
+
+    date_s = pd.Series(date_range('1/1/2015', periods=5))
+    date_cat = date_s.astype('category')
+    date_cat.dt.day
+
+.. note::
+
+    The returned ``Series`` (or ``DataFrame``) is of the same type as if you used the
+    ``.str.<method>`` / ``.dt.<method>`` on a ``Series`` of that type (and not of
+    type ``category``!).
+
+That means, that the returned values from methods and properties on the accessors of a
+``Series`` and the returned values from methods and properties on the accessors of this
+``Series`` transformed to one of type `category` will be equal:
+
+.. ipython:: python
+
+    ret_s = str_s.str.contains("a")
+    ret_cat = str_cat.str.contains("a")
+    ret_s.dtype == ret_cat.dtype
+    ret_s == ret_cat
+
+.. note::
+
+    The work is done on the ``categories`` and then a new ``Series`` is constructed. This has
+    some performance implication if you have a ``Series`` of type string, where lots of elements
+    are repeated (i.e. the number of unique elements in the ``Series`` is a lot smaller than the
+    length of the ``Series``). In this case it can be faster to convert the original ``Series``
+    to one of type ``category`` and use ``.str.<method>`` or ``.dt.<property>`` on that.
+
 Setting
 ~~~~~~~
 
diff --git a/doc/source/text.rst b/doc/source/text.rst
index ee4f96b41c7de..68ac82a5383c2 100644
--- a/doc/source/text.rst
+++ b/doc/source/text.rst
@@ -63,6 +63,22 @@ and replacing any remaining whitespaces with underscores:
    df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_')
    df
 
+.. note::
+
+    If you do a lot of string munging and have a ``Series`` where lots of elements are repeated
+    (i.e. the number of unique elements in the ``Series`` is a lot smaller than the length of the
+    ``Series``), it can be faster to convert the original ``Series`` to one of type
+    ``category`` and then use ``.str.<method>`` or ``.dt.<property>`` on that. The
+    performance difference comes from the fact that, for ``Series`` of type ``category``, the
+    string operations are done on the ``.categories`` and not on each element of the
+    ``Series``. Please note that a ``Series`` of type ``category`` with string ``.categories`` has
+    some limitations in comparison of ``Series`` of type string (e.g. you can't add strings to
+    each other: ``s + " " + s`` won't work if ``s`` is a ``Series`` of type ``category``). Also,
+    ``.str`` methods which operate on elements of type ``list`` are not available on such a
+    ``Series``. If you are interested in having these performance gains on all string ``Series``,
+    please look at `this bug report <https://github.com/pydata/pandas/issues/8640>`_.
+
+
 Splitting and Replacing Strings
 -------------------------------
 
diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt
index 046791d4287c9..b8702034cd464 100755
--- a/doc/source/whatsnew/v0.17.1.txt
+++ b/doc/source/whatsnew/v0.17.1.txt
@@ -65,6 +65,8 @@ Enhancements
 
      pd.Index([1, np.nan, 3]).fillna(2)
 
+- Series of type ``"category"`` now make ``.str.<...>`` and ``.dt.<...>`` accessor methods / properties available, if the categories are of that type. (:issue:`10661`)
+
 - ``pivot_table`` now has a ``margins_name`` argument so you can use something other than the default of 'All' (:issue:`3335`)
 
 .. _whatsnew_0171.api: