API: fix str-accessor on CategoricalIndex (pandas-dev#24005)

h-vetinari · Pingviinituutti · commit dc3e92a2d6ef · 2019-02-28T10:26:56.000+02:00
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -1256,6 +1256,7 @@ Categorical
 - Bug in :meth:`Categorical.take` with a user-provided ``fill_value`` not encoding the ``fill_value``, which could result in a ``ValueError``, incorrect results, or a segmentation fault (:issue:`23296`).
 - In meth:`Series.unstack`, specifying a ``fill_value`` not present in the categories now raises a ``TypeError`` rather than ignoring the ``fill_value`` (:issue:`23284`)
 - Bug when resampling :meth:`Dataframe.resample()` and aggregating on categorical data, the categorical dtype was getting lost. (:issue:`23227`)
+- Bug in many methods of the ``.str``-accessor, which always failed on calling the ``CategoricalIndex.str`` constructor (:issue:`23555`, :issue:`23556`)
 
 Datetimelike
 ^^^^^^^^^^^^
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
@@ -15,7 +15,7 @@
 from pandas.core.dtypes.common import (
     ensure_object, is_bool_dtype, is_categorical_dtype, is_integer,
     is_list_like, is_object_dtype, is_re, is_scalar, is_string_like)
-from pandas.core.dtypes.generic import ABCIndex, ABCSeries
+from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
 from pandas.core.dtypes.missing import isna
 
 from pandas.core.algorithms import take_1d
@@ -931,7 +931,7 @@ def str_extractall(arr, pat, flags=0):
     if regex.groups == 0:
         raise ValueError("pattern contains no capture groups")
 
-    if isinstance(arr, ABCIndex):
+    if isinstance(arr, ABCIndexClass):
         arr = arr.to_series().reset_index(drop=True)
 
     names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))
@@ -1854,15 +1854,16 @@ def __iter__(self):
     def _wrap_result(self, result, use_codes=True,
                      name=None, expand=None, fill_value=np.nan):
 
-        from pandas.core.index import Index, MultiIndex
+        from pandas import Index, Series, MultiIndex
 
         # for category, we do the stuff on the categories, so blow it up
         # to the full series again
         # But for some operations, we have to do the stuff on the full values,
         # so make it possible to skip this step as the method already did this
         # before the transformation...
         if use_codes and self._is_categorical:
-            result = take_1d(result, self._orig.cat.codes,
+            # if self._orig is a CategoricalIndex, there is no .cat-accessor
+            result = take_1d(result, Series(self._orig, copy=False).cat.codes,
                              fill_value=fill_value)
 
         if not hasattr(result, 'ndim') or not hasattr(result, 'dtype'):
diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py
@@ -245,9 +245,6 @@ def test_api_per_method(self, box, dtype,
                 and inferred_dtype in ['boolean', 'date', 'time']):
             pytest.xfail(reason='Inferring incorrectly because of NaNs; '
                          'solved by GH 23167')
-        if box == Index and dtype == 'category':
-            pytest.xfail(reason='Broken methods on CategoricalIndex; '
-                         'see GH 23556')
 
         t = box(values, dtype=dtype)  # explicit dtype to avoid casting
         method = getattr(t.str, method_name)
@@ -264,6 +261,7 @@ def test_api_per_method(self, box, dtype,
                          + ['mixed', 'mixed-integer'] * mixed_allowed)
 
         if inferred_dtype in allowed_types:
+            # xref GH 23555, GH 23556
             method(*args, **kwargs)  # works!
         else:
             # GH 23011, GH 23163