Skip to content

Commit 5bc4ef8

Browse files
committed
Finish tests; add coverage for categorical; xfail where necessary
1 parent d88b23c commit 5bc4ef8

File tree

2 files changed

+183
-134
lines changed

2 files changed

+183
-134
lines changed

pandas/core/strings.py

+18-17
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import numpy as np
33

44
from pandas.compat import zip
5-
from pandas.core.dtypes.generic import ABCSeries, ABCIndex, ABCMultiIndex
5+
from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass, ABCMultiIndex
66
from pandas.core.dtypes.missing import isna
77
from pandas.core.dtypes.common import (
88
ensure_object,
@@ -942,7 +942,7 @@ def str_extractall(arr, pat, flags=0):
942942
if regex.groups == 0:
943943
raise ValueError("pattern contains no capture groups")
944944

945-
if isinstance(arr, ABCIndex):
945+
if isinstance(arr, ABCIndexClass):
946946
arr = arr.to_series().reset_index(drop=True)
947947

948948
names = dict(zip(regex.groupindex.values(), regex.groupindex.keys()))
@@ -1757,8 +1757,8 @@ def forbid_nonstring_types(forbidden, name=None):
17571757
make it explicit which (inferred) types are disallowed by the method.
17581758
17591759
:meth:`StringMethods.__init__` allows the *union* of types its different
1760-
methods allow (after skipping NaNs; see :meth:`StringMethods._validate`):
1761-
['string', 'unicode', 'empty', 'bytes', 'mixed', 'mixed-integer'].
1760+
methods allow (after skipping NaNs; see :meth:`StringMethods._validate`),
1761+
namely: ['string', 'unicode', 'empty', 'bytes', 'mixed', 'mixed-integer'].
17621762
17631763
The default string types ['string', 'unicode', 'empty'] are allowed for all
17641764
methods. For the additional types ['bytes', 'mixed', 'mixed-integer'], each
@@ -1783,7 +1783,7 @@ def forbid_nonstring_types(forbidden, name=None):
17831783
17841784
Raises
17851785
------
1786-
AttributeError
1786+
TypeError
17871787
If the inferred type of the underlying data is in `forbidden`.
17881788
"""
17891789

@@ -1800,11 +1800,11 @@ def _forbid_nonstring_types(func):
18001800

18011801
@wraps(func)
18021802
def wrapper(self, *args, **kwargs):
1803-
if self._inferred_type not in allowed_types:
1804-
msg = ("Cannot use .str.{name} with values of inferred type "
1805-
"{inf_type!r}.".format(name=func_name,
1806-
inf_type=self._inferred_type))
1807-
raise AttributeError(msg)
1803+
if self._inferred_dtype not in allowed_types:
1804+
msg = ('Cannot use .str.{name} with values of inferred dtype '
1805+
'{inf_type!r}.'.format(name=func_name,
1806+
inf_type=self._inferred_dtype))
1807+
raise TypeError(msg)
18081808
return func(self, *args, **kwargs)
18091809
wrapper.__name__ = func_name
18101810
return wrapper
@@ -1877,7 +1877,7 @@ class StringMethods(NoNewAttributesMixin):
18771877
"""
18781878

18791879
def __init__(self, data):
1880-
self._inferred_type = self._validate(data)
1880+
self._inferred_dtype = self._validate(data)
18811881
self._is_categorical = is_categorical_dtype(data)
18821882

18831883
# .values.categories works for both Series/Index
@@ -1899,10 +1899,10 @@ def _validate(data):
18991899
values = getattr(data, 'values', data) # Series / Index
19001900
values = getattr(values, 'categories', values) # categorical / normal
19011901

1902-
# missing values obfuscates type inference -> skip
1903-
inferred_type = lib.infer_dtype(values, skipna=True)
1902+
# missing values obfuscate type inference -> skip
1903+
inferred_dtype = lib.infer_dtype(values, skipna=True)
19041904

1905-
if inferred_type not in allowed_types:
1905+
if inferred_dtype not in allowed_types:
19061906
# this is a "first line of defence" and just checks that the type
19071907
# is in the *union* of the allowed types over all methods below;
19081908
# this restriction is then refined on a per-method basis using the
@@ -1913,7 +1913,7 @@ def _validate(data):
19131913
# have a str dtype (GH 9343 / 13877)
19141914
raise AttributeError("Can only use .str accessor with string "
19151915
"values!")
1916-
return inferred_type
1916+
return inferred_dtype
19171917

19181918
def __getitem__(self, key):
19191919
if isinstance(key, slice):
@@ -1932,15 +1932,16 @@ def __iter__(self):
19321932
def _wrap_result(self, result, use_codes=True,
19331933
name=None, expand=None):
19341934

1935-
from pandas.core.index import Index, MultiIndex
1935+
from pandas import Index, Series, MultiIndex
19361936

19371937
# for category, we do the stuff on the categories, so blow it up
19381938
# to the full series again
19391939
# But for some operations, we have to do the stuff on the full values,
19401940
# so make it possible to skip this step as the method already did this
19411941
# before the transformation...
19421942
if use_codes and self._is_categorical:
1943-
result = take_1d(result, self._orig.cat.codes)
1943+
# if self._orig is a CategoricalIndex, there is no .cat-accessor
1944+
result = take_1d(result, Series(self._orig).cat.codes)
19441945

19451946
if not hasattr(result, 'ndim') or not hasattr(result, 'dtype'):
19461947
return result

0 commit comments

Comments
 (0)