Skip to content

Commit 173514f

Browse files
committed
Review (jreback)
1 parent 9775742 commit 173514f

File tree

1 file changed

+8
-13
lines changed

1 file changed

+8
-13
lines changed

pandas/core/strings.py

+8-13
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
ensure_object,
99
is_bool_dtype,
1010
is_categorical_dtype,
11-
is_object_dtype,
1211
is_string_like,
1312
is_list_like,
1413
is_scalar,
@@ -1815,8 +1814,6 @@ def __init__(self, data):
18151814

18161815
@staticmethod
18171816
def _validate(data):
1818-
from pandas.core.index import Index
1819-
18201817
if isinstance(data, ABCMultiIndex):
18211818
raise AttributeError('Can only use .str accessor with Index, '
18221819
'not MultiIndex')
@@ -1827,16 +1824,13 @@ def _validate(data):
18271824
if isinstance(data, ABCSeries):
18281825
allowed_types = allowed_types + ['bytes']
18291826

1830-
values = data if isinstance(data, Index) else data.values
1831-
if is_categorical_dtype(data.dtype):
1832-
inf_type = lib.infer_dtype(values.categories)
1833-
else:
1834-
inf_type = lib.infer_dtype(values)
1835-
1836-
all_na_obj = is_object_dtype(values.dtype) and data.isna().all()
1827+
data = data.dropna() # missing values mess up type inference
1828+
values = getattr(data, 'values', data) # Series / Index
1829+
values = getattr(values, 'categories', values) # categorical / normal
1830+
inferred_type = lib.infer_dtype(values)
18371831

18381832
# same for Series and Index (that is not MultiIndex)
1839-
if inf_type not in allowed_types and not all_na_obj:
1833+
if inferred_type not in allowed_types:
18401834
# it's neither a string series/index not a categorical series/index
18411835
# with strings inside the categories.
18421836
# this really should exclude all series/index with any non-string
@@ -2275,9 +2269,10 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
22752269

22762270
if isinstance(self._orig, Index):
22772271
# add dtype for case that result is all-NA
2278-
result = Index(result, dtype='object', name=self._orig.name)
2272+
result = Index(result, dtype=object, name=self._orig.name)
22792273
else: # Series
2280-
result = Series(result, index=data.index, name=self._orig.name)
2274+
result = Series(result, dtype=object, index=data.index,
2275+
name=self._orig.name)
22812276
return result
22822277

22832278
_shared_docs['str_split'] = ("""

0 commit comments

Comments
 (0)