8
8
ensure_object ,
9
9
is_bool_dtype ,
10
10
is_categorical_dtype ,
11
- is_object_dtype ,
12
11
is_string_like ,
13
12
is_list_like ,
14
13
is_scalar ,
@@ -1815,8 +1814,6 @@ def __init__(self, data):
1815
1814
1816
1815
@staticmethod
1817
1816
def _validate (data ):
1818
- from pandas .core .index import Index
1819
-
1820
1817
if isinstance (data , ABCMultiIndex ):
1821
1818
raise AttributeError ('Can only use .str accessor with Index, '
1822
1819
'not MultiIndex' )
@@ -1827,16 +1824,13 @@ def _validate(data):
1827
1824
if isinstance (data , ABCSeries ):
1828
1825
allowed_types = allowed_types + ['bytes' ]
1829
1826
1830
- values = data if isinstance (data , Index ) else data .values
1831
- if is_categorical_dtype (data .dtype ):
1832
- inf_type = lib .infer_dtype (values .categories )
1833
- else :
1834
- inf_type = lib .infer_dtype (values )
1835
-
1836
- all_na_obj = is_object_dtype (values .dtype ) and data .isna ().all ()
1827
+ data = data .dropna () # missing values mess up type inference
1828
+ values = getattr (data , 'values' , data ) # Series / Index
1829
+ values = getattr (values , 'categories' , values ) # categorical / normal
1830
+ inferred_type = lib .infer_dtype (values )
1837
1831
1838
1832
# same for Series and Index (that is not MultiIndex)
1839
- if inf_type not in allowed_types and not all_na_obj :
1833
+ if inferred_type not in allowed_types :
1840
1834
# it's neither a string series/index not a categorical series/index
1841
1835
# with strings inside the categories.
1842
1836
# this really should exclude all series/index with any non-string
@@ -2275,9 +2269,10 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
2275
2269
2276
2270
if isinstance (self ._orig , Index ):
2277
2271
# add dtype for case that result is all-NA
2278
- result = Index (result , dtype = ' object' , name = self ._orig .name )
2272
+ result = Index (result , dtype = object , name = self ._orig .name )
2279
2273
else : # Series
2280
- result = Series (result , index = data .index , name = self ._orig .name )
2274
+ result = Series (result , dtype = object , index = data .index ,
2275
+ name = self ._orig .name )
2281
2276
return result
2282
2277
2283
2278
_shared_docs ['str_split' ] = ("""
0 commit comments