@@ -1759,14 +1759,23 @@ def wrapper(self):
1759
1759
1760
1760
def _pat_wrapper (f , flags = False , na = False , ** kwargs ):
1761
1761
def wrapper1 (self , pat ):
1762
+ if self ._inferred_type in ['bytes' ]:
1763
+ raise AttributeError ("Cannot use .str.{} with 'bytes' "
1764
+ "values" .format (f .__name__ ))
1762
1765
result = f (self ._parent , pat )
1763
1766
return self ._wrap_result (result )
1764
1767
1765
1768
def wrapper2 (self , pat , flags = 0 , ** kwargs ):
1769
+ if self ._inferred_type in ['bytes' ]:
1770
+ raise AttributeError ("Cannot use .str.{} with 'bytes' "
1771
+ "values" .format (f .__name__ ))
1766
1772
result = f (self ._parent , pat , flags = flags , ** kwargs )
1767
1773
return self ._wrap_result (result )
1768
1774
1769
1775
def wrapper3 (self , pat , na = np .nan ):
1776
+ if self ._inferred_type in ['bytes' ]:
1777
+ raise AttributeError ("Cannot use .str.{} with 'bytes' "
1778
+ "values" .format (f .__name__ ))
1770
1779
result = f (self ._parent , pat , na = na )
1771
1780
return self ._wrap_result (result )
1772
1781
@@ -1803,7 +1812,7 @@ class StringMethods(NoNewAttributesMixin):
1803
1812
"""
1804
1813
1805
1814
def __init__ (self , data ):
1806
- self ._validate (data )
1815
+ self ._inferred_type = self . _validate (data )
1807
1816
self ._is_categorical = is_categorical_dtype (data )
1808
1817
1809
1818
# .values.categories works for both Series/Index
@@ -1818,18 +1827,18 @@ def _validate(data):
1818
1827
raise AttributeError ('Can only use .str accessor with Index, '
1819
1828
'not MultiIndex' )
1820
1829
1821
- # see src/inference .pyx which can contain string values
1830
+ # see _libs/lib .pyx for list of inferred types
1822
1831
allowed_types = ['string' , 'unicode' , 'empty' ,
1823
1832
'mixed' , 'mixed-integer' ]
1824
1833
if isinstance (data , ABCSeries ):
1834
+ # needed for str.decode
1825
1835
allowed_types = allowed_types + ['bytes' ]
1826
1836
1827
- data = data .dropna () # missing values mess up type inference
1828
1837
values = getattr (data , 'values' , data ) # Series / Index
1829
1838
values = getattr (values , 'categories' , values ) # categorical / normal
1830
- inferred_type = lib .infer_dtype (values )
1839
+ # missing values mess up type inference -> skip
1840
+ inferred_type = lib .infer_dtype (values , skipna = True )
1831
1841
1832
- # same for Series and Index (that is not MultiIndex)
1833
1842
if inferred_type not in allowed_types :
1834
1843
# it's neither a string series/index not a categorical series/index
1835
1844
# with strings inside the categories.
@@ -1838,7 +1847,8 @@ def _validate(data):
1838
1847
# have a str dtype (GH 9343 / 13877)
1839
1848
raise AttributeError ("Can only use .str accessor with string "
1840
1849
"values (i.e. inferred_type is 'string', "
1841
- "'unicode' or 'mixed')" )
1850
+ "'unicode', 'mixed' or 'empty')" )
1851
+ return inferred_type
1842
1852
1843
1853
def __getitem__ (self , key ):
1844
1854
if isinstance (key , slice ):
@@ -2188,6 +2198,11 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
2188
2198
"""
2189
2199
from pandas import Index , Series , concat
2190
2200
2201
+ if self ._inferred_type in ['mixed' , 'mixed-integer' , 'bytes' ]:
2202
+ raise AttributeError ("Can only use .str.cat with string values "
2203
+ "(i.e. inferred_type is 'string', 'unicode' "
2204
+ "'empty')" )
2205
+
2191
2206
if isinstance (others , compat .string_types ):
2192
2207
raise ValueError ("Did you mean to supply a `sep` keyword?" )
2193
2208
if sep is None :
@@ -2396,13 +2411,17 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
2396
2411
'side' : 'beginning' ,
2397
2412
'method' : 'split' })
2398
2413
def split (self , pat = None , n = - 1 , expand = False ):
2414
+ if self ._inferred_type in ['bytes' ]:
2415
+ raise AttributeError ("Cannot use .str.split with 'bytes' values" )
2399
2416
result = str_split (self ._parent , pat , n = n )
2400
2417
return self ._wrap_result (result , expand = expand )
2401
2418
2402
2419
@Appender (_shared_docs ['str_split' ] % {
2403
2420
'side' : 'end' ,
2404
2421
'method' : 'rsplit' })
2405
2422
def rsplit (self , pat = None , n = - 1 , expand = False ):
2423
+ if self ._inferred_type in ['bytes' ]:
2424
+ raise AttributeError ("Cannot use .str.rsplit with 'bytes' values" )
2406
2425
result = str_rsplit (self ._parent , pat , n = n )
2407
2426
return self ._wrap_result (result , expand = expand )
2408
2427
@@ -2493,6 +2512,9 @@ def rsplit(self, pat=None, n=-1, expand=False):
2493
2512
'also' : 'rpartition : Split the string at the last occurrence of `sep`'
2494
2513
})
2495
2514
def partition (self , pat = ' ' , expand = True ):
2515
+ if self ._inferred_type in ['bytes' ]:
2516
+ raise AttributeError ("Cannot use .str.partition with "
2517
+ "'bytes' values" )
2496
2518
f = lambda x : x .partition (pat )
2497
2519
result = _na_map (f , self ._parent )
2498
2520
return self ._wrap_result (result , expand = expand )
@@ -2504,6 +2526,9 @@ def partition(self, pat=' ', expand=True):
2504
2526
'also' : 'partition : Split the string at the first occurrence of `sep`'
2505
2527
})
2506
2528
def rpartition (self , pat = ' ' , expand = True ):
2529
+ if self ._inferred_type in ['bytes' ]:
2530
+ raise AttributeError ("Cannot use .str.rpartition with "
2531
+ "'bytes' values" )
2507
2532
f = lambda x : x .rpartition (pat )
2508
2533
result = _na_map (f , self ._parent )
2509
2534
return self ._wrap_result (result , expand = expand )
@@ -2515,6 +2540,8 @@ def get(self, i):
2515
2540
2516
2541
@copy (str_join )
2517
2542
def join (self , sep ):
2543
+ if self ._inferred_type in ['bytes' ]:
2544
+ raise AttributeError ("Cannot use .str.join with 'bytes' values" )
2518
2545
result = str_join (self ._parent , sep )
2519
2546
return self ._wrap_result (result )
2520
2547
@@ -2565,14 +2592,20 @@ def pad(self, width, side='left', fillchar=' '):
2565
2592
@Appender (_shared_docs ['str_pad' ] % dict (side = 'left and right' ,
2566
2593
method = 'center' ))
2567
2594
def center (self , width , fillchar = ' ' ):
2595
+ if self ._inferred_type in ['bytes' ]:
2596
+ raise AttributeError ("Cannot use .str.center with 'bytes' values" )
2568
2597
return self .pad (width , side = 'both' , fillchar = fillchar )
2569
2598
2570
2599
@Appender (_shared_docs ['str_pad' ] % dict (side = 'right' , method = 'ljust' ))
2571
2600
def ljust (self , width , fillchar = ' ' ):
2601
+ if self ._inferred_type in ['bytes' ]:
2602
+ raise AttributeError ("Cannot use .str.ljust with 'bytes' values" )
2572
2603
return self .pad (width , side = 'right' , fillchar = fillchar )
2573
2604
2574
2605
@Appender (_shared_docs ['str_pad' ] % dict (side = 'left' , method = 'rjust' ))
2575
2606
def rjust (self , width , fillchar = ' ' ):
2607
+ if self ._inferred_type in ['bytes' ]:
2608
+ raise AttributeError ("Cannot use .str.rjust with 'bytes' values" )
2576
2609
return self .pad (width , side = 'left' , fillchar = fillchar )
2577
2610
2578
2611
def zfill (self , width ):
@@ -2635,21 +2668,29 @@ def zfill(self, width):
2635
2668
4 NaN
2636
2669
dtype: object
2637
2670
"""
2671
+ if self ._inferred_type in ['bytes' ]:
2672
+ raise AttributeError ("Cannot use .str.zfill with 'bytes' values" )
2638
2673
result = str_pad (self ._parent , width , side = 'left' , fillchar = '0' )
2639
2674
return self ._wrap_result (result )
2640
2675
2641
2676
@copy (str_slice )
2642
2677
def slice (self , start = None , stop = None , step = None ):
2678
+ if self ._inferred_type in ['bytes' ]:
2679
+ raise AttributeError ("Cannot use .str.slice with 'bytes' values" )
2643
2680
result = str_slice (self ._parent , start , stop , step )
2644
2681
return self ._wrap_result (result )
2645
2682
2646
2683
@copy (str_slice_replace )
2647
2684
def slice_replace (self , start = None , stop = None , repl = None ):
2685
+ if self ._inferred_type in ['bytes' ]:
2686
+ raise AttributeError ("Cannot use .str.slice_replace with "
2687
+ "'bytes' values" )
2648
2688
result = str_slice_replace (self ._parent , start , stop , repl )
2649
2689
return self ._wrap_result (result )
2650
2690
2651
2691
@copy (str_decode )
2652
2692
def decode (self , encoding , errors = "strict" ):
2693
+ # need to allow bytes here
2653
2694
result = str_decode (self ._parent , encoding , errors )
2654
2695
return self ._wrap_result (result )
2655
2696
@@ -2724,28 +2765,39 @@ def encode(self, encoding, errors="strict"):
2724
2765
@Appender (_shared_docs ['str_strip' ] % dict (side = 'left and right sides' ,
2725
2766
method = 'strip' ))
2726
2767
def strip (self , to_strip = None ):
2768
+ if self ._inferred_type in ['bytes' ]:
2769
+ raise AttributeError ("Cannot use .str.strip with 'bytes' values" )
2727
2770
result = str_strip (self ._parent , to_strip , side = 'both' )
2728
2771
return self ._wrap_result (result )
2729
2772
2730
2773
@Appender (_shared_docs ['str_strip' ] % dict (side = 'left side' ,
2731
2774
method = 'lstrip' ))
2732
2775
def lstrip (self , to_strip = None ):
2776
+ if self ._inferred_type in ['bytes' ]:
2777
+ raise AttributeError ("Cannot use .str.lstrip with 'bytes' values" )
2733
2778
result = str_strip (self ._parent , to_strip , side = 'left' )
2734
2779
return self ._wrap_result (result )
2735
2780
2736
2781
@Appender (_shared_docs ['str_strip' ] % dict (side = 'right side' ,
2737
2782
method = 'rstrip' ))
2738
2783
def rstrip (self , to_strip = None ):
2784
+ if self ._inferred_type in ['bytes' ]:
2785
+ raise AttributeError ("Cannot use .str.rstrip with 'bytes' values" )
2739
2786
result = str_strip (self ._parent , to_strip , side = 'right' )
2740
2787
return self ._wrap_result (result )
2741
2788
2742
2789
@copy (str_wrap )
2743
2790
def wrap (self , width , ** kwargs ):
2791
+ if self ._inferred_type in ['bytes' ]:
2792
+ raise AttributeError ("Cannot use .str.wrap with 'bytes' values" )
2744
2793
result = str_wrap (self ._parent , width , ** kwargs )
2745
2794
return self ._wrap_result (result )
2746
2795
2747
2796
@copy (str_get_dummies )
2748
2797
def get_dummies (self , sep = '|' ):
2798
+ if self ._inferred_type in ['bytes' ]:
2799
+ raise AttributeError ("Cannot use .str.get_dummies with "
2800
+ "'bytes' values" )
2749
2801
# we need to cast to Series of strings as only that has all
2750
2802
# methods available for making the dummies...
2751
2803
data = self ._orig .astype (str ) if self ._is_categorical else self ._parent
@@ -2755,6 +2807,9 @@ def get_dummies(self, sep='|'):
2755
2807
2756
2808
@copy (str_translate )
2757
2809
def translate (self , table , deletechars = None ):
2810
+ if self ._inferred_type in ['bytes' ]:
2811
+ raise AttributeError ("Cannot use .str.translate with "
2812
+ "'bytes' values" )
2758
2813
result = str_translate (self ._parent , table , deletechars )
2759
2814
return self ._wrap_result (result )
2760
2815
@@ -2765,10 +2820,15 @@ def translate(self, table, deletechars=None):
2765
2820
2766
2821
@copy (str_extract )
2767
2822
def extract (self , pat , flags = 0 , expand = True ):
2823
+ if self ._inferred_type in ['bytes' ]:
2824
+ raise AttributeError ("Cannot use .str.extract with 'bytes' values" )
2768
2825
return str_extract (self , pat , flags = flags , expand = expand )
2769
2826
2770
2827
@copy (str_extractall )
2771
2828
def extractall (self , pat , flags = 0 ):
2829
+ if self ._inferred_type in ['bytes' ]:
2830
+ raise AttributeError ("Cannot use .str.extractall with "
2831
+ "'bytes' values" )
2772
2832
return str_extractall (self ._orig , pat , flags = flags )
2773
2833
2774
2834
_shared_docs ['find' ] = ("""
@@ -2798,13 +2858,17 @@ def extractall(self, pat, flags=0):
2798
2858
dict (side = 'lowest' , method = 'find' ,
2799
2859
also = 'rfind : Return highest indexes in each strings' ))
2800
2860
def find (self , sub , start = 0 , end = None ):
2861
+ if self ._inferred_type in ['bytes' ]:
2862
+ raise AttributeError ("Cannot use .str.find with 'bytes' values" )
2801
2863
result = str_find (self ._parent , sub , start = start , end = end , side = 'left' )
2802
2864
return self ._wrap_result (result )
2803
2865
2804
2866
@Appender (_shared_docs ['find' ] %
2805
2867
dict (side = 'highest' , method = 'rfind' ,
2806
2868
also = 'find : Return lowest indexes in each strings' ))
2807
2869
def rfind (self , sub , start = 0 , end = None ):
2870
+ if self ._inferred_type in ['bytes' ]:
2871
+ raise AttributeError ("Cannot use .str.rfind with 'bytes' values" )
2808
2872
result = str_find (self ._parent , sub ,
2809
2873
start = start , end = end , side = 'right' )
2810
2874
return self ._wrap_result (result )
@@ -2824,6 +2888,9 @@ def normalize(self, form):
2824
2888
normalized : Series/Index of objects
2825
2889
"""
2826
2890
import unicodedata
2891
+ if self ._inferred_type in ['bytes' ]:
2892
+ raise AttributeError ("Cannot use .str.normalize with "
2893
+ "'bytes' values" )
2827
2894
f = lambda x : unicodedata .normalize (form , compat .u_safe (x ))
2828
2895
result = _na_map (f , self ._parent )
2829
2896
return self ._wrap_result (result )
@@ -2856,6 +2923,8 @@ def normalize(self, form):
2856
2923
dict (side = 'lowest' , similar = 'find' , method = 'index' ,
2857
2924
also = 'rindex : Return highest indexes in each strings' ))
2858
2925
def index (self , sub , start = 0 , end = None ):
2926
+ if self ._inferred_type in ['bytes' ]:
2927
+ raise AttributeError ("Cannot use .str.index with 'bytes' values" )
2859
2928
result = str_index (self ._parent , sub ,
2860
2929
start = start , end = end , side = 'left' )
2861
2930
return self ._wrap_result (result )
@@ -2864,6 +2933,8 @@ def index(self, sub, start=0, end=None):
2864
2933
dict (side = 'highest' , similar = 'rfind' , method = 'rindex' ,
2865
2934
also = 'index : Return lowest indexes in each strings' ))
2866
2935
def rindex (self , sub , start = 0 , end = None ):
2936
+ if self ._inferred_type in ['bytes' ]:
2937
+ raise AttributeError ("Cannot use .str.rindex with 'bytes' values" )
2867
2938
result = str_index (self ._parent , sub ,
2868
2939
start = start , end = end , side = 'right' )
2869
2940
return self ._wrap_result (result )
0 commit comments