132
132
_default_encoding = "UTF-8"
133
133
134
134
135
- def _ensure_decoded (s ):
136
- """if we have bytes, decode them to unicode"""
137
- if isinstance (s , np .bytes_ ):
138
- s = s .decode ("UTF-8" )
139
- return s
140
-
141
-
142
135
def _ensure_encoding (encoding : str | None ) -> str :
143
136
# set the encoding if we need
144
137
if encoding is None :
@@ -1730,8 +1723,8 @@ def _create_storer(
1730
1723
if value is not None and not isinstance (value , (Series , DataFrame )):
1731
1724
raise TypeError ("value must be None, Series, or DataFrame" )
1732
1725
1733
- pt = _ensure_decoded ( getattr (group ._v_attrs , "pandas_type" , None ) )
1734
- tt = _ensure_decoded ( getattr (group ._v_attrs , "table_type" , None ) )
1726
+ pt = getattr (group ._v_attrs , "pandas_type" , None )
1727
+ tt = getattr (group ._v_attrs , "table_type" , None )
1735
1728
1736
1729
# infer the pt from the passed value
1737
1730
if pt is None :
@@ -1798,7 +1791,7 @@ def _create_storer(
1798
1791
"worm" : WORMTable ,
1799
1792
}
1800
1793
try :
1801
- cls = _TABLE_MAP [tt ]
1794
+ cls = _TABLE_MAP [tt ] # type: ignore[index]
1802
1795
except KeyError as err :
1803
1796
raise TypeError (
1804
1797
f"cannot properly create the storer for: [_TABLE_MAP] [group->"
@@ -2145,13 +2138,13 @@ def convert(
2145
2138
# preventing the original recarry from being free'ed
2146
2139
values = values [self .cname ].copy ()
2147
2140
2148
- val_kind = _ensure_decoded ( self .kind )
2141
+ val_kind = self .kind
2149
2142
values = _maybe_convert (values , val_kind , encoding , errors )
2150
2143
kwargs = {}
2151
- kwargs ["name" ] = _ensure_decoded ( self .index_name )
2144
+ kwargs ["name" ] = self .index_name
2152
2145
2153
2146
if self .freq is not None :
2154
- kwargs ["freq" ] = _ensure_decoded ( self .freq )
2147
+ kwargs ["freq" ] = self .freq
2155
2148
2156
2149
factory : type [Index | DatetimeIndex ] = Index
2157
2150
if lib .is_np_dtype (values .dtype , "M" ) or isinstance (
@@ -2210,7 +2203,7 @@ def maybe_set_size(self, min_itemsize=None) -> None:
2210
2203
min_itemsize can be an integer or a dict with this columns name
2211
2204
with an integer size
2212
2205
"""
2213
- if _ensure_decoded ( self .kind ) == "string" :
2206
+ if self .kind == "string" :
2214
2207
if isinstance (min_itemsize , dict ):
2215
2208
min_itemsize = min_itemsize .get (self .name )
2216
2209
@@ -2231,7 +2224,7 @@ def validate_and_set(self, handler: AppendableTable, append: bool) -> None:
2231
2224
def validate_col (self , itemsize = None ):
2232
2225
"""validate this column: return the compared against itemsize"""
2233
2226
# validate this column for string truncation (or reset to the max size)
2234
- if _ensure_decoded ( self .kind ) == "string" :
2227
+ if self .kind == "string" :
2235
2228
c = self .col
2236
2229
if c is not None :
2237
2230
if itemsize is None :
@@ -2561,14 +2554,14 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
2561
2554
assert isinstance (converted , np .ndarray ) # for mypy
2562
2555
2563
2556
# use the meta if needed
2564
- meta = _ensure_decoded ( self .meta )
2557
+ meta = self .meta
2565
2558
metadata = self .metadata
2566
2559
ordered = self .ordered
2567
2560
tz = self .tz
2568
2561
2569
2562
assert dtype_name is not None
2570
2563
# convert to the correct dtype
2571
- dtype = _ensure_decoded ( dtype_name )
2564
+ dtype = dtype_name
2572
2565
2573
2566
# reverse converts
2574
2567
if dtype .startswith ("datetime64" ):
@@ -2618,7 +2611,7 @@ def convert(self, values: np.ndarray, nan_rep, encoding: str, errors: str):
2618
2611
converted = converted .astype ("O" , copy = False )
2619
2612
2620
2613
# convert nans / decode
2621
- if _ensure_decoded ( kind ) == "string" :
2614
+ if kind == "string" :
2622
2615
converted = _unconvert_string_array (
2623
2616
converted , nan_rep = nan_rep , encoding = encoding , errors = errors
2624
2617
)
@@ -2706,18 +2699,19 @@ def is_old_version(self) -> bool:
2706
2699
@property
2707
2700
def version (self ) -> tuple [int , int , int ]:
2708
2701
"""compute and set our version"""
2709
- version = _ensure_decoded (getattr (self .group ._v_attrs , "pandas_version" , None ))
2710
- try :
2711
- version = tuple (int (x ) for x in version .split ("." ))
2712
- if len (version ) == 2 :
2713
- version = version + (0 ,)
2714
- except AttributeError :
2715
- version = (0 , 0 , 0 )
2716
- return version
2702
+ version = getattr (self .group ._v_attrs , "pandas_version" , None )
2703
+ if isinstance (version , str ):
2704
+ version_tup = tuple (int (x ) for x in version .split ("." ))
2705
+ if len (version_tup ) == 2 :
2706
+ version_tup = version_tup + (0 ,)
2707
+ assert len (version_tup ) == 3 # needed for mypy
2708
+ return version_tup
2709
+ else :
2710
+ return (0 , 0 , 0 )
2717
2711
2718
2712
@property
2719
2713
def pandas_type (self ):
2720
- return _ensure_decoded ( getattr (self .group ._v_attrs , "pandas_type" , None ) )
2714
+ return getattr (self .group ._v_attrs , "pandas_type" , None )
2721
2715
2722
2716
def __repr__ (self ) -> str :
2723
2717
"""return a pretty representation of myself"""
@@ -2854,9 +2848,7 @@ def _alias_to_class(self, alias):
2854
2848
return self ._reverse_index_map .get (alias , Index )
2855
2849
2856
2850
def _get_index_factory (self , attrs ):
2857
- index_class = self ._alias_to_class (
2858
- _ensure_decoded (getattr (attrs , "index_class" , "" ))
2859
- )
2851
+ index_class = self ._alias_to_class (getattr (attrs , "index_class" , "" ))
2860
2852
2861
2853
factory : Callable
2862
2854
@@ -2892,12 +2884,7 @@ def f(values, freq=None, tz=None):
2892
2884
factory = TimedeltaIndex
2893
2885
2894
2886
if "tz" in attrs :
2895
- if isinstance (attrs ["tz" ], bytes ):
2896
- # created by python2
2897
- kwargs ["tz" ] = attrs ["tz" ].decode ("utf-8" )
2898
- else :
2899
- # created by python3
2900
- kwargs ["tz" ] = attrs ["tz" ]
2887
+ kwargs ["tz" ] = attrs ["tz" ]
2901
2888
assert index_class is DatetimeIndex # just checking
2902
2889
2903
2890
return factory , kwargs
@@ -2929,9 +2916,9 @@ def set_attrs(self) -> None:
2929
2916
def get_attrs (self ) -> None :
2930
2917
"""retrieve our attributes"""
2931
2918
self .encoding = _ensure_encoding (getattr (self .attrs , "encoding" , None ))
2932
- self .errors = _ensure_decoded ( getattr (self .attrs , "errors" , "strict" ) )
2919
+ self .errors = getattr (self .attrs , "errors" , "strict" )
2933
2920
for n in self .attributes :
2934
- setattr (self , n , _ensure_decoded ( getattr (self .attrs , n , None ) ))
2921
+ setattr (self , n , getattr (self .attrs , n , None ))
2935
2922
2936
2923
def write (self , obj , ** kwargs ) -> None :
2937
2924
self .set_attrs ()
@@ -2948,7 +2935,7 @@ def read_array(self, key: str, start: int | None = None, stop: int | None = None
2948
2935
if isinstance (node , tables .VLArray ):
2949
2936
ret = node [0 ][start :stop ]
2950
2937
else :
2951
- dtype = _ensure_decoded ( getattr (attrs , "value_type" , None ) )
2938
+ dtype = getattr (attrs , "value_type" , None )
2952
2939
shape = getattr (attrs , "shape" , None )
2953
2940
2954
2941
if shape is not None :
@@ -2973,7 +2960,7 @@ def read_array(self, key: str, start: int | None = None, stop: int | None = None
2973
2960
def read_index (
2974
2961
self , key : str , start : int | None = None , stop : int | None = None
2975
2962
) -> Index :
2976
- variety = _ensure_decoded ( getattr (self .attrs , f"{ key } _variety" ) )
2963
+ variety = getattr (self .attrs , f"{ key } _variety" )
2977
2964
2978
2965
if variety == "multi" :
2979
2966
return self .read_multi_index (key , start = start , stop = stop )
@@ -3063,12 +3050,11 @@ def read_index_node(
3063
3050
# have written a sentinel. Here we replace it with the original.
3064
3051
if "shape" in node ._v_attrs and np .prod (node ._v_attrs .shape ) == 0 :
3065
3052
data = np .empty (node ._v_attrs .shape , dtype = node ._v_attrs .value_type )
3066
- kind = _ensure_decoded ( node ._v_attrs .kind )
3053
+ kind = node ._v_attrs .kind
3067
3054
name = None
3068
3055
3069
3056
if "name" in node ._v_attrs :
3070
3057
name = _ensure_str (node ._v_attrs .name )
3071
- name = _ensure_decoded (name )
3072
3058
3073
3059
attrs = node ._v_attrs
3074
3060
factory , kwargs = self ._get_index_factory (attrs )
@@ -3584,7 +3570,7 @@ def get_attrs(self) -> None:
3584
3570
self .info = getattr (self .attrs , "info" , None ) or {}
3585
3571
self .nan_rep = getattr (self .attrs , "nan_rep" , None )
3586
3572
self .encoding = _ensure_encoding (getattr (self .attrs , "encoding" , None ))
3587
- self .errors = _ensure_decoded ( getattr (self .attrs , "errors" , "strict" ) )
3573
+ self .errors = getattr (self .attrs , "errors" , "strict" )
3588
3574
self .levels : list [Hashable ] = getattr (self .attrs , "levels" , None ) or []
3589
3575
self .index_axes = [a for a in self .indexables if a .is_an_indexable ]
3590
3576
self .values_axes = [a for a in self .indexables if not a .is_an_indexable ]
@@ -4926,7 +4912,6 @@ def _set_tz(
4926
4912
name = None
4927
4913
values = values .ravel ()
4928
4914
4929
- tz = _ensure_decoded (tz )
4930
4915
values = DatetimeIndex (values , name = name )
4931
4916
values = values .tz_localize ("UTC" ).tz_convert (tz )
4932
4917
elif coerce :
@@ -5228,8 +5213,6 @@ def _dtype_to_kind(dtype_str: str) -> str:
5228
5213
"""
5229
5214
Find the "kind" string describing the given dtype name.
5230
5215
"""
5231
- dtype_str = _ensure_decoded (dtype_str )
5232
-
5233
5216
if dtype_str .startswith (("string" , "bytes" )):
5234
5217
kind = "string"
5235
5218
elif dtype_str .startswith ("float" ):
0 commit comments