156
156
157
157
@Appender (_read_stata_doc )
158
158
def read_stata (filepath_or_buffer , convert_dates = True ,
159
- convert_categoricals = True , encoding = 'latin-1' , index = None ,
159
+ convert_categoricals = True , encoding = None , index = None ,
160
160
convert_missing = False , preserve_dtypes = True , columns = None ,
161
161
order_categoricals = True , chunksize = None , iterator = False ):
162
162
@@ -821,11 +821,11 @@ def get_base_missing_value(cls, dtype):
821
821
class StataParser (object ):
822
822
_default_encoding = 'latin-1'
823
823
824
- def __init__ (self , encoding = 'latin-1' ):
825
-
826
- if encoding not in VALID_ENCODINGS :
827
- raise ValueError ('Unknown encoding. Only latin-1 and ascii '
828
- 'supported.' )
824
+ def __init__ (self , encoding ):
825
+ if encoding is not None :
826
+ if encoding not in VALID_ENCODINGS :
827
+ raise ValueError ('Unknown encoding. Only latin-1 and ascii '
828
+ 'supported.' )
829
829
830
830
self ._encoding = encoding
831
831
@@ -957,9 +957,10 @@ def __init__(self, path_or_buf, convert_dates=True,
957
957
self ._preserve_dtypes = preserve_dtypes
958
958
self ._columns = columns
959
959
self ._order_categoricals = order_categoricals
960
- if encoding not in VALID_ENCODINGS :
961
- raise ValueError ('Unknown encoding. Only latin-1 and ascii '
962
- 'supported.' )
960
+ if encoding is not None :
961
+ if encoding not in VALID_ENCODINGS :
962
+ raise ValueError ('Unknown encoding. Only latin-1 and ascii '
963
+ 'supported.' )
963
964
self ._encoding = encoding
964
965
self ._chunksize = chunksize
965
966
@@ -1373,7 +1374,8 @@ def _read_value_labels(self):
1373
1374
1374
1375
def _read_strls (self ):
1375
1376
self .path_or_buf .seek (self .seek_strls )
1376
- self .GSO = {0 : '' }
1377
+ # Wrap v_o in a string to allow uint64 values as keys on 32bit OS
1378
+ self .GSO = {'0' : '' }
1377
1379
while True :
1378
1380
if self .path_or_buf .read (3 ) != b'GSO' :
1379
1381
break
@@ -1398,7 +1400,8 @@ def _read_strls(self):
1398
1400
if self .format_version == 117 :
1399
1401
encoding = self ._encoding or self ._default_encoding
1400
1402
va = va [0 :- 1 ].decode (encoding )
1401
- self .GSO [v_o ] = va
1403
+ # Wrap v_o in a string to allow uint64 values as keys on 32bit OS
1404
+ self .GSO [str (v_o )] = va
1402
1405
1403
1406
# legacy
1404
1407
@Appender ('DEPRECATED: ' + _data_method_doc )
@@ -1634,7 +1637,8 @@ def _insert_strls(self, data):
1634
1637
for i , typ in enumerate (self .typlist ):
1635
1638
if typ != 'Q' :
1636
1639
continue
1637
- data .iloc [:, i ] = [self .GSO [k ] for k in data .iloc [:, i ]]
1640
+ # Wrap v_o in a string to allow uint64 values as keys on 32bit OS
1641
+ data .iloc [:, i ] = [self .GSO [str (k )] for k in data .iloc [:, i ]]
1638
1642
return data
1639
1643
1640
1644
def _do_select_columns (self , data , columns ):
0 commit comments