@@ -44,7 +44,7 @@ def read_stata(filepath_or_buffer, convert_dates=True,
44
44
Read value labels and convert columns to Categorical/Factor variables
45
45
encoding : string, None or encoding
46
46
Encoding used to parse the files. Note that Stata doesn't
47
- support unicode. None defaults to cp1252 .
47
+ support unicode. None defaults to iso-8859-1 .
48
48
index : identifier of index column
49
49
identifier of column that should be used as index of the DataFrame
50
50
convert_missing : boolean, defaults to False
@@ -683,7 +683,7 @@ def get_base_missing_value(cls, dtype):
683
683
684
684
685
685
class StataParser (object ):
686
- _default_encoding = 'cp1252 '
686
+ _default_encoding = 'iso-8859-1 '
687
687
688
688
def __init__ (self , encoding ):
689
689
self ._encoding = encoding
@@ -823,10 +823,10 @@ class StataReader(StataParser):
823
823
Path to .dta file or object implementing a binary read() functions
824
824
encoding : string, None or encoding
825
825
Encoding used to parse the files. Note that Stata doesn't
826
- support unicode. None defaults to cp1252 .
826
+ support unicode. None defaults to iso-8859-1 .
827
827
"""
828
828
829
- def __init__ (self , path_or_buf , encoding = 'cp1252 ' ):
829
+ def __init__ (self , path_or_buf , encoding = 'iso-8859-1 ' ):
830
830
super (StataReader , self ).__init__ (encoding )
831
831
self .col_sizes = ()
832
832
self ._has_string_data = False
@@ -841,7 +841,13 @@ def __init__(self, path_or_buf, encoding='cp1252'):
841
841
if isinstance (path_or_buf , (str , compat .text_type , bytes )):
842
842
self .path_or_buf = open (path_or_buf , 'rb' )
843
843
else :
844
- self .path_or_buf = path_or_buf
844
+ # Copy to BytesIO, and ensure no encoding
845
+ contents = path_or_buf .read ()
846
+ try :
847
+ contents = contents .encode (self ._default_encoding )
848
+ except :
849
+ pass
850
+ self .path_or_buf = BytesIO (contents )
845
851
846
852
self ._read_header ()
847
853
0 commit comments