@@ -851,23 +851,24 @@ def __init__(self, encoding):
851
851
float32_max = b'\xff \xff \xff \x7e '
852
852
float64_min = b'\xff \xff \xff \xff \xff \xff \xef \xff '
853
853
float64_max = b'\xff \xff \xff \xff \xff \xff \xdf \x7f '
854
- self .VALID_RANGE = \
855
- {
856
- 'b' : (- 127 , 100 ),
857
- 'h' : (- 32767 , 32740 ),
858
- 'l' : (- 2147483647 , 2147483620 ),
859
- 'f' : (np .float32 (struct .unpack ('<f' , float32_min )[0 ]),
860
- np .float32 (struct .unpack ('<f' , float32_max )[0 ])),
861
- 'd' : (np .float64 (struct .unpack ('<d' , float64_min )[0 ]),
862
- np .float64 (struct .unpack ('<d' , float64_max )[0 ]))
863
- }
864
-
865
- self .OLD_TYPE_MAPPING = \
866
- {
867
- 'i' : 252 ,
868
- 'f' : 254 ,
869
- 'b' : 251
870
- }
854
+ self .VALID_RANGE = {
855
+ 'b' : (- 127 , 100 ),
856
+ 'h' : (- 32767 , 32740 ),
857
+ 'l' : (- 2147483647 , 2147483620 ),
858
+ 'f' : (np .float32 (struct .unpack ('<f' , float32_min )[0 ]),
859
+ np .float32 (struct .unpack ('<f' , float32_max )[0 ])),
860
+ 'd' : (np .float64 (struct .unpack ('<d' , float64_min )[0 ]),
861
+ np .float64 (struct .unpack ('<d' , float64_max )[0 ]))
862
+ }
863
+
864
+ self .OLD_TYPE_MAPPING = {
865
+ 98 : 251 , # byte
866
+ 105 : 252 , # int
867
+ 108 : 253 , # long
868
+ 102 : 254 # float
869
+ # don't know old code for double
870
+ }
871
+
871
872
# These missing values are the generic '.' in Stata, and are used
872
873
# to replace nans
873
874
self .MISSING_VALUES = {
@@ -878,15 +879,14 @@ def __init__(self, encoding):
878
879
'd' : np .float64 (
879
880
struct .unpack ('<d' , b'\x00 \x00 \x00 \x00 \x00 \x00 \xe0 \x7f ' )[0 ])
880
881
}
881
- self .NUMPY_TYPE_MAP = \
882
- {
883
- 'b' : 'i1' ,
884
- 'h' : 'i2' ,
885
- 'l' : 'i4' ,
886
- 'f' : 'f4' ,
887
- 'd' : 'f8' ,
888
- 'Q' : 'u8'
889
- }
882
+ self .NUMPY_TYPE_MAP = {
883
+ 'b' : 'i1' ,
884
+ 'h' : 'i2' ,
885
+ 'l' : 'i4' ,
886
+ 'f' : 'f4' ,
887
+ 'd' : 'f8' ,
888
+ 'Q' : 'u8'
889
+ }
890
890
891
891
# Reserved words cannot be used as variable names
892
892
self .RESERVED_WORDS = ('aggregate' , 'array' , 'boolean' , 'break' ,
@@ -900,12 +900,6 @@ def __init__(self, encoding):
900
900
'protected' , 'quad' , 'rowvector' , 'short' ,
901
901
'typedef' , 'typename' , 'virtual' )
902
902
903
- def _decode_bytes (self , str , errors = None ):
904
- if compat .PY3 or self ._encoding is not None :
905
- return str .decode (self ._encoding , errors )
906
- else :
907
- return str
908
-
909
903
910
904
class StataReader (StataParser , BaseIterator ):
911
905
__doc__ = _stata_reader_doc
@@ -1201,11 +1195,14 @@ def _read_old_header(self, first_char):
1201
1195
typlist = [ord (self .path_or_buf .read (1 ))
1202
1196
for i in range (self .nvar )]
1203
1197
else :
1204
- typlist = [
1205
- self .OLD_TYPE_MAPPING [
1206
- self ._decode_bytes (self .path_or_buf .read (1 ))
1207
- ] for i in range (self .nvar )
1208
- ]
1198
+ buf = self .path_or_buf .read (self .nvar )
1199
+ typlistb = np .frombuffer (buf , dtype = np .uint8 )
1200
+ typlist = []
1201
+ for tp in typlistb :
1202
+ if tp in self .OLD_TYPE_MAPPING :
1203
+ typlist .append (self .OLD_TYPE_MAPPING [tp ])
1204
+ else :
1205
+ typlist .append (tp - 127 ) # string
1209
1206
1210
1207
try :
1211
1208
self .typlist = [self .TYPE_MAP [typ ] for typ in typlist ]
@@ -1526,7 +1523,7 @@ def read(self, nrows=None, convert_dates=None,
1526
1523
data [col ],
1527
1524
self .fmtlist [i ])
1528
1525
1529
- if convert_categoricals and self .value_label_dict :
1526
+ if convert_categoricals and self .format_version > 108 :
1530
1527
data = self ._do_convert_categoricals (data ,
1531
1528
self .value_label_dict ,
1532
1529
self .lbllist ,
0 commit comments