@@ -1139,13 +1139,17 @@ def _read_new_header(self, first_char):
1139
1139
# The first part of the header is common to 117 and 118.
1140
1140
self .path_or_buf .read (27 ) # stata_dta><header><release>
1141
1141
self .format_version = int (self .path_or_buf .read (3 ))
1142
- if self .format_version not in [117 , 118 ]:
1142
+ if self .format_version not in [117 , 118 , 119 ]:
1143
1143
raise ValueError (_version_error )
1144
1144
self ._set_encoding ()
1145
1145
self .path_or_buf .read (21 ) # </release><byteorder>
1146
1146
self .byteorder = self .path_or_buf .read (3 ) == b"MSF" and ">" or "<"
1147
1147
self .path_or_buf .read (15 ) # </byteorder><K>
1148
- self .nvar = struct .unpack (self .byteorder + "H" , self .path_or_buf .read (2 ))[0 ]
1148
+ nvar_type = "H" if self .format_version <= 118 else "I"
1149
+ nvar_size = 2 if self .format_version <= 118 else 4
1150
+ self .nvar = struct .unpack (
1151
+ self .byteorder + nvar_type , self .path_or_buf .read (nvar_size )
1152
+ )[0 ]
1149
1153
self .path_or_buf .read (7 ) # </K><N>
1150
1154
1151
1155
self .nobs = self ._get_nobs ()
@@ -1207,7 +1211,7 @@ def _read_new_header(self, first_char):
1207
1211
self .path_or_buf .seek (self ._seek_variable_labels )
1208
1212
self ._variable_labels = self ._get_variable_labels ()
1209
1213
1210
- # Get data type information, works for versions 117-118 .
1214
+ # Get data type information, works for versions 117-119 .
1211
1215
def _get_dtypes (self , seek_vartypes ):
1212
1216
1213
1217
self .path_or_buf .seek (seek_vartypes )
@@ -1241,14 +1245,14 @@ def f(typ):
1241
1245
def _get_varlist (self ):
1242
1246
if self .format_version == 117 :
1243
1247
b = 33
1244
- elif self .format_version = = 118 :
1248
+ elif self .format_version > = 118 :
1245
1249
b = 129
1246
1250
1247
1251
return [self ._decode (self .path_or_buf .read (b )) for i in range (self .nvar )]
1248
1252
1249
1253
# Returns the format list
1250
1254
def _get_fmtlist (self ):
1251
- if self .format_version = = 118 :
1255
+ if self .format_version > = 118 :
1252
1256
b = 57
1253
1257
elif self .format_version > 113 :
1254
1258
b = 49
@@ -1270,7 +1274,7 @@ def _get_lbllist(self):
1270
1274
return [self ._decode (self .path_or_buf .read (b )) for i in range (self .nvar )]
1271
1275
1272
1276
def _get_variable_labels (self ):
1273
- if self .format_version = = 118 :
1277
+ if self .format_version > = 118 :
1274
1278
vlblist = [
1275
1279
self ._decode (self .path_or_buf .read (321 )) for i in range (self .nvar )
1276
1280
]
@@ -1285,13 +1289,13 @@ def _get_variable_labels(self):
1285
1289
return vlblist
1286
1290
1287
1291
def _get_nobs (self ):
1288
- if self .format_version = = 118 :
1292
+ if self .format_version > = 118 :
1289
1293
return struct .unpack (self .byteorder + "Q" , self .path_or_buf .read (8 ))[0 ]
1290
1294
else :
1291
1295
return struct .unpack (self .byteorder + "I" , self .path_or_buf .read (4 ))[0 ]
1292
1296
1293
1297
def _get_data_label (self ):
1294
- if self .format_version = = 118 :
1298
+ if self .format_version > = 118 :
1295
1299
strlen = struct .unpack (self .byteorder + "H" , self .path_or_buf .read (2 ))[0 ]
1296
1300
return self ._decode (self .path_or_buf .read (strlen ))
1297
1301
elif self .format_version == 117 :
@@ -1303,7 +1307,7 @@ def _get_data_label(self):
1303
1307
return self ._decode (self .path_or_buf .read (32 ))
1304
1308
1305
1309
def _get_time_stamp (self ):
1306
- if self .format_version = = 118 :
1310
+ if self .format_version > = 118 :
1307
1311
strlen = struct .unpack ("b" , self .path_or_buf .read (1 ))[0 ]
1308
1312
return self .path_or_buf .read (strlen ).decode ("utf-8" )
1309
1313
elif self .format_version == 117 :
@@ -1321,7 +1325,7 @@ def _get_seek_variable_labels(self):
1321
1325
# a work around that uses the previous label, 33 bytes for each
1322
1326
# variable, 20 for the closing tag and 17 for the opening tag
1323
1327
return self ._seek_value_label_names + (33 * self .nvar ) + 20 + 17
1324
- elif self .format_version = = 118 :
1328
+ elif self .format_version > = 118 :
1325
1329
return struct .unpack (self .byteorder + "q" , self .path_or_buf .read (8 ))[0 ] + 17
1326
1330
else :
1327
1331
raise ValueError ()
@@ -1519,10 +1523,12 @@ def _read_strls(self):
1519
1523
else :
1520
1524
buf = self .path_or_buf .read (12 )
1521
1525
# Only tested on little endian file on little endian machine.
1526
+ v_size = 2 if self .format_version == 118 else 3
1522
1527
if self .byteorder == "<" :
1523
- buf = buf [0 :2 ] + buf [4 :10 ]
1528
+ buf = buf [0 :v_size ] + buf [4 : 12 - v_size ]
1524
1529
else :
1525
- buf = buf [0 :2 ] + buf [6 :]
1530
+ # This path may not be correct, impossible to test
1531
+ buf = buf [0 :v_size ] + buf [4 + v_size :]
1526
1532
v_o = struct .unpack ("Q" , buf )[0 ]
1527
1533
typ = struct .unpack ("B" , self .path_or_buf .read (1 ))[0 ]
1528
1534
length = struct .unpack (self .byteorder + "I" , self .path_or_buf .read (4 ))[0 ]
0 commit comments